This repository has been archived by the owner on Sep 30, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfill-template
executable file
·185 lines (136 loc) · 5.5 KB
/
fill-template
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
#!/usr/bin/env python3
"""
Fill a LaTeX document templated with Jinja2 and render to PDF for every row in
an input table.
Template parameters are read from the CSV/TSV file specified with --params and
are interpolated via \VAR{column_name} constructs in LaTeX. Other templating
is possible, refer to <https://pythonhosted.org/latex/>.
The rendered PDFs are saved to file paths specified by --output, which should
be a Python f-string making use of the template parameters to construct a
unique filename for every row of input data.
For example, given a TSV that looks like this:
\b
barcode birth_date pat_name
AAAAAAAA 1966-05-09 Kermit the Frog
BBBBBBBB 1969-07-21 Bert
running:
\b
fill-template \\
--template scan/report-en.tex \\
--params sesame-results.tsv \\
--output '{barcode}-{birth_date}-en.pdf'
will produce two files:
\b
AAAAAAAA-1966-05-09-en.pdf
BBBBBBBB-1969-07-21-en.pdf
"""
import click
import fsspec
import logging
import os
import pandas
from datetime import date
from itertools import cycle
from jinja2 import FileSystemLoader
from multiprocessing import Pool
from latex.build import PdfLatexBuilder
from latex.jinja2 import make_env
from os.path import basename, dirname
from sys import stdin, stderr
LOG_LEVEL = os.environ.get("LOG_LEVEL", "debug").upper()
logging.basicConfig(
level = logging.ERROR,
format = "[%(asctime)s] %(levelname)-8s %(message)s",
datefmt = "%Y-%m-%d %H:%M:%S%z",
stream = stderr)
logging.captureWarnings(True)
log = logging.getLogger(__name__)
log.setLevel(LOG_LEVEL)
# XeLaTeX has the best Unicode support of the various LaTeX engines, which is
# important since we'll be preparing reports in 12 languages.
xelatex = PdfLatexBuilder("xelatex")
assert xelatex.is_available(), "xelatex is not available"
@click.command("fill-template", help = __doc__, no_args_is_help = True)
@click.option("--template", "template_path",
metavar = "<report.tex>",
help = "Path to a LaTeX document templated with Jinja2; "
"syntax described at <https://pythonhosted.org/latex/>",
type = click.Path(exists = True, dir_okay = False, resolve_path = True),
required = True)
@click.option("--params", "params_path",
metavar = "<results.csv>",
help = "Path to a table (CSV or TSV) of template parameter values; "
"each row produces one output file. "
"Default is stdin.",
default = "-")
@click.option("--output", "output_path",
metavar = "<path.pdf>",
help = "Path to which PDFs are written; should use Python f-string "
"syntax to produce a unique path for each input row.",
required = True)
@click.option("--filter", "filter_query",
metavar = "<condition>",
help = "A Pandas DataFrame query string for filtering the parameter table")
@click.option("--workers", "worker_count",
metavar = "<n>",
help = "Number of parallel worker processes to start for PDF rendering",
show_default = True,
type = int,
# Number of CPUs available to this process; see doc for os.cpu_count()
default = len(os.sched_getaffinity(0)))
@click.pass_context
def __main__(ctx, *, template_path, params_path, output_path, filter_query, worker_count):
if params_path == "-":
params_path = "/dev/stdin"
with Pool(worker_count) as workers:
log.debug(f"Started pool of n={worker_count:,} workers")
with fsspec.open(params_path) as file:
param_table = read_params(file, filter_query)
results = workers.starmap(
render, zip(
cycle([str(template_path)]),
cycle([output_path]),
param_table ))
errors = [
output_file
for output_file, successful in results
if not successful ]
if errors:
log.error(f"Errors were encountered (n={len(errors)}) during processing of: {errors!r}")
ctx.exit(1)
def render(template_path, output_path, param_row):
output_name = output_path.format(**param_row)
with fsspec.open(output_name, "wb") as output_file:
try:
log.info(f"Generating {output_name}")
pdf_renderer(template_path)(param_row, output_file)
return output_name, True
except:
log.exception(f"Error generating {output_name}")
return output_name, False
def pdf_renderer(template_path):
template_dir = dirname(template_path)
jinja = make_env(loader = FileSystemLoader(template_dir))
template = jinja.get_template(basename(template_path))
# Allow the template to reference files relative to itself. The empty
# string includes the default locations, as described in `man tex`.
texinputs = [template_dir, ""]
def render_pdf(params, output_file):
# Convenience globals useful for reports.
params.setdefault("current_date", str(date.today()))
return (
xelatex
.build_pdf(template.render(params), texinputs = texinputs)
.save_to(output_file))
return render_pdf
def read_params(file, filter_query):
params = pandas.read_csv(file, sep = None, dtype = "string", engine = "python")
row_count = len(params)
log.info(f"Loaded {row_count:,} parameter rows")
if filter_query:
params.query(filter_query, inplace = True)
filtered_row_count = row_count - len(params)
log.info(f"Filtered out {filtered_row_count:,} parameter rows where {filter_query}")
return params.to_dict("records")
if __name__ == "__main__":
__main__()