/src/qpdf/libqpdf/Pl_Flate.cc
Line | Count | Source |
1 | | #include <qpdf/Pl_Flate.hh> |
2 | | |
3 | | #include <climits> |
4 | | #include <cstring> |
5 | | #include <zlib.h> |
6 | | |
7 | | #include <qpdf/QIntC.hh> |
8 | | #include <qpdf/QUtil.hh> |
9 | | #include <qpdf/Util.hh> |
10 | | #include <qpdf/qpdf-config.h> |
11 | | |
12 | | #ifdef ZOPFLI |
13 | | # include <zopfli.h> |
14 | | #endif |
15 | | |
16 | | using namespace qpdf; |
17 | | |
18 | | namespace |
19 | | { |
20 | | unsigned long long memory_limit_{0}; |
21 | | } // namespace |
22 | | |
23 | | int Pl_Flate::compression_level = Z_DEFAULT_COMPRESSION; |
24 | | |
25 | | Pl_Flate::Members::Members(size_t out_bufsize, action_e action) : |
26 | 14.3k | out_bufsize(out_bufsize), |
27 | 14.3k | action(action), |
28 | 14.3k | initialized(false), |
29 | 14.3k | zdata(nullptr) |
30 | 14.3k | { |
31 | 14.3k | this->outbuf = QUtil::make_shared_array<unsigned char>(out_bufsize); |
32 | | // Indirect through zdata to reach the z_stream so we don't have to include zlib.h in |
33 | | // Pl_Flate.hh. This means people using shared library versions of qpdf don't have to have zlib |
34 | | // development files available, which particularly helps in a Windows environment. |
35 | 14.3k | zdata = new z_stream; |
36 | | |
37 | 14.3k | util::no_ci_rt_error_if( |
38 | 14.3k | out_bufsize > UINT_MAX, |
39 | 14.3k | "Pl_Flate: zlib doesn't support buffer sizes larger than unsigned int"); |
40 | | |
41 | 14.3k | z_stream& zstream = *(static_cast<z_stream*>(this->zdata)); |
42 | 14.3k | zstream.zalloc = nullptr; |
43 | 14.3k | zstream.zfree = nullptr; |
44 | 14.3k | zstream.opaque = nullptr; |
45 | 14.3k | zstream.next_in = nullptr; |
46 | 14.3k | zstream.avail_in = 0; |
47 | 14.3k | zstream.next_out = this->outbuf.get(); |
48 | 14.3k | zstream.avail_out = QIntC::to_uint(out_bufsize); |
49 | | |
50 | 14.3k | if (action == a_deflate && Pl_Flate::zopfli_enabled()) { |
51 | 0 | zopfli_buf = std::make_unique<std::string>(); |
52 | 0 | } |
53 | 14.3k | } |
54 | | |
55 | | Pl_Flate::Members::~Members() |
56 | 14.3k | { |
57 | 14.3k | if (initialized) { |
58 | 3.75k | z_stream& zstream = *(static_cast<z_stream*>(zdata)); |
59 | 3.75k | if (action == a_deflate) { |
60 | 0 | deflateEnd(&zstream); |
61 | 3.75k | } else { |
62 | 3.75k | inflateEnd(&zstream); |
63 | 3.75k | } |
64 | 3.75k | } |
65 | | |
66 | 14.3k | delete static_cast<z_stream*>(this->zdata); |
67 | 14.3k | zdata = nullptr; |
68 | 14.3k | } |
69 | | |
70 | | Pl_Flate::Pl_Flate( |
71 | | char const* identifier, Pipeline* next, action_e action, unsigned int out_bufsize_int) : |
72 | 14.3k | Pipeline(identifier, next), |
73 | 14.3k | m(std::make_unique<Members>(QIntC::to_size(out_bufsize_int), action)) |
74 | 14.3k | { |
75 | 14.3k | util::assertion(next, "Attempt to create Pl_Flate with nullptr as next"); |
76 | 14.3k | } |
77 | | |
78 | | // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer |
79 | 14.3k | Pl_Flate::~Pl_Flate() = default; |
80 | | |
81 | | unsigned long long |
82 | | Pl_Flate::memory_limit() |
83 | 7.00k | { |
84 | 7.00k | return memory_limit_; |
85 | 7.00k | } |
86 | | |
87 | | void |
88 | | Pl_Flate::memory_limit(unsigned long long limit) |
89 | 24.1k | { |
90 | 24.1k | memory_limit_ = limit; |
91 | 24.1k | } |
92 | | |
93 | | void |
94 | | Pl_Flate::setWarnCallback(std::function<void(char const*, int)> callback) |
95 | 15.7k | { |
96 | 15.7k | m->callback = callback; |
97 | 15.7k | } |
98 | | |
99 | | void |
100 | | Pl_Flate::warn(char const* msg, int code) |
101 | 2.85k | { |
102 | 2.85k | if (m->callback) { |
103 | 2.85k | m->callback(msg, code); |
104 | 2.85k | } |
105 | 2.85k | } |
106 | | |
107 | | void |
108 | | Pl_Flate::write(unsigned char const* data, size_t len) |
109 | 251k | { |
110 | 251k | util::assertion( |
111 | 251k | m->outbuf.get(), identifier + ": Pl_Flate: write() called after finish() called"); |
112 | 251k | if (m->zopfli_buf) { |
113 | 0 | m->zopfli_buf->append(reinterpret_cast<char const*>(data), len); |
114 | 0 | return; |
115 | 0 | } |
116 | | |
117 | | // Write in chunks in case len is too big to fit in an int. Assume int is at least 32 bits. |
118 | 251k | static size_t const max_bytes = 1 << 30; |
119 | 251k | size_t bytes_left = len; |
120 | 251k | unsigned char const* buf = data; |
121 | 502k | while (bytes_left > 0) { |
122 | 251k | size_t bytes = (bytes_left >= max_bytes ? max_bytes : bytes_left); |
123 | 251k | handleData(buf, bytes, (m->action == a_inflate ? Z_SYNC_FLUSH : Z_NO_FLUSH)); |
124 | 251k | bytes_left -= bytes; |
125 | 251k | buf += bytes; |
126 | 251k | } |
127 | 251k | } |
128 | | |
129 | | void |
130 | | Pl_Flate::handleData(unsigned char const* data, size_t len, int flush) |
131 | 263k | { |
132 | 263k | util::no_ci_rt_error_if( |
133 | 263k | len > UINT_MAX, "Pl_Flate: zlib doesn't support data blocks larger than int"); |
134 | 263k | z_stream& zstream = *(static_cast<z_stream*>(m->zdata)); |
135 | | // zlib is known not to modify the data pointed to by next_in but doesn't declare the field |
136 | | // value const unless compiled to do so. |
137 | 263k | zstream.next_in = const_cast<unsigned char*>(data); |
138 | 263k | zstream.avail_in = QIntC::to_uint(len); |
139 | | |
140 | 263k | if (!m->initialized) { |
141 | 12.6k | int err = Z_OK; |
142 | | |
143 | | // deflateInit and inflateInit are macros that use old-style casts. |
144 | 12.6k | #if ((defined(__GNUC__) && ((__GNUC__ * 100) + __GNUC_MINOR__) >= 406) || defined(__clang__)) |
145 | 12.6k | # pragma GCC diagnostic push |
146 | 12.6k | # pragma GCC diagnostic ignored "-Wold-style-cast" |
147 | 12.6k | #endif |
148 | 12.6k | if (m->action == a_deflate) { |
149 | 0 | err = deflateInit(&zstream, compression_level); |
150 | 12.6k | } else { |
151 | 12.6k | err = inflateInit(&zstream); |
152 | 12.6k | } |
153 | 12.6k | #if ((defined(__GNUC__) && ((__GNUC__ * 100) + __GNUC_MINOR__) >= 406) || defined(__clang__)) |
154 | 12.6k | # pragma GCC diagnostic pop |
155 | 12.6k | #endif |
156 | | |
157 | 12.6k | checkError("Init", err); |
158 | 12.6k | m->initialized = true; |
159 | 12.6k | } |
160 | | |
161 | 263k | int err = Z_OK; |
162 | | |
163 | 263k | bool done = false; |
164 | 519k | while (!done) { |
165 | 263k | if (m->action == a_deflate) { |
166 | 0 | err = deflate(&zstream, flush); |
167 | 263k | } else { |
168 | 263k | err = inflate(&zstream, flush); |
169 | 263k | } |
170 | 263k | if ((m->action == a_inflate) && (err != Z_OK) && zstream.msg && |
171 | 9.14k | (strcmp(zstream.msg, "incorrect data check") == 0)) { |
172 | | // Other PDF readers ignore this specific error. Combining this with Z_SYNC_FLUSH |
173 | | // enables qpdf to handle some broken zlib streams without losing data. |
174 | 2.66k | err = Z_STREAM_END; |
175 | 2.66k | } |
176 | 263k | switch (err) { |
177 | 2.85k | case Z_BUF_ERROR: |
178 | | // Probably shouldn't be able to happen, but possible as a boundary condition: if the |
179 | | // last call to inflate exactly filled the output buffer, it's possible that the next |
180 | | // call to inflate could have nothing to do. There are PDF files in the wild that have |
181 | | // this error (including at least one in qpdf's test suite). In some cases, we want to |
182 | | // know about this, because it indicates incorrect compression, so call a callback if |
183 | | // provided. |
184 | 2.85k | warn("input stream is complete but output may still be valid", err); |
185 | 2.85k | done = true; |
186 | 2.85k | break; |
187 | | |
188 | 12.7k | case Z_STREAM_END: |
189 | 12.7k | done = true; |
190 | | // fall through |
191 | | |
192 | 253k | case Z_OK: |
193 | 253k | { |
194 | 253k | if ((zstream.avail_in == 0) && (zstream.avail_out > 0)) { |
195 | | // There is nothing left to read, and there was sufficient buffer space to write |
196 | | // everything we needed, so we're done for now. |
197 | 251k | done = true; |
198 | 251k | } |
199 | 253k | uLong ready = QIntC::to_ulong(m->out_bufsize - zstream.avail_out); |
200 | 253k | if (ready > 0) { |
201 | 225k | if (memory_limit_ && m->action != a_deflate) { |
202 | 225k | m->written += ready; |
203 | 225k | if (m->written > memory_limit_) { |
204 | 53 | throw std::runtime_error("PL_Flate memory limit exceeded"); |
205 | 53 | } |
206 | 225k | } |
207 | 225k | next()->write(m->outbuf.get(), ready); |
208 | 225k | zstream.next_out = m->outbuf.get(); |
209 | 225k | zstream.avail_out = QIntC::to_uint(m->out_bufsize); |
210 | 225k | } |
211 | 253k | } |
212 | 253k | break; |
213 | | |
214 | 253k | default: |
215 | 6.88k | checkError("data", err); |
216 | 263k | } |
217 | 263k | } |
218 | 263k | } |
219 | | |
220 | | void |
221 | | Pl_Flate::finish() |
222 | 13.3k | { |
223 | 13.3k | if (m->written > memory_limit_) { |
224 | 51 | throw std::runtime_error("PL_Flate memory limit exceeded"); |
225 | 51 | } |
226 | 13.2k | try { |
227 | 13.2k | if (m->zopfli_buf) { |
228 | 0 | finish_zopfli(); |
229 | 13.2k | } else if (m->outbuf.get()) { |
230 | 13.2k | if (m->initialized) { |
231 | 11.8k | z_stream& zstream = *(static_cast<z_stream*>(m->zdata)); |
232 | 11.8k | unsigned char buf[1]; |
233 | 11.8k | buf[0] = '\0'; |
234 | 11.8k | handleData(buf, 0, Z_FINISH); |
235 | 11.8k | int err = Z_OK; |
236 | 11.8k | if (m->action == a_deflate) { |
237 | 0 | err = deflateEnd(&zstream); |
238 | 11.8k | } else { |
239 | 11.8k | err = inflateEnd(&zstream); |
240 | 11.8k | } |
241 | 11.8k | m->initialized = false; |
242 | 11.8k | checkError("End", err); |
243 | 11.8k | } |
244 | | |
245 | 13.2k | m->outbuf = nullptr; |
246 | 13.2k | } |
247 | 13.2k | } catch (std::exception& e) { |
248 | 2.91k | try { |
249 | 2.91k | next()->finish(); |
250 | 2.91k | } catch (...) { |
251 | | // ignore secondary exception |
252 | 7 | } |
253 | 2.91k | throw std::runtime_error(e.what()); |
254 | 2.91k | } |
255 | 10.3k | next()->finish(); |
256 | 10.3k | } |
257 | | |
258 | | void |
259 | | Pl_Flate::setCompressionLevel(int level) |
260 | 0 | { |
261 | 0 | compression_level = level; |
262 | 0 | } |
263 | | |
264 | | void |
265 | | Pl_Flate::checkError(char const* prefix, int error_code) |
266 | 28.5k | { |
267 | 28.5k | z_stream& zstream = *(static_cast<z_stream*>(m->zdata)); |
268 | 28.5k | if (error_code != Z_OK) { |
269 | 6.88k | char const* action_str = (m->action == a_deflate ? "deflate" : "inflate"); |
270 | 6.88k | std::string msg = identifier + ": " + action_str + ": " + prefix + ": "; |
271 | | |
272 | 6.88k | if (zstream.msg) { |
273 | 6.47k | msg += zstream.msg; |
274 | 6.47k | } else { |
275 | 408 | switch (error_code) { |
276 | 0 | case Z_ERRNO: |
277 | 0 | msg += "zlib system error"; |
278 | 0 | break; |
279 | | |
280 | 0 | case Z_STREAM_ERROR: |
281 | 0 | msg += "zlib stream error"; |
282 | 0 | break; |
283 | | |
284 | 0 | case Z_DATA_ERROR: |
285 | 0 | msg += "zlib data error"; |
286 | 0 | break; |
287 | | |
288 | 0 | case Z_MEM_ERROR: |
289 | 0 | msg += "zlib memory error"; |
290 | 0 | break; |
291 | | |
292 | 0 | case Z_BUF_ERROR: |
293 | 0 | msg += "zlib buffer error"; |
294 | 0 | break; |
295 | | |
296 | 0 | case Z_VERSION_ERROR: |
297 | 0 | msg += "zlib version error"; |
298 | 0 | break; |
299 | | |
300 | 408 | default: |
301 | 408 | msg += std::string("zlib unknown error (") + std::to_string(error_code) + ")"; |
302 | 408 | break; |
303 | 408 | } |
304 | 408 | } |
305 | | |
306 | 6.88k | throw std::runtime_error(msg); |
307 | 6.88k | } |
308 | 28.5k | } |
309 | | |
310 | | void |
311 | | Pl_Flate::finish_zopfli() |
312 | 0 | { |
313 | | #ifdef ZOPFLI |
314 | | if (!m->zopfli_buf) { |
315 | | return; |
316 | | } |
317 | | auto buf = std::move(*m->zopfli_buf.release()); |
318 | | ZopfliOptions z_opt; |
319 | | ZopfliInitOptions(&z_opt); |
320 | | unsigned char* out{nullptr}; |
321 | | size_t out_size{0}; |
322 | | ZopfliCompress( |
323 | | &z_opt, |
324 | | ZOPFLI_FORMAT_ZLIB, |
325 | | reinterpret_cast<unsigned char const*>(buf.c_str()), |
326 | | buf.size(), |
327 | | &out, |
328 | | &out_size); |
329 | | std::unique_ptr<unsigned char, decltype(&free)> p(out, &free); |
330 | | next()->write(out, out_size); |
331 | | // next()->finish is called by finish() |
332 | | #endif |
333 | 0 | } |
334 | | |
335 | | bool |
336 | | Pl_Flate::zopfli_supported() |
337 | 0 | { |
338 | | #ifdef ZOPFLI |
339 | | return true; |
340 | | #else |
341 | 0 | return false; |
342 | 0 | #endif |
343 | 0 | } |
344 | | |
345 | | bool |
346 | | Pl_Flate::zopfli_enabled() |
347 | 0 | { |
348 | 0 | if (zopfli_supported()) { |
349 | 0 | std::string value; |
350 | 0 | static bool enabled = QUtil::get_env("QPDF_ZOPFLI", &value) && value != "disabled"; |
351 | 0 | return enabled; |
352 | 0 | } else { |
353 | 0 | return false; |
354 | 0 | } |
355 | 0 | } |
356 | | |
357 | | bool |
358 | | Pl_Flate::zopfli_check_env(QPDFLogger* logger) |
359 | 0 | { |
360 | 0 | if (Pl_Flate::zopfli_supported()) { |
361 | 0 | return true; |
362 | 0 | } |
363 | 0 | std::string value; |
364 | 0 | auto is_set = QUtil::get_env("QPDF_ZOPFLI", &value); |
365 | 0 | if (!is_set || value == "disabled" || value == "silent") { |
366 | 0 | return true; |
367 | 0 | } |
368 | 0 | if (!logger) { |
369 | 0 | logger = QPDFLogger::defaultLogger().get(); |
370 | 0 | } |
371 | | |
372 | | // This behavior is known in QPDFJob (for the --zopfli argument), Pl_Flate.hh, README.md, |
373 | | // and the manual. Do a case-insensitive search for zopfli if changing the behavior. |
374 | 0 | if (value == "force") { |
375 | 0 | throw std::runtime_error("QPDF_ZOPFLI=force, and zopfli support is not enabled"); |
376 | 0 | } |
377 | 0 | logger->warn("QPDF_ZOPFLI is set, but libqpdf was not built with zopfli support\n"); |
378 | 0 | logger->warn( |
379 | 0 | "Set QPDF_ZOPFLI=silent to suppress this warning and use zopfli when available.\n"); |
380 | 0 | return false; |
381 | 0 | } |