/src/qpdf/libqpdf/Pl_Flate.cc
Line | Count | Source (jump to first uncovered line) |
1 | | #include <qpdf/Pl_Flate.hh> |
2 | | |
3 | | #include <climits> |
4 | | #include <cstring> |
5 | | #include <zlib.h> |
6 | | |
7 | | #include <qpdf/QIntC.hh> |
8 | | #include <qpdf/QUtil.hh> |
9 | | #include <qpdf/qpdf-config.h> |
10 | | |
11 | | #ifdef ZOPFLI |
12 | | # include <zopfli.h> |
13 | | #endif |
14 | | |
15 | | namespace |
16 | | { |
17 | | unsigned long long memory_limit_{0}; |
18 | | } // namespace |
19 | | |
20 | | int Pl_Flate::compression_level = Z_DEFAULT_COMPRESSION; |
21 | | |
22 | | Pl_Flate::Members::Members(size_t out_bufsize, action_e action) : |
23 | 0 | out_bufsize(out_bufsize), |
24 | 0 | action(action), |
25 | 0 | initialized(false), |
26 | 0 | zdata(nullptr) |
27 | 0 | { |
28 | 0 | this->outbuf = QUtil::make_shared_array<unsigned char>(out_bufsize); |
29 | | // Indirect through zdata to reach the z_stream so we don't have to include zlib.h in |
30 | | // Pl_Flate.hh. This means people using shared library versions of qpdf don't have to have zlib |
31 | | // development files available, which particularly helps in a Windows environment. |
32 | 0 | zdata = new z_stream; |
33 | |
|
34 | 0 | if (out_bufsize > UINT_MAX) { |
35 | 0 | throw std::runtime_error( |
36 | 0 | "Pl_Flate: zlib doesn't support buffer sizes larger than unsigned int"); |
37 | 0 | } |
38 | | |
39 | 0 | z_stream& zstream = *(static_cast<z_stream*>(this->zdata)); |
40 | 0 | zstream.zalloc = nullptr; |
41 | 0 | zstream.zfree = nullptr; |
42 | 0 | zstream.opaque = nullptr; |
43 | 0 | zstream.next_in = nullptr; |
44 | 0 | zstream.avail_in = 0; |
45 | 0 | zstream.next_out = this->outbuf.get(); |
46 | 0 | zstream.avail_out = QIntC::to_uint(out_bufsize); |
47 | |
|
48 | 0 | if (action == a_deflate && Pl_Flate::zopfli_enabled()) { |
49 | 0 | zopfli_buf = std::make_unique<std::string>(); |
50 | 0 | } |
51 | 0 | } |
52 | | |
53 | | Pl_Flate::Members::~Members() |
54 | 0 | { |
55 | 0 | if (initialized) { |
56 | 0 | z_stream& zstream = *(static_cast<z_stream*>(zdata)); |
57 | 0 | if (action == a_deflate) { |
58 | 0 | deflateEnd(&zstream); |
59 | 0 | } else { |
60 | 0 | inflateEnd(&zstream); |
61 | 0 | } |
62 | 0 | } |
63 | |
|
64 | 0 | delete static_cast<z_stream*>(this->zdata); |
65 | 0 | zdata = nullptr; |
66 | 0 | } |
67 | | |
68 | | Pl_Flate::Pl_Flate( |
69 | | char const* identifier, Pipeline* next, action_e action, unsigned int out_bufsize_int) : |
70 | 0 | Pipeline(identifier, next), |
71 | 0 | m(std::make_unique<Members>(QIntC::to_size(out_bufsize_int), action)) |
72 | 0 | { |
73 | 0 | if (!next) { |
74 | 0 | throw std::logic_error("Attempt to create Pl_Flate with nullptr as next"); |
75 | 0 | } |
76 | 0 | } |
77 | | |
78 | | // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer |
79 | 0 | Pl_Flate::~Pl_Flate() = default; |
80 | | |
81 | | unsigned long long |
82 | | Pl_Flate::memory_limit() |
83 | 0 | { |
84 | 0 | return memory_limit_; |
85 | 0 | } |
86 | | |
87 | | void |
88 | | Pl_Flate::memory_limit(unsigned long long limit) |
89 | 0 | { |
90 | 0 | memory_limit_ = limit; |
91 | 0 | } |
92 | | |
93 | | void |
94 | | Pl_Flate::setWarnCallback(std::function<void(char const*, int)> callback) |
95 | 0 | { |
96 | 0 | m->callback = callback; |
97 | 0 | } |
98 | | |
99 | | void |
100 | | Pl_Flate::warn(char const* msg, int code) |
101 | 0 | { |
102 | 0 | if (m->callback) { |
103 | 0 | m->callback(msg, code); |
104 | 0 | } |
105 | 0 | } |
106 | | |
107 | | void |
108 | | Pl_Flate::write(unsigned char const* data, size_t len) |
109 | 0 | { |
110 | 0 | if (!m->outbuf) { |
111 | 0 | throw std::logic_error( |
112 | 0 | this->identifier + ": Pl_Flate: write() called after finish() called"); |
113 | 0 | } |
114 | 0 | if (m->zopfli_buf) { |
115 | 0 | m->zopfli_buf->append(reinterpret_cast<char const*>(data), len); |
116 | 0 | return; |
117 | 0 | } |
118 | | |
119 | | // Write in chunks in case len is too big to fit in an int. Assume int is at least 32 bits. |
120 | 0 | static size_t const max_bytes = 1 << 30; |
121 | 0 | size_t bytes_left = len; |
122 | 0 | unsigned char const* buf = data; |
123 | 0 | while (bytes_left > 0) { |
124 | 0 | size_t bytes = (bytes_left >= max_bytes ? max_bytes : bytes_left); |
125 | 0 | handleData(buf, bytes, (m->action == a_inflate ? Z_SYNC_FLUSH : Z_NO_FLUSH)); |
126 | 0 | bytes_left -= bytes; |
127 | 0 | buf += bytes; |
128 | 0 | } |
129 | 0 | } |
130 | | |
131 | | void |
132 | | Pl_Flate::handleData(unsigned char const* data, size_t len, int flush) |
133 | 0 | { |
134 | 0 | if (len > UINT_MAX) { |
135 | 0 | throw std::runtime_error("Pl_Flate: zlib doesn't support data blocks larger than int"); |
136 | 0 | } |
137 | 0 | z_stream& zstream = *(static_cast<z_stream*>(m->zdata)); |
138 | | // zlib is known not to modify the data pointed to by next_in but doesn't declare the field |
139 | | // value const unless compiled to do so. |
140 | 0 | zstream.next_in = const_cast<unsigned char*>(data); |
141 | 0 | zstream.avail_in = QIntC::to_uint(len); |
142 | |
|
143 | 0 | if (!m->initialized) { |
144 | 0 | int err = Z_OK; |
145 | | |
146 | | // deflateInit and inflateInit are macros that use old-style casts. |
147 | 0 | #if ((defined(__GNUC__) && ((__GNUC__ * 100) + __GNUC_MINOR__) >= 406) || defined(__clang__)) |
148 | 0 | # pragma GCC diagnostic push |
149 | 0 | # pragma GCC diagnostic ignored "-Wold-style-cast" |
150 | 0 | #endif |
151 | 0 | if (m->action == a_deflate) { |
152 | 0 | err = deflateInit(&zstream, compression_level); |
153 | 0 | } else { |
154 | 0 | err = inflateInit(&zstream); |
155 | 0 | } |
156 | 0 | #if ((defined(__GNUC__) && ((__GNUC__ * 100) + __GNUC_MINOR__) >= 406) || defined(__clang__)) |
157 | 0 | # pragma GCC diagnostic pop |
158 | 0 | #endif |
159 | |
|
160 | 0 | checkError("Init", err); |
161 | 0 | m->initialized = true; |
162 | 0 | } |
163 | |
|
164 | 0 | int err = Z_OK; |
165 | |
|
166 | 0 | bool done = false; |
167 | 0 | while (!done) { |
168 | 0 | if (m->action == a_deflate) { |
169 | 0 | err = deflate(&zstream, flush); |
170 | 0 | } else { |
171 | 0 | err = inflate(&zstream, flush); |
172 | 0 | } |
173 | 0 | if ((m->action == a_inflate) && (err != Z_OK) && zstream.msg && |
174 | 0 | (strcmp(zstream.msg, "incorrect data check") == 0)) { |
175 | | // Other PDF readers ignore this specific error. Combining this with Z_SYNC_FLUSH |
176 | | // enables qpdf to handle some broken zlib streams without losing data. |
177 | 0 | err = Z_STREAM_END; |
178 | 0 | } |
179 | 0 | switch (err) { |
180 | 0 | case Z_BUF_ERROR: |
181 | | // Probably shouldn't be able to happen, but possible as a boundary condition: if the |
182 | | // last call to inflate exactly filled the output buffer, it's possible that the next |
183 | | // call to inflate could have nothing to do. There are PDF files in the wild that have |
184 | | // this error (including at least one in qpdf's test suite). In some cases, we want to |
185 | | // know about this, because it indicates incorrect compression, so call a callback if |
186 | | // provided. |
187 | 0 | warn("input stream is complete but output may still be valid", err); |
188 | 0 | done = true; |
189 | 0 | break; |
190 | | |
191 | 0 | case Z_STREAM_END: |
192 | 0 | done = true; |
193 | | // fall through |
194 | |
|
195 | 0 | case Z_OK: |
196 | 0 | { |
197 | 0 | if ((zstream.avail_in == 0) && (zstream.avail_out > 0)) { |
198 | | // There is nothing left to read, and there was sufficient buffer space to write |
199 | | // everything we needed, so we're done for now. |
200 | 0 | done = true; |
201 | 0 | } |
202 | 0 | uLong ready = QIntC::to_ulong(m->out_bufsize - zstream.avail_out); |
203 | 0 | if (ready > 0) { |
204 | 0 | if (memory_limit_ && m->action != a_deflate) { |
205 | 0 | m->written += ready; |
206 | 0 | if (m->written > memory_limit_) { |
207 | 0 | throw std::runtime_error("PL_Flate memory limit exceeded"); |
208 | 0 | } |
209 | 0 | } |
210 | 0 | next()->write(m->outbuf.get(), ready); |
211 | 0 | zstream.next_out = m->outbuf.get(); |
212 | 0 | zstream.avail_out = QIntC::to_uint(m->out_bufsize); |
213 | 0 | } |
214 | 0 | } |
215 | 0 | break; |
216 | | |
217 | 0 | default: |
218 | 0 | checkError("data", err); |
219 | 0 | break; |
220 | 0 | } |
221 | 0 | } |
222 | 0 | } |
223 | | |
224 | | void |
225 | | Pl_Flate::finish() |
226 | 0 | { |
227 | 0 | if (m->written > memory_limit_) { |
228 | 0 | throw std::runtime_error("PL_Flate memory limit exceeded"); |
229 | 0 | } |
230 | 0 | try { |
231 | 0 | if (m->zopfli_buf) { |
232 | 0 | finish_zopfli(); |
233 | 0 | } else if (m->outbuf.get()) { |
234 | 0 | if (m->initialized) { |
235 | 0 | z_stream& zstream = *(static_cast<z_stream*>(m->zdata)); |
236 | 0 | unsigned char buf[1]; |
237 | 0 | buf[0] = '\0'; |
238 | 0 | handleData(buf, 0, Z_FINISH); |
239 | 0 | int err = Z_OK; |
240 | 0 | if (m->action == a_deflate) { |
241 | 0 | err = deflateEnd(&zstream); |
242 | 0 | } else { |
243 | 0 | err = inflateEnd(&zstream); |
244 | 0 | } |
245 | 0 | m->initialized = false; |
246 | 0 | checkError("End", err); |
247 | 0 | } |
248 | |
|
249 | 0 | m->outbuf = nullptr; |
250 | 0 | } |
251 | 0 | } catch (std::exception& e) { |
252 | 0 | try { |
253 | 0 | next()->finish(); |
254 | 0 | } catch (...) { |
255 | | // ignore secondary exception |
256 | 0 | } |
257 | 0 | throw std::runtime_error(e.what()); |
258 | 0 | } |
259 | 0 | next()->finish(); |
260 | 0 | } |
261 | | |
262 | | void |
263 | | Pl_Flate::setCompressionLevel(int level) |
264 | 0 | { |
265 | 0 | compression_level = level; |
266 | 0 | } |
267 | | |
268 | | void |
269 | | Pl_Flate::checkError(char const* prefix, int error_code) |
270 | 0 | { |
271 | 0 | z_stream& zstream = *(static_cast<z_stream*>(m->zdata)); |
272 | 0 | if (error_code != Z_OK) { |
273 | 0 | char const* action_str = (m->action == a_deflate ? "deflate" : "inflate"); |
274 | 0 | std::string msg = identifier + ": " + action_str + ": " + prefix + ": "; |
275 | |
|
276 | 0 | if (zstream.msg) { |
277 | 0 | msg += zstream.msg; |
278 | 0 | } else { |
279 | 0 | switch (error_code) { |
280 | 0 | case Z_ERRNO: |
281 | 0 | msg += "zlib system error"; |
282 | 0 | break; |
283 | | |
284 | 0 | case Z_STREAM_ERROR: |
285 | 0 | msg += "zlib stream error"; |
286 | 0 | break; |
287 | | |
288 | 0 | case Z_DATA_ERROR: |
289 | 0 | msg += "zlib data error"; |
290 | 0 | break; |
291 | | |
292 | 0 | case Z_MEM_ERROR: |
293 | 0 | msg += "zlib memory error"; |
294 | 0 | break; |
295 | | |
296 | 0 | case Z_BUF_ERROR: |
297 | 0 | msg += "zlib buffer error"; |
298 | 0 | break; |
299 | | |
300 | 0 | case Z_VERSION_ERROR: |
301 | 0 | msg += "zlib version error"; |
302 | 0 | break; |
303 | | |
304 | 0 | default: |
305 | 0 | msg += std::string("zlib unknown error (") + std::to_string(error_code) + ")"; |
306 | 0 | break; |
307 | 0 | } |
308 | 0 | } |
309 | | |
310 | 0 | throw std::runtime_error(msg); |
311 | 0 | } |
312 | 0 | } |
313 | | |
314 | | void |
315 | | Pl_Flate::finish_zopfli() |
316 | 0 | { |
317 | | #ifdef ZOPFLI |
318 | | if (!m->zopfli_buf) { |
319 | | return; |
320 | | } |
321 | | auto buf = std::move(*m->zopfli_buf.release()); |
322 | | ZopfliOptions z_opt; |
323 | | ZopfliInitOptions(&z_opt); |
324 | | unsigned char* out{nullptr}; |
325 | | size_t out_size{0}; |
326 | | ZopfliCompress( |
327 | | &z_opt, |
328 | | ZOPFLI_FORMAT_ZLIB, |
329 | | reinterpret_cast<unsigned char const*>(buf.c_str()), |
330 | | buf.size(), |
331 | | &out, |
332 | | &out_size); |
333 | | std::unique_ptr<unsigned char, decltype(&free)> p(out, &free); |
334 | | next()->write(out, out_size); |
335 | | // next()->finish is called by finish() |
336 | | #endif |
337 | 0 | } |
338 | | |
339 | | bool |
340 | | Pl_Flate::zopfli_supported() |
341 | 0 | { |
342 | | #ifdef ZOPFLI |
343 | | return true; |
344 | | #else |
345 | 0 | return false; |
346 | 0 | #endif |
347 | 0 | } |
348 | | |
349 | | bool |
350 | | Pl_Flate::zopfli_enabled() |
351 | 0 | { |
352 | 0 | if (zopfli_supported()) { |
353 | 0 | std::string value; |
354 | 0 | static bool enabled = QUtil::get_env("QPDF_ZOPFLI", &value) && value != "disabled"; |
355 | 0 | return enabled; |
356 | 0 | } else { |
357 | 0 | return false; |
358 | 0 | } |
359 | 0 | } |
360 | | |
361 | | bool |
362 | | Pl_Flate::zopfli_check_env(QPDFLogger* logger) |
363 | 0 | { |
364 | 0 | if (Pl_Flate::zopfli_supported()) { |
365 | 0 | return true; |
366 | 0 | } |
367 | 0 | std::string value; |
368 | 0 | auto is_set = QUtil::get_env("QPDF_ZOPFLI", &value); |
369 | 0 | if (!is_set || value == "disabled" || value == "silent") { |
370 | 0 | return true; |
371 | 0 | } |
372 | 0 | if (!logger) { |
373 | 0 | logger = QPDFLogger::defaultLogger().get(); |
374 | 0 | } |
375 | | |
376 | | // This behavior is known in QPDFJob (for the --zopfli argument), Pl_Flate.hh, README.md, |
377 | | // and the manual. Do a case-insensitive search for zopfli if changing the behavior. |
378 | 0 | if (value == "force") { |
379 | 0 | throw std::runtime_error("QPDF_ZOPFLI=force, and zopfli support is not enabled"); |
380 | 0 | } |
381 | 0 | logger->warn("QPDF_ZOPFLI is set, but libqpdf was not built with zopfli support\n"); |
382 | 0 | logger->warn( |
383 | 0 | "Set QPDF_ZOPFLI=silent to suppress this warning and use zopfli when available.\n"); |
384 | 0 | return false; |
385 | 0 | } |