/src/qpdf/libqpdf/Pl_Flate.cc
Line | Count | Source |
1 | | #include <qpdf/Pl_Flate.hh> |
2 | | |
3 | | #include <climits> |
4 | | #include <cstring> |
5 | | #include <zlib.h> |
6 | | |
7 | | #include <qpdf/QIntC.hh> |
8 | | #include <qpdf/QUtil.hh> |
9 | | #include <qpdf/Util.hh> |
10 | | #include <qpdf/global_private.hh> |
11 | | #include <qpdf/qpdf-config.h> |
12 | | |
13 | | #ifdef ZOPFLI |
14 | | # include <zopfli.h> |
15 | | #endif |
16 | | |
17 | | using namespace qpdf; |
18 | | |
19 | | namespace |
20 | | { |
21 | | static unsigned long long const& memory_limit{global::Limits::flate_max_memory()}; |
22 | | } // namespace |
23 | | |
24 | | int Pl_Flate::compression_level = Z_DEFAULT_COMPRESSION; |
25 | | |
26 | | Pl_Flate::Members::Members(size_t out_bufsize, action_e action) : |
27 | 0 | out_bufsize(out_bufsize), |
28 | 0 | action(action), |
29 | 0 | initialized(false), |
30 | 0 | zdata(nullptr) |
31 | 0 | { |
32 | 0 | this->outbuf = QUtil::make_shared_array<unsigned char>(out_bufsize); |
33 | | // Indirect through zdata to reach the z_stream so we don't have to include zlib.h in |
34 | | // Pl_Flate.hh. This means people using shared library versions of qpdf don't have to have zlib |
35 | | // development files available, which particularly helps in a Windows environment. |
36 | 0 | zdata = new z_stream; |
37 | |
|
38 | 0 | util::no_ci_rt_error_if( |
39 | 0 | out_bufsize > UINT_MAX, |
40 | 0 | "Pl_Flate: zlib doesn't support buffer sizes larger than unsigned int"); |
41 | |
|
42 | 0 | z_stream& zstream = *(static_cast<z_stream*>(this->zdata)); |
43 | 0 | zstream.zalloc = nullptr; |
44 | 0 | zstream.zfree = nullptr; |
45 | 0 | zstream.opaque = nullptr; |
46 | 0 | zstream.next_in = nullptr; |
47 | 0 | zstream.avail_in = 0; |
48 | 0 | zstream.next_out = this->outbuf.get(); |
49 | 0 | zstream.avail_out = QIntC::to_uint(out_bufsize); |
50 | |
|
51 | 0 | if (action == a_deflate && Pl_Flate::zopfli_enabled()) { |
52 | 0 | zopfli_buf = std::make_unique<std::string>(); |
53 | 0 | } |
54 | 0 | } |
55 | | |
56 | | Pl_Flate::Members::~Members() |
57 | 0 | { |
58 | 0 | if (initialized) { |
59 | 0 | z_stream& zstream = *(static_cast<z_stream*>(zdata)); |
60 | 0 | if (action == a_deflate) { |
61 | 0 | deflateEnd(&zstream); |
62 | 0 | } else { |
63 | 0 | inflateEnd(&zstream); |
64 | 0 | } |
65 | 0 | } |
66 | |
|
67 | 0 | delete static_cast<z_stream*>(this->zdata); |
68 | 0 | zdata = nullptr; |
69 | 0 | } |
70 | | |
71 | | Pl_Flate::Pl_Flate( |
72 | | char const* identifier, Pipeline* next, action_e action, unsigned int out_bufsize_int) : |
73 | 0 | Pipeline(identifier, next), |
74 | 0 | m(std::make_unique<Members>(QIntC::to_size(out_bufsize_int), action)) |
75 | 0 | { |
76 | 0 | util::assertion(next, "Attempt to create Pl_Flate with nullptr as next"); |
77 | 0 | } |
78 | | |
79 | | // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer |
80 | 0 | Pl_Flate::~Pl_Flate() = default; |
81 | | |
82 | | unsigned long long |
83 | | Pl_Flate::memory_limit() |
84 | 0 | { |
85 | 0 | return ::memory_limit; |
86 | 0 | } |
87 | | |
88 | | void |
89 | | Pl_Flate::memory_limit(unsigned long long limit) |
90 | 0 | { |
91 | 0 | global::Limits::flate_max_memory(limit); |
92 | 0 | } |
93 | | |
94 | | void |
95 | | Pl_Flate::setWarnCallback(std::function<void(char const*, int)> callback) |
96 | 0 | { |
97 | 0 | m->callback = callback; |
98 | 0 | } |
99 | | |
100 | | void |
101 | | Pl_Flate::warn(char const* msg, int code) |
102 | 0 | { |
103 | 0 | if (m->callback) { |
104 | 0 | m->callback(msg, code); |
105 | 0 | } |
106 | 0 | } |
107 | | |
108 | | void |
109 | | Pl_Flate::write(unsigned char const* data, size_t len) |
110 | 0 | { |
111 | 0 | util::assertion( |
112 | 0 | m->outbuf.get(), identifier + ": Pl_Flate: write() called after finish() called"); |
113 | 0 | if (m->zopfli_buf) { |
114 | 0 | m->zopfli_buf->append(reinterpret_cast<char const*>(data), len); |
115 | 0 | return; |
116 | 0 | } |
117 | | |
118 | | // Write in chunks in case len is too big to fit in an int. Assume int is at least 32 bits. |
119 | 0 | static size_t const max_bytes = 1 << 30; |
120 | 0 | size_t bytes_left = len; |
121 | 0 | unsigned char const* buf = data; |
122 | 0 | while (bytes_left > 0) { |
123 | 0 | size_t bytes = (bytes_left >= max_bytes ? max_bytes : bytes_left); |
124 | 0 | handleData(buf, bytes, (m->action == a_inflate ? Z_SYNC_FLUSH : Z_NO_FLUSH)); |
125 | 0 | bytes_left -= bytes; |
126 | 0 | buf += bytes; |
127 | 0 | } |
128 | 0 | } |
129 | | |
130 | | void |
131 | | Pl_Flate::handleData(unsigned char const* data, size_t len, int flush) |
132 | 0 | { |
133 | 0 | util::no_ci_rt_error_if( |
134 | 0 | len > UINT_MAX, "Pl_Flate: zlib doesn't support data blocks larger than int"); |
135 | 0 | z_stream& zstream = *(static_cast<z_stream*>(m->zdata)); |
136 | | // zlib is known not to modify the data pointed to by next_in but doesn't declare the field |
137 | | // value const unless compiled to do so. |
138 | 0 | zstream.next_in = const_cast<unsigned char*>(data); |
139 | 0 | zstream.avail_in = QIntC::to_uint(len); |
140 | |
|
141 | 0 | if (!m->initialized) { |
142 | 0 | int err = Z_OK; |
143 | | |
144 | | // deflateInit and inflateInit are macros that use old-style casts. |
145 | 0 | #if ((defined(__GNUC__) && ((__GNUC__ * 100) + __GNUC_MINOR__) >= 406) || defined(__clang__)) |
146 | 0 | # pragma GCC diagnostic push |
147 | 0 | # pragma GCC diagnostic ignored "-Wold-style-cast" |
148 | 0 | #endif |
149 | 0 | if (m->action == a_deflate) { |
150 | 0 | err = deflateInit(&zstream, compression_level); |
151 | 0 | } else { |
152 | 0 | err = inflateInit(&zstream); |
153 | 0 | } |
154 | 0 | #if ((defined(__GNUC__) && ((__GNUC__ * 100) + __GNUC_MINOR__) >= 406) || defined(__clang__)) |
155 | 0 | # pragma GCC diagnostic pop |
156 | 0 | #endif |
157 | |
|
158 | 0 | checkError("Init", err); |
159 | 0 | m->initialized = true; |
160 | 0 | } |
161 | |
|
162 | 0 | int err = Z_OK; |
163 | |
|
164 | 0 | bool done = false; |
165 | 0 | while (!done) { |
166 | 0 | if (m->action == a_deflate) { |
167 | 0 | err = deflate(&zstream, flush); |
168 | 0 | } else { |
169 | 0 | err = inflate(&zstream, flush); |
170 | 0 | } |
171 | 0 | if ((m->action == a_inflate) && (err != Z_OK) && zstream.msg && |
172 | 0 | (strcmp(zstream.msg, "incorrect data check") == 0)) { |
173 | | // Other PDF readers ignore this specific error. Combining this with Z_SYNC_FLUSH |
174 | | // enables qpdf to handle some broken zlib streams without losing data. |
175 | 0 | err = Z_STREAM_END; |
176 | 0 | } |
177 | 0 | switch (err) { |
178 | 0 | case Z_BUF_ERROR: |
179 | | // Probably shouldn't be able to happen, but possible as a boundary condition: if the |
180 | | // last call to inflate exactly filled the output buffer, it's possible that the next |
181 | | // call to inflate could have nothing to do. There are PDF files in the wild that have |
182 | | // this error (including at least one in qpdf's test suite). In some cases, we want to |
183 | | // know about this, because it indicates incorrect compression, so call a callback if |
184 | | // provided. |
185 | 0 | warn("input stream is complete but output may still be valid", err); |
186 | 0 | done = true; |
187 | 0 | break; |
188 | | |
189 | 0 | case Z_STREAM_END: |
190 | 0 | done = true; |
191 | | // fall through |
192 | |
|
193 | 0 | case Z_OK: |
194 | 0 | { |
195 | 0 | if ((zstream.avail_in == 0) && (zstream.avail_out > 0)) { |
196 | | // There is nothing left to read, and there was sufficient buffer space to write |
197 | | // everything we needed, so we're done for now. |
198 | 0 | done = true; |
199 | 0 | } |
200 | 0 | uLong ready = QIntC::to_ulong(m->out_bufsize - zstream.avail_out); |
201 | 0 | if (ready > 0) { |
202 | 0 | if (::memory_limit && m->action != a_deflate) { |
203 | 0 | m->written += ready; |
204 | 0 | if (m->written > ::memory_limit) { |
205 | 0 | throw std::runtime_error("PL_Flate memory limit exceeded"); |
206 | 0 | } |
207 | 0 | } |
208 | 0 | next()->write(m->outbuf.get(), ready); |
209 | 0 | zstream.next_out = m->outbuf.get(); |
210 | 0 | zstream.avail_out = QIntC::to_uint(m->out_bufsize); |
211 | 0 | } |
212 | 0 | } |
213 | 0 | break; |
214 | | |
215 | 0 | default: |
216 | 0 | checkError("data", err); |
217 | 0 | } |
218 | 0 | } |
219 | 0 | } |
220 | | |
221 | | void |
222 | | Pl_Flate::finish() |
223 | 0 | { |
224 | 0 | if (m->written > ::memory_limit) { |
225 | 0 | throw std::runtime_error("PL_Flate memory limit exceeded"); |
226 | 0 | } |
227 | 0 | try { |
228 | 0 | if (m->zopfli_buf) { |
229 | 0 | finish_zopfli(); |
230 | 0 | } else if (m->outbuf.get()) { |
231 | 0 | if (m->initialized) { |
232 | 0 | z_stream& zstream = *(static_cast<z_stream*>(m->zdata)); |
233 | 0 | unsigned char buf[1]; |
234 | 0 | buf[0] = '\0'; |
235 | 0 | handleData(buf, 0, Z_FINISH); |
236 | 0 | int err = Z_OK; |
237 | 0 | if (m->action == a_deflate) { |
238 | 0 | err = deflateEnd(&zstream); |
239 | 0 | } else { |
240 | 0 | err = inflateEnd(&zstream); |
241 | 0 | } |
242 | 0 | m->initialized = false; |
243 | 0 | checkError("End", err); |
244 | 0 | } |
245 | |
|
246 | 0 | m->outbuf = nullptr; |
247 | 0 | } |
248 | 0 | } catch (std::exception& e) { |
249 | 0 | try { |
250 | 0 | next()->finish(); |
251 | 0 | } catch (...) { |
252 | | // ignore secondary exception |
253 | 0 | } |
254 | 0 | throw std::runtime_error(e.what()); |
255 | 0 | } |
256 | 0 | next()->finish(); |
257 | 0 | } |
258 | | |
259 | | void |
260 | | Pl_Flate::setCompressionLevel(int level) |
261 | 0 | { |
262 | 0 | compression_level = level; |
263 | 0 | } |
264 | | |
265 | | void |
266 | | Pl_Flate::checkError(char const* prefix, int error_code) |
267 | 0 | { |
268 | 0 | z_stream& zstream = *(static_cast<z_stream*>(m->zdata)); |
269 | 0 | if (error_code != Z_OK) { |
270 | 0 | char const* action_str = (m->action == a_deflate ? "deflate" : "inflate"); |
271 | 0 | std::string msg = identifier + ": " + action_str + ": " + prefix + ": "; |
272 | |
|
273 | 0 | if (zstream.msg) { |
274 | 0 | msg += zstream.msg; |
275 | 0 | } else { |
276 | 0 | switch (error_code) { |
277 | 0 | case Z_ERRNO: |
278 | 0 | msg += "zlib system error"; |
279 | 0 | break; |
280 | | |
281 | 0 | case Z_STREAM_ERROR: |
282 | 0 | msg += "zlib stream error"; |
283 | 0 | break; |
284 | | |
285 | 0 | case Z_DATA_ERROR: |
286 | 0 | msg += "zlib data error"; |
287 | 0 | break; |
288 | | |
289 | 0 | case Z_MEM_ERROR: |
290 | 0 | msg += "zlib memory error"; |
291 | 0 | break; |
292 | | |
293 | 0 | case Z_BUF_ERROR: |
294 | 0 | msg += "zlib buffer error"; |
295 | 0 | break; |
296 | | |
297 | 0 | case Z_VERSION_ERROR: |
298 | 0 | msg += "zlib version error"; |
299 | 0 | break; |
300 | | |
301 | 0 | default: |
302 | 0 | msg += std::string("zlib unknown error (") + std::to_string(error_code) + ")"; |
303 | 0 | break; |
304 | 0 | } |
305 | 0 | } |
306 | | |
307 | 0 | throw std::runtime_error(msg); |
308 | 0 | } |
309 | 0 | } |
310 | | |
311 | | void |
312 | | Pl_Flate::finish_zopfli() |
313 | 0 | { |
314 | | #ifdef ZOPFLI |
315 | | if (!m->zopfli_buf) { |
316 | | return; |
317 | | } |
318 | | auto buf = std::move(*m->zopfli_buf.release()); |
319 | | ZopfliOptions z_opt; |
320 | | ZopfliInitOptions(&z_opt); |
321 | | unsigned char* out{nullptr}; |
322 | | size_t out_size{0}; |
323 | | ZopfliCompress( |
324 | | &z_opt, |
325 | | ZOPFLI_FORMAT_ZLIB, |
326 | | reinterpret_cast<unsigned char const*>(buf.c_str()), |
327 | | buf.size(), |
328 | | &out, |
329 | | &out_size); |
330 | | std::unique_ptr<unsigned char, decltype(&free)> p(out, &free); |
331 | | next()->write(out, out_size); |
332 | | // next()->finish is called by finish() |
333 | | #endif |
334 | 0 | } |
335 | | |
336 | | bool |
337 | | Pl_Flate::zopfli_supported() |
338 | 0 | { |
339 | | #ifdef ZOPFLI |
340 | | return true; |
341 | | #else |
342 | 0 | return false; |
343 | 0 | #endif |
344 | 0 | } |
345 | | |
346 | | bool |
347 | | Pl_Flate::zopfli_enabled() |
348 | 0 | { |
349 | 0 | if (zopfli_supported()) { |
350 | 0 | std::string value; |
351 | 0 | static bool enabled = QUtil::get_env("QPDF_ZOPFLI", &value) && value != "disabled"; |
352 | 0 | return enabled; |
353 | 0 | } else { |
354 | 0 | return false; |
355 | 0 | } |
356 | 0 | } |
357 | | |
358 | | bool |
359 | | Pl_Flate::zopfli_check_env(QPDFLogger* logger) |
360 | 0 | { |
361 | 0 | if (Pl_Flate::zopfli_supported()) { |
362 | 0 | return true; |
363 | 0 | } |
364 | 0 | std::string value; |
365 | 0 | auto is_set = QUtil::get_env("QPDF_ZOPFLI", &value); |
366 | 0 | if (!is_set || value == "disabled" || value == "silent") { |
367 | 0 | return true; |
368 | 0 | } |
369 | 0 | if (!logger) { |
370 | 0 | logger = QPDFLogger::defaultLogger().get(); |
371 | 0 | } |
372 | | |
373 | | // This behavior is known in QPDFJob (for the --zopfli argument), Pl_Flate.hh, README.md, |
374 | | // and the manual. Do a case-insensitive search for zopfli if changing the behavior. |
375 | 0 | if (value == "force") { |
376 | 0 | throw std::runtime_error("QPDF_ZOPFLI=force, and zopfli support is not enabled"); |
377 | 0 | } |
378 | 0 | logger->warn("QPDF_ZOPFLI is set, but libqpdf was not built with zopfli support\n"); |
379 | 0 | logger->warn( |
380 | 0 | "Set QPDF_ZOPFLI=silent to suppress this warning and use zopfli when available.\n"); |
381 | 0 | return false; |
382 | 0 | } |