/src/qpdf/libqpdf/Pl_Flate.cc
Line | Count | Source |
1 | | #include <qpdf/Pl_Flate.hh> |
2 | | |
3 | | #include <climits> |
4 | | #include <cstring> |
5 | | #include <zlib.h> |
6 | | |
7 | | #include <qpdf/QIntC.hh> |
8 | | #include <qpdf/QUtil.hh> |
9 | | #include <qpdf/Util.hh> |
10 | | #include <qpdf/global_private.hh> |
11 | | #include <qpdf/qpdf-config.h> |
12 | | |
13 | | #ifdef ZOPFLI |
14 | | # include <zopfli.h> |
15 | | #endif |
16 | | |
17 | | using namespace qpdf; |
18 | | |
19 | | namespace |
20 | | { |
21 | | static unsigned long long const& memory_limit{global::Limits::flate_max_memory()}; |
22 | | } // namespace |
23 | | |
24 | | int Pl_Flate::compression_level = Z_DEFAULT_COMPRESSION; |
25 | | |
26 | | Pl_Flate::Members::Members(size_t out_bufsize, action_e action) : |
27 | 75.6k | out_bufsize(out_bufsize), |
28 | 75.6k | action(action), |
29 | 75.6k | initialized(false), |
30 | 75.6k | zdata(nullptr) |
31 | 75.6k | { |
32 | 75.6k | this->outbuf = QUtil::make_shared_array<unsigned char>(out_bufsize); |
33 | | // Indirect through zdata to reach the z_stream so we don't have to include zlib.h in |
34 | | // Pl_Flate.hh. This means people using shared library versions of qpdf don't have to have zlib |
35 | | // development files available, which particularly helps in a Windows environment. |
36 | 75.6k | zdata = new z_stream; |
37 | | |
38 | 75.6k | util::no_ci_rt_error_if( |
39 | 75.6k | out_bufsize > UINT_MAX, |
40 | 75.6k | "Pl_Flate: zlib doesn't support buffer sizes larger than unsigned int"); |
41 | | |
42 | 75.6k | z_stream& zstream = *(static_cast<z_stream*>(this->zdata)); |
43 | 75.6k | zstream.zalloc = nullptr; |
44 | 75.6k | zstream.zfree = nullptr; |
45 | 75.6k | zstream.opaque = nullptr; |
46 | 75.6k | zstream.next_in = nullptr; |
47 | 75.6k | zstream.avail_in = 0; |
48 | 75.6k | zstream.next_out = this->outbuf.get(); |
49 | 75.6k | zstream.avail_out = QIntC::to_uint(out_bufsize); |
50 | | |
51 | 75.6k | if (action == a_deflate && Pl_Flate::zopfli_enabled()) { |
52 | 0 | zopfli_buf = std::make_unique<std::string>(); |
53 | 0 | } |
54 | 75.6k | } |
55 | | |
56 | | Pl_Flate::Members::~Members() |
57 | 75.6k | { |
58 | 75.6k | if (initialized) { |
59 | 3.24k | z_stream& zstream = *(static_cast<z_stream*>(zdata)); |
60 | 3.24k | if (action == a_deflate) { |
61 | 1.08k | deflateEnd(&zstream); |
62 | 2.15k | } else { |
63 | 2.15k | inflateEnd(&zstream); |
64 | 2.15k | } |
65 | 3.24k | } |
66 | | |
67 | 75.6k | delete static_cast<z_stream*>(this->zdata); |
68 | 75.6k | zdata = nullptr; |
69 | 75.6k | } |
70 | | |
71 | | Pl_Flate::Pl_Flate( |
72 | | char const* identifier, Pipeline* next, action_e action, unsigned int out_bufsize_int) : |
73 | 75.6k | Pipeline(identifier, next), |
74 | 75.6k | m(std::make_unique<Members>(QIntC::to_size(out_bufsize_int), action)) |
75 | 75.6k | { |
76 | 75.6k | util::assertion(next, "Attempt to create Pl_Flate with nullptr as next"); |
77 | 75.6k | } |
78 | | |
79 | | // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer |
80 | 75.6k | Pl_Flate::~Pl_Flate() = default; |
81 | | |
82 | | unsigned long long |
83 | | Pl_Flate::memory_limit() |
84 | 3.99k | { |
85 | 3.99k | return ::memory_limit; |
86 | 3.99k | } |
87 | | |
88 | | void |
89 | | Pl_Flate::memory_limit(unsigned long long limit) |
90 | 0 | { |
91 | 0 | global::Limits::flate_max_memory(limit); |
92 | 0 | } |
93 | | |
94 | | void |
95 | | Pl_Flate::setWarnCallback(std::function<void(char const*, int)> callback) |
96 | 15.2k | { |
97 | 15.2k | m->callback = callback; |
98 | 15.2k | } |
99 | | |
100 | | void |
101 | | Pl_Flate::warn(char const* msg, int code) |
102 | 3.15k | { |
103 | 3.15k | if (m->callback) { |
104 | 3.15k | m->callback(msg, code); |
105 | 3.15k | } |
106 | 3.15k | } |
107 | | |
108 | | void |
109 | | Pl_Flate::write(unsigned char const* data, size_t len) |
110 | 3.91M | { |
111 | 3.91M | util::assertion( |
112 | 3.91M | m->outbuf.get(), identifier + ": Pl_Flate: write() called after finish() called"); |
113 | 3.91M | if (m->zopfli_buf) { |
114 | 0 | m->zopfli_buf->append(reinterpret_cast<char const*>(data), len); |
115 | 0 | return; |
116 | 0 | } |
117 | | |
118 | | // Write in chunks in case len is too big to fit in an int. Assume int is at least 32 bits. |
119 | 3.91M | static size_t const max_bytes = 1 << 30; |
120 | 3.91M | size_t bytes_left = len; |
121 | 3.91M | unsigned char const* buf = data; |
122 | 7.83M | while (bytes_left > 0) { |
123 | 3.91M | size_t bytes = (bytes_left >= max_bytes ? max_bytes : bytes_left); |
124 | 3.91M | handleData(buf, bytes, (m->action == a_inflate ? Z_SYNC_FLUSH : Z_NO_FLUSH)); |
125 | 3.91M | bytes_left -= bytes; |
126 | 3.91M | buf += bytes; |
127 | 3.91M | } |
128 | 3.91M | } |
129 | | |
130 | | void |
131 | | Pl_Flate::handleData(unsigned char const* data, size_t len, int flush) |
132 | 3.98M | { |
133 | 3.98M | util::no_ci_rt_error_if( |
134 | 3.98M | len > UINT_MAX, "Pl_Flate: zlib doesn't support data blocks larger than int"); |
135 | 3.98M | z_stream& zstream = *(static_cast<z_stream*>(m->zdata)); |
136 | | // zlib is known not to modify the data pointed to by next_in but doesn't declare the field |
137 | | // value const unless compiled to do so. |
138 | 3.98M | zstream.next_in = const_cast<unsigned char*>(data); |
139 | 3.98M | zstream.avail_in = QIntC::to_uint(len); |
140 | | |
141 | 3.98M | if (!m->initialized) { |
142 | 68.8k | int err = Z_OK; |
143 | | |
144 | | // deflateInit and inflateInit are macros that use old-style casts. |
145 | 68.8k | #if ((defined(__GNUC__) && ((__GNUC__ * 100) + __GNUC_MINOR__) >= 406) || defined(__clang__)) |
146 | 68.8k | # pragma GCC diagnostic push |
147 | 68.8k | # pragma GCC diagnostic ignored "-Wold-style-cast" |
148 | 68.8k | #endif |
149 | 68.8k | if (m->action == a_deflate) { |
150 | 54.6k | err = deflateInit(&zstream, compression_level); |
151 | 54.6k | } else { |
152 | 14.1k | err = inflateInit(&zstream); |
153 | 14.1k | } |
154 | 68.8k | #if ((defined(__GNUC__) && ((__GNUC__ * 100) + __GNUC_MINOR__) >= 406) || defined(__clang__)) |
155 | 68.8k | # pragma GCC diagnostic pop |
156 | 68.8k | #endif |
157 | | |
158 | 68.8k | checkError("Init", err); |
159 | 68.8k | m->initialized = true; |
160 | 68.8k | } |
161 | | |
162 | 3.98M | int err = Z_OK; |
163 | | |
164 | 3.98M | bool done = false; |
165 | 7.96M | while (!done) { |
166 | 3.98M | if (m->action == a_deflate) { |
167 | 3.79M | err = deflate(&zstream, flush); |
168 | 3.79M | } else { |
169 | 189k | err = inflate(&zstream, flush); |
170 | 189k | } |
171 | 3.98M | if ((m->action == a_inflate) && (err != Z_OK) && zstream.msg && |
172 | 16.7k | (strcmp(zstream.msg, "incorrect data check") == 0)) { |
173 | | // Other PDF readers ignore this specific error. Combining this with Z_SYNC_FLUSH |
174 | | // enables qpdf to handle some broken zlib streams without losing data. |
175 | 12.8k | err = Z_STREAM_END; |
176 | 12.8k | } |
177 | 3.98M | switch (err) { |
178 | 3.15k | case Z_BUF_ERROR: |
179 | | // Probably shouldn't be able to happen, but possible as a boundary condition: if the |
180 | | // last call to inflate exactly filled the output buffer, it's possible that the next |
181 | | // call to inflate could have nothing to do. There are PDF files in the wild that have |
182 | | // this error (including at least one in qpdf's test suite). In some cases, we want to |
183 | | // know about this, because it indicates incorrect compression, so call a callback if |
184 | | // provided. |
185 | 3.15k | warn("input stream is complete but output may still be valid", err); |
186 | 3.15k | done = true; |
187 | 3.15k | break; |
188 | | |
189 | 79.9k | case Z_STREAM_END: |
190 | 79.9k | done = true; |
191 | | // fall through |
192 | | |
193 | 3.97M | case Z_OK: |
194 | 3.97M | { |
195 | 3.97M | if ((zstream.avail_in == 0) && (zstream.avail_out > 0)) { |
196 | | // There is nothing left to read, and there was sufficient buffer space to write |
197 | | // everything we needed, so we're done for now. |
198 | 3.96M | done = true; |
199 | 3.96M | } |
200 | 3.97M | uLong ready = QIntC::to_ulong(m->out_bufsize - zstream.avail_out); |
201 | 3.97M | if (ready > 0) { |
202 | 264k | if (::memory_limit && m->action != a_deflate) { |
203 | 152k | m->written += ready; |
204 | 152k | if (m->written > ::memory_limit) { |
205 | 62 | throw std::runtime_error("PL_Flate memory limit exceeded"); |
206 | 62 | } |
207 | 152k | } |
208 | 264k | next()->write(m->outbuf.get(), ready); |
209 | 264k | zstream.next_out = m->outbuf.get(); |
210 | 264k | zstream.avail_out = QIntC::to_uint(m->out_bufsize); |
211 | 264k | } |
212 | 3.97M | } |
213 | 3.97M | break; |
214 | | |
215 | 3.97M | default: |
216 | 3.96k | checkError("data", err); |
217 | 3.98M | } |
218 | 3.98M | } |
219 | 3.98M | } |
220 | | |
221 | | void |
222 | | Pl_Flate::finish() |
223 | 70.6k | { |
224 | 70.6k | if (m->written > ::memory_limit) { |
225 | 60 | throw std::runtime_error("PL_Flate memory limit exceeded"); |
226 | 60 | } |
227 | 70.5k | try { |
228 | 70.5k | if (m->zopfli_buf) { |
229 | 0 | finish_zopfli(); |
230 | 70.5k | } else if (m->outbuf.get()) { |
231 | 70.5k | if (m->initialized) { |
232 | 67.1k | z_stream& zstream = *(static_cast<z_stream*>(m->zdata)); |
233 | 67.1k | unsigned char buf[1]; |
234 | 67.1k | buf[0] = '\0'; |
235 | 67.1k | handleData(buf, 0, Z_FINISH); |
236 | 67.1k | int err = Z_OK; |
237 | 67.1k | if (m->action == a_deflate) { |
238 | 53.5k | err = deflateEnd(&zstream); |
239 | 53.5k | } else { |
240 | 13.6k | err = inflateEnd(&zstream); |
241 | 13.6k | } |
242 | 67.1k | m->initialized = false; |
243 | 67.1k | checkError("End", err); |
244 | 67.1k | } |
245 | | |
246 | 70.5k | m->outbuf = nullptr; |
247 | 70.5k | } |
248 | 70.5k | } catch (std::exception& e) { |
249 | 1.57k | try { |
250 | 1.57k | next()->finish(); |
251 | 1.57k | } catch (...) { |
252 | | // ignore secondary exception |
253 | 3 | } |
254 | 1.57k | throw std::runtime_error(e.what()); |
255 | 1.57k | } |
256 | 69.0k | next()->finish(); |
257 | 69.0k | } |
258 | | |
259 | | void |
260 | | Pl_Flate::setCompressionLevel(int level) |
261 | 0 | { |
262 | 0 | compression_level = level; |
263 | 0 | } |
264 | | |
265 | | void |
266 | | Pl_Flate::checkError(char const* prefix, int error_code) |
267 | 138k | { |
268 | 138k | z_stream& zstream = *(static_cast<z_stream*>(m->zdata)); |
269 | 138k | if (error_code != Z_OK) { |
270 | 3.96k | char const* action_str = (m->action == a_deflate ? "deflate" : "inflate"); |
271 | 3.96k | std::string msg = identifier + ": " + action_str + ": " + prefix + ": "; |
272 | | |
273 | 3.96k | if (zstream.msg) { |
274 | 3.93k | msg += zstream.msg; |
275 | 3.93k | } else { |
276 | 24 | switch (error_code) { |
277 | 0 | case Z_ERRNO: |
278 | 0 | msg += "zlib system error"; |
279 | 0 | break; |
280 | | |
281 | 0 | case Z_STREAM_ERROR: |
282 | 0 | msg += "zlib stream error"; |
283 | 0 | break; |
284 | | |
285 | 0 | case Z_DATA_ERROR: |
286 | 0 | msg += "zlib data error"; |
287 | 0 | break; |
288 | | |
289 | 0 | case Z_MEM_ERROR: |
290 | 0 | msg += "zlib memory error"; |
291 | 0 | break; |
292 | | |
293 | 0 | case Z_BUF_ERROR: |
294 | 0 | msg += "zlib buffer error"; |
295 | 0 | break; |
296 | | |
297 | 0 | case Z_VERSION_ERROR: |
298 | 0 | msg += "zlib version error"; |
299 | 0 | break; |
300 | | |
301 | 24 | default: |
302 | 24 | msg += std::string("zlib unknown error (") + std::to_string(error_code) + ")"; |
303 | 24 | break; |
304 | 24 | } |
305 | 24 | } |
306 | | |
307 | 3.96k | throw std::runtime_error(msg); |
308 | 3.96k | } |
309 | 138k | } |
310 | | |
311 | | void |
312 | | Pl_Flate::finish_zopfli() |
313 | 0 | { |
314 | | #ifdef ZOPFLI |
315 | | if (!m->zopfli_buf) { |
316 | | return; |
317 | | } |
318 | | auto buf = std::move(*m->zopfli_buf.release()); |
319 | | ZopfliOptions z_opt; |
320 | | ZopfliInitOptions(&z_opt); |
321 | | unsigned char* out{nullptr}; |
322 | | size_t out_size{0}; |
323 | | ZopfliCompress( |
324 | | &z_opt, |
325 | | ZOPFLI_FORMAT_ZLIB, |
326 | | reinterpret_cast<unsigned char const*>(buf.c_str()), |
327 | | buf.size(), |
328 | | &out, |
329 | | &out_size); |
330 | | std::unique_ptr<unsigned char, decltype(&free)> p(out, &free); |
331 | | next()->write(out, out_size); |
332 | | // next()->finish is called by finish() |
333 | | #endif |
334 | 0 | } |
335 | | |
336 | | bool |
337 | | Pl_Flate::zopfli_supported() |
338 | 60.4k | { |
339 | | #ifdef ZOPFLI |
340 | | return true; |
341 | | #else |
342 | 60.4k | return false; |
343 | 60.4k | #endif |
344 | 60.4k | } |
345 | | |
346 | | bool |
347 | | Pl_Flate::zopfli_enabled() |
348 | 60.4k | { |
349 | 60.4k | if (zopfli_supported()) { |
350 | 0 | std::string value; |
351 | 0 | static bool enabled = QUtil::get_env("QPDF_ZOPFLI", &value) && value != "disabled"; |
352 | 0 | return enabled; |
353 | 60.4k | } else { |
354 | 60.4k | return false; |
355 | 60.4k | } |
356 | 60.4k | } |
357 | | |
358 | | bool |
359 | | Pl_Flate::zopfli_check_env(QPDFLogger* logger) |
360 | 0 | { |
361 | 0 | if (Pl_Flate::zopfli_supported()) { |
362 | 0 | return true; |
363 | 0 | } |
364 | 0 | std::string value; |
365 | 0 | auto is_set = QUtil::get_env("QPDF_ZOPFLI", &value); |
366 | 0 | if (!is_set || value == "disabled" || value == "silent") { |
367 | 0 | return true; |
368 | 0 | } |
369 | 0 | if (!logger) { |
370 | 0 | logger = QPDFLogger::defaultLogger().get(); |
371 | 0 | } |
372 | | |
373 | | // This behavior is known in QPDFJob (for the --zopfli argument), Pl_Flate.hh, README.md, |
374 | | // and the manual. Do a case-insensitive search for zopfli if changing the behavior. |
375 | 0 | if (value == "force") { |
376 | 0 | throw std::runtime_error("QPDF_ZOPFLI=force, and zopfli support is not enabled"); |
377 | 0 | } |
378 | 0 | logger->warn("QPDF_ZOPFLI is set, but libqpdf was not built with zopfli support\n"); |
379 | 0 | logger->warn( |
380 | 0 | "Set QPDF_ZOPFLI=silent to suppress this warning and use zopfli when available.\n"); |
381 | 0 | return false; |
382 | 0 | } |