/src/qpdf/libqpdf/Pl_Flate.cc
Line | Count | Source |
1 | | #include <qpdf/Pl_Flate.hh> |
2 | | |
3 | | #include <climits> |
4 | | #include <cstring> |
5 | | #include <zlib.h> |
6 | | |
7 | | #include <qpdf/QIntC.hh> |
8 | | #include <qpdf/QUtil.hh> |
9 | | #include <qpdf/Util.hh> |
10 | | #include <qpdf/qpdf-config.h> |
11 | | |
12 | | #ifdef ZOPFLI |
13 | | # include <zopfli.h> |
14 | | #endif |
15 | | |
16 | | using namespace qpdf; |
17 | | |
18 | | namespace |
19 | | { |
20 | | unsigned long long memory_limit_{0}; |
21 | | } // namespace |
22 | | |
23 | | int Pl_Flate::compression_level = Z_DEFAULT_COMPRESSION; |
24 | | |
25 | | Pl_Flate::Members::Members(size_t out_bufsize, action_e action) : |
26 | 0 | out_bufsize(out_bufsize), |
27 | 0 | action(action), |
28 | 0 | initialized(false), |
29 | 0 | zdata(nullptr) |
30 | 0 | { |
31 | 0 | this->outbuf = QUtil::make_shared_array<unsigned char>(out_bufsize); |
32 | | // Indirect through zdata to reach the z_stream so we don't have to include zlib.h in |
33 | | // Pl_Flate.hh. This means people using shared library versions of qpdf don't have to have zlib |
34 | | // development files available, which particularly helps in a Windows environment. |
35 | 0 | zdata = new z_stream; |
36 | |
|
37 | 0 | util::no_ci_rt_error_if( |
38 | 0 | out_bufsize > UINT_MAX, |
39 | 0 | "Pl_Flate: zlib doesn't support buffer sizes larger than unsigned int"); |
40 | |
|
41 | 0 | z_stream& zstream = *(static_cast<z_stream*>(this->zdata)); |
42 | 0 | zstream.zalloc = nullptr; |
43 | 0 | zstream.zfree = nullptr; |
44 | 0 | zstream.opaque = nullptr; |
45 | 0 | zstream.next_in = nullptr; |
46 | 0 | zstream.avail_in = 0; |
47 | 0 | zstream.next_out = this->outbuf.get(); |
48 | 0 | zstream.avail_out = QIntC::to_uint(out_bufsize); |
49 | |
|
50 | 0 | if (action == a_deflate && Pl_Flate::zopfli_enabled()) { |
51 | 0 | zopfli_buf = std::make_unique<std::string>(); |
52 | 0 | } |
53 | 0 | } |
54 | | |
55 | | Pl_Flate::Members::~Members() |
56 | 0 | { |
57 | 0 | if (initialized) { |
58 | 0 | z_stream& zstream = *(static_cast<z_stream*>(zdata)); |
59 | 0 | if (action == a_deflate) { |
60 | 0 | deflateEnd(&zstream); |
61 | 0 | } else { |
62 | 0 | inflateEnd(&zstream); |
63 | 0 | } |
64 | 0 | } |
65 | |
|
66 | 0 | delete static_cast<z_stream*>(this->zdata); |
67 | 0 | zdata = nullptr; |
68 | 0 | } |
69 | | |
70 | | Pl_Flate::Pl_Flate( |
71 | | char const* identifier, Pipeline* next, action_e action, unsigned int out_bufsize_int) : |
72 | 0 | Pipeline(identifier, next), |
73 | 0 | m(std::make_unique<Members>(QIntC::to_size(out_bufsize_int), action)) |
74 | 0 | { |
75 | 0 | util::assertion(next, "Attempt to create Pl_Flate with nullptr as next"); |
76 | 0 | } |
77 | | |
78 | | // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer |
79 | 0 | Pl_Flate::~Pl_Flate() = default; |
80 | | |
81 | | unsigned long long |
82 | | Pl_Flate::memory_limit() |
83 | 0 | { |
84 | 0 | return memory_limit_; |
85 | 0 | } |
86 | | |
87 | | void |
88 | | Pl_Flate::memory_limit(unsigned long long limit) |
89 | 0 | { |
90 | 0 | memory_limit_ = limit; |
91 | 0 | } |
92 | | |
93 | | void |
94 | | Pl_Flate::setWarnCallback(std::function<void(char const*, int)> callback) |
95 | 0 | { |
96 | 0 | m->callback = callback; |
97 | 0 | } |
98 | | |
99 | | void |
100 | | Pl_Flate::warn(char const* msg, int code) |
101 | 0 | { |
102 | 0 | if (m->callback) { |
103 | 0 | m->callback(msg, code); |
104 | 0 | } |
105 | 0 | } |
106 | | |
107 | | void |
108 | | Pl_Flate::write(unsigned char const* data, size_t len) |
109 | 0 | { |
110 | 0 | util::assertion( |
111 | 0 | m->outbuf.get(), identifier + ": Pl_Flate: write() called after finish() called"); |
112 | 0 | if (m->zopfli_buf) { |
113 | 0 | m->zopfli_buf->append(reinterpret_cast<char const*>(data), len); |
114 | 0 | return; |
115 | 0 | } |
116 | | |
117 | | // Write in chunks in case len is too big to fit in an int. Assume int is at least 32 bits. |
118 | 0 | static size_t const max_bytes = 1 << 30; |
119 | 0 | size_t bytes_left = len; |
120 | 0 | unsigned char const* buf = data; |
121 | 0 | while (bytes_left > 0) { |
122 | 0 | size_t bytes = (bytes_left >= max_bytes ? max_bytes : bytes_left); |
123 | 0 | handleData(buf, bytes, (m->action == a_inflate ? Z_SYNC_FLUSH : Z_NO_FLUSH)); |
124 | 0 | bytes_left -= bytes; |
125 | 0 | buf += bytes; |
126 | 0 | } |
127 | 0 | } |
128 | | |
129 | | void |
130 | | Pl_Flate::handleData(unsigned char const* data, size_t len, int flush) |
131 | 0 | { |
132 | 0 | util::no_ci_rt_error_if( |
133 | 0 | len > UINT_MAX, "Pl_Flate: zlib doesn't support data blocks larger than int"); |
134 | 0 | z_stream& zstream = *(static_cast<z_stream*>(m->zdata)); |
135 | | // zlib is known not to modify the data pointed to by next_in but doesn't declare the field |
136 | | // value const unless compiled to do so. |
137 | 0 | zstream.next_in = const_cast<unsigned char*>(data); |
138 | 0 | zstream.avail_in = QIntC::to_uint(len); |
139 | |
|
140 | 0 | if (!m->initialized) { |
141 | 0 | int err = Z_OK; |
142 | | |
143 | | // deflateInit and inflateInit are macros that use old-style casts. |
144 | 0 | #if ((defined(__GNUC__) && ((__GNUC__ * 100) + __GNUC_MINOR__) >= 406) || defined(__clang__)) |
145 | 0 | # pragma GCC diagnostic push |
146 | 0 | # pragma GCC diagnostic ignored "-Wold-style-cast" |
147 | 0 | #endif |
148 | 0 | if (m->action == a_deflate) { |
149 | 0 | err = deflateInit(&zstream, compression_level); |
150 | 0 | } else { |
151 | 0 | err = inflateInit(&zstream); |
152 | 0 | } |
153 | 0 | #if ((defined(__GNUC__) && ((__GNUC__ * 100) + __GNUC_MINOR__) >= 406) || defined(__clang__)) |
154 | 0 | # pragma GCC diagnostic pop |
155 | 0 | #endif |
156 | |
|
157 | 0 | checkError("Init", err); |
158 | 0 | m->initialized = true; |
159 | 0 | } |
160 | |
|
161 | 0 | int err = Z_OK; |
162 | |
|
163 | 0 | bool done = false; |
164 | 0 | while (!done) { |
165 | 0 | if (m->action == a_deflate) { |
166 | 0 | err = deflate(&zstream, flush); |
167 | 0 | } else { |
168 | 0 | err = inflate(&zstream, flush); |
169 | 0 | } |
170 | 0 | if ((m->action == a_inflate) && (err != Z_OK) && zstream.msg && |
171 | 0 | (strcmp(zstream.msg, "incorrect data check") == 0)) { |
172 | | // Other PDF readers ignore this specific error. Combining this with Z_SYNC_FLUSH |
173 | | // enables qpdf to handle some broken zlib streams without losing data. |
174 | 0 | err = Z_STREAM_END; |
175 | 0 | } |
176 | 0 | switch (err) { |
177 | 0 | case Z_BUF_ERROR: |
178 | | // Probably shouldn't be able to happen, but possible as a boundary condition: if the |
179 | | // last call to inflate exactly filled the output buffer, it's possible that the next |
180 | | // call to inflate could have nothing to do. There are PDF files in the wild that have |
181 | | // this error (including at least one in qpdf's test suite). In some cases, we want to |
182 | | // know about this, because it indicates incorrect compression, so call a callback if |
183 | | // provided. |
184 | 0 | warn("input stream is complete but output may still be valid", err); |
185 | 0 | done = true; |
186 | 0 | break; |
187 | | |
188 | 0 | case Z_STREAM_END: |
189 | 0 | done = true; |
190 | | // fall through |
191 | |
|
192 | 0 | case Z_OK: |
193 | 0 | { |
194 | 0 | if ((zstream.avail_in == 0) && (zstream.avail_out > 0)) { |
195 | | // There is nothing left to read, and there was sufficient buffer space to write |
196 | | // everything we needed, so we're done for now. |
197 | 0 | done = true; |
198 | 0 | } |
199 | 0 | uLong ready = QIntC::to_ulong(m->out_bufsize - zstream.avail_out); |
200 | 0 | if (ready > 0) { |
201 | 0 | if (memory_limit_ && m->action != a_deflate) { |
202 | 0 | m->written += ready; |
203 | 0 | if (m->written > memory_limit_) { |
204 | 0 | throw std::runtime_error("PL_Flate memory limit exceeded"); |
205 | 0 | } |
206 | 0 | } |
207 | 0 | next()->write(m->outbuf.get(), ready); |
208 | 0 | zstream.next_out = m->outbuf.get(); |
209 | 0 | zstream.avail_out = QIntC::to_uint(m->out_bufsize); |
210 | 0 | } |
211 | 0 | } |
212 | 0 | break; |
213 | | |
214 | 0 | default: |
215 | 0 | checkError("data", err); |
216 | 0 | } |
217 | 0 | } |
218 | 0 | } |
219 | | |
220 | | void |
221 | | Pl_Flate::finish() |
222 | 0 | { |
223 | 0 | if (m->written > memory_limit_) { |
224 | 0 | throw std::runtime_error("PL_Flate memory limit exceeded"); |
225 | 0 | } |
226 | 0 | try { |
227 | 0 | if (m->zopfli_buf) { |
228 | 0 | finish_zopfli(); |
229 | 0 | } else if (m->outbuf.get()) { |
230 | 0 | if (m->initialized) { |
231 | 0 | z_stream& zstream = *(static_cast<z_stream*>(m->zdata)); |
232 | 0 | unsigned char buf[1]; |
233 | 0 | buf[0] = '\0'; |
234 | 0 | handleData(buf, 0, Z_FINISH); |
235 | 0 | int err = Z_OK; |
236 | 0 | if (m->action == a_deflate) { |
237 | 0 | err = deflateEnd(&zstream); |
238 | 0 | } else { |
239 | 0 | err = inflateEnd(&zstream); |
240 | 0 | } |
241 | 0 | m->initialized = false; |
242 | 0 | checkError("End", err); |
243 | 0 | } |
244 | |
|
245 | 0 | m->outbuf = nullptr; |
246 | 0 | } |
247 | 0 | } catch (std::exception& e) { |
248 | 0 | try { |
249 | 0 | next()->finish(); |
250 | 0 | } catch (...) { |
251 | | // ignore secondary exception |
252 | 0 | } |
253 | 0 | throw std::runtime_error(e.what()); |
254 | 0 | } |
255 | 0 | next()->finish(); |
256 | 0 | } |
257 | | |
258 | | void |
259 | | Pl_Flate::setCompressionLevel(int level) |
260 | 0 | { |
261 | 0 | compression_level = level; |
262 | 0 | } |
263 | | |
264 | | void |
265 | | Pl_Flate::checkError(char const* prefix, int error_code) |
266 | 0 | { |
267 | 0 | z_stream& zstream = *(static_cast<z_stream*>(m->zdata)); |
268 | 0 | if (error_code != Z_OK) { |
269 | 0 | char const* action_str = (m->action == a_deflate ? "deflate" : "inflate"); |
270 | 0 | std::string msg = identifier + ": " + action_str + ": " + prefix + ": "; |
271 | |
|
272 | 0 | if (zstream.msg) { |
273 | 0 | msg += zstream.msg; |
274 | 0 | } else { |
275 | 0 | switch (error_code) { |
276 | 0 | case Z_ERRNO: |
277 | 0 | msg += "zlib system error"; |
278 | 0 | break; |
279 | | |
280 | 0 | case Z_STREAM_ERROR: |
281 | 0 | msg += "zlib stream error"; |
282 | 0 | break; |
283 | | |
284 | 0 | case Z_DATA_ERROR: |
285 | 0 | msg += "zlib data error"; |
286 | 0 | break; |
287 | | |
288 | 0 | case Z_MEM_ERROR: |
289 | 0 | msg += "zlib memory error"; |
290 | 0 | break; |
291 | | |
292 | 0 | case Z_BUF_ERROR: |
293 | 0 | msg += "zlib buffer error"; |
294 | 0 | break; |
295 | | |
296 | 0 | case Z_VERSION_ERROR: |
297 | 0 | msg += "zlib version error"; |
298 | 0 | break; |
299 | | |
300 | 0 | default: |
301 | 0 | msg += std::string("zlib unknown error (") + std::to_string(error_code) + ")"; |
302 | 0 | break; |
303 | 0 | } |
304 | 0 | } |
305 | | |
306 | 0 | throw std::runtime_error(msg); |
307 | 0 | } |
308 | 0 | } |
309 | | |
310 | | void |
311 | | Pl_Flate::finish_zopfli() |
312 | 0 | { |
313 | | #ifdef ZOPFLI |
314 | | if (!m->zopfli_buf) { |
315 | | return; |
316 | | } |
317 | | auto buf = std::move(*m->zopfli_buf.release()); |
318 | | ZopfliOptions z_opt; |
319 | | ZopfliInitOptions(&z_opt); |
320 | | unsigned char* out{nullptr}; |
321 | | size_t out_size{0}; |
322 | | ZopfliCompress( |
323 | | &z_opt, |
324 | | ZOPFLI_FORMAT_ZLIB, |
325 | | reinterpret_cast<unsigned char const*>(buf.c_str()), |
326 | | buf.size(), |
327 | | &out, |
328 | | &out_size); |
329 | | std::unique_ptr<unsigned char, decltype(&free)> p(out, &free); |
330 | | next()->write(out, out_size); |
331 | | // next()->finish is called by finish() |
332 | | #endif |
333 | 0 | } |
334 | | |
335 | | bool |
336 | | Pl_Flate::zopfli_supported() |
337 | 0 | { |
338 | | #ifdef ZOPFLI |
339 | | return true; |
340 | | #else |
341 | 0 | return false; |
342 | 0 | #endif |
343 | 0 | } |
344 | | |
345 | | bool |
346 | | Pl_Flate::zopfli_enabled() |
347 | 0 | { |
348 | 0 | if (zopfli_supported()) { |
349 | 0 | std::string value; |
350 | 0 | static bool enabled = QUtil::get_env("QPDF_ZOPFLI", &value) && value != "disabled"; |
351 | 0 | return enabled; |
352 | 0 | } else { |
353 | 0 | return false; |
354 | 0 | } |
355 | 0 | } |
356 | | |
357 | | bool |
358 | | Pl_Flate::zopfli_check_env(QPDFLogger* logger) |
359 | 0 | { |
360 | 0 | if (Pl_Flate::zopfli_supported()) { |
361 | 0 | return true; |
362 | 0 | } |
363 | 0 | std::string value; |
364 | 0 | auto is_set = QUtil::get_env("QPDF_ZOPFLI", &value); |
365 | 0 | if (!is_set || value == "disabled" || value == "silent") { |
366 | 0 | return true; |
367 | 0 | } |
368 | 0 | if (!logger) { |
369 | 0 | logger = QPDFLogger::defaultLogger().get(); |
370 | 0 | } |
371 | | |
372 | | // This behavior is known in QPDFJob (for the --zopfli argument), Pl_Flate.hh, README.md, |
373 | | // and the manual. Do a case-insensitive search for zopfli if changing the behavior. |
374 | 0 | if (value == "force") { |
375 | 0 | throw std::runtime_error("QPDF_ZOPFLI=force, and zopfli support is not enabled"); |
376 | 0 | } |
377 | 0 | logger->warn("QPDF_ZOPFLI is set, but libqpdf was not built with zopfli support\n"); |
378 | 0 | logger->warn( |
379 | 0 | "Set QPDF_ZOPFLI=silent to suppress this warning and use zopfli when available.\n"); |
380 | 0 | return false; |
381 | 0 | } |