/src/mupdf/source/pdf/pdf-stream.c
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (C) 2004-2024 Artifex Software, Inc. |
2 | | // |
3 | | // This file is part of MuPDF. |
4 | | // |
5 | | // MuPDF is free software: you can redistribute it and/or modify it under the |
6 | | // terms of the GNU Affero General Public License as published by the Free |
7 | | // Software Foundation, either version 3 of the License, or (at your option) |
8 | | // any later version. |
9 | | // |
10 | | // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY |
11 | | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
12 | | // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more |
13 | | // details. |
14 | | // |
15 | | // You should have received a copy of the GNU Affero General Public License |
16 | | // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> |
17 | | // |
18 | | // Alternative licensing terms are available from the licensor. |
19 | | // For commercial licensing, see <https://www.artifex.com/> or contact |
20 | | // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, |
21 | | // CA 94129, USA, for further information. |
22 | | |
23 | | #include "mupdf/fitz.h" |
24 | | #include "mupdf/pdf.h" |
25 | | |
26 | | #include <string.h> |
27 | | |
28 | | int |
29 | | pdf_obj_num_is_stream(fz_context *ctx, pdf_document *doc, int num) |
30 | 395k | { |
31 | 395k | pdf_xref_entry *entry; |
32 | | |
33 | 395k | if (num <= 0 || num >= pdf_xref_len(ctx, doc)) |
34 | 707 | return 0; |
35 | | |
36 | 790k | fz_try(ctx) |
37 | 790k | entry = pdf_cache_object(ctx, doc, num); |
38 | 790k | fz_catch(ctx) |
39 | 562 | { |
40 | 562 | fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); |
41 | 562 | fz_rethrow_if(ctx, FZ_ERROR_SYSTEM); |
42 | 562 | fz_report_error(ctx); |
43 | 562 | return 0; |
44 | 562 | } |
45 | | |
46 | 394k | return entry->stm_ofs != 0 || entry->stm_buf; |
47 | 395k | } |
48 | | |
49 | | int |
50 | | pdf_is_stream(fz_context *ctx, pdf_obj *ref) |
51 | 120k | { |
52 | 120k | pdf_document *doc = pdf_get_indirect_document(ctx, ref); |
53 | 120k | if (doc) |
54 | 92.3k | return pdf_obj_num_is_stream(ctx, doc, pdf_to_num(ctx, ref)); |
55 | 28.3k | return 0; |
56 | 120k | } |
57 | | |
58 | | /* |
59 | | * Scan stream dictionary for an explicit /Crypt filter |
60 | | */ |
61 | | static int |
62 | | pdf_stream_has_crypt(fz_context *ctx, pdf_obj *stm) |
63 | 99.2k | { |
64 | 99.2k | pdf_obj *filters; |
65 | 99.2k | pdf_obj *obj; |
66 | 99.2k | int i; |
67 | | |
68 | 99.2k | filters = pdf_dict_geta(ctx, stm, PDF_NAME(Filter), PDF_NAME(F)); |
69 | 99.2k | if (filters) |
70 | 79.7k | { |
71 | 79.7k | if (pdf_name_eq(ctx, filters, PDF_NAME(Crypt))) |
72 | 0 | return 1; |
73 | 79.7k | if (pdf_is_array(ctx, filters)) |
74 | 1.73k | { |
75 | 1.73k | int n = pdf_array_len(ctx, filters); |
76 | 3.54k | for (i = 0; i < n; i++) |
77 | 1.81k | { |
78 | 1.81k | obj = pdf_array_get(ctx, filters, i); |
79 | 1.81k | if (pdf_name_eq(ctx, obj, PDF_NAME(Crypt))) |
80 | 0 | return 1; |
81 | 1.81k | } |
82 | 1.73k | } |
83 | 79.7k | } |
84 | 99.2k | return 0; |
85 | 99.2k | } |
86 | | |
87 | | static fz_jbig2_globals * |
88 | | pdf_load_jbig2_globals(fz_context *ctx, pdf_obj *dict) |
89 | 4 | { |
90 | 4 | fz_jbig2_globals *globals; |
91 | 4 | fz_buffer *buf = NULL; |
92 | | |
93 | 4 | fz_var(buf); |
94 | | |
95 | 4 | if ((globals = pdf_find_item(ctx, fz_drop_jbig2_globals_imp, dict)) != NULL) |
96 | 0 | return globals; |
97 | | |
98 | 4 | if (pdf_mark_obj(ctx, dict)) |
99 | 1 | fz_throw(ctx, FZ_ERROR_FORMAT, "cyclic reference when loading JBIG2 globals"); |
100 | | |
101 | 6 | fz_try(ctx) |
102 | 6 | { |
103 | 3 | buf = pdf_load_stream(ctx, dict); |
104 | 3 | globals = fz_load_jbig2_globals(ctx, buf); |
105 | 3 | if (globals) |
106 | 2 | pdf_store_item(ctx, dict, globals, fz_buffer_storage(ctx, buf, NULL)); |
107 | 3 | } |
108 | 6 | fz_always(ctx) |
109 | 3 | { |
110 | 3 | fz_drop_buffer(ctx, buf); |
111 | 3 | pdf_unmark_obj(ctx, dict); |
112 | 3 | } |
113 | 3 | fz_catch(ctx) |
114 | 1 | { |
115 | 1 | fz_rethrow(ctx); |
116 | 1 | } |
117 | | |
118 | 2 | return globals; |
119 | 3 | } |
120 | | |
121 | | static void |
122 | | build_compression_params(fz_context *ctx, pdf_obj *f, pdf_obj *p, fz_compression_params *params) |
123 | 92.1k | { |
124 | 92.1k | params->type = FZ_IMAGE_RAW; |
125 | | |
126 | 92.1k | if (pdf_name_eq(ctx, f, PDF_NAME(CCITTFaxDecode)) || pdf_name_eq(ctx, f, PDF_NAME(CCF))) |
127 | 11.0k | { |
128 | 11.0k | params->type = FZ_IMAGE_FAX; |
129 | 11.0k | params->u.fax.k = pdf_dict_get_int_default(ctx, p, PDF_NAME(K), 0); |
130 | 11.0k | params->u.fax.end_of_line = pdf_dict_get_bool_default(ctx, p, PDF_NAME(EndOfLine), 0); |
131 | 11.0k | params->u.fax.encoded_byte_align = pdf_dict_get_bool_default(ctx, p, PDF_NAME(EncodedByteAlign), 0); |
132 | 11.0k | params->u.fax.columns = pdf_dict_get_int_default(ctx, p, PDF_NAME(Columns), 1728); |
133 | 11.0k | params->u.fax.rows = pdf_dict_get_int_default(ctx, p, PDF_NAME(Rows), 0); |
134 | 11.0k | params->u.fax.end_of_block = pdf_dict_get_bool_default(ctx, p, PDF_NAME(EndOfBlock), 1); |
135 | 11.0k | params->u.fax.black_is_1 = pdf_dict_get_bool_default(ctx, p, PDF_NAME(BlackIs1), 0); |
136 | 11.0k | } |
137 | 81.0k | else if (pdf_name_eq(ctx, f, PDF_NAME(DCTDecode)) || pdf_name_eq(ctx, f, PDF_NAME(DCT))) |
138 | 539 | { |
139 | 539 | params->type = FZ_IMAGE_JPEG; |
140 | 539 | params->u.jpeg.color_transform = pdf_dict_get_int_default(ctx, p, PDF_NAME(ColorTransform), -1); |
141 | 539 | params->u.jpeg.invert_cmyk = 0; |
142 | 539 | } |
143 | 80.4k | else if (pdf_name_eq(ctx, f, PDF_NAME(RunLengthDecode)) || pdf_name_eq(ctx, f, PDF_NAME(RL))) |
144 | 16 | { |
145 | 16 | params->type = FZ_IMAGE_RLD; |
146 | 16 | } |
147 | 80.4k | else if (pdf_name_eq(ctx, f, PDF_NAME(FlateDecode)) || pdf_name_eq(ctx, f, PDF_NAME(Fl))) |
148 | 76.7k | { |
149 | 76.7k | params->type = FZ_IMAGE_FLATE; |
150 | 76.7k | params->u.flate.predictor = pdf_dict_get_int_default(ctx, p, PDF_NAME(Predictor), 1); |
151 | 76.7k | params->u.flate.columns = pdf_dict_get_int_default(ctx, p, PDF_NAME(Columns), 1); |
152 | 76.7k | params->u.flate.colors = pdf_dict_get_int_default(ctx, p, PDF_NAME(Colors), 1); |
153 | 76.7k | params->u.flate.bpc = pdf_dict_get_int_default(ctx, p, PDF_NAME(BitsPerComponent), 8); |
154 | 76.7k | } |
155 | 3.69k | else if (pdf_name_eq(ctx, f, PDF_NAME(LZWDecode)) || pdf_name_eq(ctx, f, PDF_NAME(LZW))) |
156 | 6 | { |
157 | 6 | params->type = FZ_IMAGE_LZW; |
158 | 6 | params->u.lzw.predictor = pdf_dict_get_int_default(ctx, p, PDF_NAME(Predictor), 1); |
159 | 6 | params->u.lzw.columns = pdf_dict_get_int_default(ctx, p, PDF_NAME(Columns), 1); |
160 | 6 | params->u.lzw.colors = pdf_dict_get_int_default(ctx, p, PDF_NAME(Colors), 1); |
161 | 6 | params->u.lzw.bpc = pdf_dict_get_int_default(ctx, p, PDF_NAME(BitsPerComponent), 8); |
162 | 6 | params->u.lzw.early_change = pdf_dict_get_int_default(ctx, p, PDF_NAME(EarlyChange), 1); |
163 | 6 | } |
164 | 3.68k | else if (pdf_name_eq(ctx, f, PDF_NAME(JBIG2Decode))) |
165 | 616 | { |
166 | 616 | pdf_obj *g = pdf_dict_get(ctx, p, PDF_NAME(JBIG2Globals)); |
167 | | |
168 | 616 | params->type = FZ_IMAGE_JBIG2; |
169 | 616 | params->u.jbig2.globals = NULL; |
170 | 616 | params->u.jbig2.embedded = 1; /* jbig2 streams are always embedded without file headers */ |
171 | 616 | if (g) |
172 | 5 | { |
173 | 5 | if (!pdf_is_stream(ctx, g)) |
174 | 1 | fz_warn(ctx, "jbig2 globals is not a stream, skipping globals"); |
175 | 4 | else |
176 | 4 | params->u.jbig2.globals = pdf_load_jbig2_globals(ctx, g); |
177 | 5 | } |
178 | 616 | } |
179 | 92.1k | } |
180 | | |
181 | | /* |
182 | | * Create a filter given a name and param dictionary. |
183 | | */ |
184 | | static fz_stream * |
185 | | build_filter(fz_context *ctx, fz_stream *chain, pdf_document *doc, pdf_obj *f, pdf_obj *p, int num, int gen, fz_compression_params *params, int might_be_image) |
186 | 92.1k | { |
187 | 92.1k | fz_compression_params local_params; |
188 | | |
189 | 92.1k | local_params.u.jbig2.globals = NULL; |
190 | 92.1k | if (params == NULL) |
191 | 73.9k | params = &local_params; |
192 | | |
193 | 92.1k | if (!might_be_image && |
194 | 92.1k | (pdf_name_eq(ctx, f, PDF_NAME(CCITTFaxDecode)) || |
195 | 26.2k | pdf_name_eq(ctx, f, PDF_NAME(CCF)) || |
196 | 26.2k | pdf_name_eq(ctx, f, PDF_NAME(DCTDecode)) || |
197 | 26.2k | pdf_name_eq(ctx, f, PDF_NAME(DCT)) || |
198 | 26.2k | pdf_name_eq(ctx, f, PDF_NAME(JBIG2Decode)) || |
199 | 26.2k | pdf_name_eq(ctx, f, PDF_NAME(JPXDecode)))) |
200 | 6 | { |
201 | 6 | fz_warn(ctx, "Can't open image only stream for non-image purposes"); |
202 | 6 | return fz_open_memory(ctx, (unsigned char *)"", 0); |
203 | 6 | } |
204 | | |
205 | 92.1k | build_compression_params(ctx, f, p, params); |
206 | | |
207 | | /* If we were using params we were passed in, and we successfully |
208 | | * recognised the image type, we can use the existing filter and |
209 | | * shortstop here. */ |
210 | 92.1k | if (params != &local_params && params->type != FZ_IMAGE_RAW) |
211 | 16.7k | return fz_keep_stream(ctx, chain); /* nothing to do */ |
212 | | |
213 | 75.3k | else if (params->type == FZ_IMAGE_JBIG2) |
214 | 527 | { |
215 | 527 | fz_stream *stm; |
216 | 1.05k | fz_try(ctx) |
217 | 1.05k | stm = fz_open_image_decomp_stream(ctx, chain, params, NULL); |
218 | 1.05k | fz_always(ctx) |
219 | 527 | fz_drop_jbig2_globals(ctx, local_params.u.jbig2.globals); |
220 | 527 | fz_catch(ctx) |
221 | 0 | fz_rethrow(ctx); |
222 | 527 | return stm; |
223 | 527 | } |
224 | | |
225 | 74.8k | else if (params->type != FZ_IMAGE_RAW) |
226 | 71.7k | return fz_open_image_decomp_stream(ctx, chain, params, NULL); |
227 | | |
228 | 3.07k | else if (pdf_name_eq(ctx, f, PDF_NAME(ASCIIHexDecode)) || pdf_name_eq(ctx, f, PDF_NAME(AHx))) |
229 | 1.25k | return fz_open_ahxd(ctx, chain); |
230 | | |
231 | 1.82k | else if (pdf_name_eq(ctx, f, PDF_NAME(ASCII85Decode)) || pdf_name_eq(ctx, f, PDF_NAME(A85))) |
232 | 43 | return fz_open_a85d(ctx, chain); |
233 | | |
234 | 1.78k | else if (pdf_name_eq(ctx, f, PDF_NAME(JPXDecode))) |
235 | 1.31k | return fz_keep_stream(ctx, chain); /* JPX decoding is special cased in the image loading code */ |
236 | | |
237 | 469 | else if (pdf_name_eq(ctx, f, PDF_NAME(Crypt))) |
238 | 0 | { |
239 | 0 | if (!doc->crypt) |
240 | 0 | fz_warn(ctx, "crypt filter in unencrypted document"); |
241 | 0 | else |
242 | 0 | { |
243 | 0 | pdf_obj *name = pdf_dict_get(ctx, p, PDF_NAME(Name)); |
244 | 0 | if (pdf_is_name(ctx, name)) |
245 | 0 | return pdf_open_crypt_with_filter(ctx, chain, doc->crypt, name, num, gen); |
246 | 0 | } |
247 | 0 | } |
248 | | |
249 | 469 | else |
250 | 469 | fz_warn(ctx, "unknown filter name (%s)", pdf_to_name(ctx, f)); |
251 | | |
252 | 469 | return fz_keep_stream(ctx, chain); |
253 | 92.1k | } |
254 | | |
255 | | /* Build filter, and assume ownership of chain */ |
256 | | static fz_stream * |
257 | | build_filter_drop(fz_context *ctx, fz_stream *tail, pdf_document *doc, pdf_obj *f, pdf_obj *p, int num, int gen, fz_compression_params *params, int might_be_image) |
258 | 3.05k | { |
259 | 3.05k | fz_stream *head; |
260 | 6.10k | fz_try(ctx) |
261 | 6.10k | head = build_filter(ctx, tail, doc, f, p, num, gen, params, might_be_image); |
262 | 6.10k | fz_always(ctx) |
263 | 3.05k | fz_drop_stream(ctx, tail); |
264 | 3.05k | fz_catch(ctx) |
265 | 0 | fz_rethrow(ctx); |
266 | 3.05k | return head; |
267 | 3.05k | } |
268 | | |
269 | | /* |
270 | | * Build a chain of filters given filter names and param dicts. |
271 | | * If chain is given, start filter chain with it. |
272 | | * Assume ownership of chain. |
273 | | */ |
274 | | static fz_stream * |
275 | | build_filter_chain_drop(fz_context *ctx, fz_stream *chain, pdf_document *doc, pdf_obj *fs, pdf_obj *ps, int num, int gen, fz_compression_params *params, int might_be_image) |
276 | 2.96k | { |
277 | 2.96k | fz_var(chain); |
278 | 5.93k | fz_try(ctx) |
279 | 5.93k | { |
280 | 2.96k | int i, n = pdf_array_len(ctx, fs); |
281 | 6.02k | for (i = 0; i < n; i++) |
282 | 3.05k | { |
283 | 3.05k | pdf_obj *f = pdf_array_get(ctx, fs, i); |
284 | 3.05k | pdf_obj *p = pdf_array_get(ctx, ps, i); |
285 | 3.05k | chain = build_filter_drop(ctx, chain, doc, f, p, num, gen, (i == n-1 ? params : NULL), might_be_image); |
286 | 3.05k | } |
287 | 2.96k | } |
288 | 5.93k | fz_catch(ctx) |
289 | 0 | fz_rethrow(ctx); |
290 | 2.96k | return chain; |
291 | 2.96k | } |
292 | | |
293 | | static fz_stream * |
294 | | build_filter_chain(fz_context *ctx, fz_stream *chain, pdf_document *doc, pdf_obj *fs, pdf_obj *ps, int num, int gen, fz_compression_params *params, int might_be_image) |
295 | 2.96k | { |
296 | 2.96k | return build_filter_chain_drop(ctx, fz_keep_stream(ctx, chain), doc, fs, ps, num, gen, params, might_be_image); |
297 | 2.96k | } |
298 | | |
299 | | /* |
300 | | * Build a filter for reading raw stream data. |
301 | | * This is a null filter to constrain reading to the stream length (and to |
302 | | * allow for other people accessing the file), followed by a decryption |
303 | | * filter. |
304 | | * |
305 | | * orig_num and orig_gen are used purely to seed the encryption. |
306 | | */ |
307 | | static fz_stream * |
308 | | pdf_open_raw_filter(fz_context *ctx, fz_stream *file_stm, pdf_document *doc, pdf_obj *stmobj, int num, int *orig_num, int *orig_gen, int64_t offset) |
309 | 106k | { |
310 | 106k | pdf_xref_entry *x = NULL; |
311 | 106k | fz_stream *null_stm, *crypt_stm; |
312 | 106k | int hascrypt; |
313 | 106k | int64_t len; |
314 | | |
315 | 106k | if (num > 0 && num < pdf_xref_len(ctx, doc)) |
316 | 106k | { |
317 | 106k | x = pdf_get_xref_entry(ctx, doc, num); |
318 | 106k | } |
319 | 106k | if (x == NULL) |
320 | 633 | { |
321 | | /* We only end up here when called from pdf_open_stream_with_offset to parse new format XRef sections. */ |
322 | | /* New style XRef sections must have generation number 0. */ |
323 | 633 | *orig_num = num; |
324 | 633 | *orig_gen = 0; |
325 | 633 | } |
326 | 105k | else |
327 | 105k | { |
328 | 105k | *orig_num = x->num; |
329 | 105k | *orig_gen = x->gen; |
330 | 105k | if (x->stm_buf) |
331 | 7.25k | return fz_open_buffer(ctx, x->stm_buf); |
332 | 105k | } |
333 | | |
334 | 99.2k | hascrypt = pdf_stream_has_crypt(ctx, stmobj); |
335 | 99.2k | len = pdf_dict_get_int64(ctx, stmobj, PDF_NAME(Length)); |
336 | 99.2k | if (len < 0) |
337 | 0 | len = 0; |
338 | 99.2k | null_stm = fz_open_endstream_filter(ctx, file_stm, (uint64_t)len, offset); |
339 | 99.2k | if (doc->crypt && !hascrypt) |
340 | 1.99k | { |
341 | 3.99k | fz_try(ctx) |
342 | 3.99k | crypt_stm = pdf_open_crypt(ctx, null_stm, doc->crypt, *orig_num, *orig_gen); |
343 | 3.99k | fz_always(ctx) |
344 | 1.99k | fz_drop_stream(ctx, null_stm); |
345 | 1.99k | fz_catch(ctx) |
346 | 0 | fz_rethrow(ctx); |
347 | 1.99k | return crypt_stm; |
348 | 1.99k | } |
349 | 97.2k | return null_stm; |
350 | 99.2k | } |
351 | | |
352 | | /* |
353 | | * Construct a filter to decode a stream, constraining |
354 | | * to stream length and decrypting. |
355 | | */ |
356 | | static fz_stream * |
357 | | pdf_open_filter(fz_context *ctx, pdf_document *doc, fz_stream *file_stm, pdf_obj *stmobj, int num, int64_t offset, fz_compression_params *imparams, int might_be_image) |
358 | 106k | { |
359 | 106k | pdf_obj *filters = pdf_dict_geta(ctx, stmobj, PDF_NAME(Filter), PDF_NAME(F)); |
360 | 106k | pdf_obj *params = pdf_dict_geta(ctx, stmobj, PDF_NAME(DecodeParms), PDF_NAME(DP)); |
361 | 106k | int orig_num, orig_gen; |
362 | 106k | fz_stream *rstm, *fstm; |
363 | | |
364 | 106k | rstm = pdf_open_raw_filter(ctx, file_stm, doc, stmobj, num, &orig_num, &orig_gen, offset); |
365 | 213k | fz_try(ctx) |
366 | 213k | { |
367 | 106k | if (pdf_is_name(ctx, filters)) |
368 | 77.9k | fstm = build_filter(ctx, rstm, doc, filters, params, orig_num, orig_gen, imparams, might_be_image); |
369 | 28.5k | else if (pdf_array_len(ctx, filters) > 0) |
370 | 1.73k | fstm = build_filter_chain(ctx, rstm, doc, filters, params, orig_num, orig_gen, imparams, might_be_image); |
371 | 26.7k | else |
372 | 26.7k | { |
373 | 26.7k | if (imparams) |
374 | 353 | imparams->type = FZ_IMAGE_RAW; |
375 | 26.7k | fstm = fz_keep_stream(ctx, rstm); |
376 | 26.7k | } |
377 | 106k | } |
378 | 213k | fz_always(ctx) |
379 | 106k | fz_drop_stream(ctx, rstm); |
380 | 106k | fz_catch(ctx) |
381 | 2 | fz_rethrow(ctx); |
382 | | |
383 | 106k | return fstm; |
384 | 106k | } |
385 | | |
386 | | fz_stream * |
387 | | pdf_open_inline_stream(fz_context *ctx, pdf_document *doc, pdf_obj *stmobj, int length, fz_stream *file_stm, fz_compression_params *imparams) |
388 | 45.1k | { |
389 | 45.1k | pdf_obj *filters = pdf_dict_geta(ctx, stmobj, PDF_NAME(Filter), PDF_NAME(F)); |
390 | 45.1k | pdf_obj *params = pdf_dict_geta(ctx, stmobj, PDF_NAME(DecodeParms), PDF_NAME(DP)); |
391 | | |
392 | 45.1k | if (pdf_is_name(ctx, filters)) |
393 | 11.0k | return build_filter(ctx, file_stm, doc, filters, params, 0, 0, imparams, 1); |
394 | 34.0k | else if (pdf_array_len(ctx, filters) > 0) |
395 | 1.23k | return build_filter_chain(ctx, file_stm, doc, filters, params, 0, 0, imparams, 1); |
396 | | |
397 | 32.7k | if (imparams) |
398 | 32.7k | imparams->type = FZ_IMAGE_RAW; |
399 | 32.7k | return fz_open_null_filter(ctx, file_stm, length, fz_tell(ctx, file_stm)); |
400 | 45.1k | } |
401 | | |
402 | | void |
403 | | pdf_load_compressed_inline_image(fz_context *ctx, pdf_document *doc, pdf_obj *dict, int length, fz_stream *file_stm, int indexed, fz_compressed_image *image) |
404 | 45.1k | { |
405 | 45.1k | fz_stream *istm = NULL, *leech = NULL, *decomp = NULL; |
406 | 45.1k | fz_pixmap *pixmap = NULL; |
407 | 45.1k | fz_compressed_buffer *bc; |
408 | 45.1k | int dummy_l2factor = 0; |
409 | | |
410 | 45.1k | fz_var(istm); |
411 | 45.1k | fz_var(leech); |
412 | 45.1k | fz_var(decomp); |
413 | 45.1k | fz_var(pixmap); |
414 | | |
415 | 45.1k | bc = fz_new_compressed_buffer(ctx); |
416 | 90.2k | fz_try(ctx) |
417 | 90.2k | { |
418 | 45.1k | bc->buffer = fz_new_buffer(ctx, 1024); |
419 | 45.1k | istm = pdf_open_inline_stream(ctx, doc, dict, length, file_stm, &bc->params); |
420 | 45.1k | leech = fz_open_leecher(ctx, istm, bc->buffer); |
421 | 45.1k | decomp = fz_open_image_decomp_stream(ctx, leech, &bc->params, &dummy_l2factor); |
422 | 45.1k | pixmap = fz_decomp_image_from_stream(ctx, decomp, image, NULL, indexed, 0, NULL); |
423 | 45.1k | fz_set_compressed_image_buffer(ctx, image, bc); |
424 | 45.1k | } |
425 | 90.2k | fz_always(ctx) |
426 | 45.1k | { |
427 | 45.1k | fz_drop_stream(ctx, istm); |
428 | 45.1k | fz_drop_stream(ctx, leech); |
429 | 45.1k | fz_drop_stream(ctx, decomp); |
430 | 45.1k | fz_drop_pixmap(ctx, pixmap); |
431 | 45.1k | } |
432 | 45.1k | fz_catch(ctx) |
433 | 9 | { |
434 | 9 | fz_drop_compressed_buffer(ctx, bc); |
435 | 9 | fz_rethrow(ctx); |
436 | 9 | } |
437 | 45.1k | } |
438 | | |
439 | | fz_stream * |
440 | | pdf_open_raw_stream_number(fz_context *ctx, pdf_document *doc, int num) |
441 | 0 | { |
442 | 0 | pdf_xref_entry *x; |
443 | 0 | int orig_num, orig_gen; |
444 | |
|
445 | 0 | x = pdf_cache_object(ctx, doc, num); |
446 | 0 | if (x->stm_ofs == 0) |
447 | 0 | fz_throw(ctx, FZ_ERROR_FORMAT, "object is not a stream"); |
448 | | |
449 | 0 | return pdf_open_raw_filter(ctx, doc->file, doc, x->obj, num, &orig_num, &orig_gen, x->stm_ofs); |
450 | 0 | } |
451 | | |
452 | | static fz_stream * |
453 | | pdf_open_image_stream(fz_context *ctx, pdf_document *doc, int num, fz_compression_params *params, int might_be_image) |
454 | 105k | { |
455 | 105k | pdf_xref_entry *x; |
456 | | |
457 | 105k | x = pdf_cache_object(ctx, doc, num); |
458 | 105k | if (x->stm_ofs == 0 && x->stm_buf == NULL) |
459 | 88 | fz_throw(ctx, FZ_ERROR_FORMAT, "object is not a stream"); |
460 | | |
461 | 105k | return pdf_open_filter(ctx, doc, doc->file, x->obj, num, x->stm_ofs, params, might_be_image); |
462 | 105k | } |
463 | | |
464 | | fz_stream * |
465 | | pdf_open_stream_number(fz_context *ctx, pdf_document *doc, int num) |
466 | 40.2k | { |
467 | 40.2k | return pdf_open_image_stream(ctx, doc, num, NULL, 1); |
468 | 40.2k | } |
469 | | |
470 | | fz_stream * |
471 | | pdf_open_stream_with_offset(fz_context *ctx, pdf_document *doc, int num, pdf_obj *dict, int64_t stm_ofs) |
472 | 640 | { |
473 | 640 | if (stm_ofs == 0) |
474 | 2 | fz_throw(ctx, FZ_ERROR_FORMAT, "object is not a stream"); |
475 | 638 | return pdf_open_filter(ctx, doc, doc->file, dict, num, stm_ofs, NULL, 1); |
476 | 640 | } |
477 | | |
478 | | fz_buffer * |
479 | | pdf_load_raw_stream_number(fz_context *ctx, pdf_document *doc, int num) |
480 | 0 | { |
481 | 0 | fz_stream *stm; |
482 | 0 | pdf_obj *dict; |
483 | 0 | int64_t len; |
484 | 0 | fz_buffer *buf = NULL; |
485 | 0 | pdf_xref_entry *x; |
486 | |
|
487 | 0 | if (num > 0 && num < pdf_xref_len(ctx, doc)) |
488 | 0 | { |
489 | 0 | x = pdf_get_xref_entry_no_null(ctx, doc, num); |
490 | 0 | if (x->stm_buf) |
491 | 0 | return fz_keep_buffer(ctx, x->stm_buf); |
492 | 0 | } |
493 | | |
494 | 0 | dict = pdf_load_object(ctx, doc, num); |
495 | |
|
496 | 0 | fz_try(ctx) |
497 | 0 | len = pdf_dict_get_int64(ctx, dict, PDF_NAME(Length)); |
498 | 0 | fz_always(ctx) |
499 | 0 | pdf_drop_obj(ctx, dict); |
500 | 0 | fz_catch(ctx) |
501 | 0 | fz_rethrow(ctx); |
502 | | |
503 | 0 | stm = pdf_open_raw_stream_number(ctx, doc, num); |
504 | |
|
505 | 0 | if (len < 0) |
506 | 0 | len = 1024; |
507 | |
|
508 | 0 | fz_try(ctx) |
509 | 0 | buf = fz_read_all(ctx, stm, (size_t)len); |
510 | 0 | fz_always(ctx) |
511 | 0 | fz_drop_stream(ctx, stm); |
512 | 0 | fz_catch(ctx) |
513 | 0 | fz_rethrow(ctx); |
514 | | |
515 | 0 | return buf; |
516 | 0 | } |
517 | | |
518 | | static size_t |
519 | | pdf_guess_filter_length(size_t len, const char *filter) |
520 | 24.8k | { |
521 | 24.8k | size_t nlen = len; |
522 | | |
523 | | /* First ones get smaller, no overflow check required. */ |
524 | 24.8k | if (!strcmp(filter, "ASCIIHexDecode")) |
525 | 14 | return len / 2; |
526 | 24.8k | else if (!strcmp(filter, "ASCII85Decode")) |
527 | 24 | return len * 4 / 5; |
528 | | |
529 | 24.8k | if (!strcmp(filter, "FlateDecode")) |
530 | 18.9k | nlen = len * 3; |
531 | 5.86k | else if (!strcmp(filter, "RunLengthDecode")) |
532 | 0 | nlen = len * 3; |
533 | 5.86k | else if (!strcmp(filter, "LZWDecode")) |
534 | 0 | nlen = len * 2; |
535 | | |
536 | | /* Live with a bad estimate - we'll malloc up as we go, but |
537 | | * it's probably destined to fail anyway. */ |
538 | 24.8k | if (nlen < len) |
539 | 0 | return len; |
540 | | |
541 | 24.8k | return nlen; |
542 | 24.8k | } |
543 | | |
544 | | /* Check if an entry has a cached stream and return whether it is directly |
545 | | * reusable. A buffer is directly reusable only if the stream is |
546 | | * uncompressed, or if it is compressed purely a compression method we can |
547 | | * return details of in fz_compression_params. |
548 | | * |
549 | | * If the stream is reusable return 1, and set params as required, otherwise |
550 | | * return 0. */ |
551 | | static int |
552 | | can_reuse_buffer(fz_context *ctx, pdf_xref_entry *entry, fz_compression_params *params) |
553 | 27.2k | { |
554 | 27.2k | pdf_obj *f; |
555 | 27.2k | pdf_obj *p; |
556 | | |
557 | 27.2k | if (!entry || !entry->obj || !entry->stm_buf) |
558 | 23.8k | return 0; |
559 | | |
560 | 3.33k | if (params) |
561 | 0 | params->type = FZ_IMAGE_RAW; |
562 | | |
563 | 3.33k | f = pdf_dict_geta(ctx, entry->obj, PDF_NAME(Filter), PDF_NAME(F)); |
564 | | /* If there are no filters, it's uncompressed, and we can use it */ |
565 | 3.33k | if (!f) |
566 | 3.33k | return 1; |
567 | | |
568 | 0 | p = pdf_dict_geta(ctx, entry->obj, PDF_NAME(DecodeParms), PDF_NAME(DP)); |
569 | 0 | if (pdf_is_array(ctx, f)) |
570 | 0 | { |
571 | 0 | int len = pdf_array_len(ctx, f); |
572 | | |
573 | | /* Empty array of filters. Its uncompressed. We can cope. */ |
574 | 0 | if (len == 0) |
575 | 0 | return 1; |
576 | | /* 1 filter is the most we can hope to cope with - if more,*/ |
577 | 0 | if (len != 1) |
578 | 0 | return 0; |
579 | 0 | p = pdf_array_get(ctx, p, 0); |
580 | 0 | } |
581 | 0 | if (pdf_is_null(ctx, f)) |
582 | 0 | return 1; /* Null filter is uncompressed */ |
583 | 0 | if (!pdf_is_name(ctx, f)) |
584 | 0 | return 0; |
585 | | |
586 | | /* There are filters, so unless we have the option of shortstopping, |
587 | | * we can't use the existing buffer. */ |
588 | 0 | if (!params) |
589 | 0 | return 0; |
590 | | |
591 | 0 | build_compression_params(ctx, f, p, params); |
592 | |
|
593 | 0 | return (params->type == FZ_IMAGE_RAW) ? 0 : 1; |
594 | 0 | } |
595 | | |
596 | | static fz_buffer * |
597 | | pdf_load_image_stream(fz_context *ctx, pdf_document *doc, int num, fz_compression_params *params, int *truncated, size_t worst_case) |
598 | 27.2k | { |
599 | 27.2k | fz_stream *stm = NULL; |
600 | 27.2k | pdf_obj *dict, *obj; |
601 | 27.2k | int i, n; |
602 | 27.2k | size_t len; |
603 | 27.2k | fz_buffer *buf; |
604 | | |
605 | 27.2k | fz_var(buf); |
606 | | |
607 | 27.2k | if (num > 0 && num < pdf_xref_len(ctx, doc)) |
608 | 27.2k | { |
609 | 27.2k | pdf_xref_entry *entry = pdf_get_xref_entry(ctx, doc, num); |
610 | | /* Return ref to existing buffer, but only if uncompressed, |
611 | | * or shortstoppable */ |
612 | 27.2k | if (can_reuse_buffer(ctx, entry, params)) |
613 | 3.33k | return fz_keep_buffer(ctx, entry->stm_buf); |
614 | 27.2k | } |
615 | | |
616 | 23.8k | dict = pdf_load_object(ctx, doc, num); |
617 | 47.7k | fz_try(ctx) |
618 | 47.7k | { |
619 | 23.8k | int64_t ilen = pdf_dict_get_int64(ctx, dict, PDF_NAME(Length)); |
620 | 23.8k | if (ilen < 0) |
621 | 0 | ilen = 0; |
622 | 23.8k | len = (size_t)ilen; |
623 | | /* In 32 bit builds, we might find a length being too |
624 | | * large for a size_t. */ |
625 | 23.8k | if ((int64_t)len != ilen) |
626 | 0 | fz_throw(ctx, FZ_ERROR_LIMIT, "Stream too large"); |
627 | 23.8k | obj = pdf_dict_get(ctx, dict, PDF_NAME(Filter)); |
628 | 23.8k | len = pdf_guess_filter_length(len, pdf_to_name(ctx, obj)); |
629 | 23.8k | n = pdf_array_len(ctx, obj); |
630 | 24.8k | for (i = 0; i < n; i++) |
631 | 986 | len = pdf_guess_filter_length(len, pdf_array_get_name(ctx, obj, i)); |
632 | 23.8k | } |
633 | 47.7k | fz_always(ctx) |
634 | 23.8k | { |
635 | 23.8k | pdf_drop_obj(ctx, dict); |
636 | 23.8k | } |
637 | 23.8k | fz_catch(ctx) |
638 | 0 | { |
639 | 0 | fz_rethrow(ctx); |
640 | 0 | } |
641 | | |
642 | 23.8k | stm = pdf_open_image_stream(ctx, doc, num, params, 1); |
643 | | |
644 | 47.7k | fz_try(ctx) |
645 | 47.7k | { |
646 | 23.8k | buf = fz_read_best(ctx, stm, len, truncated, worst_case); |
647 | 23.8k | } |
648 | 47.7k | fz_always(ctx) |
649 | 23.8k | { |
650 | 23.8k | fz_drop_stream(ctx, stm); |
651 | 23.8k | } |
652 | 23.8k | fz_catch(ctx) |
653 | 3 | { |
654 | 3 | fz_rethrow(ctx); |
655 | 3 | } |
656 | | |
657 | 23.8k | return buf; |
658 | 23.8k | } |
659 | | |
660 | | fz_buffer * |
661 | | pdf_load_stream_number(fz_context *ctx, pdf_document *doc, int num) |
662 | 20.9k | { |
663 | 20.9k | return pdf_load_image_stream(ctx, doc, num, NULL, NULL, 0); |
664 | 20.9k | } |
665 | | |
666 | | fz_compressed_buffer * |
667 | | pdf_load_compressed_stream(fz_context *ctx, pdf_document *doc, int num, size_t worst_case) |
668 | 6.24k | { |
669 | 6.24k | fz_compressed_buffer *bc = fz_new_compressed_buffer(ctx); |
670 | | |
671 | 12.4k | fz_try(ctx) |
672 | 12.4k | { |
673 | 6.24k | bc->buffer = pdf_load_image_stream(ctx, doc, num, &bc->params, NULL, worst_case); |
674 | 6.24k | } |
675 | 12.4k | fz_catch(ctx) |
676 | 6 | { |
677 | 6 | fz_free(ctx, bc); |
678 | 6 | fz_rethrow(ctx); |
679 | 6 | } |
680 | 6.23k | return bc; |
681 | 6.24k | } |
682 | | |
683 | | static fz_stream * |
684 | | pdf_open_object_array(fz_context *ctx, pdf_document *doc, pdf_obj *list) |
685 | 677 | { |
686 | 677 | fz_stream *stm; |
687 | 677 | int i, n; |
688 | | |
689 | 677 | n = pdf_array_len(ctx, list); |
690 | 677 | stm = fz_open_concat(ctx, n, 1); |
691 | | |
692 | 3.75k | for (i = 0; i < n; i++) |
693 | 3.08k | { |
694 | 3.08k | pdf_obj *obj = pdf_array_get(ctx, list, i); |
695 | 6.16k | fz_try(ctx) |
696 | 6.16k | fz_concat_push_drop(ctx, stm, pdf_open_stream(ctx, obj)); |
697 | 6.16k | fz_catch(ctx) |
698 | 658 | { |
699 | 658 | if (fz_caught(ctx) == FZ_ERROR_TRYLATER || fz_caught(ctx) == FZ_ERROR_SYSTEM) |
700 | 0 | { |
701 | 0 | fz_drop_stream(ctx, stm); |
702 | 0 | fz_rethrow(ctx); |
703 | 0 | } |
704 | 658 | fz_report_error(ctx); |
705 | 658 | fz_warn(ctx, "cannot load content stream part %d/%d", i + 1, n); |
706 | 658 | } |
707 | 3.08k | } |
708 | | |
709 | 677 | return stm; |
710 | 677 | } |
711 | | |
712 | | fz_stream * |
713 | | pdf_open_contents_stream(fz_context *ctx, pdf_document *doc, pdf_obj *obj) |
714 | 43.4k | { |
715 | 43.4k | int num; |
716 | | |
717 | 43.4k | if (pdf_is_array(ctx, obj)) |
718 | 677 | return pdf_open_object_array(ctx, doc, obj); |
719 | | |
720 | 42.7k | num = pdf_to_num(ctx, obj); |
721 | 42.7k | if (pdf_is_stream(ctx, obj)) |
722 | 41.8k | return pdf_open_image_stream(ctx, doc, num, NULL, 0); |
723 | | |
724 | 884 | fz_warn(ctx, "content stream is not a stream (%d 0 R)", num); |
725 | 884 | return fz_open_memory(ctx, (unsigned char *)"", 0); |
726 | 42.7k | } |
727 | | |
728 | | fz_buffer *pdf_load_raw_stream(fz_context *ctx, pdf_obj *ref) |
729 | 0 | { |
730 | 0 | if (pdf_is_stream(ctx, ref)) |
731 | 0 | return pdf_load_raw_stream_number(ctx, pdf_get_indirect_document(ctx, ref), pdf_to_num(ctx, ref)); |
732 | 0 | fz_throw(ctx, FZ_ERROR_FORMAT, "object is not a stream"); |
733 | 0 | } |
734 | | |
735 | | fz_buffer *pdf_load_stream(fz_context *ctx, pdf_obj *ref) |
736 | 21.7k | { |
737 | 21.7k | if (pdf_is_stream(ctx, ref)) |
738 | 20.9k | return pdf_load_stream_number(ctx, pdf_get_indirect_document(ctx, ref), pdf_to_num(ctx, ref)); |
739 | 811 | fz_throw(ctx, FZ_ERROR_FORMAT, "object is not a stream"); |
740 | 21.7k | } |
741 | | |
742 | | fz_stream *pdf_open_raw_stream(fz_context *ctx, pdf_obj *ref) |
743 | 0 | { |
744 | 0 | if (pdf_is_stream(ctx, ref)) |
745 | 0 | return pdf_open_raw_stream_number(ctx, pdf_get_indirect_document(ctx, ref), pdf_to_num(ctx, ref)); |
746 | 0 | fz_throw(ctx, FZ_ERROR_FORMAT, "object is not a stream"); |
747 | 0 | } |
748 | | |
749 | | fz_stream *pdf_open_stream(fz_context *ctx, pdf_obj *ref) |
750 | 11.6k | { |
751 | 11.6k | if (pdf_is_stream(ctx, ref)) |
752 | 10.9k | return pdf_open_stream_number(ctx, pdf_get_indirect_document(ctx, ref), pdf_to_num(ctx, ref)); |
753 | 675 | fz_throw(ctx, FZ_ERROR_FORMAT, "object is not a stream"); |
754 | 11.6k | } |