Coverage Report

Created: 2025-01-28 06:17

/src/mupdf/source/pdf/pdf-stream.c
Line
Count
Source (jump to first uncovered line)
1
// Copyright (C) 2004-2024 Artifex Software, Inc.
2
//
3
// This file is part of MuPDF.
4
//
5
// MuPDF is free software: you can redistribute it and/or modify it under the
6
// terms of the GNU Affero General Public License as published by the Free
7
// Software Foundation, either version 3 of the License, or (at your option)
8
// any later version.
9
//
10
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13
// details.
14
//
15
// You should have received a copy of the GNU Affero General Public License
16
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17
//
18
// Alternative licensing terms are available from the licensor.
19
// For commercial licensing, see <https://www.artifex.com/> or contact
20
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21
// CA 94129, USA, for further information.
22
23
#include "mupdf/fitz.h"
24
#include "mupdf/pdf.h"
25
26
#include <string.h>
27
28
int
29
pdf_obj_num_is_stream(fz_context *ctx, pdf_document *doc, int num)
30
395k
{
31
395k
  pdf_xref_entry *entry;
32
33
395k
  if (num <= 0 || num >= pdf_xref_len(ctx, doc))
34
707
    return 0;
35
36
790k
  fz_try(ctx)
37
790k
    entry = pdf_cache_object(ctx, doc, num);
38
790k
  fz_catch(ctx)
39
562
  {
40
562
    fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
41
562
    fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
42
562
    fz_report_error(ctx);
43
562
    return 0;
44
562
  }
45
46
394k
  return entry->stm_ofs != 0 || entry->stm_buf;
47
395k
}
48
49
int
50
pdf_is_stream(fz_context *ctx, pdf_obj *ref)
51
120k
{
52
120k
  pdf_document *doc = pdf_get_indirect_document(ctx, ref);
53
120k
  if (doc)
54
92.3k
    return pdf_obj_num_is_stream(ctx, doc, pdf_to_num(ctx, ref));
55
28.3k
  return 0;
56
120k
}
57
58
/*
59
 * Scan stream dictionary for an explicit /Crypt filter
60
 */
61
static int
62
pdf_stream_has_crypt(fz_context *ctx, pdf_obj *stm)
63
99.2k
{
64
99.2k
  pdf_obj *filters;
65
99.2k
  pdf_obj *obj;
66
99.2k
  int i;
67
68
99.2k
  filters = pdf_dict_geta(ctx, stm, PDF_NAME(Filter), PDF_NAME(F));
69
99.2k
  if (filters)
70
79.7k
  {
71
79.7k
    if (pdf_name_eq(ctx, filters, PDF_NAME(Crypt)))
72
0
      return 1;
73
79.7k
    if (pdf_is_array(ctx, filters))
74
1.73k
    {
75
1.73k
      int n = pdf_array_len(ctx, filters);
76
3.54k
      for (i = 0; i < n; i++)
77
1.81k
      {
78
1.81k
        obj = pdf_array_get(ctx, filters, i);
79
1.81k
        if (pdf_name_eq(ctx, obj, PDF_NAME(Crypt)))
80
0
          return 1;
81
1.81k
      }
82
1.73k
    }
83
79.7k
  }
84
99.2k
  return 0;
85
99.2k
}
86
87
static fz_jbig2_globals *
88
pdf_load_jbig2_globals(fz_context *ctx, pdf_obj *dict)
89
4
{
90
4
  fz_jbig2_globals *globals;
91
4
  fz_buffer *buf = NULL;
92
93
4
  fz_var(buf);
94
95
4
  if ((globals = pdf_find_item(ctx, fz_drop_jbig2_globals_imp, dict)) != NULL)
96
0
    return globals;
97
98
4
  if (pdf_mark_obj(ctx, dict))
99
1
    fz_throw(ctx, FZ_ERROR_FORMAT, "cyclic reference when loading JBIG2 globals");
100
101
6
  fz_try(ctx)
102
6
  {
103
3
    buf = pdf_load_stream(ctx, dict);
104
3
    globals = fz_load_jbig2_globals(ctx, buf);
105
3
    if (globals)
106
2
      pdf_store_item(ctx, dict, globals, fz_buffer_storage(ctx, buf, NULL));
107
3
  }
108
6
  fz_always(ctx)
109
3
  {
110
3
    fz_drop_buffer(ctx, buf);
111
3
    pdf_unmark_obj(ctx, dict);
112
3
  }
113
3
  fz_catch(ctx)
114
1
  {
115
1
    fz_rethrow(ctx);
116
1
  }
117
118
2
  return globals;
119
3
}
120
121
static void
122
build_compression_params(fz_context *ctx, pdf_obj *f, pdf_obj *p, fz_compression_params *params)
123
92.1k
{
124
92.1k
  params->type = FZ_IMAGE_RAW;
125
126
92.1k
  if (pdf_name_eq(ctx, f, PDF_NAME(CCITTFaxDecode)) || pdf_name_eq(ctx, f, PDF_NAME(CCF)))
127
11.0k
  {
128
11.0k
    params->type = FZ_IMAGE_FAX;
129
11.0k
    params->u.fax.k = pdf_dict_get_int_default(ctx, p, PDF_NAME(K), 0);
130
11.0k
    params->u.fax.end_of_line = pdf_dict_get_bool_default(ctx, p, PDF_NAME(EndOfLine), 0);
131
11.0k
    params->u.fax.encoded_byte_align = pdf_dict_get_bool_default(ctx, p, PDF_NAME(EncodedByteAlign), 0);
132
11.0k
    params->u.fax.columns = pdf_dict_get_int_default(ctx, p, PDF_NAME(Columns), 1728);
133
11.0k
    params->u.fax.rows = pdf_dict_get_int_default(ctx, p, PDF_NAME(Rows), 0);
134
11.0k
    params->u.fax.end_of_block = pdf_dict_get_bool_default(ctx, p, PDF_NAME(EndOfBlock), 1);
135
11.0k
    params->u.fax.black_is_1 = pdf_dict_get_bool_default(ctx, p, PDF_NAME(BlackIs1), 0);
136
11.0k
  }
137
81.0k
  else if (pdf_name_eq(ctx, f, PDF_NAME(DCTDecode)) || pdf_name_eq(ctx, f, PDF_NAME(DCT)))
138
539
  {
139
539
    params->type = FZ_IMAGE_JPEG;
140
539
    params->u.jpeg.color_transform = pdf_dict_get_int_default(ctx, p, PDF_NAME(ColorTransform), -1);
141
539
    params->u.jpeg.invert_cmyk = 0;
142
539
  }
143
80.4k
  else if (pdf_name_eq(ctx, f, PDF_NAME(RunLengthDecode)) || pdf_name_eq(ctx, f, PDF_NAME(RL)))
144
16
  {
145
16
    params->type = FZ_IMAGE_RLD;
146
16
  }
147
80.4k
  else if (pdf_name_eq(ctx, f, PDF_NAME(FlateDecode)) || pdf_name_eq(ctx, f, PDF_NAME(Fl)))
148
76.7k
  {
149
76.7k
    params->type = FZ_IMAGE_FLATE;
150
76.7k
    params->u.flate.predictor = pdf_dict_get_int_default(ctx, p, PDF_NAME(Predictor), 1);
151
76.7k
    params->u.flate.columns = pdf_dict_get_int_default(ctx, p, PDF_NAME(Columns), 1);
152
76.7k
    params->u.flate.colors = pdf_dict_get_int_default(ctx, p, PDF_NAME(Colors), 1);
153
76.7k
    params->u.flate.bpc = pdf_dict_get_int_default(ctx, p, PDF_NAME(BitsPerComponent), 8);
154
76.7k
  }
155
3.69k
  else if (pdf_name_eq(ctx, f, PDF_NAME(LZWDecode)) || pdf_name_eq(ctx, f, PDF_NAME(LZW)))
156
6
  {
157
6
    params->type = FZ_IMAGE_LZW;
158
6
    params->u.lzw.predictor = pdf_dict_get_int_default(ctx, p, PDF_NAME(Predictor), 1);
159
6
    params->u.lzw.columns = pdf_dict_get_int_default(ctx, p, PDF_NAME(Columns), 1);
160
6
    params->u.lzw.colors = pdf_dict_get_int_default(ctx, p, PDF_NAME(Colors), 1);
161
6
    params->u.lzw.bpc = pdf_dict_get_int_default(ctx, p, PDF_NAME(BitsPerComponent), 8);
162
6
    params->u.lzw.early_change = pdf_dict_get_int_default(ctx, p, PDF_NAME(EarlyChange), 1);
163
6
  }
164
3.68k
  else if (pdf_name_eq(ctx, f, PDF_NAME(JBIG2Decode)))
165
616
  {
166
616
    pdf_obj *g = pdf_dict_get(ctx, p, PDF_NAME(JBIG2Globals));
167
168
616
    params->type = FZ_IMAGE_JBIG2;
169
616
    params->u.jbig2.globals = NULL;
170
616
    params->u.jbig2.embedded = 1; /* jbig2 streams are always embedded without file headers */
171
616
    if (g)
172
5
    {
173
5
      if (!pdf_is_stream(ctx, g))
174
1
        fz_warn(ctx, "jbig2 globals is not a stream, skipping globals");
175
4
      else
176
4
        params->u.jbig2.globals = pdf_load_jbig2_globals(ctx, g);
177
5
    }
178
616
  }
179
92.1k
}
180
181
/*
182
 * Create a filter given a name and param dictionary.
183
 */
184
static fz_stream *
185
build_filter(fz_context *ctx, fz_stream *chain, pdf_document *doc, pdf_obj *f, pdf_obj *p, int num, int gen, fz_compression_params *params, int might_be_image)
186
92.1k
{
187
92.1k
  fz_compression_params local_params;
188
189
92.1k
  local_params.u.jbig2.globals = NULL;
190
92.1k
  if (params == NULL)
191
73.9k
    params = &local_params;
192
193
92.1k
  if (!might_be_image &&
194
92.1k
    (pdf_name_eq(ctx, f, PDF_NAME(CCITTFaxDecode)) ||
195
26.2k
      pdf_name_eq(ctx, f, PDF_NAME(CCF)) ||
196
26.2k
      pdf_name_eq(ctx, f, PDF_NAME(DCTDecode)) ||
197
26.2k
      pdf_name_eq(ctx, f, PDF_NAME(DCT)) ||
198
26.2k
      pdf_name_eq(ctx, f, PDF_NAME(JBIG2Decode)) ||
199
26.2k
      pdf_name_eq(ctx, f, PDF_NAME(JPXDecode))))
200
6
  {
201
6
    fz_warn(ctx, "Can't open image only stream for non-image purposes");
202
6
    return fz_open_memory(ctx, (unsigned char *)"", 0);
203
6
  }
204
205
92.1k
  build_compression_params(ctx, f, p, params);
206
207
  /* If we were using params we were passed in, and we successfully
208
   * recognised the image type, we can use the existing filter and
209
   * shortstop here. */
210
92.1k
  if (params != &local_params && params->type != FZ_IMAGE_RAW)
211
16.7k
    return fz_keep_stream(ctx, chain); /* nothing to do */
212
213
75.3k
  else if (params->type == FZ_IMAGE_JBIG2)
214
527
  {
215
527
    fz_stream *stm;
216
1.05k
    fz_try(ctx)
217
1.05k
      stm = fz_open_image_decomp_stream(ctx, chain, params, NULL);
218
1.05k
    fz_always(ctx)
219
527
      fz_drop_jbig2_globals(ctx, local_params.u.jbig2.globals);
220
527
    fz_catch(ctx)
221
0
      fz_rethrow(ctx);
222
527
    return stm;
223
527
  }
224
225
74.8k
  else if (params->type != FZ_IMAGE_RAW)
226
71.7k
    return fz_open_image_decomp_stream(ctx, chain, params, NULL);
227
228
3.07k
  else if (pdf_name_eq(ctx, f, PDF_NAME(ASCIIHexDecode)) || pdf_name_eq(ctx, f, PDF_NAME(AHx)))
229
1.25k
    return fz_open_ahxd(ctx, chain);
230
231
1.82k
  else if (pdf_name_eq(ctx, f, PDF_NAME(ASCII85Decode)) || pdf_name_eq(ctx, f, PDF_NAME(A85)))
232
43
    return fz_open_a85d(ctx, chain);
233
234
1.78k
  else if (pdf_name_eq(ctx, f, PDF_NAME(JPXDecode)))
235
1.31k
    return fz_keep_stream(ctx, chain); /* JPX decoding is special cased in the image loading code */
236
237
469
  else if (pdf_name_eq(ctx, f, PDF_NAME(Crypt)))
238
0
  {
239
0
    if (!doc->crypt)
240
0
      fz_warn(ctx, "crypt filter in unencrypted document");
241
0
    else
242
0
    {
243
0
      pdf_obj *name = pdf_dict_get(ctx, p, PDF_NAME(Name));
244
0
      if (pdf_is_name(ctx, name))
245
0
        return pdf_open_crypt_with_filter(ctx, chain, doc->crypt, name, num, gen);
246
0
    }
247
0
  }
248
249
469
  else
250
469
    fz_warn(ctx, "unknown filter name (%s)", pdf_to_name(ctx, f));
251
252
469
  return fz_keep_stream(ctx, chain);
253
92.1k
}
254
255
/* Build filter, and assume ownership of chain */
256
static fz_stream *
257
build_filter_drop(fz_context *ctx, fz_stream *tail, pdf_document *doc, pdf_obj *f, pdf_obj *p, int num, int gen, fz_compression_params *params, int might_be_image)
258
3.05k
{
259
3.05k
  fz_stream *head;
260
6.10k
  fz_try(ctx)
261
6.10k
    head = build_filter(ctx, tail, doc, f, p, num, gen, params, might_be_image);
262
6.10k
  fz_always(ctx)
263
3.05k
    fz_drop_stream(ctx, tail);
264
3.05k
  fz_catch(ctx)
265
0
    fz_rethrow(ctx);
266
3.05k
  return head;
267
3.05k
}
268
269
/*
270
 * Build a chain of filters given filter names and param dicts.
271
 * If chain is given, start filter chain with it.
272
 * Assume ownership of chain.
273
 */
274
static fz_stream *
275
build_filter_chain_drop(fz_context *ctx, fz_stream *chain, pdf_document *doc, pdf_obj *fs, pdf_obj *ps, int num, int gen, fz_compression_params *params, int might_be_image)
276
2.96k
{
277
2.96k
  fz_var(chain);
278
5.93k
  fz_try(ctx)
279
5.93k
  {
280
2.96k
    int i, n = pdf_array_len(ctx, fs);
281
6.02k
    for (i = 0; i < n; i++)
282
3.05k
    {
283
3.05k
      pdf_obj *f = pdf_array_get(ctx, fs, i);
284
3.05k
      pdf_obj *p = pdf_array_get(ctx, ps, i);
285
3.05k
      chain = build_filter_drop(ctx, chain, doc, f, p, num, gen, (i == n-1 ? params : NULL), might_be_image);
286
3.05k
    }
287
2.96k
  }
288
5.93k
  fz_catch(ctx)
289
0
    fz_rethrow(ctx);
290
2.96k
  return chain;
291
2.96k
}
292
293
static fz_stream *
294
build_filter_chain(fz_context *ctx, fz_stream *chain, pdf_document *doc, pdf_obj *fs, pdf_obj *ps, int num, int gen, fz_compression_params *params, int might_be_image)
295
2.96k
{
296
2.96k
  return build_filter_chain_drop(ctx, fz_keep_stream(ctx, chain), doc, fs, ps, num, gen, params, might_be_image);
297
2.96k
}
298
299
/*
300
 * Build a filter for reading raw stream data.
301
 * This is a null filter to constrain reading to the stream length (and to
302
 * allow for other people accessing the file), followed by a decryption
303
 * filter.
304
 *
305
 * orig_num and orig_gen are used purely to seed the encryption.
306
 */
307
static fz_stream *
308
pdf_open_raw_filter(fz_context *ctx, fz_stream *file_stm, pdf_document *doc, pdf_obj *stmobj, int num, int *orig_num, int *orig_gen, int64_t offset)
309
106k
{
310
106k
  pdf_xref_entry *x = NULL;
311
106k
  fz_stream *null_stm, *crypt_stm;
312
106k
  int hascrypt;
313
106k
  int64_t len;
314
315
106k
  if (num > 0 && num < pdf_xref_len(ctx, doc))
316
106k
  {
317
106k
    x = pdf_get_xref_entry(ctx, doc, num);
318
106k
  }
319
106k
  if (x == NULL)
320
633
  {
321
    /* We only end up here when called from pdf_open_stream_with_offset to parse new format XRef sections. */
322
    /* New style XRef sections must have generation number 0. */
323
633
    *orig_num = num;
324
633
    *orig_gen = 0;
325
633
  }
326
105k
  else
327
105k
  {
328
105k
    *orig_num = x->num;
329
105k
    *orig_gen = x->gen;
330
105k
    if (x->stm_buf)
331
7.25k
      return fz_open_buffer(ctx, x->stm_buf);
332
105k
  }
333
334
99.2k
  hascrypt = pdf_stream_has_crypt(ctx, stmobj);
335
99.2k
  len = pdf_dict_get_int64(ctx, stmobj, PDF_NAME(Length));
336
99.2k
  if (len < 0)
337
0
    len = 0;
338
99.2k
  null_stm = fz_open_endstream_filter(ctx, file_stm, (uint64_t)len, offset);
339
99.2k
  if (doc->crypt && !hascrypt)
340
1.99k
  {
341
3.99k
    fz_try(ctx)
342
3.99k
      crypt_stm = pdf_open_crypt(ctx, null_stm, doc->crypt, *orig_num, *orig_gen);
343
3.99k
    fz_always(ctx)
344
1.99k
      fz_drop_stream(ctx, null_stm);
345
1.99k
    fz_catch(ctx)
346
0
      fz_rethrow(ctx);
347
1.99k
    return crypt_stm;
348
1.99k
  }
349
97.2k
  return null_stm;
350
99.2k
}
351
352
/*
353
 * Construct a filter to decode a stream, constraining
354
 * to stream length and decrypting.
355
 */
356
static fz_stream *
357
pdf_open_filter(fz_context *ctx, pdf_document *doc, fz_stream *file_stm, pdf_obj *stmobj, int num, int64_t offset, fz_compression_params *imparams, int might_be_image)
358
106k
{
359
106k
  pdf_obj *filters = pdf_dict_geta(ctx, stmobj, PDF_NAME(Filter), PDF_NAME(F));
360
106k
  pdf_obj *params = pdf_dict_geta(ctx, stmobj, PDF_NAME(DecodeParms), PDF_NAME(DP));
361
106k
  int orig_num, orig_gen;
362
106k
  fz_stream *rstm, *fstm;
363
364
106k
  rstm = pdf_open_raw_filter(ctx, file_stm, doc, stmobj, num, &orig_num, &orig_gen, offset);
365
213k
  fz_try(ctx)
366
213k
  {
367
106k
    if (pdf_is_name(ctx, filters))
368
77.9k
      fstm = build_filter(ctx, rstm, doc, filters, params, orig_num, orig_gen, imparams, might_be_image);
369
28.5k
    else if (pdf_array_len(ctx, filters) > 0)
370
1.73k
      fstm = build_filter_chain(ctx, rstm, doc, filters, params, orig_num, orig_gen, imparams, might_be_image);
371
26.7k
    else
372
26.7k
    {
373
26.7k
      if (imparams)
374
353
        imparams->type = FZ_IMAGE_RAW;
375
26.7k
      fstm = fz_keep_stream(ctx, rstm);
376
26.7k
    }
377
106k
  }
378
213k
  fz_always(ctx)
379
106k
    fz_drop_stream(ctx, rstm);
380
106k
  fz_catch(ctx)
381
2
    fz_rethrow(ctx);
382
383
106k
  return fstm;
384
106k
}
385
386
fz_stream *
387
pdf_open_inline_stream(fz_context *ctx, pdf_document *doc, pdf_obj *stmobj, int length, fz_stream *file_stm, fz_compression_params *imparams)
388
45.1k
{
389
45.1k
  pdf_obj *filters = pdf_dict_geta(ctx, stmobj, PDF_NAME(Filter), PDF_NAME(F));
390
45.1k
  pdf_obj *params = pdf_dict_geta(ctx, stmobj, PDF_NAME(DecodeParms), PDF_NAME(DP));
391
392
45.1k
  if (pdf_is_name(ctx, filters))
393
11.0k
    return build_filter(ctx, file_stm, doc, filters, params, 0, 0, imparams, 1);
394
34.0k
  else if (pdf_array_len(ctx, filters) > 0)
395
1.23k
    return build_filter_chain(ctx, file_stm, doc, filters, params, 0, 0, imparams, 1);
396
397
32.7k
  if (imparams)
398
32.7k
    imparams->type = FZ_IMAGE_RAW;
399
32.7k
  return fz_open_null_filter(ctx, file_stm, length, fz_tell(ctx, file_stm));
400
45.1k
}
401
402
void
403
pdf_load_compressed_inline_image(fz_context *ctx, pdf_document *doc, pdf_obj *dict, int length, fz_stream *file_stm, int indexed, fz_compressed_image *image)
404
45.1k
{
405
45.1k
  fz_stream *istm = NULL, *leech = NULL, *decomp = NULL;
406
45.1k
  fz_pixmap *pixmap = NULL;
407
45.1k
  fz_compressed_buffer *bc;
408
45.1k
  int dummy_l2factor = 0;
409
410
45.1k
  fz_var(istm);
411
45.1k
  fz_var(leech);
412
45.1k
  fz_var(decomp);
413
45.1k
  fz_var(pixmap);
414
415
45.1k
  bc = fz_new_compressed_buffer(ctx);
416
90.2k
  fz_try(ctx)
417
90.2k
  {
418
45.1k
    bc->buffer = fz_new_buffer(ctx, 1024);
419
45.1k
    istm = pdf_open_inline_stream(ctx, doc, dict, length, file_stm, &bc->params);
420
45.1k
    leech = fz_open_leecher(ctx, istm, bc->buffer);
421
45.1k
    decomp = fz_open_image_decomp_stream(ctx, leech, &bc->params, &dummy_l2factor);
422
45.1k
    pixmap = fz_decomp_image_from_stream(ctx, decomp, image, NULL, indexed, 0, NULL);
423
45.1k
    fz_set_compressed_image_buffer(ctx, image, bc);
424
45.1k
  }
425
90.2k
  fz_always(ctx)
426
45.1k
  {
427
45.1k
    fz_drop_stream(ctx, istm);
428
45.1k
    fz_drop_stream(ctx, leech);
429
45.1k
    fz_drop_stream(ctx, decomp);
430
45.1k
    fz_drop_pixmap(ctx, pixmap);
431
45.1k
  }
432
45.1k
  fz_catch(ctx)
433
9
  {
434
9
    fz_drop_compressed_buffer(ctx, bc);
435
9
    fz_rethrow(ctx);
436
9
  }
437
45.1k
}
438
439
fz_stream *
440
pdf_open_raw_stream_number(fz_context *ctx, pdf_document *doc, int num)
441
0
{
442
0
  pdf_xref_entry *x;
443
0
  int orig_num, orig_gen;
444
445
0
  x = pdf_cache_object(ctx, doc, num);
446
0
  if (x->stm_ofs == 0)
447
0
    fz_throw(ctx, FZ_ERROR_FORMAT, "object is not a stream");
448
449
0
  return pdf_open_raw_filter(ctx, doc->file, doc, x->obj, num, &orig_num, &orig_gen, x->stm_ofs);
450
0
}
451
452
static fz_stream *
453
pdf_open_image_stream(fz_context *ctx, pdf_document *doc, int num, fz_compression_params *params, int might_be_image)
454
105k
{
455
105k
  pdf_xref_entry *x;
456
457
105k
  x = pdf_cache_object(ctx, doc, num);
458
105k
  if (x->stm_ofs == 0 && x->stm_buf == NULL)
459
88
    fz_throw(ctx, FZ_ERROR_FORMAT, "object is not a stream");
460
461
105k
  return pdf_open_filter(ctx, doc, doc->file, x->obj, num, x->stm_ofs, params, might_be_image);
462
105k
}
463
464
fz_stream *
465
pdf_open_stream_number(fz_context *ctx, pdf_document *doc, int num)
466
40.2k
{
467
40.2k
  return pdf_open_image_stream(ctx, doc, num, NULL, 1);
468
40.2k
}
469
470
fz_stream *
471
pdf_open_stream_with_offset(fz_context *ctx, pdf_document *doc, int num, pdf_obj *dict, int64_t stm_ofs)
472
640
{
473
640
  if (stm_ofs == 0)
474
2
    fz_throw(ctx, FZ_ERROR_FORMAT, "object is not a stream");
475
638
  return pdf_open_filter(ctx, doc, doc->file, dict, num, stm_ofs, NULL, 1);
476
640
}
477
478
fz_buffer *
479
pdf_load_raw_stream_number(fz_context *ctx, pdf_document *doc, int num)
480
0
{
481
0
  fz_stream *stm;
482
0
  pdf_obj *dict;
483
0
  int64_t len;
484
0
  fz_buffer *buf = NULL;
485
0
  pdf_xref_entry *x;
486
487
0
  if (num > 0 && num < pdf_xref_len(ctx, doc))
488
0
  {
489
0
    x = pdf_get_xref_entry_no_null(ctx, doc, num);
490
0
    if (x->stm_buf)
491
0
      return fz_keep_buffer(ctx, x->stm_buf);
492
0
  }
493
494
0
  dict = pdf_load_object(ctx, doc, num);
495
496
0
  fz_try(ctx)
497
0
    len = pdf_dict_get_int64(ctx, dict, PDF_NAME(Length));
498
0
  fz_always(ctx)
499
0
    pdf_drop_obj(ctx, dict);
500
0
  fz_catch(ctx)
501
0
    fz_rethrow(ctx);
502
503
0
  stm = pdf_open_raw_stream_number(ctx, doc, num);
504
505
0
  if (len < 0)
506
0
    len = 1024;
507
508
0
  fz_try(ctx)
509
0
    buf = fz_read_all(ctx, stm, (size_t)len);
510
0
  fz_always(ctx)
511
0
    fz_drop_stream(ctx, stm);
512
0
  fz_catch(ctx)
513
0
    fz_rethrow(ctx);
514
515
0
  return buf;
516
0
}
517
518
static size_t
519
pdf_guess_filter_length(size_t len, const char *filter)
520
24.8k
{
521
24.8k
  size_t nlen = len;
522
523
  /* First ones get smaller, no overflow check required. */
524
24.8k
  if (!strcmp(filter, "ASCIIHexDecode"))
525
14
    return len / 2;
526
24.8k
  else if (!strcmp(filter, "ASCII85Decode"))
527
24
    return len * 4 / 5;
528
529
24.8k
  if (!strcmp(filter, "FlateDecode"))
530
18.9k
    nlen = len * 3;
531
5.86k
  else if (!strcmp(filter, "RunLengthDecode"))
532
0
    nlen = len * 3;
533
5.86k
  else if (!strcmp(filter, "LZWDecode"))
534
0
    nlen = len * 2;
535
536
  /* Live with a bad estimate - we'll malloc up as we go, but
537
   * it's probably destined to fail anyway. */
538
24.8k
  if (nlen < len)
539
0
    return len;
540
541
24.8k
  return nlen;
542
24.8k
}
543
544
/* Check if an entry has a cached stream and return whether it is directly
545
 * reusable. A buffer is directly reusable only if the stream is
546
 * uncompressed, or if it is compressed purely a compression method we can
547
 * return details of in fz_compression_params.
548
 *
549
 * If the stream is reusable return 1, and set params as required, otherwise
550
 * return 0. */
551
static int
552
can_reuse_buffer(fz_context *ctx, pdf_xref_entry *entry, fz_compression_params *params)
553
27.2k
{
554
27.2k
  pdf_obj *f;
555
27.2k
  pdf_obj *p;
556
557
27.2k
  if (!entry || !entry->obj || !entry->stm_buf)
558
23.8k
    return 0;
559
560
3.33k
  if (params)
561
0
    params->type = FZ_IMAGE_RAW;
562
563
3.33k
  f = pdf_dict_geta(ctx, entry->obj, PDF_NAME(Filter), PDF_NAME(F));
564
  /* If there are no filters, it's uncompressed, and we can use it */
565
3.33k
  if (!f)
566
3.33k
    return 1;
567
568
0
  p = pdf_dict_geta(ctx, entry->obj, PDF_NAME(DecodeParms), PDF_NAME(DP));
569
0
  if (pdf_is_array(ctx, f))
570
0
  {
571
0
    int len = pdf_array_len(ctx, f);
572
573
    /* Empty array of filters. Its uncompressed. We can cope. */
574
0
    if (len == 0)
575
0
      return 1;
576
    /* 1 filter is the most we can hope to cope with - if more,*/
577
0
    if (len != 1)
578
0
      return 0;
579
0
    p = pdf_array_get(ctx, p, 0);
580
0
  }
581
0
  if (pdf_is_null(ctx, f))
582
0
    return 1; /* Null filter is uncompressed */
583
0
  if (!pdf_is_name(ctx, f))
584
0
    return 0;
585
586
  /* There are filters, so unless we have the option of shortstopping,
587
   * we can't use the existing buffer. */
588
0
  if (!params)
589
0
    return 0;
590
591
0
  build_compression_params(ctx, f, p, params);
592
593
0
  return (params->type == FZ_IMAGE_RAW) ? 0 : 1;
594
0
}
595
596
static fz_buffer *
597
pdf_load_image_stream(fz_context *ctx, pdf_document *doc, int num, fz_compression_params *params, int *truncated, size_t worst_case)
598
27.2k
{
599
27.2k
  fz_stream *stm = NULL;
600
27.2k
  pdf_obj *dict, *obj;
601
27.2k
  int i, n;
602
27.2k
  size_t len;
603
27.2k
  fz_buffer *buf;
604
605
27.2k
  fz_var(buf);
606
607
27.2k
  if (num > 0 && num < pdf_xref_len(ctx, doc))
608
27.2k
  {
609
27.2k
    pdf_xref_entry *entry = pdf_get_xref_entry(ctx, doc, num);
610
    /* Return ref to existing buffer, but only if uncompressed,
611
     * or shortstoppable */
612
27.2k
    if (can_reuse_buffer(ctx, entry, params))
613
3.33k
      return fz_keep_buffer(ctx, entry->stm_buf);
614
27.2k
  }
615
616
23.8k
  dict = pdf_load_object(ctx, doc, num);
617
47.7k
  fz_try(ctx)
618
47.7k
  {
619
23.8k
    int64_t ilen = pdf_dict_get_int64(ctx, dict, PDF_NAME(Length));
620
23.8k
    if (ilen < 0)
621
0
      ilen = 0;
622
23.8k
    len = (size_t)ilen;
623
    /* In 32 bit builds, we might find a length being too
624
     * large for a size_t. */
625
23.8k
    if ((int64_t)len != ilen)
626
0
      fz_throw(ctx, FZ_ERROR_LIMIT, "Stream too large");
627
23.8k
    obj = pdf_dict_get(ctx, dict, PDF_NAME(Filter));
628
23.8k
    len = pdf_guess_filter_length(len, pdf_to_name(ctx, obj));
629
23.8k
    n = pdf_array_len(ctx, obj);
630
24.8k
    for (i = 0; i < n; i++)
631
986
      len = pdf_guess_filter_length(len, pdf_array_get_name(ctx, obj, i));
632
23.8k
  }
633
47.7k
  fz_always(ctx)
634
23.8k
  {
635
23.8k
    pdf_drop_obj(ctx, dict);
636
23.8k
  }
637
23.8k
  fz_catch(ctx)
638
0
  {
639
0
    fz_rethrow(ctx);
640
0
  }
641
642
23.8k
  stm = pdf_open_image_stream(ctx, doc, num, params, 1);
643
644
47.7k
  fz_try(ctx)
645
47.7k
  {
646
23.8k
    buf = fz_read_best(ctx, stm, len, truncated, worst_case);
647
23.8k
  }
648
47.7k
  fz_always(ctx)
649
23.8k
  {
650
23.8k
    fz_drop_stream(ctx, stm);
651
23.8k
  }
652
23.8k
  fz_catch(ctx)
653
3
  {
654
3
    fz_rethrow(ctx);
655
3
  }
656
657
23.8k
  return buf;
658
23.8k
}
659
660
fz_buffer *
661
pdf_load_stream_number(fz_context *ctx, pdf_document *doc, int num)
662
20.9k
{
663
20.9k
  return pdf_load_image_stream(ctx, doc, num, NULL, NULL, 0);
664
20.9k
}
665
666
fz_compressed_buffer *
667
pdf_load_compressed_stream(fz_context *ctx, pdf_document *doc, int num, size_t worst_case)
668
6.24k
{
669
6.24k
  fz_compressed_buffer *bc = fz_new_compressed_buffer(ctx);
670
671
12.4k
  fz_try(ctx)
672
12.4k
  {
673
6.24k
    bc->buffer = pdf_load_image_stream(ctx, doc, num, &bc->params, NULL, worst_case);
674
6.24k
  }
675
12.4k
  fz_catch(ctx)
676
6
  {
677
6
    fz_free(ctx, bc);
678
6
    fz_rethrow(ctx);
679
6
  }
680
6.23k
  return bc;
681
6.24k
}
682
683
static fz_stream *
684
pdf_open_object_array(fz_context *ctx, pdf_document *doc, pdf_obj *list)
685
677
{
686
677
  fz_stream *stm;
687
677
  int i, n;
688
689
677
  n = pdf_array_len(ctx, list);
690
677
  stm = fz_open_concat(ctx, n, 1);
691
692
3.75k
  for (i = 0; i < n; i++)
693
3.08k
  {
694
3.08k
    pdf_obj *obj = pdf_array_get(ctx, list, i);
695
6.16k
    fz_try(ctx)
696
6.16k
      fz_concat_push_drop(ctx, stm, pdf_open_stream(ctx, obj));
697
6.16k
    fz_catch(ctx)
698
658
    {
699
658
      if (fz_caught(ctx) == FZ_ERROR_TRYLATER || fz_caught(ctx) == FZ_ERROR_SYSTEM)
700
0
      {
701
0
        fz_drop_stream(ctx, stm);
702
0
        fz_rethrow(ctx);
703
0
      }
704
658
      fz_report_error(ctx);
705
658
      fz_warn(ctx, "cannot load content stream part %d/%d", i + 1, n);
706
658
    }
707
3.08k
  }
708
709
677
  return stm;
710
677
}
711
712
fz_stream *
713
pdf_open_contents_stream(fz_context *ctx, pdf_document *doc, pdf_obj *obj)
714
43.4k
{
715
43.4k
  int num;
716
717
43.4k
  if (pdf_is_array(ctx, obj))
718
677
    return pdf_open_object_array(ctx, doc, obj);
719
720
42.7k
  num = pdf_to_num(ctx, obj);
721
42.7k
  if (pdf_is_stream(ctx, obj))
722
41.8k
    return pdf_open_image_stream(ctx, doc, num, NULL, 0);
723
724
884
  fz_warn(ctx, "content stream is not a stream (%d 0 R)", num);
725
884
  return fz_open_memory(ctx, (unsigned char *)"", 0);
726
42.7k
}
727
728
fz_buffer *pdf_load_raw_stream(fz_context *ctx, pdf_obj *ref)
729
0
{
730
0
  if (pdf_is_stream(ctx, ref))
731
0
    return pdf_load_raw_stream_number(ctx, pdf_get_indirect_document(ctx, ref), pdf_to_num(ctx, ref));
732
0
  fz_throw(ctx, FZ_ERROR_FORMAT, "object is not a stream");
733
0
}
734
735
fz_buffer *pdf_load_stream(fz_context *ctx, pdf_obj *ref)
736
21.7k
{
737
21.7k
  if (pdf_is_stream(ctx, ref))
738
20.9k
    return pdf_load_stream_number(ctx, pdf_get_indirect_document(ctx, ref), pdf_to_num(ctx, ref));
739
811
  fz_throw(ctx, FZ_ERROR_FORMAT, "object is not a stream");
740
21.7k
}
741
742
fz_stream *pdf_open_raw_stream(fz_context *ctx, pdf_obj *ref)
743
0
{
744
0
  if (pdf_is_stream(ctx, ref))
745
0
    return pdf_open_raw_stream_number(ctx, pdf_get_indirect_document(ctx, ref), pdf_to_num(ctx, ref));
746
0
  fz_throw(ctx, FZ_ERROR_FORMAT, "object is not a stream");
747
0
}
748
749
fz_stream *pdf_open_stream(fz_context *ctx, pdf_obj *ref)
750
11.6k
{
751
11.6k
  if (pdf_is_stream(ctx, ref))
752
10.9k
    return pdf_open_stream_number(ctx, pdf_get_indirect_document(ctx, ref), pdf_to_num(ctx, ref));
753
675
  fz_throw(ctx, FZ_ERROR_FORMAT, "object is not a stream");
754
11.6k
}