Coverage Report

Created: 2024-05-20 06:23

/src/mupdf/source/pdf/pdf-xref.c
Line
Count
Source (jump to first uncovered line)
1
// Copyright (C) 2004-2023 Artifex Software, Inc.
2
//
3
// This file is part of MuPDF.
4
//
5
// MuPDF is free software: you can redistribute it and/or modify it under the
6
// terms of the GNU Affero General Public License as published by the Free
7
// Software Foundation, either version 3 of the License, or (at your option)
8
// any later version.
9
//
10
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13
// details.
14
//
15
// You should have received a copy of the GNU Affero General Public License
16
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17
//
18
// Alternative licensing terms are available from the licensor.
19
// For commercial licensing, see <https://www.artifex.com/> or contact
20
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21
// CA 94129, USA, for further information.
22
23
#include "mupdf/fitz.h"
24
#include "pdf-annot-imp.h"
25
26
#include <assert.h>
27
#include <limits.h>
28
#include <string.h>
29
30
#undef DEBUG_PROGESSIVE_ADVANCE
31
32
#ifdef DEBUG_PROGESSIVE_ADVANCE
33
#define DEBUGMESS(A) do { fz_warn A; } while (0)
34
#else
35
0
#define DEBUGMESS(A) do { } while (0)
36
#endif
37
38
529k
#define isdigit(c) (c >= '0' && c <= '9')
39
40
static inline int iswhite(int ch)
41
145k
{
42
145k
  return
43
145k
    ch == '\000' || ch == '\011' || ch == '\012' ||
44
145k
    ch == '\014' || ch == '\015' || ch == '\040';
45
145k
}
46
47
/*
48
 * xref tables
49
 */
50
51
static void
52
pdf_drop_xref_subsec(fz_context *ctx, pdf_xref *xref)
53
13.7k
{
54
13.7k
  pdf_xref_subsec *sub = xref->subsec;
55
13.7k
  pdf_unsaved_sig *usig;
56
13.7k
  int e;
57
58
27.2k
  while (sub != NULL)
59
13.4k
  {
60
13.4k
    pdf_xref_subsec *next_sub = sub->next;
61
34.0M
    for (e = 0; e < sub->len; e++)
62
34.0M
    {
63
34.0M
      pdf_xref_entry *entry = &sub->table[e];
64
34.0M
      pdf_drop_obj(ctx, entry->obj);
65
34.0M
      fz_drop_buffer(ctx, entry->stm_buf);
66
34.0M
    }
67
13.4k
    fz_free(ctx, sub->table);
68
13.4k
    fz_free(ctx, sub);
69
13.4k
    sub = next_sub;
70
13.4k
  }
71
72
13.7k
  pdf_drop_obj(ctx, xref->pre_repair_trailer);
73
13.7k
  pdf_drop_obj(ctx, xref->trailer);
74
75
13.7k
  while ((usig = xref->unsaved_sigs) != NULL)
76
0
  {
77
0
    xref->unsaved_sigs = usig->next;
78
0
    pdf_drop_obj(ctx, usig->field);
79
0
    pdf_drop_signer(ctx, usig->signer);
80
0
    fz_free(ctx, usig);
81
0
  }
82
13.7k
}
83
84
static void pdf_drop_xref_sections_imp(fz_context *ctx, pdf_document *doc, pdf_xref *xref_sections, int num_xref_sections)
85
46.3k
{
86
46.3k
  int x;
87
88
58.7k
  for (x = 0; x < num_xref_sections; x++)
89
12.3k
    pdf_drop_xref_subsec(ctx, &xref_sections[x]);
90
91
46.3k
  fz_free(ctx, xref_sections);
92
46.3k
}
93
94
static void pdf_drop_xref_sections(fz_context *ctx, pdf_document *doc)
95
23.1k
{
96
23.1k
  pdf_drop_xref_sections_imp(ctx, doc, doc->saved_xref_sections, doc->saved_num_xref_sections);
97
23.1k
  pdf_drop_xref_sections_imp(ctx, doc, doc->xref_sections, doc->num_xref_sections);
98
99
23.1k
  doc->saved_xref_sections = NULL;
100
23.1k
  doc->saved_num_xref_sections = 0;
101
23.1k
  doc->xref_sections = NULL;
102
23.1k
  doc->num_xref_sections = 0;
103
23.1k
  doc->num_incremental_sections = 0;
104
23.1k
}
105
106
static void
107
extend_xref_index(fz_context *ctx, pdf_document *doc, int newlen)
108
34.1k
{
109
34.1k
  int i;
110
111
34.1k
  doc->xref_index = fz_realloc_array(ctx, doc->xref_index, newlen, int);
112
33.8M
  for (i = doc->max_xref_len; i < newlen; i++)
113
33.8M
  {
114
33.8M
    doc->xref_index[i] = 0;
115
33.8M
  }
116
34.1k
  doc->max_xref_len = newlen;
117
34.1k
}
118
119
static void
120
resize_xref_sub(fz_context *ctx, pdf_xref *xref, int base, int newlen)
121
6.60k
{
122
6.60k
  pdf_xref_subsec *sub;
123
6.60k
  int i;
124
125
6.60k
  assert(xref != NULL);
126
6.60k
  sub = xref->subsec;
127
6.60k
  assert(sub->next == NULL && sub->start == base && sub->len+base == xref->num_objects);
128
6.60k
  assert(newlen+base > xref->num_objects);
129
130
6.60k
  sub->table = fz_realloc_array(ctx, sub->table, newlen, pdf_xref_entry);
131
13.2k
  for (i = sub->len; i < newlen; i++)
132
6.60k
  {
133
6.60k
    sub->table[i].type = 0;
134
6.60k
    sub->table[i].ofs = 0;
135
6.60k
    sub->table[i].gen = 0;
136
6.60k
    sub->table[i].num = 0;
137
6.60k
    sub->table[i].stm_ofs = 0;
138
6.60k
    sub->table[i].stm_buf = NULL;
139
6.60k
    sub->table[i].obj = NULL;
140
6.60k
  }
141
6.60k
  sub->len = newlen;
142
6.60k
  if (newlen+base > xref->num_objects)
143
6.60k
    xref->num_objects = newlen+base;
144
6.60k
}
145
146
/* This is only ever called when we already have an incremental
147
 * xref. This means there will only be 1 subsec, and it will be
148
 * a complete subsec. */
149
static void pdf_resize_xref(fz_context *ctx, pdf_document *doc, int newlen)
150
0
{
151
0
  pdf_xref *xref = &doc->xref_sections[doc->xref_base];
152
153
0
  resize_xref_sub(ctx, xref, 0, newlen);
154
0
  if (doc->max_xref_len < newlen)
155
0
    extend_xref_index(ctx, doc, newlen);
156
0
}
157
158
static void pdf_populate_next_xref_level(fz_context *ctx, pdf_document *doc)
159
1.76k
{
160
1.76k
  pdf_xref *xref;
161
1.76k
  doc->xref_sections = fz_realloc_array(ctx, doc->xref_sections, doc->num_xref_sections + 1, pdf_xref);
162
1.76k
  doc->num_xref_sections++;
163
164
1.76k
  xref = &doc->xref_sections[doc->num_xref_sections - 1];
165
1.76k
  xref->subsec = NULL;
166
1.76k
  xref->num_objects = 0;
167
1.76k
  xref->trailer = NULL;
168
1.76k
  xref->pre_repair_trailer = NULL;
169
1.76k
  xref->unsaved_sigs = NULL;
170
1.76k
  xref->unsaved_sigs_end = NULL;
171
1.76k
}
172
173
pdf_obj *pdf_trailer(fz_context *ctx, pdf_document *doc)
174
273k
{
175
  /* Return the document's trailer (of the appropriate vintage) */
176
273k
  pdf_xref *xrefs = doc->xref_sections;
177
178
273k
  return xrefs ? xrefs[doc->xref_base].trailer : NULL;
179
273k
}
180
181
void pdf_set_populating_xref_trailer(fz_context *ctx, pdf_document *doc, pdf_obj *trailer)
182
12.0k
{
183
  /* Update the trailer of the xref section being populated */
184
12.0k
  pdf_xref *xref = &doc->xref_sections[doc->num_xref_sections - 1];
185
12.0k
  if (xref->trailer)
186
62
  {
187
62
    pdf_drop_obj(ctx, xref->pre_repair_trailer);
188
62
    xref->pre_repair_trailer = xref->trailer;
189
62
  }
190
12.0k
  xref->trailer = pdf_keep_obj(ctx, trailer);
191
12.0k
}
192
193
int pdf_xref_len(fz_context *ctx, pdf_document *doc)
194
7.32M
{
195
7.32M
  int i = doc->xref_base;
196
7.32M
  int xref_len = 0;
197
198
7.32M
  if (doc->local_xref && doc->local_xref_nesting > 0)
199
650k
    xref_len = doc->local_xref->num_objects;
200
201
15.6M
  while (i < doc->num_xref_sections)
202
8.36M
    xref_len = fz_maxi(xref_len, doc->xref_sections[i++].num_objects);
203
204
7.32M
  return xref_len;
205
7.32M
}
206
207
/* Ensure that the given xref has a single subsection
208
 * that covers the entire range. */
209
static void
210
ensure_solid_xref(fz_context *ctx, pdf_document *doc, int num, int which)
211
34.0k
{
212
34.0k
  pdf_xref *xref = &doc->xref_sections[which];
213
34.0k
  pdf_xref_subsec *sub = xref->subsec;
214
34.0k
  pdf_xref_subsec *new_sub;
215
216
34.0k
  if (num < xref->num_objects)
217
696
    num = xref->num_objects;
218
219
34.0k
  if (sub != NULL && sub->next == NULL && sub->start == 0 && sub->len >= num)
220
810
    return;
221
222
33.2k
  new_sub = fz_malloc_struct(ctx, pdf_xref_subsec);
223
66.5k
  fz_try(ctx)
224
66.5k
  {
225
33.2k
    new_sub->table = fz_malloc_struct_array(ctx, num, pdf_xref_entry);
226
33.2k
    new_sub->start = 0;
227
33.2k
    new_sub->len = num;
228
33.2k
    new_sub->next = NULL;
229
33.2k
  }
230
66.5k
  fz_catch(ctx)
231
0
  {
232
0
    fz_free(ctx, new_sub);
233
0
    fz_rethrow(ctx);
234
0
  }
235
236
  /* Move objects over to the new subsection and destroy the old
237
   * ones */
238
33.2k
  sub = xref->subsec;
239
55.3k
  while (sub != NULL)
240
22.1k
  {
241
22.1k
    pdf_xref_subsec *next = sub->next;
242
22.1k
    int i;
243
244
33.5M
    for (i = 0; i < sub->len; i++)
245
33.5M
    {
246
33.5M
      new_sub->table[i+sub->start] = sub->table[i];
247
33.5M
    }
248
22.1k
    fz_free(ctx, sub->table);
249
22.1k
    fz_free(ctx, sub);
250
22.1k
    sub = next;
251
22.1k
  }
252
33.2k
  xref->num_objects = num;
253
33.2k
  xref->subsec = new_sub;
254
33.2k
  if (doc->max_xref_len < num)
255
32.3k
    extend_xref_index(ctx, doc, num);
256
33.2k
}
257
258
static pdf_xref_entry *
259
pdf_get_local_xref_entry(fz_context *ctx, pdf_document *doc, int num)
260
23.7k
{
261
23.7k
  pdf_xref *xref = doc->local_xref;
262
23.7k
  pdf_xref_subsec *sub;
263
264
23.7k
  if (xref == NULL || doc->local_xref_nesting == 0)
265
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "Local xref not present!");
266
267
  /* Local xrefs only ever have 1 section, and it should be solid. */
268
23.7k
  sub = xref->subsec;
269
23.7k
  assert(sub && !sub->next);
270
23.7k
  if (num >= sub->start && num < sub->start + sub->len)
271
17.1k
    return &sub->table[num - sub->start];
272
273
  /* Expand the xref so we can return a pointer. */
274
6.60k
  resize_xref_sub(ctx, xref, 0, num+1);
275
6.60k
  sub = xref->subsec;
276
6.60k
  return &sub->table[num - sub->start];
277
23.7k
}
278
279
pdf_xref_entry *pdf_get_populating_xref_entry(fz_context *ctx, pdf_document *doc, int num)
280
134M
{
281
  /* Return an entry within the xref currently being populated */
282
134M
  pdf_xref *xref;
283
134M
  pdf_xref_subsec *sub;
284
285
134M
  if (doc->num_xref_sections == 0)
286
11.3k
  {
287
11.3k
    doc->xref_sections = fz_malloc_struct(ctx, pdf_xref);
288
11.3k
    doc->num_xref_sections = 1;
289
11.3k
  }
290
291
134M
  if (doc->local_xref && doc->local_xref_nesting > 0)
292
0
    return pdf_get_local_xref_entry(ctx, doc, num);
293
294
  /* Prevent accidental heap underflow */
295
134M
  if (num < 0 || num > PDF_MAX_OBJECT_NUMBER)
296
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "object number out of range (%d)", num);
297
298
  /* Return the pointer to the entry in the last section. */
299
134M
  xref = &doc->xref_sections[doc->num_xref_sections-1];
300
301
134M
  for (sub = xref->subsec; sub != NULL; sub = sub->next)
302
134M
  {
303
134M
    if (num >= sub->start && num < sub->start + sub->len)
304
134M
      return &sub->table[num-sub->start];
305
134M
  }
306
307
  /* We've been asked for an object that's not in a subsec. */
308
22.3k
  ensure_solid_xref(ctx, doc, num+1, doc->num_xref_sections-1);
309
22.3k
  xref = &doc->xref_sections[doc->num_xref_sections-1];
310
22.3k
  sub = xref->subsec;
311
312
22.3k
  return &sub->table[num-sub->start];
313
134M
}
314
315
/* It is vital that pdf_get_xref_entry_aux called with !solidify_if_needed
316
 * and a value object number, does NOT try/catch or throw. */
317
static
318
pdf_xref_entry *pdf_get_xref_entry_aux(fz_context *ctx, pdf_document *doc, int i, int solidify_if_needed)
319
40.9M
{
320
40.9M
  pdf_xref *xref = NULL;
321
40.9M
  pdf_xref_subsec *sub;
322
40.9M
  int j;
323
324
40.9M
  if (i < 0)
325
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "Negative object number requested");
326
327
40.9M
  if (i < doc->max_xref_len)
328
40.7M
    j = doc->xref_index[i];
329
150k
  else
330
150k
    j = 0;
331
332
  /* If we have an active local xref, check there first. */
333
40.9M
  if (doc->local_xref && doc->local_xref_nesting > 0)
334
621k
  {
335
621k
    xref = doc->local_xref;
336
337
621k
    if (i < xref->num_objects)
338
621k
    {
339
979k
      for (sub = xref->subsec; sub != NULL; sub = sub->next)
340
621k
      {
341
621k
        pdf_xref_entry *entry;
342
343
621k
        if (i < sub->start || i >= sub->start + sub->len)
344
0
          continue;
345
346
621k
        entry = &sub->table[i - sub->start];
347
621k
        if (entry->type)
348
262k
          return entry;
349
621k
      }
350
621k
    }
351
621k
  }
352
353
  /* We may be accessing an earlier version of the document using xref_base
354
   * and j may be an index into a later xref section */
355
40.6M
  if (doc->xref_base > j)
356
5
    j = doc->xref_base;
357
40.6M
  else
358
40.6M
    j = 0;
359
360
361
  /* Find the first xref section where the entry is defined. */
362
41.1M
  for (; j < doc->num_xref_sections; j++)
363
41.1M
  {
364
41.1M
    xref = &doc->xref_sections[j];
365
366
41.1M
    if (i < xref->num_objects)
367
41.1M
    {
368
44.4M
      for (sub = xref->subsec; sub != NULL; sub = sub->next)
369
43.9M
      {
370
43.9M
        pdf_xref_entry *entry;
371
372
43.9M
        if (i < sub->start || i >= sub->start + sub->len)
373
3.31M
          continue;
374
375
40.6M
        entry = &sub->table[i - sub->start];
376
40.6M
        if (entry->type)
377
40.6M
        {
378
          /* Don't update xref_index if xref_base may have
379
           * influenced the value of j */
380
40.6M
          if (doc->xref_base == 0)
381
40.6M
            doc->xref_index[i] = j;
382
40.6M
          return entry;
383
40.6M
        }
384
40.6M
      }
385
41.1M
    }
386
41.1M
  }
387
388
  /* Didn't find the entry in any section. Return the entry from
389
   * the local_xref (if there is one active), or the final section. */
390
316
  if (doc->local_xref && doc->local_xref_nesting > 0)
391
0
  {
392
0
    if (xref == NULL || i < xref->num_objects)
393
0
    {
394
0
      xref = doc->local_xref;
395
0
      sub = xref->subsec;
396
0
      assert(sub != NULL && sub->next == NULL);
397
0
      if (i >= sub->start && i < sub->start + sub->len)
398
0
        return &sub->table[i - sub->start];
399
0
    }
400
401
    /* Expand the xref so we can return a pointer. */
402
0
    resize_xref_sub(ctx, xref, 0, i+1);
403
0
    sub = xref->subsec;
404
0
    return &sub->table[i - sub->start];
405
0
  }
406
407
316
  doc->xref_index[i] = 0;
408
316
  if (xref == NULL || i < xref->num_objects)
409
96
  {
410
96
    xref = &doc->xref_sections[doc->xref_base];
411
104
    for (sub = xref->subsec; sub != NULL; sub = sub->next)
412
100
    {
413
100
      if (i >= sub->start && i < sub->start + sub->len)
414
92
        return &sub->table[i - sub->start];
415
100
    }
416
96
  }
417
418
  /* Some really hairy code here. When we are reading the file in
419
   * initially, we read from 'newest' to 'oldest' (i.e. from 0 to
420
   * doc->num_xref_sections-1). Each section is created initially
421
   * with num_objects == 0 in it, and remains like that while we
422
   * are parsing the stream from the file. This is the only time
423
   * we'll ever have xref_sections with 0 objects in them. */
424
224
  if (doc->xref_sections[doc->num_xref_sections-1].num_objects == 0)
425
195
  {
426
    /* The oldest xref section has 0 objects in it. So we are
427
     * parsing an xref stream while loading. We don't want to
428
     * solidify the xref we are currently parsing for (as it'll
429
     * get very confused, and end up a different 'shape' in
430
     * memory to that which is in the file, and would hence
431
     * render 'fingerprinting' for snapshotting invalid) so
432
     * just give up at this point. */
433
195
    return NULL;
434
195
  }
435
436
29
  if (!solidify_if_needed)
437
3
    return NULL;
438
439
  /* At this point, we solidify the xref. This ensures that we
440
   * can return a pointer. This is the only case where this function
441
   * might throw an exception, and it will never happen when we are
442
   * working within a 'solid' xref. */
443
26
  ensure_solid_xref(ctx, doc, i+1, 0);
444
26
  xref = &doc->xref_sections[0];
445
26
  sub = xref->subsec;
446
26
  return &sub->table[i - sub->start];
447
29
}
448
449
pdf_xref_entry *pdf_get_xref_entry(fz_context *ctx, pdf_document *doc, int i)
450
40.2M
{
451
40.2M
  return pdf_get_xref_entry_aux(ctx, doc, i, 1);
452
40.2M
}
453
454
pdf_xref_entry *pdf_get_xref_entry_no_change(fz_context *ctx, pdf_document *doc, int i)
455
649k
{
456
649k
  return pdf_get_xref_entry_aux(ctx, doc, i, 0);
457
649k
}
458
459
pdf_xref_entry *pdf_get_xref_entry_no_null(fz_context *ctx, pdf_document *doc, int i)
460
33.5M
{
461
33.5M
  pdf_xref_entry *entry = pdf_get_xref_entry(ctx, doc, i);
462
33.5M
  if (entry != NULL)
463
33.5M
    return entry;
464
0
  fz_throw(ctx, FZ_ERROR_ARGUMENT, "cannot find object in xref (%d 0 R), but not allowed to return NULL", i);
465
33.5M
}
466
467
void pdf_xref_entry_map(fz_context *ctx, pdf_document *doc, void (*fn)(fz_context *, pdf_xref_entry *, int, pdf_document *, void *), void *arg)
468
617
{
469
617
  int i, j;
470
617
  pdf_xref_subsec *sub;
471
617
  int xref_base = doc->xref_base;
472
473
1.23k
  fz_try(ctx)
474
1.23k
  {
475
    /* Map over any active local xref first. */
476
617
    if (doc->local_xref && doc->local_xref_nesting > 0)
477
0
    {
478
0
      pdf_xref *xref = doc->local_xref;
479
480
0
      for (sub = xref->subsec; sub != NULL; sub = sub->next)
481
0
      {
482
0
        for (i = sub->start; i < sub->start + sub->len; i++)
483
0
        {
484
0
          pdf_xref_entry *entry = &sub->table[i - sub->start];
485
0
          if (entry->type)
486
0
            fn(ctx, entry, i, doc, arg);
487
0
        }
488
0
      }
489
0
    }
490
491
1.46k
    for (j = 0; j < doc->num_xref_sections; j++)
492
848
    {
493
848
      pdf_xref *xref = &doc->xref_sections[j];
494
848
      doc->xref_base = j;
495
496
1.94k
      for (sub = xref->subsec; sub != NULL; sub = sub->next)
497
1.10k
      {
498
39.6k
        for (i = sub->start; i < sub->start + sub->len; i++)
499
38.5k
        {
500
38.5k
          pdf_xref_entry *entry = &sub->table[i - sub->start];
501
38.5k
          if (entry->type)
502
38.3k
            fn(ctx, entry, i, doc, arg);
503
38.5k
        }
504
1.10k
      }
505
848
    }
506
617
  }
507
1.23k
  fz_always(ctx)
508
617
  {
509
617
    doc->xref_base = xref_base;
510
617
  }
511
617
  fz_catch(ctx)
512
13
    fz_rethrow(ctx);
513
617
}
514
515
/*
516
  Ensure we have an incremental xref section where we can store
517
  updated versions of indirect objects. This is a new xref section
518
  consisting of a single xref subsection.
519
*/
520
static void ensure_incremental_xref(fz_context *ctx, pdf_document *doc)
521
148
{
522
  /* If there are as yet no incremental sections, or if the most recent
523
   * one has been used to sign a signature field, then we need a new one.
524
   * After a signing, any further document changes require a new increment */
525
148
  if ((doc->num_incremental_sections == 0 || doc->xref_sections[0].unsaved_sigs != NULL)
526
148
    && !doc->disallow_new_increments)
527
74
  {
528
74
    pdf_xref *xref = &doc->xref_sections[0];
529
74
    pdf_xref *pxref;
530
74
    pdf_xref_entry *new_table = fz_malloc_struct_array(ctx, xref->num_objects, pdf_xref_entry);
531
74
    pdf_xref_subsec *sub = NULL;
532
74
    pdf_obj *trailer = NULL;
533
74
    int i;
534
535
74
    fz_var(trailer);
536
74
    fz_var(sub);
537
148
    fz_try(ctx)
538
148
    {
539
74
      sub = fz_malloc_struct(ctx, pdf_xref_subsec);
540
74
      trailer = xref->trailer ? pdf_copy_dict(ctx, xref->trailer) : NULL;
541
74
      doc->xref_sections = fz_realloc_array(ctx, doc->xref_sections, doc->num_xref_sections + 1, pdf_xref);
542
74
      xref = &doc->xref_sections[0];
543
74
      pxref = &doc->xref_sections[1];
544
74
      memmove(pxref, xref, doc->num_xref_sections * sizeof(pdf_xref));
545
      /* xref->num_objects is already correct */
546
74
      xref->subsec = sub;
547
74
      sub = NULL;
548
74
      xref->trailer = trailer;
549
74
      xref->pre_repair_trailer = NULL;
550
74
      xref->unsaved_sigs = NULL;
551
74
      xref->unsaved_sigs_end = NULL;
552
74
      xref->subsec->next = NULL;
553
74
      xref->subsec->len = xref->num_objects;
554
74
      xref->subsec->start = 0;
555
74
      xref->subsec->table = new_table;
556
74
      doc->num_xref_sections++;
557
74
      doc->num_incremental_sections++;
558
74
    }
559
148
    fz_catch(ctx)
560
0
    {
561
0
      fz_free(ctx, sub);
562
0
      fz_free(ctx, new_table);
563
0
      pdf_drop_obj(ctx, trailer);
564
0
      fz_rethrow(ctx);
565
0
    }
566
567
    /* Update the xref_index */
568
7.65k
    for (i = 0; i < doc->max_xref_len; i++)
569
7.57k
    {
570
7.57k
      doc->xref_index[i]++;
571
7.57k
    }
572
74
  }
573
148
}
574
575
/* Used when altering a document */
576
pdf_xref_entry *pdf_get_incremental_xref_entry(fz_context *ctx, pdf_document *doc, int i)
577
74
{
578
74
  pdf_xref *xref;
579
74
  pdf_xref_subsec *sub;
580
581
  /* Make a new final xref section if we haven't already */
582
74
  ensure_incremental_xref(ctx, doc);
583
584
74
  xref = &doc->xref_sections[doc->xref_base];
585
74
  if (i >= xref->num_objects)
586
0
    pdf_resize_xref(ctx, doc, i + 1);
587
588
74
  sub = xref->subsec;
589
74
  assert(sub != NULL && sub->next == NULL);
590
74
  assert(i >= sub->start && i < sub->start + sub->len);
591
74
  doc->xref_index[i] = 0;
592
74
  return &sub->table[i - sub->start];
593
74
}
594
595
int pdf_xref_is_incremental(fz_context *ctx, pdf_document *doc, int num)
596
0
{
597
0
  pdf_xref *xref = &doc->xref_sections[doc->xref_base];
598
0
  pdf_xref_subsec *sub = xref->subsec;
599
600
0
  assert(sub != NULL && sub->next == NULL && sub->len == xref->num_objects && sub->start == 0);
601
602
0
  return num < xref->num_objects && sub->table[num].type;
603
0
}
604
605
/* Used when clearing signatures. Removes the signature
606
from the list of unsaved signed signatures. */
607
void pdf_xref_remove_unsaved_signature(fz_context *ctx, pdf_document *doc, pdf_obj *field)
608
0
{
609
0
  int num = pdf_to_num(ctx, field);
610
0
  int idx = doc->xref_index[num];
611
0
  pdf_xref *xref = &doc->xref_sections[idx];
612
0
  pdf_unsaved_sig **usigptr = &xref->unsaved_sigs;
613
0
  pdf_unsaved_sig *usig = xref->unsaved_sigs;
614
615
0
  while (usig)
616
0
  {
617
0
    pdf_unsaved_sig **nextptr = &usig->next;
618
0
    pdf_unsaved_sig *next = usig->next;
619
620
0
    if (usig->field == field)
621
0
    {
622
0
      if (xref->unsaved_sigs_end == &usig->next)
623
0
      {
624
0
        if (usig->next)
625
0
          xref->unsaved_sigs_end = &usig->next->next;
626
0
        else
627
0
          xref->unsaved_sigs_end = NULL;
628
0
      }
629
0
      if (usigptr)
630
0
        *usigptr = usig->next;
631
632
0
      usig->next = NULL;
633
0
      pdf_drop_obj(ctx, usig->field);
634
0
      pdf_drop_signer(ctx, usig->signer);
635
0
      fz_free(ctx, usig);
636
637
0
      break;
638
0
    }
639
640
0
    usig = next;
641
0
    usigptr = nextptr;
642
0
  }
643
0
}
644
645
void pdf_xref_store_unsaved_signature(fz_context *ctx, pdf_document *doc, pdf_obj *field, pdf_pkcs7_signer *signer)
646
0
{
647
0
  pdf_xref *xref = &doc->xref_sections[0];
648
0
  pdf_unsaved_sig *unsaved_sig;
649
650
  /* Record details within the document structure so that contents
651
   * and byte_range can be updated with their correct values at
652
   * saving time */
653
0
  unsaved_sig = fz_malloc_struct(ctx, pdf_unsaved_sig);
654
0
  unsaved_sig->field = pdf_keep_obj(ctx, field);
655
0
  unsaved_sig->signer = signer->keep(ctx, signer);
656
0
  unsaved_sig->next = NULL;
657
0
  if (xref->unsaved_sigs_end == NULL)
658
0
    xref->unsaved_sigs_end = &xref->unsaved_sigs;
659
660
0
  *xref->unsaved_sigs_end = unsaved_sig;
661
0
  xref->unsaved_sigs_end = &unsaved_sig->next;
662
0
}
663
664
int pdf_xref_obj_is_unsaved_signature(pdf_document *doc, pdf_obj *obj)
665
0
{
666
0
  int i;
667
0
  for (i = 0; i < doc->num_incremental_sections; i++)
668
0
  {
669
0
    pdf_xref *xref = &doc->xref_sections[i];
670
0
    pdf_unsaved_sig *usig;
671
672
0
    for (usig = xref->unsaved_sigs; usig; usig = usig->next)
673
0
    {
674
0
      if (usig->field == obj)
675
0
        return 1;
676
0
    }
677
0
  }
678
679
0
  return 0;
680
0
}
681
682
void pdf_ensure_solid_xref(fz_context *ctx, pdf_document *doc, int num)
683
10.9k
{
684
10.9k
  if (doc->num_xref_sections == 0)
685
0
    pdf_populate_next_xref_level(ctx, doc);
686
687
10.9k
  ensure_solid_xref(ctx, doc, num, 0);
688
10.9k
}
689
690
int pdf_xref_ensure_incremental_object(fz_context *ctx, pdf_document *doc, int num)
691
74
{
692
74
  pdf_xref_entry *new_entry, *old_entry;
693
74
  pdf_xref_subsec *sub = NULL;
694
74
  int i;
695
74
  pdf_obj *copy;
696
697
  /* Make sure we have created an xref section for incremental updates */
698
74
  ensure_incremental_xref(ctx, doc);
699
700
  /* Search for the section that contains this object */
701
74
  for (i = doc->xref_index[num]; i < doc->num_xref_sections; i++)
702
74
  {
703
74
    pdf_xref *xref = &doc->xref_sections[i];
704
705
74
    if (num < 0 && num >= xref->num_objects)
706
0
      break;
707
74
    for (sub = xref->subsec; sub != NULL; sub = sub->next)
708
74
    {
709
74
      if (sub->start <= num && num < sub->start + sub->len && sub->table[num - sub->start].type)
710
74
        break;
711
74
    }
712
74
    if (sub != NULL)
713
74
      break;
714
74
  }
715
  /* sub == NULL implies we did not find it */
716
717
  /* If we don't find it, or it's already in the incremental section, return */
718
74
  if (i == 0 || sub == NULL)
719
0
    return 0;
720
721
74
  copy = pdf_deep_copy_obj(ctx, sub->table[num - sub->start].obj);
722
723
  /* Move the object to the incremental section */
724
74
  i = doc->xref_index[num];
725
74
  doc->xref_index[num] = 0;
726
74
  old_entry = &sub->table[num - sub->start];
727
148
  fz_try(ctx)
728
148
    new_entry = pdf_get_incremental_xref_entry(ctx, doc, num);
729
148
  fz_catch(ctx)
730
0
  {
731
0
    pdf_drop_obj(ctx, copy);
732
0
    doc->xref_index[num] = i;
733
0
    fz_rethrow(ctx);
734
0
  }
735
74
  *new_entry = *old_entry;
736
74
  if (new_entry->type == 'o')
737
1
  {
738
1
    new_entry->type = 'n';
739
1
    new_entry->gen = 0;
740
1
  }
741
  /* Better keep a copy. We must override the old entry with
742
   * the copy because the caller may be holding a reference to
743
   * the original and expect it to end up in the new entry */
744
74
  old_entry->obj = copy;
745
74
  old_entry->stm_buf = NULL;
746
747
74
  return 1;
748
74
}
749
750
void pdf_xref_ensure_local_object(fz_context *ctx, pdf_document *doc, int num)
751
69.7k
{
752
69.7k
  pdf_xref_entry *new_entry, *old_entry;
753
69.7k
  pdf_xref_subsec *sub = NULL;
754
69.7k
  int i;
755
69.7k
  pdf_xref *xref;
756
69.7k
  pdf_obj *copy;
757
758
  /* Is it in the local section already? */
759
69.7k
  xref = doc->local_xref;
760
74.6k
  for (sub = xref->subsec; sub != NULL; sub = sub->next)
761
69.7k
  {
762
69.7k
    if (sub->start <= num && num < sub->start + sub->len && sub->table[num - sub->start].type)
763
64.8k
      break;
764
69.7k
  }
765
  /* If we found it, it's in the local section already. */
766
69.7k
  if (sub != NULL)
767
64.8k
    return;
768
769
  /* Search for the section that contains this object */
770
4.94k
  for (i = doc->xref_index[num]; i < doc->num_xref_sections; i++)
771
4.94k
  {
772
4.94k
    xref = &doc->xref_sections[i];
773
774
4.94k
    if (num < 0 && num >= xref->num_objects)
775
0
      break;
776
9.62k
    for (sub = xref->subsec; sub != NULL; sub = sub->next)
777
9.62k
    {
778
9.62k
      if (sub->start <= num && num < sub->start + sub->len && sub->table[num - sub->start].type)
779
4.94k
        break;
780
9.62k
    }
781
4.94k
    if (sub != NULL)
782
4.94k
      break;
783
4.94k
  }
784
  /* sub == NULL implies we did not find it */
785
4.94k
  if (sub == NULL)
786
0
    return; /* No object to find */
787
788
4.94k
  copy = pdf_deep_copy_obj(ctx, sub->table[num - sub->start].obj);
789
790
  /* Copy the object to the local section */
791
4.94k
  i = doc->xref_index[num];
792
4.94k
  doc->xref_index[num] = 0;
793
4.94k
  old_entry = &sub->table[num - sub->start];
794
9.89k
  fz_try(ctx)
795
9.89k
    new_entry = pdf_get_local_xref_entry(ctx, doc, num);
796
9.89k
  fz_catch(ctx)
797
0
  {
798
0
    pdf_drop_obj(ctx, copy);
799
0
    doc->xref_index[num] = i;
800
0
    fz_rethrow(ctx);
801
0
  }
802
4.94k
  *new_entry = *old_entry;
803
4.94k
  if (new_entry->type == 'o')
804
909
  {
805
909
    new_entry->type = 'n';
806
909
    new_entry->gen = 0;
807
909
  }
808
4.94k
  new_entry->stm_buf = NULL;
809
4.94k
  new_entry->obj = NULL;
810
  /* old entry is incremental and may have changes.
811
   * Better keep a copy. We must override the old entry with
812
   * the copy because the caller may be holding a reference to
813
   * the original and expect it to end up in the new entry */
814
4.94k
  new_entry->obj = old_entry->obj;
815
4.94k
  old_entry->obj = copy;
816
4.94k
  new_entry->stm_buf = NULL; /* FIXME */
817
4.94k
}
818
819
void pdf_replace_xref(fz_context *ctx, pdf_document *doc, pdf_xref_entry *entries, int n)
820
0
{
821
0
  int *xref_index = NULL;
822
0
  pdf_xref *xref = NULL;
823
0
  pdf_xref_subsec *sub;
824
825
0
  fz_var(xref_index);
826
0
  fz_var(xref);
827
828
0
  fz_try(ctx)
829
0
  {
830
0
    xref_index = fz_calloc(ctx, n, sizeof(int));
831
0
    xref = fz_malloc_struct(ctx, pdf_xref);
832
0
    sub = fz_malloc_struct(ctx, pdf_xref_subsec);
833
0
  }
834
0
  fz_catch(ctx)
835
0
  {
836
0
    fz_free(ctx, xref);
837
0
    fz_free(ctx, xref_index);
838
0
    fz_rethrow(ctx);
839
0
  }
840
841
0
  sub->table = entries;
842
0
  sub->start = 0;
843
0
  sub->len = n;
844
845
0
  xref->subsec = sub;
846
0
  xref->num_objects = n;
847
0
  xref->trailer = pdf_keep_obj(ctx, pdf_trailer(ctx, doc));
848
849
  /* The new table completely replaces the previous separate sections */
850
0
  pdf_drop_xref_sections(ctx, doc);
851
852
0
  doc->xref_sections = xref;
853
0
  doc->num_xref_sections = 1;
854
0
  doc->num_incremental_sections = 0;
855
0
  doc->xref_base = 0;
856
0
  doc->disallow_new_increments = 0;
857
0
  doc->max_xref_len = n;
858
859
0
  fz_free(ctx, doc->xref_index);
860
0
  doc->xref_index = xref_index;
861
0
}
862
863
void pdf_forget_xref(fz_context *ctx, pdf_document *doc)
864
11.3k
{
865
11.3k
  pdf_obj *trailer = pdf_keep_obj(ctx, pdf_trailer(ctx, doc));
866
867
11.3k
  pdf_drop_local_xref_and_resources(ctx, doc);
868
869
11.3k
  if (doc->saved_xref_sections)
870
0
    pdf_drop_xref_sections_imp(ctx, doc, doc->saved_xref_sections, doc->saved_num_xref_sections);
871
872
11.3k
  doc->saved_xref_sections = doc->xref_sections;
873
11.3k
  doc->saved_num_xref_sections = doc->num_xref_sections;
874
875
11.3k
  doc->xref_sections = NULL;
876
11.3k
  doc->startxref = 0;
877
11.3k
  doc->num_xref_sections = 0;
878
11.3k
  doc->num_incremental_sections = 0;
879
11.3k
  doc->xref_base = 0;
880
11.3k
  doc->disallow_new_increments = 0;
881
882
22.7k
  fz_try(ctx)
883
22.7k
  {
884
11.3k
    pdf_get_populating_xref_entry(ctx, doc, 0);
885
11.3k
  }
886
22.7k
  fz_catch(ctx)
887
0
  {
888
0
    pdf_drop_obj(ctx, trailer);
889
0
    fz_rethrow(ctx);
890
0
  }
891
892
  /* Set the trailer of the final xref section. */
893
11.3k
  doc->xref_sections[0].trailer = trailer;
894
11.3k
}
895
896
/*
897
 * magic version tag and startxref
898
 */
899
900
int
901
pdf_version(fz_context *ctx, pdf_document *doc)
902
0
{
903
0
  int version = doc->version;
904
0
  fz_try(ctx)
905
0
  {
906
0
    pdf_obj *obj = pdf_dict_getl(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root), PDF_NAME(Version), NULL);
907
0
    const char *str = pdf_to_name(ctx, obj);
908
0
    if (*str)
909
0
      version = 10 * (fz_atof(str) + 0.05f);
910
0
  }
911
0
  fz_catch(ctx)
912
0
  {
913
0
    fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
914
0
    fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
915
0
    fz_report_error(ctx);
916
0
    fz_warn(ctx, "Ignoring broken Root/Version number.");
917
0
  }
918
0
  return version;
919
0
}
920
921
static void
922
pdf_load_version(fz_context *ctx, pdf_document *doc)
923
11.8k
{
924
11.8k
  char buf[20];
925
926
11.8k
  fz_seek(ctx, doc->file, 0, SEEK_SET);
927
11.8k
  fz_read_line(ctx, doc->file, buf, sizeof buf);
928
11.8k
  if (strlen(buf) < 5 || (memcmp(buf, "%PDF-", 5) != 0 && memcmp(buf, "%FDF-", 5) != 0))
929
9.28k
    fz_throw(ctx, FZ_ERROR_FORMAT, "cannot recognize version marker");
930
931
2.60k
  if (buf[1] == 'F')
932
2
    doc->is_fdf = 1;
933
934
2.60k
  doc->version = 10 * (fz_atof(buf+5) + 0.05f);
935
2.60k
  if (doc->version < 10 || doc->version > 17)
936
389
    if (doc->version != 20)
937
383
      fz_warn(ctx, "unknown PDF version: %d.%d", doc->version / 10, doc->version % 10);
938
2.60k
}
939
940
static void
941
pdf_read_start_xref(fz_context *ctx, pdf_document *doc)
942
2.60k
{
943
2.60k
  unsigned char buf[1024];
944
2.60k
  size_t i, n;
945
2.60k
  int64_t t;
946
947
2.60k
  fz_seek(ctx, doc->file, 0, SEEK_END);
948
949
2.60k
  doc->file_size = fz_tell(ctx, doc->file);
950
951
2.60k
  t = fz_maxi64(0, doc->file_size - (int64_t)sizeof buf);
952
2.60k
  fz_seek(ctx, doc->file, t, SEEK_SET);
953
954
2.60k
  n = fz_read(ctx, doc->file, buf, sizeof buf);
955
2.60k
  if (n < 9)
956
0
    fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find startxref");
957
958
2.60k
  i = n - 9;
959
2.60k
  do
960
1.08M
  {
961
1.08M
    if (memcmp(buf + i, "startxref", 9) == 0)
962
1.44k
    {
963
1.44k
      i += 9;
964
3.89k
      while (i < n && iswhite(buf[i]))
965
2.44k
        i ++;
966
1.44k
      doc->startxref = 0;
967
7.49k
      while (i < n && isdigit(buf[i]))
968
6.04k
      {
969
6.04k
        if (doc->startxref >= INT64_MAX/10)
970
2
          fz_throw(ctx, FZ_ERROR_LIMIT, "startxref too large");
971
6.04k
        doc->startxref = doc->startxref * 10 + (buf[i++] - '0');
972
6.04k
      }
973
1.44k
      if (doc->startxref != 0)
974
1.41k
        return;
975
34
      break;
976
1.44k
    }
977
1.08M
  } while (i-- > 0);
978
979
1.19k
  fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find startxref");
980
2.60k
}
981
982
void fz_skip_space(fz_context *ctx, fz_stream *stm)
983
2.42k
{
984
2.42k
  do
985
3.97k
  {
986
3.97k
    int c = fz_peek_byte(ctx, stm);
987
3.97k
    if (c == EOF || c > 32)
988
2.42k
      return;
989
1.55k
    (void)fz_read_byte(ctx, stm);
990
1.55k
  }
991
2.42k
  while (1);
992
2.42k
}
993
994
int fz_skip_string(fz_context *ctx, fz_stream *stm, const char *str)
995
1.21k
{
996
6.05k
  while (*str)
997
4.84k
  {
998
4.84k
    int c = fz_peek_byte(ctx, stm);
999
4.84k
    if (c == EOF || c != *str++)
1000
3
      return 1;
1001
4.84k
    (void)fz_read_byte(ctx, stm);
1002
4.84k
  }
1003
1.21k
  return 0;
1004
1.21k
}
1005
1006
/*
1007
 * trailer dictionary
1008
 */
1009
1010
static int
1011
pdf_xref_size_from_old_trailer(fz_context *ctx, pdf_document *doc)
1012
629
{
1013
629
  int len;
1014
629
  char *s;
1015
629
  int64_t t;
1016
629
  pdf_token tok;
1017
629
  int c;
1018
629
  int size = 0;
1019
629
  int64_t ofs;
1020
629
  pdf_obj *trailer = NULL;
1021
629
  size_t n;
1022
629
  pdf_lexbuf *buf = &doc->lexbuf.base;
1023
629
  pdf_obj *obj = NULL;
1024
1025
629
  fz_var(trailer);
1026
1027
  /* Record the current file read offset so that we can reinstate it */
1028
629
  ofs = fz_tell(ctx, doc->file);
1029
1030
629
  fz_skip_space(ctx, doc->file);
1031
629
  if (fz_skip_string(ctx, doc->file, "xref"))
1032
3
    fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find xref marker");
1033
626
  fz_skip_space(ctx, doc->file);
1034
1035
2.50k
  while (1)
1036
2.50k
  {
1037
2.50k
    c = fz_peek_byte(ctx, doc->file);
1038
2.50k
    if (!isdigit(c))
1039
611
      break;
1040
1041
1.89k
    fz_read_line(ctx, doc->file, buf->scratch, buf->size);
1042
1.89k
    s = buf->scratch;
1043
1.89k
    fz_strsep(&s, " "); /* ignore start */
1044
1.89k
    if (!s)
1045
12
      fz_throw(ctx, FZ_ERROR_FORMAT, "xref subsection length missing");
1046
1.87k
    len = fz_atoi(fz_strsep(&s, " "));
1047
1.87k
    if (len < 0)
1048
2
      fz_throw(ctx, FZ_ERROR_FORMAT, "xref subsection length must be positive");
1049
1050
    /* broken pdfs where the section is not on a separate line */
1051
1.87k
    if (s && *s != '\0')
1052
123
      fz_seek(ctx, doc->file, -(2 + (int)strlen(s)), SEEK_CUR);
1053
1054
1.87k
    t = fz_tell(ctx, doc->file);
1055
1.87k
    if (t < 0)
1056
0
      fz_throw(ctx, FZ_ERROR_SYSTEM, "cannot tell in file");
1057
1058
    /* Spec says xref entries should be 20 bytes, but it's not infrequent
1059
     * to see 19, in particular for some PCLm drivers. Cope. */
1060
1.87k
    if (len > 0)
1061
1.59k
    {
1062
1.59k
      n = fz_read(ctx, doc->file, (unsigned char *)buf->scratch, 20);
1063
1.59k
      if (n < 19)
1064
1
        fz_throw(ctx, FZ_ERROR_FORMAT, "malformed xref table");
1065
1.59k
      if (n == 20 && buf->scratch[19] > 32)
1066
38
        n = 19;
1067
1.59k
    }
1068
283
    else
1069
283
      n = 20;
1070
1071
1.87k
    if (len > (int64_t)((INT64_MAX - t) / n))
1072
0
      fz_throw(ctx, FZ_ERROR_LIMIT, "xref has too many entries");
1073
1074
1.87k
    fz_seek(ctx, doc->file, t + n * (int64_t)len, SEEK_SET);
1075
1.87k
  }
1076
1077
1.22k
  fz_try(ctx)
1078
1.22k
  {
1079
611
    tok = pdf_lex(ctx, doc->file, buf);
1080
611
    if (tok != PDF_TOK_TRAILER)
1081
22
      fz_throw(ctx, FZ_ERROR_FORMAT, "expected trailer marker");
1082
1083
589
    tok = pdf_lex(ctx, doc->file, buf);
1084
589
    if (tok != PDF_TOK_OPEN_DICT)
1085
2
      fz_throw(ctx, FZ_ERROR_FORMAT, "expected trailer dictionary");
1086
1087
587
    trailer = pdf_parse_dict(ctx, doc, doc->file, buf);
1088
1089
587
    obj = pdf_dict_get(ctx, trailer, PDF_NAME(Size));
1090
587
    if (pdf_is_indirect(ctx, obj))
1091
2
      fz_throw(ctx, FZ_ERROR_FORMAT, "trailer Size entry is indirect");
1092
1093
585
    size = pdf_dict_get_int(ctx, trailer, PDF_NAME(Size));
1094
585
    if (size < 0 || size > PDF_MAX_OBJECT_NUMBER + 1)
1095
0
      fz_throw(ctx, FZ_ERROR_FORMAT, "trailer Size entry out of range");
1096
585
  }
1097
1.22k
  fz_always(ctx)
1098
611
  {
1099
611
    pdf_drop_obj(ctx, trailer);
1100
611
  }
1101
611
  fz_catch(ctx)
1102
27
  {
1103
27
    fz_rethrow(ctx);
1104
27
  }
1105
1106
558
  fz_seek(ctx, doc->file, ofs, SEEK_SET);
1107
1108
558
  return size;
1109
585
}
1110
1111
static pdf_xref_entry *
1112
pdf_xref_find_subsection(fz_context *ctx, pdf_document *doc, int start, int len)
1113
2.26k
{
1114
2.26k
  pdf_xref *xref = &doc->xref_sections[doc->num_xref_sections-1];
1115
2.26k
  pdf_xref_subsec *sub, *extend = NULL;
1116
2.26k
  int num_objects;
1117
2.26k
  int solidify = 0;
1118
1119
2.26k
  if (len == 0)
1120
24
    return NULL;
1121
1122
  /* Different cases here.
1123
   * Case 1) We might be asking for a subsection (or a subset of a
1124
   *         subsection) that we already have - Just return it.
1125
   * Case 2) We might be asking for a subsection that overlaps (or
1126
   *         extends) a subsection we already have - extend the existing one.
1127
   * Case 3) We might be asking for a subsection that overlaps multiple
1128
   *         existing subsections - solidify the whole set.
1129
   * Case 4) We might be asking for a completely new subsection - just
1130
   *         allocate it.
1131
   */
1132
1133
  /* Sanity check */
1134
8.65k
  for (sub = xref->subsec; sub != NULL; sub = sub->next)
1135
6.42k
  {
1136
6.42k
    if (start >= sub->start && start <= sub->start + sub->len)
1137
634
    {
1138
      /* 'start' is in (or immediately after) 'sub' */
1139
634
      if (start + len <= sub->start + sub->len)
1140
8
      {
1141
        /* And so is start+len-1 - just return this! Case 1. */
1142
8
        return &sub->table[start-sub->start];
1143
8
      }
1144
      /* So we overlap with sub. */
1145
626
      if (extend == NULL)
1146
626
      {
1147
        /* Maybe we can extend sub? */
1148
626
        extend = sub;
1149
626
      }
1150
0
      else
1151
0
      {
1152
        /* OK, so we've already found an overlapping one. We'll need to solidify. Case 3. */
1153
0
        solidify = 1;
1154
0
        break;
1155
0
      }
1156
626
    }
1157
5.79k
    else if (start + len > sub->start && start + len < sub->start + sub->len)
1158
1
    {
1159
      /* The end of the start+len range is in 'sub'. */
1160
      /* For now, we won't support extending sub backwards. Just take this as
1161
       * needing to solidify. Case 3. */
1162
1
      solidify = 1;
1163
1
      break;
1164
1
    }
1165
5.79k
    else if (start < sub->start && start + len >= sub->start + sub->len)
1166
0
    {
1167
      /* The end of the start+len range is beyond 'sub'. */
1168
      /* For now, we won't support extending sub backwards. Just take this as
1169
       * needing to solidify. Another variant of case 3. */
1170
0
      solidify = 1;
1171
0
      break;
1172
0
    }
1173
6.42k
  }
1174
1175
2.23k
  num_objects = xref->num_objects;
1176
2.23k
  if (num_objects < start + len)
1177
2.21k
    num_objects = start + len;
1178
1179
2.23k
  if (solidify)
1180
1
  {
1181
    /* Case 3: Solidify the xref */
1182
1
    ensure_solid_xref(ctx, doc, num_objects, doc->num_xref_sections-1);
1183
1
    xref = &doc->xref_sections[doc->num_xref_sections-1];
1184
1
    sub = xref->subsec;
1185
1
  }
1186
2.23k
  else if (extend)
1187
626
  {
1188
    /* Case 2: Extend the subsection */
1189
626
    int newlen = start + len - extend->start;
1190
626
    sub = extend;
1191
626
    sub->table = fz_realloc_array(ctx, sub->table, newlen, pdf_xref_entry);
1192
626
    memset(&sub->table[sub->len], 0, sizeof(pdf_xref_entry) * (newlen - sub->len));
1193
626
    sub->len = newlen;
1194
626
    if (xref->num_objects < sub->start + sub->len)
1195
619
      xref->num_objects = sub->start + sub->len;
1196
626
    if (doc->max_xref_len < sub->start + sub->len)
1197
597
      extend_xref_index(ctx, doc, sub->start + sub->len);
1198
626
  }
1199
1.60k
  else
1200
1.60k
  {
1201
    /* Case 4 */
1202
1.60k
    sub = fz_malloc_struct(ctx, pdf_xref_subsec);
1203
3.21k
    fz_try(ctx)
1204
3.21k
    {
1205
1.60k
      sub->table = fz_malloc_struct_array(ctx, len, pdf_xref_entry);
1206
1.60k
      sub->start = start;
1207
1.60k
      sub->len = len;
1208
1.60k
      sub->next = xref->subsec;
1209
1.60k
      xref->subsec = sub;
1210
1.60k
    }
1211
3.21k
    fz_catch(ctx)
1212
0
    {
1213
0
      fz_free(ctx, sub);
1214
0
      fz_rethrow(ctx);
1215
0
    }
1216
1.60k
    if (xref->num_objects < num_objects)
1217
1.59k
      xref->num_objects = num_objects;
1218
1.60k
    if (doc->max_xref_len < num_objects)
1219
1.20k
      extend_xref_index(ctx, doc, num_objects);
1220
1.60k
  }
1221
2.23k
  return &sub->table[start-sub->start];
1222
2.23k
}
1223
1224
static inline void
1225
validate_object_number_range(fz_context *ctx, int first, int len, const char *what)
1226
2.26k
{
1227
2.26k
  if (first < 0 || first > PDF_MAX_OBJECT_NUMBER)
1228
1
    fz_throw(ctx, FZ_ERROR_FORMAT, "first object number in %s out of range", what);
1229
2.26k
  if (len < 0 || len > PDF_MAX_OBJECT_NUMBER)
1230
1
    fz_throw(ctx, FZ_ERROR_FORMAT, "number of objects in %s out of range", what);
1231
2.26k
  if (len > 0 && len - 1 > PDF_MAX_OBJECT_NUMBER - first)
1232
0
    fz_throw(ctx, FZ_ERROR_FORMAT, "last object number in %s out of range", what);
1233
2.26k
}
1234
1235
static pdf_obj *
1236
pdf_read_old_xref(fz_context *ctx, pdf_document *doc)
1237
629
{
1238
629
  int start, len, c, i, xref_len, carried;
1239
629
  fz_stream *file = doc->file;
1240
629
  pdf_xref_entry *table;
1241
629
  pdf_token tok;
1242
629
  size_t n;
1243
629
  char *s, *e;
1244
629
  pdf_lexbuf *buf = &doc->lexbuf.base;
1245
1246
629
  xref_len = pdf_xref_size_from_old_trailer(ctx, doc);
1247
1248
629
  fz_skip_space(ctx, doc->file);
1249
629
  if (fz_skip_string(ctx, doc->file, "xref"))
1250
0
    fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find xref marker");
1251
629
  fz_skip_space(ctx, doc->file);
1252
1253
2.06k
  while (1)
1254
2.02k
  {
1255
2.02k
    c = fz_peek_byte(ctx, file);
1256
2.02k
    if (!isdigit(c))
1257
543
      break;
1258
1259
1.48k
    fz_read_line(ctx, file, buf->scratch, buf->size);
1260
1.48k
    s = buf->scratch;
1261
1.48k
    start = fz_atoi(fz_strsep(&s, " "));
1262
1.48k
    len = fz_atoi(fz_strsep(&s, " "));
1263
1264
    /* broken pdfs where the section is not on a separate line */
1265
1.48k
    if (s && *s != '\0')
1266
9
    {
1267
9
      fz_warn(ctx, "broken xref subsection. proceeding anyway.");
1268
9
      fz_seek(ctx, file, -(2 + (int)strlen(s)), SEEK_CUR);
1269
9
    }
1270
1271
1.48k
    validate_object_number_range(ctx, start, len, "xref subsection");
1272
1273
    /* broken pdfs where size in trailer undershoots entries in xref sections */
1274
1.48k
    if (start + len > xref_len)
1275
740
    {
1276
740
      fz_warn(ctx, "broken xref subsection, proceeding anyway.");
1277
740
    }
1278
1279
1.48k
    table = pdf_xref_find_subsection(ctx, doc, start, len);
1280
1281
    /* Xref entries SHOULD be 20 bytes long, but we see 19 byte
1282
     * ones more frequently than we'd like (e.g. PCLm drivers).
1283
     * Cope with this by 'carrying' data forward. */
1284
1.48k
    carried = 0;
1285
28.6k
    for (i = 0; i < len; i++)
1286
27.2k
    {
1287
27.2k
      pdf_xref_entry *entry = &table[i];
1288
27.2k
      n = fz_read(ctx, file, (unsigned char *) buf->scratch + carried, 20-carried);
1289
27.2k
      if (n != (size_t)(20-carried))
1290
0
        fz_throw(ctx, FZ_ERROR_FORMAT, "unexpected EOF in xref table");
1291
27.2k
      n += carried;
1292
27.2k
      buf->scratch[n] = '\0';
1293
27.2k
      if (!entry->type)
1294
27.2k
      {
1295
27.2k
        s = buf->scratch;
1296
27.2k
        e = s + n;
1297
1298
27.2k
        entry->num = start + i;
1299
1300
        /* broken pdfs where line start with white space */
1301
27.4k
        while (s < e && iswhite(*s))
1302
262
          s++;
1303
1304
27.2k
        if (s == e || !isdigit(*s))
1305
9
          fz_throw(ctx, FZ_ERROR_FORMAT, "xref offset missing");
1306
298k
        while (s < e && isdigit(*s))
1307
271k
          entry->ofs = entry->ofs * 10 + *s++ - '0';
1308
1309
54.5k
        while (s < e && iswhite(*s))
1310
27.3k
          s++;
1311
27.1k
        if (s == e || !isdigit(*s))
1312
22
          fz_throw(ctx, FZ_ERROR_FORMAT, "xref generation number missing");
1313
162k
        while (s < e && isdigit(*s))
1314
135k
          entry->gen = entry->gen * 10 + *s++ - '0';
1315
1316
54.3k
        while (s < e && iswhite(*s))
1317
27.1k
          s++;
1318
27.1k
        if (s == e || (*s != 'f' && *s != 'n' && *s != 'o'))
1319
9
          fz_throw(ctx, FZ_ERROR_FORMAT, "unexpected xref type: 0x%x (%d %d R)", s == e ? 0 : *s, entry->num, entry->gen);
1320
27.1k
        entry->type = *s++;
1321
1322
        /* If the last byte of our buffer isn't an EOL (or space), carry one byte forward */
1323
27.1k
        carried = buf->scratch[19] > 32;
1324
27.1k
        if (carried)
1325
39
          buf->scratch[0] = buf->scratch[19];
1326
27.1k
      }
1327
27.2k
    }
1328
1.44k
    if (carried)
1329
10
      fz_unread_byte(ctx, file);
1330
1.44k
  }
1331
1332
589
  tok = pdf_lex(ctx, file, buf);
1333
589
  if (tok != PDF_TOK_TRAILER)
1334
1
    fz_throw(ctx, FZ_ERROR_FORMAT, "expected trailer marker");
1335
1336
588
  tok = pdf_lex(ctx, file, buf);
1337
588
  if (tok != PDF_TOK_OPEN_DICT)
1338
0
    fz_throw(ctx, FZ_ERROR_FORMAT, "expected trailer dictionary");
1339
1340
588
  doc->last_xref_was_old_style = 1;
1341
1342
588
  return pdf_parse_dict(ctx, doc, file, buf);
1343
588
}
1344
1345
static void
1346
pdf_read_new_xref_section(fz_context *ctx, pdf_document *doc, fz_stream *stm, int i0, int i1, int w0, int w1, int w2)
1347
787
{
1348
787
  pdf_xref_entry *table;
1349
787
  int i, n;
1350
1351
787
  validate_object_number_range(ctx, i0, i1, "xref subsection");
1352
1353
787
  table = pdf_xref_find_subsection(ctx, doc, i0, i1);
1354
29.6k
  for (i = i0; i < i0 + i1; i++)
1355
28.8k
  {
1356
28.8k
    pdf_xref_entry *entry = &table[i-i0];
1357
28.8k
    int a = 0;
1358
28.8k
    int64_t b = 0;
1359
28.8k
    int c = 0;
1360
1361
28.8k
    if (fz_is_eof(ctx, stm))
1362
22
      fz_throw(ctx, FZ_ERROR_FORMAT, "truncated xref stream");
1363
1364
58.1k
    for (n = 0; n < w0; n++)
1365
29.2k
      a = (a << 8) + fz_read_byte(ctx, stm);
1366
646M
    for (n = 0; n < w1; n++)
1367
646M
      b = (b << 8) + fz_read_byte(ctx, stm);
1368
2.92M
    for (n = 0; n < w2; n++)
1369
2.89M
      c = (c << 8) + fz_read_byte(ctx, stm);
1370
1371
28.8k
    if (!entry->type)
1372
28.8k
    {
1373
28.8k
      int t = w0 ? a : 1;
1374
28.8k
      entry->type = t == 0 ? 'f' : t == 1 ? 'n' : t == 2 ? 'o' : 0;
1375
28.8k
      entry->ofs = w1 ? b : 0;
1376
28.8k
      entry->gen = w2 ? c : 0;
1377
28.8k
      entry->num = i;
1378
28.8k
    }
1379
28.8k
  }
1380
1381
765
  doc->last_xref_was_old_style = 0;
1382
765
}
1383
1384
/* Entered with file locked, remains locked throughout. */
1385
static pdf_obj *
1386
pdf_read_new_xref(fz_context *ctx, pdf_document *doc)
1387
628
{
1388
628
  fz_stream *stm = NULL;
1389
628
  pdf_obj *trailer = NULL;
1390
628
  pdf_obj *index = NULL;
1391
628
  pdf_obj *obj = NULL;
1392
628
  int gen, num = 0;
1393
628
  int64_t ofs, stm_ofs;
1394
628
  int size, w0, w1, w2;
1395
628
  int t;
1396
1397
628
  fz_var(trailer);
1398
628
  fz_var(stm);
1399
1400
1.25k
  fz_try(ctx)
1401
1.25k
  {
1402
628
    ofs = fz_tell(ctx, doc->file);
1403
628
    trailer = pdf_parse_ind_obj(ctx, doc, doc->file, &num, &gen, &stm_ofs, NULL);
1404
628
    if (num == 0)
1405
1
      fz_throw(ctx, FZ_ERROR_FORMAT, "Trailer object number cannot be 0\n");
1406
628
  }
1407
1.25k
  fz_catch(ctx)
1408
92
  {
1409
92
    pdf_drop_obj(ctx, trailer);
1410
92
    fz_rethrow(ctx);
1411
92
  }
1412
1413
1.07k
  fz_try(ctx)
1414
1.07k
  {
1415
536
    pdf_xref_entry *entry;
1416
1417
536
    obj = pdf_dict_get(ctx, trailer, PDF_NAME(Size));
1418
536
    if (!obj)
1419
3
      fz_throw(ctx, FZ_ERROR_FORMAT, "xref stream missing Size entry (%d 0 R)", num);
1420
1421
533
    size = pdf_to_int(ctx, obj);
1422
1423
533
    obj = pdf_dict_get(ctx, trailer, PDF_NAME(W));
1424
533
    if (!obj)
1425
2
      fz_throw(ctx, FZ_ERROR_FORMAT, "xref stream missing W entry (%d  R)", num);
1426
1427
531
    if (pdf_is_indirect(ctx, pdf_array_get(ctx, obj, 0)))
1428
0
      fz_throw(ctx, FZ_ERROR_FORMAT, "xref stream object type field width an indirect object");
1429
531
    if (pdf_is_indirect(ctx, pdf_array_get(ctx, obj, 1)))
1430
0
      fz_throw(ctx, FZ_ERROR_FORMAT, "xref stream object field 2 width an indirect object");
1431
531
    if (pdf_is_indirect(ctx, pdf_array_get(ctx, obj, 2)))
1432
0
      fz_throw(ctx, FZ_ERROR_FORMAT, "xref stream object field 3 width an indirect object");
1433
1434
531
    if (doc->file_reading_linearly && pdf_dict_get(ctx, trailer, PDF_NAME(Encrypt)))
1435
0
      fz_throw(ctx, FZ_ERROR_ARGUMENT, "Cannot read linearly with encryption");
1436
1437
531
    w0 = pdf_array_get_int(ctx, obj, 0);
1438
531
    w1 = pdf_array_get_int(ctx, obj, 1);
1439
531
    w2 = pdf_array_get_int(ctx, obj, 2);
1440
1441
531
    if (w0 < 0)
1442
0
      fz_warn(ctx, "xref stream objects have corrupt type");
1443
531
    if (w1 < 0)
1444
1
      fz_warn(ctx, "xref stream objects have corrupt offset");
1445
531
    if (w2 < 0)
1446
0
      fz_warn(ctx, "xref stream objects have corrupt generation");
1447
1448
531
    w0 = w0 < 0 ? 0 : w0;
1449
531
    w1 = w1 < 0 ? 0 : w1;
1450
531
    w2 = w2 < 0 ? 0 : w2;
1451
1452
531
    index = pdf_dict_get(ctx, trailer, PDF_NAME(Index));
1453
1454
531
    stm = pdf_open_stream_with_offset(ctx, doc, num, trailer, stm_ofs);
1455
1456
531
    if (!index)
1457
198
    {
1458
198
      pdf_read_new_xref_section(ctx, doc, stm, 0, size, w0, w1, w2);
1459
198
    }
1460
333
    else
1461
333
    {
1462
333
      int n = pdf_array_len(ctx, index);
1463
922
      for (t = 0; t < n; t += 2)
1464
589
      {
1465
589
        int i0 = pdf_array_get_int(ctx, index, t + 0);
1466
589
        int i1 = pdf_array_get_int(ctx, index, t + 1);
1467
589
        pdf_read_new_xref_section(ctx, doc, stm, i0, i1, w0, w1, w2);
1468
589
      }
1469
333
    }
1470
531
    entry = pdf_get_populating_xref_entry(ctx, doc, num);
1471
531
    entry->ofs = ofs;
1472
531
    entry->gen = gen;
1473
531
    entry->num = num;
1474
531
    entry->stm_ofs = stm_ofs;
1475
531
    pdf_drop_obj(ctx, entry->obj);
1476
531
    entry->obj = pdf_keep_obj(ctx, trailer);
1477
531
    entry->type = 'n';
1478
531
    pdf_set_obj_parent(ctx, trailer, num);
1479
531
  }
1480
1.07k
  fz_always(ctx)
1481
536
  {
1482
536
    fz_drop_stream(ctx, stm);
1483
536
  }
1484
536
  fz_catch(ctx)
1485
29
  {
1486
29
    pdf_drop_obj(ctx, trailer);
1487
29
    fz_rethrow(ctx);
1488
29
  }
1489
1490
501
  return trailer;
1491
530
}
1492
1493
static pdf_obj *
1494
pdf_read_xref(fz_context *ctx, pdf_document *doc, int64_t ofs)
1495
1.77k
{
1496
1.77k
  pdf_obj *trailer;
1497
1.77k
  int c;
1498
1499
1.77k
  fz_seek(ctx, doc->file, ofs, SEEK_SET);
1500
1501
5.63k
  while (iswhite(fz_peek_byte(ctx, doc->file)))
1502
3.85k
    fz_read_byte(ctx, doc->file);
1503
1504
1.77k
  c = fz_peek_byte(ctx, doc->file);
1505
1.77k
  if (c == 'x')
1506
629
    trailer = pdf_read_old_xref(ctx, doc);
1507
1.15k
  else if (isdigit(c))
1508
628
    trailer = pdf_read_new_xref(ctx, doc);
1509
522
  else
1510
522
    fz_throw(ctx, FZ_ERROR_FORMAT, "cannot recognize xref format");
1511
1512
1.25k
  return trailer;
1513
1.77k
}
1514
1515
static int64_t
1516
read_xref_section(fz_context *ctx, pdf_document *doc, int64_t ofs)
1517
1.76k
{
1518
1.76k
  pdf_obj *trailer = NULL;
1519
1.76k
  pdf_obj *prevobj;
1520
1.76k
  int64_t xrefstmofs = 0;
1521
1.76k
  int64_t prevofs = 0;
1522
1523
1.76k
  trailer = pdf_read_xref(ctx, doc, ofs);
1524
2.07k
  fz_try(ctx)
1525
2.07k
  {
1526
1.03k
    pdf_set_populating_xref_trailer(ctx, doc, trailer);
1527
1528
    /* FIXME: do we overwrite free entries properly? */
1529
    /* FIXME: Does this work properly with progression? */
1530
1.03k
    xrefstmofs = pdf_to_int64(ctx, pdf_dict_get(ctx, trailer, PDF_NAME(XRefStm)));
1531
1.03k
    if (xrefstmofs)
1532
16
    {
1533
16
      if (xrefstmofs < 0)
1534
0
        fz_throw(ctx, FZ_ERROR_FORMAT, "negative xref stream offset");
1535
1536
      /*
1537
        Read the XRefStm stream, but throw away the resulting trailer. We do not
1538
        follow any Prev tag therein, as specified on Page 108 of the PDF reference
1539
        1.7
1540
      */
1541
16
      pdf_drop_obj(ctx, pdf_read_xref(ctx, doc, xrefstmofs));
1542
16
    }
1543
1544
1.03k
    prevobj = pdf_dict_get(ctx, trailer, PDF_NAME(Prev));
1545
1.03k
    if (pdf_is_int(ctx, prevobj))
1546
351
    {
1547
351
      prevofs = pdf_to_int64(ctx, prevobj);
1548
351
      if (prevofs <= 0)
1549
0
        fz_throw(ctx, FZ_ERROR_FORMAT, "invalid offset for previous xref section");
1550
351
    }
1551
1.03k
  }
1552
2.07k
  fz_always(ctx)
1553
1.03k
    pdf_drop_obj(ctx, trailer);
1554
1.03k
  fz_catch(ctx)
1555
3
    fz_rethrow(ctx);
1556
1557
1.76k
  return prevofs;
1558
1.76k
}
1559
1560
static void
1561
pdf_read_xref_sections(fz_context *ctx, pdf_document *doc, int64_t ofs, int read_previous)
1562
1.41k
{
1563
1.41k
  int i, len, cap;
1564
1.41k
  int64_t *offsets;
1565
1.41k
  int populated = 0;
1566
1.41k
  int size, xref_len;
1567
1568
1.41k
  len = 0;
1569
1.41k
  cap = 10;
1570
1.41k
  offsets = fz_malloc_array(ctx, cap, int64_t);
1571
1572
1.41k
  fz_var(populated);
1573
1.41k
  fz_var(offsets);
1574
1575
2.82k
  fz_try(ctx)
1576
2.82k
  {
1577
3.17k
    while(ofs)
1578
1.76k
    {
1579
2.22k
      for (i = 0; i < len; i ++)
1580
463
      {
1581
463
        if (offsets[i] == ofs)
1582
0
          break;
1583
463
      }
1584
1.76k
      if (i < len)
1585
0
      {
1586
0
        fz_warn(ctx, "ignoring xref section recursion at offset %d", (int)ofs);
1587
0
        break;
1588
0
      }
1589
1.76k
      if (len == cap)
1590
0
      {
1591
0
        cap *= 2;
1592
0
        offsets = fz_realloc_array(ctx, offsets, cap, int64_t);
1593
0
      }
1594
1.76k
      offsets[len++] = ofs;
1595
1596
1.76k
      pdf_populate_next_xref_level(ctx, doc);
1597
1.76k
      populated = 1;
1598
1.76k
      ofs = read_xref_section(ctx, doc, ofs);
1599
1.76k
      if (!read_previous)
1600
0
        break;
1601
1.76k
    }
1602
1603
    /* For pathological files, such as chinese-example.pdf, where the original
1604
     * xref in the file is highly fragmented, we can safely solidify it here
1605
     * with no ill effects. */
1606
1.41k
    ensure_solid_xref(ctx, doc, 0, doc->num_xref_sections-1);
1607
1608
1.41k
    size = pdf_dict_get_int(ctx, pdf_trailer(ctx, doc), PDF_NAME(Size));
1609
1.41k
    xref_len = pdf_xref_len(ctx, doc);
1610
1.41k
    if (xref_len > size)
1611
64
      fz_throw(ctx, FZ_ERROR_FORMAT, "incorrect number of xref entries in trailer, repairing");
1612
1.41k
  }
1613
2.82k
  fz_always(ctx)
1614
1.41k
  {
1615
1.41k
    fz_free(ctx, offsets);
1616
1.41k
  }
1617
1.41k
  fz_catch(ctx)
1618
794
  {
1619
    /* Undo pdf_populate_next_xref_level if we've done that already. */
1620
794
    if (populated)
1621
794
    {
1622
794
      pdf_drop_xref_subsec(ctx, &doc->xref_sections[doc->num_xref_sections - 1]);
1623
794
      doc->num_xref_sections--;
1624
794
    }
1625
794
    fz_rethrow(ctx);
1626
794
  }
1627
1.34k
}
1628
1629
static void
1630
pdf_prime_xref_index(fz_context *ctx, pdf_document *doc)
1631
11.5k
{
1632
11.5k
  int i, j;
1633
11.5k
  int *idx = doc->xref_index;
1634
1635
23.4k
  for (i = doc->num_xref_sections-1; i >= 0; i--)
1636
11.8k
  {
1637
11.8k
    pdf_xref *xref = &doc->xref_sections[i];
1638
11.8k
    pdf_xref_subsec *subsec = xref->subsec;
1639
23.9k
    while (subsec != NULL)
1640
12.0k
    {
1641
12.0k
      int start = subsec->start;
1642
12.0k
      int end = subsec->start + subsec->len;
1643
33.4M
      for (j = start; j < end; j++)
1644
33.4M
      {
1645
33.4M
        char t = subsec->table[j-start].type;
1646
33.4M
        if (t != 0 && t != 'f')
1647
199k
          idx[j] = i;
1648
33.4M
      }
1649
1650
12.0k
      subsec = subsec->next;
1651
12.0k
    }
1652
11.8k
  }
1653
11.5k
}
1654
1655
static void
1656
check_xref_entry_offsets(fz_context *ctx, pdf_xref_entry *entry, int i, pdf_document *doc, void *arg)
1657
38.3k
{
1658
38.3k
  int xref_len = (int)(intptr_t)arg;
1659
1660
38.3k
  if (entry->type == 'n')
1661
22.3k
  {
1662
    /* Special case code: "0000000000 * n" means free,
1663
     * according to some producers (inc Quartz) */
1664
22.3k
    if (entry->ofs == 0)
1665
33
      entry->type = 'f';
1666
22.3k
    else if (entry->ofs <= 0 || entry->ofs >= doc->file_size)
1667
6
      fz_throw(ctx, FZ_ERROR_FORMAT, "object offset out of range: %d (%d 0 R)", (int)entry->ofs, i);
1668
22.3k
  }
1669
16.0k
  else if (entry->type == 'o')
1670
9.55k
  {
1671
    /* Read this into a local variable here, because pdf_get_xref_entry
1672
     * may solidify the xref, hence invalidating "entry", meaning we
1673
     * need a stashed value for the throw. */
1674
9.55k
    int64_t ofs = entry->ofs;
1675
9.55k
    if (ofs <= 0 || ofs >= xref_len || pdf_get_xref_entry_no_null(ctx, doc, ofs)->type != 'n')
1676
7
      fz_throw(ctx, FZ_ERROR_FORMAT, "invalid reference to an objstm that does not exist: %d (%d 0 R)", (int)ofs, i);
1677
9.55k
  }
1678
38.3k
}
1679
1680
/*
1681
 * load xref tables from pdf
1682
 *
1683
 * File locked on entry, throughout and on exit.
1684
 */
1685
1686
static void
1687
pdf_load_xref(fz_context *ctx, pdf_document *doc)
1688
2.60k
{
1689
2.60k
  int xref_len;
1690
2.60k
  pdf_xref_entry *entry;
1691
1692
2.60k
  pdf_read_start_xref(ctx, doc);
1693
1694
2.60k
  pdf_read_xref_sections(ctx, doc, doc->startxref, 1);
1695
1696
2.60k
  if (pdf_xref_len(ctx, doc) == 0)
1697
1
    fz_throw(ctx, FZ_ERROR_FORMAT, "found xref was empty");
1698
1699
2.60k
  pdf_prime_xref_index(ctx, doc);
1700
1701
2.60k
  entry = pdf_get_xref_entry_no_null(ctx, doc, 0);
1702
  /* broken pdfs where first object is missing */
1703
2.60k
  if (!entry->type)
1704
6
  {
1705
6
    entry->type = 'f';
1706
6
    entry->gen = 65535;
1707
6
    entry->num = 0;
1708
6
  }
1709
  /* broken pdfs where first object is not free */
1710
2.59k
  else if (entry->type != 'f')
1711
3
    fz_warn(ctx, "first object in xref is not free");
1712
1713
  /* broken pdfs where object offsets are out of range */
1714
2.60k
  xref_len = pdf_xref_len(ctx, doc);
1715
2.60k
  pdf_xref_entry_map(ctx, doc, check_xref_entry_offsets, (void *)(intptr_t)xref_len);
1716
2.60k
}
1717
1718
static void
1719
pdf_check_linear(fz_context *ctx, pdf_document *doc)
1720
2.60k
{
1721
2.60k
  pdf_obj *dict = NULL;
1722
2.60k
  pdf_obj *o;
1723
2.60k
  int num, gen;
1724
2.60k
  int64_t stmofs;
1725
1726
2.60k
  fz_var(dict);
1727
1728
5.20k
  fz_try(ctx)
1729
5.20k
  {
1730
2.60k
    dict = pdf_parse_ind_obj(ctx, doc, doc->file, &num, &gen, &stmofs, NULL);
1731
2.60k
    if (!pdf_is_dict(ctx, dict))
1732
26
      break;
1733
2.57k
    o = pdf_dict_get(ctx, dict, PDF_NAME(Linearized));
1734
2.57k
    if (o == NULL)
1735
1.65k
      break;
1736
928
    if (pdf_to_int(ctx, o) != 1)
1737
1
      break;
1738
927
    doc->has_linearization_object = 1;
1739
927
  }
1740
5.20k
  fz_always(ctx)
1741
2.60k
    pdf_drop_obj(ctx, dict);
1742
2.60k
  fz_catch(ctx)
1743
597
  {
1744
    /* Silently swallow this error. */
1745
597
    fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
1746
597
    fz_report_error(ctx);
1747
597
  }
1748
2.60k
}
1749
1750
static void
1751
pdf_load_linear(fz_context *ctx, pdf_document *doc)
1752
0
{
1753
0
  pdf_obj *dict = NULL;
1754
0
  pdf_obj *hint = NULL;
1755
0
  pdf_obj *o;
1756
0
  int num, gen, lin, len;
1757
0
  int64_t stmofs;
1758
1759
0
  fz_var(dict);
1760
0
  fz_var(hint);
1761
1762
0
  fz_try(ctx)
1763
0
  {
1764
0
    pdf_xref_entry *entry;
1765
1766
0
    dict = pdf_parse_ind_obj(ctx, doc, doc->file, &num, &gen, &stmofs, NULL);
1767
0
    if (!pdf_is_dict(ctx, dict))
1768
0
      fz_throw(ctx, FZ_ERROR_FORMAT, "Failed to read linearized dictionary");
1769
0
    o = pdf_dict_get(ctx, dict, PDF_NAME(Linearized));
1770
0
    if (o == NULL)
1771
0
      fz_throw(ctx, FZ_ERROR_FORMAT, "Failed to read linearized dictionary");
1772
0
    lin = pdf_to_int(ctx, o);
1773
0
    if (lin != 1)
1774
0
      fz_throw(ctx, FZ_ERROR_FORMAT, "Unexpected version of Linearized tag (%d)", lin);
1775
0
    doc->has_linearization_object = 1;
1776
0
    len = pdf_dict_get_int(ctx, dict, PDF_NAME(L));
1777
0
    if (len != doc->file_length)
1778
0
      fz_throw(ctx, FZ_ERROR_ARGUMENT, "File has been updated since linearization");
1779
1780
0
    pdf_read_xref_sections(ctx, doc, fz_tell(ctx, doc->file), 0);
1781
1782
0
    doc->linear_page_count = pdf_dict_get_int(ctx, dict, PDF_NAME(N));
1783
0
    doc->linear_page_refs = fz_realloc_array(ctx, doc->linear_page_refs, doc->linear_page_count, pdf_obj *);
1784
0
    memset(doc->linear_page_refs, 0, doc->linear_page_count * sizeof(pdf_obj*));
1785
0
    doc->linear_obj = dict;
1786
0
    doc->linear_pos = fz_tell(ctx, doc->file);
1787
0
    doc->linear_page1_obj_num = pdf_dict_get_int(ctx, dict, PDF_NAME(O));
1788
0
    doc->linear_page_refs[0] = pdf_new_indirect(ctx, doc, doc->linear_page1_obj_num, 0);
1789
0
    doc->linear_page_num = 0;
1790
0
    hint = pdf_dict_get(ctx, dict, PDF_NAME(H));
1791
0
    doc->hint_object_offset = pdf_array_get_int(ctx, hint, 0);
1792
0
    doc->hint_object_length = pdf_array_get_int(ctx, hint, 1);
1793
1794
0
    entry = pdf_get_populating_xref_entry(ctx, doc, 0);
1795
0
    entry->type = 'f';
1796
0
  }
1797
0
  fz_catch(ctx)
1798
0
  {
1799
0
    pdf_drop_obj(ctx, dict);
1800
0
    fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
1801
0
    fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
1802
0
    fz_report_error(ctx);
1803
    /* Drop back to non linearized reading mode */
1804
0
    doc->file_reading_linearly = 0;
1805
0
  }
1806
0
}
1807
1808
/*
1809
 * Initialize and load xref tables.
1810
 * If password is not null, try to decrypt.
1811
 */
1812
1813
static void
1814
pdf_init_document(fz_context *ctx, pdf_document *doc)
1815
11.8k
{
1816
11.8k
  pdf_obj *encrypt, *id;
1817
11.8k
  int repaired = 0;
1818
1819
23.7k
  fz_try(ctx)
1820
23.7k
  {
1821
    /* Check to see if we should work in progressive mode */
1822
11.8k
    if (doc->file->progressive)
1823
0
    {
1824
0
      doc->file_reading_linearly = 1;
1825
0
      fz_seek(ctx, doc->file, 0, SEEK_END);
1826
0
      doc->file_length = fz_tell(ctx, doc->file);
1827
0
      if (doc->file_length < 0)
1828
0
        doc->file_length = 0;
1829
0
      fz_seek(ctx, doc->file, 0, SEEK_SET);
1830
0
    }
1831
1832
11.8k
    pdf_load_version(ctx, doc);
1833
1834
11.8k
    if (doc->is_fdf)
1835
2
    {
1836
2
      doc->file_reading_linearly = 0;
1837
2
      repaired = 1;
1838
2
      break; /* skip to end of try/catch */
1839
2
    }
1840
1841
    /* Try to load the linearized file if we are in progressive
1842
     * mode. */
1843
11.8k
    if (doc->file_reading_linearly)
1844
0
      pdf_load_linear(ctx, doc);
1845
11.8k
    else
1846
      /* Even if we're not in progressive mode, check to see
1847
       * if the file claims to be linearized. This is important
1848
       * for checking signatures later on. */
1849
11.8k
      pdf_check_linear(ctx, doc);
1850
1851
    /* If we aren't in progressive mode (or the linear load failed
1852
     * and has set us back to non-progressive mode), load normally.
1853
     */
1854
11.8k
    if (!doc->file_reading_linearly)
1855
2.60k
      pdf_load_xref(ctx, doc);
1856
11.8k
  }
1857
23.7k
  fz_catch(ctx)
1858
11.2k
  {
1859
11.2k
    pdf_drop_xref_sections(ctx, doc);
1860
11.2k
    fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
1861
11.2k
    doc->file_reading_linearly = 0;
1862
11.2k
    fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
1863
11.2k
    fz_report_error(ctx);
1864
11.2k
    fz_warn(ctx, "trying to repair broken xref");
1865
11.2k
    repaired = 1;
1866
11.2k
  }
1867
1868
23.7k
  fz_try(ctx)
1869
23.7k
  {
1870
11.8k
    if (repaired)
1871
11.2k
    {
1872
      /* pdf_repair_xref may access xref_index, so reset it properly */
1873
11.2k
      if (doc->xref_index)
1874
243
        memset(doc->xref_index, 0, sizeof(int) * doc->max_xref_len);
1875
11.2k
      pdf_repair_xref(ctx, doc);
1876
11.2k
      pdf_prime_xref_index(ctx, doc);
1877
11.2k
    }
1878
1879
11.8k
    encrypt = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt));
1880
11.8k
    id = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(ID));
1881
11.8k
    if (pdf_is_dict(ctx, encrypt))
1882
243
      doc->crypt = pdf_new_crypt(ctx, encrypt, id);
1883
1884
    /* Allow lazy clients to read encrypted files with a blank password */
1885
11.8k
    (void)pdf_authenticate_password(ctx, doc, "");
1886
1887
11.8k
    if (repaired)
1888
10.9k
    {
1889
10.9k
      pdf_repair_trailer(ctx, doc);
1890
10.9k
    }
1891
11.8k
  }
1892
23.7k
  fz_catch(ctx)
1893
386
  {
1894
386
    fz_rethrow(ctx);
1895
386
  }
1896
11.8k
}
1897
1898
void pdf_repair_trailer(fz_context *ctx, pdf_document *doc)
1899
10.9k
{
1900
10.9k
  int hasroot, hasinfo;
1901
10.9k
  pdf_obj *obj, *nobj;
1902
10.9k
  pdf_obj *dict = NULL;
1903
10.9k
  int i;
1904
1905
10.9k
  int xref_len = pdf_xref_len(ctx, doc);
1906
10.9k
  pdf_repair_obj_stms(ctx, doc);
1907
1908
10.9k
  hasroot = (pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root)) != NULL);
1909
10.9k
  hasinfo = (pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info)) != NULL);
1910
1911
10.9k
  fz_var(dict);
1912
1913
21.9k
  fz_try(ctx)
1914
21.9k
  {
1915
    /* Scan from the end so we have a better chance of finding
1916
     * newer objects if there are multiple instances of Info and
1917
     * Root objects.
1918
     */
1919
33.2M
    for (i = xref_len - 1; i > 0 && (!hasinfo || !hasroot); --i)
1920
33.2M
    {
1921
33.2M
      pdf_xref_entry *entry = pdf_get_xref_entry_no_null(ctx, doc, i);
1922
33.2M
      if (entry->type == 0 || entry->type == 'f')
1923
33.0M
        continue;
1924
1925
257k
      fz_try(ctx)
1926
257k
      {
1927
128k
        dict = pdf_load_object(ctx, doc, i);
1928
128k
      }
1929
257k
      fz_catch(ctx)
1930
12.0k
      {
1931
12.0k
        fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
1932
12.0k
        fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
1933
12.0k
        fz_report_error(ctx);
1934
12.0k
        fz_warn(ctx, "ignoring broken object (%d 0 R)", i);
1935
12.0k
        continue;
1936
12.0k
      }
1937
1938
116k
      if (!hasroot)
1939
60.5k
      {
1940
60.5k
        obj = pdf_dict_get(ctx, dict, PDF_NAME(Type));
1941
60.5k
        if (obj == PDF_NAME(Catalog))
1942
8.21k
        {
1943
8.21k
          nobj = pdf_new_indirect(ctx, doc, i, 0);
1944
8.21k
          pdf_dict_put_drop(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root), nobj);
1945
8.21k
          hasroot = 1;
1946
8.21k
        }
1947
60.5k
      }
1948
1949
116k
      if (!hasinfo)
1950
112k
      {
1951
112k
        if (pdf_dict_get(ctx, dict, PDF_NAME(Creator)) || pdf_dict_get(ctx, dict, PDF_NAME(Producer)))
1952
1.46k
        {
1953
1.46k
          nobj = pdf_new_indirect(ctx, doc, i, 0);
1954
1.46k
          pdf_dict_put_drop(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info), nobj);
1955
1.46k
          hasinfo = 1;
1956
1.46k
        }
1957
112k
      }
1958
1959
116k
      pdf_drop_obj(ctx, dict);
1960
116k
      dict = NULL;
1961
116k
    }
1962
10.9k
  }
1963
21.9k
  fz_always(ctx)
1964
10.9k
  {
1965
    /* ensure that strings are not used in their repaired, non-decrypted form */
1966
10.9k
    if (doc->crypt)
1967
200
    {
1968
200
      pdf_crypt *tmp;
1969
200
      pdf_clear_xref(ctx, doc);
1970
1971
      /* ensure that Encryption dictionary and ID are cached without decryption,
1972
         otherwise a decrypted Encryption dictionary and ID may be used when saving
1973
         the PDF causing it to be inconsistent (since strings/streams are encrypted
1974
         with the actual encryption key, not the decrypted encryption key). */
1975
200
      tmp = doc->crypt;
1976
200
      doc->crypt = NULL;
1977
400
      fz_try(ctx)
1978
400
      {
1979
200
        (void) pdf_resolve_indirect(ctx, pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt)));
1980
200
        (void) pdf_resolve_indirect(ctx, pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(ID)));
1981
200
      }
1982
400
      fz_always(ctx)
1983
200
        doc->crypt = tmp;
1984
200
      fz_catch(ctx)
1985
0
      {
1986
0
        fz_rethrow(ctx);
1987
0
      }
1988
200
    }
1989
10.9k
  }
1990
10.9k
  fz_catch(ctx)
1991
0
  {
1992
0
    pdf_drop_obj(ctx, dict);
1993
0
    fz_rethrow(ctx);
1994
0
  }
1995
10.9k
}
1996
1997
void
1998
pdf_invalidate_xfa(fz_context *ctx, pdf_document *doc)
1999
11.8k
{
2000
11.8k
  if (doc == NULL)
2001
0
    return;
2002
11.8k
  fz_drop_xml(ctx, doc->xfa);
2003
11.8k
  doc->xfa = NULL;
2004
11.8k
}
2005
2006
static void
2007
pdf_drop_document_imp(fz_context *ctx, pdf_document *doc)
2008
11.8k
{
2009
11.8k
  int i;
2010
2011
11.8k
  fz_defer_reap_start(ctx);
2012
2013
  /* Type3 glyphs in the glyph cache can contain pdf_obj pointers
2014
   * that we are about to destroy. Simplest solution is to bin the
2015
   * glyph cache at this point. */
2016
23.7k
  fz_try(ctx)
2017
23.7k
    fz_purge_glyph_cache(ctx);
2018
23.7k
  fz_catch(ctx)
2019
0
  {
2020
    /* Swallow error, but continue dropping */
2021
0
    fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
2022
0
    fz_report_error(ctx);
2023
0
  }
2024
2025
11.8k
  pdf_set_doc_event_callback(ctx, doc, NULL, NULL, NULL);
2026
11.8k
  pdf_drop_js(ctx, doc->js);
2027
2028
11.8k
  pdf_drop_journal(ctx, doc->journal);
2029
2030
11.8k
  pdf_drop_resource_tables(ctx, doc);
2031
2032
11.8k
  pdf_drop_local_xref(ctx, doc->local_xref);
2033
2034
11.8k
  pdf_drop_xref_sections(ctx, doc);
2035
11.8k
  fz_free(ctx, doc->xref_index);
2036
2037
11.8k
  fz_drop_stream(ctx, doc->file);
2038
11.8k
  pdf_drop_crypt(ctx, doc->crypt);
2039
2040
11.8k
  pdf_drop_obj(ctx, doc->linear_obj);
2041
11.8k
  if (doc->linear_page_refs)
2042
0
  {
2043
0
    for (i=0; i < doc->linear_page_count; i++)
2044
0
      pdf_drop_obj(ctx, doc->linear_page_refs[i]);
2045
2046
0
    fz_free(ctx, doc->linear_page_refs);
2047
0
  }
2048
2049
11.8k
  fz_free(ctx, doc->hint_page);
2050
11.8k
  fz_free(ctx, doc->hint_shared_ref);
2051
11.8k
  fz_free(ctx, doc->hint_shared);
2052
11.8k
  fz_free(ctx, doc->hint_obj_offsets);
2053
2054
12.3k
  for (i=0; i < doc->num_type3_fonts; i++)
2055
425
  {
2056
850
    fz_try(ctx)
2057
850
      fz_decouple_type3_font(ctx, doc->type3_fonts[i], (void *)doc);
2058
850
    fz_always(ctx)
2059
425
      fz_drop_font(ctx, doc->type3_fonts[i]);
2060
425
    fz_catch(ctx)
2061
0
    {
2062
      /* Swallow error, but continue dropping */
2063
0
      fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
2064
0
      fz_report_error(ctx);
2065
0
    }
2066
425
  }
2067
2068
11.8k
  fz_free(ctx, doc->type3_fonts);
2069
2070
11.8k
  pdf_drop_ocg(ctx, doc);
2071
2072
11.8k
  pdf_empty_store(ctx, doc);
2073
2074
11.8k
  pdf_lexbuf_fin(ctx, &doc->lexbuf.base);
2075
2076
11.8k
  fz_drop_colorspace(ctx, doc->oi);
2077
2078
34.5k
  for (i = 0; i < doc->orphans_count; i++)
2079
22.6k
    pdf_drop_obj(ctx, doc->orphans[i]);
2080
2081
11.8k
  fz_free(ctx, doc->orphans);
2082
2083
11.8k
  pdf_drop_page_tree_internal(ctx, doc);
2084
2085
11.8k
  fz_defer_reap_end(ctx);
2086
2087
11.8k
  pdf_invalidate_xfa(ctx, doc);
2088
11.8k
}
2089
2090
void
2091
pdf_drop_document(fz_context *ctx, pdf_document *doc)
2092
27.3k
{
2093
27.3k
  fz_drop_document(ctx, &doc->super);
2094
27.3k
}
2095
2096
pdf_document *
2097
pdf_keep_document(fz_context *ctx, pdf_document *doc)
2098
27.3k
{
2099
27.3k
  return (pdf_document *)fz_keep_document(ctx, &doc->super);
2100
27.3k
}
2101
2102
/*
2103
 * compressed object streams
2104
 */
2105
2106
/*
2107
  Do not hold pdf_xref_entry's over call to this function as they
2108
  may be invalidated!
2109
*/
2110
static pdf_xref_entry *
2111
pdf_load_obj_stm(fz_context *ctx, pdf_document *doc, int num, pdf_lexbuf *buf, int target)
2112
23.5k
{
2113
23.5k
  fz_stream *stm = NULL;
2114
23.5k
  pdf_obj *objstm = NULL;
2115
23.5k
  int *numbuf = NULL;
2116
23.5k
  int64_t *ofsbuf = NULL;
2117
2118
23.5k
  pdf_obj *obj;
2119
23.5k
  int64_t first;
2120
23.5k
  int count;
2121
23.5k
  int i;
2122
23.5k
  pdf_token tok;
2123
23.5k
  pdf_xref_entry *ret_entry = NULL;
2124
23.5k
  int ret_idx;
2125
23.5k
  int xref_len;
2126
23.5k
  int found;
2127
23.5k
  fz_stream *sub = NULL;
2128
2129
23.5k
  fz_var(numbuf);
2130
23.5k
  fz_var(ofsbuf);
2131
23.5k
  fz_var(objstm);
2132
23.5k
  fz_var(stm);
2133
23.5k
  fz_var(sub);
2134
2135
47.1k
  fz_try(ctx)
2136
47.1k
  {
2137
23.5k
    objstm = pdf_load_object(ctx, doc, num);
2138
2139
23.5k
    if (pdf_obj_marked(ctx, objstm))
2140
0
      fz_throw(ctx, FZ_ERROR_FORMAT, "recursive object stream lookup");
2141
23.5k
  }
2142
47.1k
  fz_catch(ctx)
2143
61
  {
2144
61
    pdf_drop_obj(ctx, objstm);
2145
61
    fz_rethrow(ctx);
2146
61
  }
2147
2148
46.9k
  fz_try(ctx)
2149
46.9k
  {
2150
23.4k
    (void)pdf_mark_obj(ctx, objstm);
2151
2152
23.4k
    count = pdf_dict_get_int(ctx, objstm, PDF_NAME(N));
2153
23.4k
    first = pdf_dict_get_int(ctx, objstm, PDF_NAME(First));
2154
2155
23.4k
    if (count < 0 || count > PDF_MAX_OBJECT_NUMBER)
2156
9
      fz_throw(ctx, FZ_ERROR_FORMAT, "number of objects in object stream out of range");
2157
2158
23.4k
    numbuf = fz_calloc(ctx, count, sizeof(*numbuf));
2159
23.4k
    ofsbuf = fz_calloc(ctx, count, sizeof(*ofsbuf));
2160
2161
23.4k
    xref_len = pdf_xref_len(ctx, doc);
2162
2163
23.4k
    found = 0;
2164
2165
23.4k
    stm = pdf_open_stream_number(ctx, doc, num);
2166
1.34M
    for (i = 0; i < count; i++)
2167
1.32M
    {
2168
1.32M
      tok = pdf_lex(ctx, stm, buf);
2169
1.32M
      if (tok != PDF_TOK_INT)
2170
1.21k
        fz_throw(ctx, FZ_ERROR_FORMAT, "corrupt object stream (%d 0 R)", num);
2171
1.32M
      numbuf[found] = buf->i;
2172
2173
1.32M
      tok = pdf_lex(ctx, stm, buf);
2174
1.32M
      if (tok != PDF_TOK_INT)
2175
1.53k
        fz_throw(ctx, FZ_ERROR_FORMAT, "corrupt object stream (%d 0 R)", num);
2176
1.32M
      ofsbuf[found] = buf->i;
2177
2178
1.32M
      if (numbuf[found] <= 0 || numbuf[found] >= xref_len)
2179
85.3k
        fz_warn(ctx, "object stream object out of range, skipping");
2180
1.23M
      else
2181
1.23M
        found++;
2182
1.32M
    }
2183
2184
20.7k
    ret_idx = -1;
2185
361k
    for (i = 0; i < found; i++)
2186
341k
    {
2187
341k
      pdf_xref_entry *entry;
2188
341k
      uint64_t length;
2189
341k
      int64_t offset;
2190
2191
341k
      offset = first + ofsbuf[i];
2192
341k
      if (i+1 < found)
2193
336k
        length = ofsbuf[i+1] - ofsbuf[i];
2194
4.52k
      else
2195
4.52k
        length = UINT64_MAX;
2196
2197
341k
      sub = fz_open_null_filter(ctx, stm, length, offset);
2198
2199
341k
      obj = pdf_parse_stm_obj(ctx, doc, sub, buf);
2200
341k
      fz_drop_stream(ctx, sub);
2201
341k
      sub = NULL;
2202
2203
341k
      entry = pdf_get_xref_entry_no_null(ctx, doc, numbuf[i]);
2204
2205
341k
      pdf_set_obj_parent(ctx, obj, numbuf[i]);
2206
2207
      /* We may have set entry->type to be 'O' from being 'o' to avoid nasty
2208
       * recursions in pdf_cache_object. Accept the type being 'O' here. */
2209
341k
      if ((entry->type == 'o' || entry->type == 'O') && entry->ofs == num)
2210
323k
      {
2211
        /* If we already have an entry for this object,
2212
         * we'd like to drop it and use the new one -
2213
         * but this means that anyone currently holding
2214
         * a pointer to the old one will be left with a
2215
         * stale pointer. Instead, we drop the new one
2216
         * and trust that the old one is correct. */
2217
323k
        if (entry->obj)
2218
274k
        {
2219
274k
          if (pdf_objcmp(ctx, entry->obj, obj))
2220
37
            fz_warn(ctx, "Encountered new definition for object %d - keeping the original one", numbuf[i]);
2221
274k
          pdf_drop_obj(ctx, obj);
2222
274k
        }
2223
49.1k
        else
2224
49.1k
        {
2225
49.1k
          entry->obj = obj;
2226
49.1k
          fz_drop_buffer(ctx, entry->stm_buf);
2227
49.1k
          entry->stm_buf = NULL;
2228
49.1k
        }
2229
323k
        if (numbuf[i] == target)
2230
3.63k
          ret_idx = i;
2231
323k
      }
2232
17.2k
      else
2233
17.2k
      {
2234
17.2k
        pdf_drop_obj(ctx, obj);
2235
17.2k
      }
2236
341k
    }
2237
    /* Parsing our way through the stream can cause the xref to be
2238
     * solidified, which will move an entry. We therefore can't
2239
     * read the entry for returning until no more parsing is to be
2240
     * done. Thus we end up reading this entry twice. */
2241
20.7k
    if (ret_idx >= 0)
2242
3.58k
      ret_entry = pdf_get_xref_entry_no_null(ctx, doc, numbuf[ret_idx]);
2243
20.7k
  }
2244
46.9k
  fz_always(ctx)
2245
23.4k
  {
2246
23.4k
    fz_drop_stream(ctx, stm);
2247
23.4k
    fz_drop_stream(ctx, sub);
2248
23.4k
    fz_free(ctx, ofsbuf);
2249
23.4k
    fz_free(ctx, numbuf);
2250
23.4k
    pdf_unmark_obj(ctx, objstm);
2251
23.4k
    pdf_drop_obj(ctx, objstm);
2252
23.4k
  }
2253
23.4k
  fz_catch(ctx)
2254
19.8k
  {
2255
19.8k
    fz_rethrow(ctx);
2256
19.8k
  }
2257
920
  return ret_entry;
2258
20.7k
}
2259
2260
/*
2261
 * object loading
2262
 */
2263
static int
2264
pdf_obj_read(fz_context *ctx, pdf_document *doc, int64_t *offset, int *nump, pdf_obj **page)
2265
0
{
2266
0
  pdf_lexbuf *buf = &doc->lexbuf.base;
2267
0
  int num, gen, tok;
2268
0
  int64_t numofs, genofs, stmofs, tmpofs, newtmpofs;
2269
0
  int xref_len;
2270
0
  pdf_xref_entry *entry;
2271
2272
0
  numofs = *offset;
2273
0
  fz_seek(ctx, doc->file, numofs, SEEK_SET);
2274
2275
  /* We expect to read 'num' here */
2276
0
  tok = pdf_lex(ctx, doc->file, buf);
2277
0
  genofs = fz_tell(ctx, doc->file);
2278
0
  if (tok != PDF_TOK_INT)
2279
0
  {
2280
    /* Failed! */
2281
0
    DEBUGMESS((ctx, "skipping unexpected data (tok=%d) at %d", tok, *offset));
2282
0
    *offset = genofs;
2283
0
    return tok == PDF_TOK_EOF;
2284
0
  }
2285
0
  *nump = num = buf->i;
2286
2287
  /* We expect to read 'gen' here */
2288
0
  tok = pdf_lex(ctx, doc->file, buf);
2289
0
  tmpofs = fz_tell(ctx, doc->file);
2290
0
  if (tok != PDF_TOK_INT)
2291
0
  {
2292
    /* Failed! */
2293
0
    DEBUGMESS((ctx, "skipping unexpected data after \"%d\" (tok=%d) at %d", num, tok, *offset));
2294
0
    *offset = tmpofs;
2295
0
    return tok == PDF_TOK_EOF;
2296
0
  }
2297
0
  gen = buf->i;
2298
2299
  /* We expect to read 'obj' here */
2300
0
  do
2301
0
  {
2302
0
    tmpofs = fz_tell(ctx, doc->file);
2303
0
    tok = pdf_lex(ctx, doc->file, buf);
2304
0
    if (tok == PDF_TOK_OBJ)
2305
0
      break;
2306
0
    if (tok != PDF_TOK_INT)
2307
0
    {
2308
0
      DEBUGMESS((ctx, "skipping unexpected data (tok=%d) at %d", tok, tmpofs));
2309
0
      *offset = fz_tell(ctx, doc->file);
2310
0
      return tok == PDF_TOK_EOF;
2311
0
    }
2312
0
    DEBUGMESS((ctx, "skipping unexpected int %d at %d", num, numofs));
2313
0
    *nump = num = gen;
2314
0
    numofs = genofs;
2315
0
    gen = buf->i;
2316
0
    genofs = tmpofs;
2317
0
  }
2318
0
  while (1);
2319
2320
  /* Now we read the actual object */
2321
0
  xref_len = pdf_xref_len(ctx, doc);
2322
2323
  /* When we are reading a progressive file, we typically see:
2324
   *    File Header
2325
   *    obj m (Linearization params)
2326
   *    xref #1 (refers to objects m-n)
2327
   *    obj m+1
2328
   *    ...
2329
   *    obj n
2330
   *    obj 1
2331
   *    ...
2332
   *    obj n-1
2333
   *    xref #2
2334
   *
2335
   * The linearisation params are read elsewhere, hence
2336
   * whenever we read an object it should just go into the
2337
   * previous xref.
2338
   */
2339
0
  tok = pdf_repair_obj(ctx, doc, buf, &stmofs, NULL, NULL, NULL, page, &newtmpofs, NULL);
2340
2341
0
  do /* So we can break out of it */
2342
0
  {
2343
0
    if (num <= 0 || num >= xref_len)
2344
0
    {
2345
0
      fz_warn(ctx, "Not a valid object number (%d %d obj)", num, gen);
2346
0
      break;
2347
0
    }
2348
0
    if (gen != 0)
2349
0
    {
2350
0
      fz_warn(ctx, "Unexpected non zero generation number in linearized file");
2351
0
    }
2352
0
    entry = pdf_get_populating_xref_entry(ctx, doc, num);
2353
0
    if (entry->type != 0)
2354
0
    {
2355
0
      DEBUGMESS((ctx, "Duplicate object found (%d %d obj)", num, gen));
2356
0
      break;
2357
0
    }
2358
0
    if (page && *page)
2359
0
    {
2360
0
      DEBUGMESS((ctx, "Successfully read object %d @ %d - and found page %d!", num, numofs, doc->linear_page_num));
2361
0
      if (!entry->obj)
2362
0
        entry->obj = pdf_keep_obj(ctx, *page);
2363
2364
0
      if (doc->linear_page_refs[doc->linear_page_num] == NULL)
2365
0
        doc->linear_page_refs[doc->linear_page_num] = pdf_new_indirect(ctx, doc, num, gen);
2366
0
    }
2367
0
    else
2368
0
    {
2369
0
      DEBUGMESS((ctx, "Successfully read object %d @ %d", num, numofs));
2370
0
    }
2371
0
    entry->type = 'n';
2372
0
    entry->gen = gen; // XXX: was 0
2373
0
    entry->num = num;
2374
0
    entry->ofs = numofs;
2375
0
    entry->stm_ofs = stmofs;
2376
0
  }
2377
0
  while (0);
2378
0
  if (page && *page)
2379
0
    doc->linear_page_num++;
2380
2381
0
  if (tok == PDF_TOK_ENDOBJ)
2382
0
  {
2383
0
    *offset = fz_tell(ctx, doc->file);
2384
0
  }
2385
0
  else
2386
0
  {
2387
0
    *offset = newtmpofs;
2388
0
  }
2389
0
  return 0;
2390
0
}
2391
2392
static void
2393
pdf_load_hinted_page(fz_context *ctx, pdf_document *doc, int pagenum)
2394
0
{
2395
0
  pdf_obj *page = NULL;
2396
2397
0
  if (!doc->hints_loaded || !doc->linear_page_refs)
2398
0
    return;
2399
2400
0
  if (doc->linear_page_refs[pagenum])
2401
0
    return;
2402
2403
0
  fz_var(page);
2404
2405
0
  fz_try(ctx)
2406
0
  {
2407
0
    int num = doc->hint_page[pagenum].number;
2408
0
    page = pdf_load_object(ctx, doc, num);
2409
0
    if (pdf_name_eq(ctx, PDF_NAME(Page), pdf_dict_get(ctx, page, PDF_NAME(Type))))
2410
0
    {
2411
      /* We have found the page object! */
2412
0
      DEBUGMESS((ctx, "LoadHintedPage pagenum=%d num=%d", pagenum, num));
2413
0
      doc->linear_page_refs[pagenum] = pdf_new_indirect(ctx, doc, num, 0);
2414
0
    }
2415
0
  }
2416
0
  fz_always(ctx)
2417
0
    pdf_drop_obj(ctx, page);
2418
0
  fz_catch(ctx)
2419
0
  {
2420
0
    fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
2421
    /* Swallow the error and proceed as normal */
2422
0
    fz_report_error(ctx);
2423
0
  }
2424
0
}
2425
2426
static int
2427
read_hinted_object(fz_context *ctx, pdf_document *doc, int num)
2428
0
{
2429
  /* Try to find the object using our hint table. Find the closest
2430
   * object <= the one we want that has a hint and read forward from
2431
   * there. */
2432
0
  int expected = num;
2433
0
  int curr_pos;
2434
0
  int64_t start, offset;
2435
2436
0
  while (doc->hint_obj_offsets[expected] == 0 && expected > 0)
2437
0
    expected--;
2438
0
  if (expected != num)
2439
0
    DEBUGMESS((ctx, "object %d is unhinted, will search forward from %d", expected, num));
2440
0
  if (expected == 0) /* No hints found, just bail */
2441
0
    return 0;
2442
2443
0
  curr_pos = fz_tell(ctx, doc->file);
2444
0
  offset = doc->hint_obj_offsets[expected];
2445
2446
0
  fz_var(expected);
2447
2448
0
  fz_try(ctx)
2449
0
  {
2450
0
    int found;
2451
2452
    /* Try to read forward from there */
2453
0
    do
2454
0
    {
2455
0
      start = offset;
2456
0
      DEBUGMESS((ctx, "Searching for object %d @ %d", expected, offset));
2457
0
      pdf_obj_read(ctx, doc, &offset, &found, 0);
2458
0
      DEBUGMESS((ctx, "Found object %d - next will be @ %d", found, offset));
2459
0
      if (found <= expected)
2460
0
      {
2461
        /* We found the right one (or one earlier than
2462
         * we expected). Update the hints. */
2463
0
        doc->hint_obj_offsets[expected] = offset;
2464
0
        doc->hint_obj_offsets[found] = start;
2465
0
        doc->hint_obj_offsets[found+1] = offset;
2466
        /* Retry with the next one */
2467
0
        expected = found+1;
2468
0
      }
2469
0
      else
2470
0
      {
2471
        /* We found one later than we expected. */
2472
0
        doc->hint_obj_offsets[expected] = 0;
2473
0
        doc->hint_obj_offsets[found] = start;
2474
0
        doc->hint_obj_offsets[found+1] = offset;
2475
0
        while (doc->hint_obj_offsets[expected] == 0 && expected > 0)
2476
0
          expected--;
2477
0
        if (expected == 0) /* No hints found, we give up */
2478
0
          break;
2479
0
      }
2480
0
    }
2481
0
    while (found != num);
2482
0
  }
2483
0
  fz_always(ctx)
2484
0
  {
2485
0
    fz_seek(ctx, doc->file, curr_pos, SEEK_SET);
2486
0
  }
2487
0
  fz_catch(ctx)
2488
0
  {
2489
0
    fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
2490
    /* FIXME: Currently we ignore the hint. Perhaps we should
2491
     * drop back to non-hinted operation here. */
2492
0
    doc->hint_obj_offsets[expected] = 0;
2493
0
    fz_rethrow(ctx);
2494
0
  }
2495
0
  return expected != 0;
2496
0
}
2497
2498
pdf_obj *
2499
pdf_load_unencrypted_object(fz_context *ctx, pdf_document *doc, int num)
2500
0
{
2501
0
  pdf_xref_entry *x;
2502
2503
0
  if (num <= 0 || num >= pdf_xref_len(ctx, doc))
2504
0
    fz_throw(ctx, FZ_ERROR_FORMAT, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(ctx, doc));
2505
2506
0
  x = pdf_get_xref_entry_no_null(ctx, doc, num);
2507
0
  if (x->type == 'n')
2508
0
  {
2509
0
    fz_seek(ctx, doc->file, x->ofs, SEEK_SET);
2510
0
    return pdf_parse_ind_obj(ctx, doc, doc->file, NULL, NULL, NULL, NULL);
2511
0
  }
2512
0
  return NULL;
2513
0
}
2514
2515
pdf_xref_entry *
2516
pdf_cache_object(fz_context *ctx, pdf_document *doc, int num)
2517
6.68M
{
2518
6.68M
  pdf_xref_entry *x;
2519
6.68M
  int rnum, rgen, try_repair;
2520
2521
6.68M
  fz_var(try_repair);
2522
2523
6.68M
  if (num <= 0 || num >= pdf_xref_len(ctx, doc))
2524
70.7k
    fz_throw(ctx, FZ_ERROR_FORMAT, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(ctx, doc));
2525
2526
6.61M
object_updated:
2527
6.61M
  try_repair = 0;
2528
6.61M
  rnum = num;
2529
2530
6.61M
  x = pdf_get_xref_entry(ctx, doc, num);
2531
6.61M
  if (x == NULL)
2532
0
    fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find object in xref (%d 0 R)", num);
2533
2534
6.61M
  if (x->obj != NULL)
2535
6.05M
    return x;
2536
2537
561k
  if (x->type == 'f')
2538
151k
  {
2539
151k
    x->obj = PDF_NULL;
2540
151k
  }
2541
409k
  else if (x->type == 'n')
2542
386k
  {
2543
386k
    fz_seek(ctx, doc->file, x->ofs, SEEK_SET);
2544
2545
772k
    fz_try(ctx)
2546
772k
    {
2547
386k
      x->obj = pdf_parse_ind_obj(ctx, doc, doc->file,
2548
386k
          &rnum, &rgen, &x->stm_ofs, &try_repair);
2549
386k
    }
2550
772k
    fz_catch(ctx)
2551
225k
    {
2552
225k
      fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
2553
225k
      fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
2554
225k
      if (!try_repair)
2555
225k
        fz_rethrow(ctx);
2556
178
      else
2557
178
        fz_report_error(ctx);
2558
225k
    }
2559
2560
161k
    if (!try_repair && rnum != num)
2561
15
    {
2562
15
      pdf_drop_obj(ctx, x->obj);
2563
15
      x->type = 'f';
2564
15
      x->ofs = -1;
2565
15
      x->gen = 0;
2566
15
      x->num = 0;
2567
15
      x->stm_ofs = 0;
2568
15
      x->obj = NULL;
2569
15
      try_repair = (doc->repair_attempted == 0);
2570
15
    }
2571
2572
161k
    if (try_repair)
2573
193
    {
2574
193
perform_repair:
2575
386
      fz_try(ctx)
2576
386
      {
2577
193
        pdf_repair_xref(ctx, doc);
2578
193
        pdf_prime_xref_index(ctx, doc);
2579
193
        pdf_repair_obj_stms(ctx, doc);
2580
193
        pdf_repair_trailer(ctx, doc);
2581
193
      }
2582
386
      fz_catch(ctx)
2583
137
      {
2584
137
        fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
2585
137
        fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
2586
137
        fz_rethrow_if(ctx, FZ_ERROR_REPAIRED);
2587
137
        fz_report_error(ctx);
2588
137
        if (rnum == num)
2589
129
          fz_throw(ctx, FZ_ERROR_FORMAT, "cannot parse object (%d 0 R)", num);
2590
8
        else
2591
8
          fz_throw(ctx, FZ_ERROR_FORMAT, "found object (%d 0 R) instead of (%d 0 R)", rnum, num);
2592
137
      }
2593
56
      goto object_updated;
2594
193
    }
2595
2596
161k
    if (doc->crypt)
2597
5.43k
      pdf_crypt_obj(ctx, doc->crypt, x->obj, x->num, x->gen);
2598
161k
  }
2599
23.6k
  else if (x->type == 'o')
2600
23.5k
  {
2601
23.5k
    if (!x->obj)
2602
23.5k
    {
2603
23.5k
      pdf_xref_entry *orig_x = x;
2604
23.5k
      pdf_xref_entry *ox = x; /* This init is unused, but it shuts warnings up. */
2605
23.5k
      orig_x->type = 'O'; /* Mark this node so we know we're recursing. */
2606
47.1k
      fz_try(ctx)
2607
47.1k
        x = pdf_load_obj_stm(ctx, doc, x->ofs, &doc->lexbuf.base, num);
2608
47.1k
      fz_always(ctx)
2609
23.5k
      {
2610
        /* Most of the time ox == orig_x, but if pdf_load_obj_stm performed a
2611
         * repair, it may not be. It is safe to call pdf_get_xref_entry_no_change
2612
         * here, as it does not try/catch. */
2613
23.5k
        ox = pdf_get_xref_entry_no_change(ctx, doc, num);
2614
        /* Bug 706762: ox can be NULL if the object went away during a repair. */
2615
23.5k
        if (ox && ox->type == 'O')
2616
23.5k
          ox->type = 'o'; /* Not recursing any more. */
2617
23.5k
      }
2618
23.5k
      fz_catch(ctx)
2619
19.8k
        fz_rethrow(ctx);
2620
3.68k
      if (x == NULL)
2621
96
        fz_throw(ctx, FZ_ERROR_FORMAT, "cannot load object stream containing object (%d 0 R)", num);
2622
3.58k
      if (!x->obj)
2623
0
      {
2624
0
        x->type = 'f';
2625
0
        if (ox)
2626
0
          ox->type = 'f';
2627
0
        if (doc->repair_attempted)
2628
0
          fz_throw(ctx, FZ_ERROR_FORMAT, "object (%d 0 R) was not found in its object stream", num);
2629
0
        goto perform_repair;
2630
0
      }
2631
3.58k
    }
2632
23.5k
  }
2633
110
  else if (doc->hint_obj_offsets && read_hinted_object(ctx, doc, num))
2634
0
  {
2635
0
    goto object_updated;
2636
0
  }
2637
110
  else if (doc->file_length && doc->linear_pos < doc->file_length)
2638
0
  {
2639
0
    fz_throw(ctx, FZ_ERROR_TRYLATER, "cannot find object in xref (%d 0 R) - not loaded yet?", num);
2640
0
  }
2641
110
  else
2642
110
  {
2643
110
    fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find object in xref (%d 0 R)", num);
2644
110
  }
2645
2646
316k
  pdf_set_obj_parent(ctx, x->obj, num);
2647
316k
  return x;
2648
561k
}
2649
2650
pdf_obj *
2651
pdf_load_object(fz_context *ctx, pdf_document *doc, int num)
2652
268k
{
2653
268k
  pdf_xref_entry *entry = pdf_cache_object(ctx, doc, num);
2654
268k
  return pdf_keep_obj(ctx, entry->obj);
2655
268k
}
2656
2657
pdf_obj *
2658
pdf_resolve_indirect(fz_context *ctx, pdf_obj *ref)
2659
5.99M
{
2660
5.99M
  if (pdf_is_indirect(ctx, ref))
2661
5.99M
  {
2662
5.99M
    pdf_document *doc = pdf_get_indirect_document(ctx, ref);
2663
5.99M
    int num = pdf_to_num(ctx, ref);
2664
5.99M
    pdf_xref_entry *entry;
2665
2666
5.99M
    if (!doc)
2667
0
      return NULL;
2668
5.99M
    if (num <= 0)
2669
1.78k
    {
2670
1.78k
      fz_warn(ctx, "invalid indirect reference (%d 0 R)", num);
2671
1.78k
      return NULL;
2672
1.78k
    }
2673
2674
11.9M
    fz_try(ctx)
2675
11.9M
      entry = pdf_cache_object(ctx, doc, num);
2676
11.9M
    fz_catch(ctx)
2677
303k
    {
2678
303k
      fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
2679
303k
      fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
2680
303k
      fz_rethrow_if(ctx, FZ_ERROR_REPAIRED);
2681
303k
      fz_report_error(ctx);
2682
303k
      fz_warn(ctx, "cannot load object (%d 0 R) into cache", num);
2683
303k
      return NULL;
2684
303k
    }
2685
2686
5.69M
    ref = entry->obj;
2687
5.69M
  }
2688
5.69M
  return ref;
2689
5.99M
}
2690
2691
pdf_obj *
2692
pdf_resolve_indirect_chain(fz_context *ctx, pdf_obj *ref)
2693
5.98M
{
2694
5.98M
  int sanity = 10;
2695
2696
11.9M
  while (pdf_is_indirect(ctx, ref))
2697
5.99M
  {
2698
5.99M
    if (--sanity == 0)
2699
1.25k
    {
2700
1.25k
      fz_warn(ctx, "too many indirections (possible indirection cycle involving %d 0 R)", pdf_to_num(ctx, ref));
2701
1.25k
      return NULL;
2702
1.25k
    }
2703
2704
5.99M
    ref = pdf_resolve_indirect(ctx, ref);
2705
5.99M
  }
2706
2707
5.98M
  return ref;
2708
5.98M
}
2709
2710
int
2711
pdf_count_objects(fz_context *ctx, pdf_document *doc)
2712
0
{
2713
0
  return pdf_xref_len(ctx, doc);
2714
0
}
2715
2716
int
2717
pdf_is_local_object(fz_context *ctx, pdf_document *doc, pdf_obj *obj)
2718
157
{
2719
157
  pdf_xref *xref = doc->local_xref;
2720
157
  pdf_xref_subsec *sub;
2721
157
  int num;
2722
2723
157
  if (!pdf_is_indirect(ctx, obj))
2724
3
    return 0;
2725
2726
154
  if (xref == NULL)
2727
69
    return 0; /* no local xref present */
2728
2729
85
  num = pdf_to_num(ctx, obj);
2730
2731
  /* Local xrefs only ever have 1 section, and it should be solid. */
2732
85
  sub = xref->subsec;
2733
85
  if (num >= sub->start && num < sub->start + sub->len)
2734
85
    return sub->table[num - sub->start].type != 0;
2735
2736
0
  return 0;
2737
85
}
2738
2739
static int
2740
pdf_create_local_object(fz_context *ctx, pdf_document *doc)
2741
6.60k
{
2742
  /* TODO: reuse free object slots by properly linking free object chains in the ofs field */
2743
6.60k
  pdf_xref_entry *entry;
2744
6.60k
  int num;
2745
2746
6.60k
  num = doc->local_xref->num_objects;
2747
2748
6.60k
  entry = pdf_get_local_xref_entry(ctx, doc, num);
2749
6.60k
  entry->type = 'f';
2750
6.60k
  entry->ofs = -1;
2751
6.60k
  entry->gen = 0;
2752
6.60k
  entry->num = num;
2753
6.60k
  entry->stm_ofs = 0;
2754
6.60k
  entry->stm_buf = NULL;
2755
6.60k
  entry->obj = NULL;
2756
6.60k
  return num;
2757
6.60k
}
2758
2759
int
2760
pdf_create_object(fz_context *ctx, pdf_document *doc)
2761
6.60k
{
2762
  /* TODO: reuse free object slots by properly linking free object chains in the ofs field */
2763
6.60k
  pdf_xref_entry *entry;
2764
6.60k
  int num;
2765
2766
6.60k
  if (doc->local_xref && doc->local_xref_nesting > 0)
2767
6.60k
    return pdf_create_local_object(ctx, doc);
2768
2769
0
  num = pdf_xref_len(ctx, doc);
2770
2771
0
  if (num > PDF_MAX_OBJECT_NUMBER)
2772
0
    fz_throw(ctx, FZ_ERROR_LIMIT, "too many objects stored in pdf");
2773
2774
0
  entry = pdf_get_incremental_xref_entry(ctx, doc, num);
2775
0
  entry->type = 'f';
2776
0
  entry->ofs = -1;
2777
0
  entry->gen = 0;
2778
0
  entry->num = num;
2779
0
  entry->stm_ofs = 0;
2780
0
  entry->stm_buf = NULL;
2781
0
  entry->obj = NULL;
2782
2783
0
  pdf_add_journal_fragment(ctx, doc, num, NULL, NULL, 1);
2784
2785
0
  return num;
2786
0
}
2787
2788
static void
2789
pdf_delete_local_object(fz_context *ctx, pdf_document *doc, int num)
2790
0
{
2791
0
  pdf_xref_entry *x;
2792
2793
0
  if (doc->local_xref == NULL || doc->local_xref_nesting == 0)
2794
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "No local xref to delete from!");
2795
2796
0
  if (num <= 0 || num >= doc->local_xref->num_objects)
2797
0
  {
2798
0
    fz_warn(ctx, "local object out of range (%d 0 R); xref size %d", num, doc->local_xref->num_objects);
2799
0
    return;
2800
0
  }
2801
2802
0
  x = pdf_get_local_xref_entry(ctx, doc, num);
2803
2804
0
  fz_drop_buffer(ctx, x->stm_buf);
2805
0
  pdf_drop_obj(ctx, x->obj);
2806
2807
0
  x->type = 'f';
2808
0
  x->ofs = 0;
2809
0
  x->gen += 1;
2810
0
  x->num = 0;
2811
0
  x->stm_ofs = 0;
2812
0
  x->stm_buf = NULL;
2813
0
  x->obj = NULL;
2814
0
}
2815
2816
void
2817
pdf_delete_object(fz_context *ctx, pdf_document *doc, int num)
2818
0
{
2819
0
  pdf_xref_entry *x;
2820
0
  pdf_xref *xref;
2821
0
  int j;
2822
2823
0
  if (doc->local_xref && doc->local_xref_nesting > 0)
2824
0
  {
2825
0
    pdf_delete_local_object(ctx, doc, num);
2826
0
    return;
2827
0
  }
2828
2829
0
  if (num <= 0 || num >= pdf_xref_len(ctx, doc))
2830
0
  {
2831
0
    fz_warn(ctx, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(ctx, doc));
2832
0
    return;
2833
0
  }
2834
2835
0
  x = pdf_get_incremental_xref_entry(ctx, doc, num);
2836
2837
0
  fz_drop_buffer(ctx, x->stm_buf);
2838
0
  pdf_drop_obj(ctx, x->obj);
2839
2840
0
  x->type = 'f';
2841
0
  x->ofs = 0;
2842
0
  x->gen += 1;
2843
0
  x->num = 0;
2844
0
  x->stm_ofs = 0;
2845
0
  x->stm_buf = NULL;
2846
0
  x->obj = NULL;
2847
2848
  /* Currently we've left a 'free' object in the incremental
2849
   * section. This is enough to cause us to think that the
2850
   * document has changes. Check back in the non-incremental
2851
   * sections to see if the last instance of the object there
2852
   * was free (or if this object never appeared). If so, we
2853
   * can mark this object as non-existent in the incremental
2854
   * xref. This is important so we can 'undo' back to emptiness
2855
   * after we save/when we reload a snapshot. */
2856
0
  for (j = 1; j < doc->num_xref_sections; j++)
2857
0
  {
2858
0
    xref = &doc->xref_sections[j];
2859
2860
0
    if (num < xref->num_objects)
2861
0
    {
2862
0
      pdf_xref_subsec *sub;
2863
0
      for (sub = xref->subsec; sub != NULL; sub = sub->next)
2864
0
      {
2865
0
        pdf_xref_entry *entry;
2866
2867
0
        if (num < sub->start || num >= sub->start + sub->len)
2868
0
          continue;
2869
2870
0
        entry = &sub->table[num - sub->start];
2871
0
        if (entry->type)
2872
0
        {
2873
0
          if (entry->type == 'f')
2874
0
          {
2875
            /* It was free already! */
2876
0
            x->type = 0;
2877
0
            x->gen = 0;
2878
0
          }
2879
          /* It was a real object. */
2880
0
          return;
2881
0
        }
2882
0
      }
2883
0
    }
2884
0
  }
2885
  /* It never appeared before. */
2886
0
  x->type = 0;
2887
0
  x->gen = 0;
2888
0
}
2889
2890
static void
2891
pdf_update_local_object(fz_context *ctx, pdf_document *doc, int num, pdf_obj *newobj)
2892
6.60k
{
2893
6.60k
  pdf_xref_entry *x;
2894
2895
6.60k
  if (doc->local_xref == NULL || doc->local_xref_nesting == 0)
2896
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't update local object without a local xref");
2897
2898
6.60k
  if (!newobj)
2899
0
  {
2900
0
    pdf_delete_local_object(ctx, doc, num);
2901
0
    return;
2902
0
  }
2903
2904
6.60k
  x = pdf_get_local_xref_entry(ctx, doc, num);
2905
2906
6.60k
  pdf_drop_obj(ctx, x->obj);
2907
2908
6.60k
  x->type = 'n';
2909
6.60k
  x->ofs = 0;
2910
6.60k
  x->obj = pdf_keep_obj(ctx, newobj);
2911
2912
6.60k
  pdf_set_obj_parent(ctx, newobj, num);
2913
6.60k
}
2914
2915
void
2916
pdf_update_object(fz_context *ctx, pdf_document *doc, int num, pdf_obj *newobj)
2917
6.60k
{
2918
6.60k
  pdf_xref_entry *x;
2919
2920
6.60k
  if (doc->local_xref && doc->local_xref_nesting > 0)
2921
6.60k
  {
2922
6.60k
    pdf_update_local_object(ctx, doc, num, newobj);
2923
6.60k
    return;
2924
6.60k
  }
2925
2926
0
  if (num <= 0 || num >= pdf_xref_len(ctx, doc))
2927
0
  {
2928
0
    fz_warn(ctx, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(ctx, doc));
2929
0
    return;
2930
0
  }
2931
2932
0
  if (!newobj)
2933
0
  {
2934
0
    pdf_delete_object(ctx, doc, num);
2935
0
    return;
2936
0
  }
2937
2938
0
  x = pdf_get_incremental_xref_entry(ctx, doc, num);
2939
2940
0
  pdf_drop_obj(ctx, x->obj);
2941
2942
0
  x->type = 'n';
2943
0
  x->ofs = 0;
2944
0
  x->obj = pdf_keep_obj(ctx, newobj);
2945
2946
0
  pdf_set_obj_parent(ctx, newobj, num);
2947
0
}
2948
2949
void
2950
pdf_update_stream(fz_context *ctx, pdf_document *doc, pdf_obj *obj, fz_buffer *newbuf, int compressed)
2951
5.62k
{
2952
5.62k
  int num;
2953
5.62k
  pdf_xref_entry *x;
2954
2955
5.62k
  if (pdf_is_indirect(ctx, obj))
2956
5.62k
    num = pdf_to_num(ctx, obj);
2957
0
  else
2958
0
    num = pdf_obj_parent_num(ctx, obj);
2959
2960
  /* Write the Length first, as this has the effect of moving the
2961
   * old object into the journal for undo. This also moves the
2962
   * stream buffer with it, keeping it consistent. */
2963
5.62k
  pdf_dict_put_int(ctx, obj, PDF_NAME(Length), fz_buffer_storage(ctx, newbuf, NULL));
2964
2965
5.62k
  if (doc->local_xref && doc->local_xref_nesting > 0)
2966
5.62k
  {
2967
5.62k
    x = pdf_get_local_xref_entry(ctx, doc, num);
2968
5.62k
  }
2969
0
  else
2970
0
  {
2971
0
    if (num <= 0 || num >= pdf_xref_len(ctx, doc))
2972
0
    {
2973
0
      fz_warn(ctx, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(ctx, doc));
2974
0
      return;
2975
0
    }
2976
2977
0
    x = pdf_get_xref_entry_no_null(ctx, doc, num);
2978
0
  }
2979
2980
5.62k
  fz_drop_buffer(ctx, x->stm_buf);
2981
5.62k
  x->stm_buf = fz_keep_buffer(ctx, newbuf);
2982
2983
5.62k
  if (!compressed)
2984
5.62k
  {
2985
5.62k
    pdf_dict_del(ctx, obj, PDF_NAME(Filter));
2986
5.62k
    pdf_dict_del(ctx, obj, PDF_NAME(DecodeParms));
2987
5.62k
  }
2988
5.62k
}
2989
2990
int
2991
pdf_lookup_metadata(fz_context *ctx, pdf_document *doc, const char *key, char *buf, int size)
2992
0
{
2993
0
  if (!strcmp(key, FZ_META_FORMAT))
2994
0
  {
2995
0
    int version = pdf_version(ctx, doc);
2996
0
    return 1 + (int)fz_snprintf(buf, size, "PDF %d.%d", version/10, version % 10);
2997
0
  }
2998
2999
0
  if (!strcmp(key, FZ_META_ENCRYPTION))
3000
0
  {
3001
0
    if (doc->crypt)
3002
0
    {
3003
0
      const char *stream_method = pdf_crypt_stream_method(ctx, doc->crypt);
3004
0
      const char *string_method = pdf_crypt_string_method(ctx, doc->crypt);
3005
0
      if (stream_method == string_method)
3006
0
        return 1 + (int)fz_snprintf(buf, size, "Standard V%d R%d %d-bit %s",
3007
0
            pdf_crypt_version(ctx, doc->crypt),
3008
0
            pdf_crypt_revision(ctx, doc->crypt),
3009
0
            pdf_crypt_length(ctx, doc->crypt),
3010
0
            pdf_crypt_string_method(ctx, doc->crypt));
3011
0
      else
3012
0
        return 1 + (int)fz_snprintf(buf, size, "Standard V%d R%d %d-bit streams: %s strings: %s",
3013
0
            pdf_crypt_version(ctx, doc->crypt),
3014
0
            pdf_crypt_revision(ctx, doc->crypt),
3015
0
            pdf_crypt_length(ctx, doc->crypt),
3016
0
            pdf_crypt_stream_method(ctx, doc->crypt),
3017
0
            pdf_crypt_string_method(ctx, doc->crypt));
3018
0
    }
3019
0
    else
3020
0
      return 1 + (int)fz_strlcpy(buf, "None", size);
3021
0
  }
3022
3023
0
  if (strstr(key, "info:") == key)
3024
0
  {
3025
0
    pdf_obj *info;
3026
0
    const char *s;
3027
0
    int n;
3028
3029
0
    info = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info));
3030
0
    if (!info)
3031
0
      return -1;
3032
3033
0
    info = pdf_dict_gets(ctx, info, key + 5);
3034
0
    if (!info)
3035
0
      return -1;
3036
3037
0
    s = pdf_to_text_string(ctx, info);
3038
0
    if (strlen(s) <= 0)
3039
0
      return -1;
3040
3041
0
    n = 1 + (int)fz_strlcpy(buf, s, size);
3042
0
    return n;
3043
0
  }
3044
3045
0
  return -1;
3046
0
}
3047
3048
void
3049
pdf_set_metadata(fz_context *ctx, pdf_document *doc, const char *key, const char *value)
3050
0
{
3051
3052
0
  pdf_obj *info = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info));
3053
3054
0
  pdf_begin_operation(ctx, doc, "Set Metadata");
3055
3056
0
  fz_try(ctx)
3057
0
  {
3058
    /* Ensure we have an Info dictionary. */
3059
0
    if (!pdf_is_dict(ctx, info))
3060
0
    {
3061
0
      info = pdf_add_new_dict(ctx, doc, 8);
3062
0
      pdf_dict_put_drop(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info), info);
3063
0
    }
3064
3065
0
    if (!strcmp(key, FZ_META_INFO_TITLE))
3066
0
      pdf_dict_put_text_string(ctx, info, PDF_NAME(Title), value);
3067
0
    else if (!strcmp(key, FZ_META_INFO_AUTHOR))
3068
0
      pdf_dict_put_text_string(ctx, info, PDF_NAME(Author), value);
3069
0
    else if (!strcmp(key, FZ_META_INFO_SUBJECT))
3070
0
      pdf_dict_put_text_string(ctx, info, PDF_NAME(Subject), value);
3071
0
    else if (!strcmp(key, FZ_META_INFO_KEYWORDS))
3072
0
      pdf_dict_put_text_string(ctx, info, PDF_NAME(Keywords), value);
3073
0
    else if (!strcmp(key, FZ_META_INFO_CREATOR))
3074
0
      pdf_dict_put_text_string(ctx, info, PDF_NAME(Creator), value);
3075
0
    else if (!strcmp(key, FZ_META_INFO_PRODUCER))
3076
0
      pdf_dict_put_text_string(ctx, info, PDF_NAME(Producer), value);
3077
0
    else if (!strcmp(key, FZ_META_INFO_CREATIONDATE))
3078
0
    {
3079
0
      int64_t time = pdf_parse_date(ctx, value);
3080
0
      if (time >= 0)
3081
0
        pdf_dict_put_date(ctx, info, PDF_NAME(CreationDate), time);
3082
0
    }
3083
0
    else if (!strcmp(key, FZ_META_INFO_MODIFICATIONDATE))
3084
0
    {
3085
0
      int64_t time = pdf_parse_date(ctx, value);
3086
0
      if (time >= 0)
3087
0
        pdf_dict_put_date(ctx, info, PDF_NAME(ModDate), time);
3088
0
    }
3089
3090
0
    if (!strncmp(key, FZ_META_INFO, strlen(FZ_META_INFO)))
3091
0
      key += strlen(FZ_META_INFO);
3092
0
    pdf_dict_put_text_string(ctx, info, pdf_new_name(ctx, key), value);
3093
0
    pdf_end_operation(ctx, doc);
3094
0
  }
3095
0
  fz_catch(ctx)
3096
0
  {
3097
0
    pdf_abandon_operation(ctx, doc);
3098
0
    fz_rethrow(ctx);
3099
0
  }
3100
0
}
3101
3102
static fz_link_dest
3103
pdf_resolve_link_imp(fz_context *ctx, fz_document *doc_, const char *uri)
3104
0
{
3105
0
  pdf_document *doc = (pdf_document*)doc_;
3106
0
  return pdf_resolve_link_dest(ctx, doc, uri);
3107
0
}
3108
3109
char *pdf_format_link_uri(fz_context *ctx, fz_document *doc, fz_link_dest dest)
3110
0
{
3111
0
  return pdf_new_uri_from_explicit_dest(ctx, dest);
3112
0
}
3113
3114
/*
3115
  Initializers for the fz_document interface.
3116
3117
  The functions are split across two files to allow calls to a
3118
  version of the constructor that does not link in the interpreter.
3119
  The interpreter references the built-in font and cmap resources
3120
  which are quite big. Not linking those into the mutool binary
3121
  saves roughly 6MB of space.
3122
*/
3123
3124
static pdf_document *
3125
pdf_new_document(fz_context *ctx, fz_stream *file)
3126
11.8k
{
3127
11.8k
  pdf_document *doc = fz_new_derived_document(ctx, pdf_document);
3128
3129
11.8k
#ifndef NDEBUG
3130
11.8k
  {
3131
11.8k
    void pdf_verify_name_table_sanity(void);
3132
11.8k
    pdf_verify_name_table_sanity();
3133
11.8k
  }
3134
11.8k
#endif
3135
3136
11.8k
  doc->super.drop_document = (fz_document_drop_fn*)pdf_drop_document_imp;
3137
11.8k
  doc->super.get_output_intent = (fz_document_output_intent_fn*)pdf_document_output_intent;
3138
11.8k
  doc->super.needs_password = (fz_document_needs_password_fn*)pdf_needs_password;
3139
11.8k
  doc->super.authenticate_password = (fz_document_authenticate_password_fn*)pdf_authenticate_password;
3140
11.8k
  doc->super.has_permission = (fz_document_has_permission_fn*)pdf_has_permission;
3141
11.8k
  doc->super.outline_iterator = (fz_document_outline_iterator_fn*)pdf_new_outline_iterator;
3142
11.8k
  doc->super.resolve_link_dest = pdf_resolve_link_imp;
3143
11.8k
  doc->super.format_link_uri = pdf_format_link_uri;
3144
11.8k
  doc->super.count_pages = pdf_count_pages_imp;
3145
11.8k
  doc->super.load_page = pdf_load_page_imp;
3146
11.8k
  doc->super.page_label = pdf_page_label_imp;
3147
11.8k
  doc->super.lookup_metadata = (fz_document_lookup_metadata_fn*)pdf_lookup_metadata;
3148
11.8k
  doc->super.set_metadata = (fz_document_set_metadata_fn*)pdf_set_metadata;
3149
11.8k
  doc->super.run_structure = (fz_document_run_structure_fn *)pdf_run_document_structure;
3150
3151
11.8k
  pdf_lexbuf_init(ctx, &doc->lexbuf.base, PDF_LEXBUF_LARGE);
3152
11.8k
  doc->file = fz_keep_stream(ctx, file);
3153
3154
  /* Default to PDF-1.7 if the version header is missing and for new documents */
3155
11.8k
  doc->version = 17;
3156
3157
11.8k
  return doc;
3158
11.8k
}
3159
3160
pdf_document *
3161
pdf_open_document_with_stream(fz_context *ctx, fz_stream *file)
3162
11.8k
{
3163
11.8k
  pdf_document *doc = pdf_new_document(ctx, file);
3164
23.7k
  fz_try(ctx)
3165
23.7k
  {
3166
11.8k
    pdf_init_document(ctx, doc);
3167
11.8k
  }
3168
23.7k
  fz_catch(ctx)
3169
386
  {
3170
    /* fz_drop_document may clobber our error code/message so we have to stash them temporarily. */
3171
386
    char message[256];
3172
386
    int code;
3173
386
    fz_strlcpy(message, fz_convert_error(ctx, &code), sizeof message);
3174
386
    fz_drop_document(ctx, &doc->super);
3175
386
    fz_throw(ctx, code, "%s", message);
3176
386
  }
3177
11.5k
  return doc;
3178
11.8k
}
3179
3180
/* Uncomment the following to test progressive loading. */
3181
/* #define TEST_PROGRESSIVE_HACK */
3182
3183
pdf_document *
3184
pdf_open_document(fz_context *ctx, const char *filename)
3185
0
{
3186
0
  fz_stream *file = NULL;
3187
0
  pdf_document *doc = NULL;
3188
3189
0
  fz_var(file);
3190
0
  fz_var(doc);
3191
3192
0
  fz_try(ctx)
3193
0
  {
3194
0
    file = fz_open_file(ctx, filename);
3195
#ifdef TEST_PROGRESSIVE_HACK
3196
    file->progressive = 1;
3197
#endif
3198
0
    doc = pdf_new_document(ctx, file);
3199
0
    pdf_init_document(ctx, doc);
3200
0
  }
3201
0
  fz_always(ctx)
3202
0
  {
3203
0
    fz_drop_stream(ctx, file);
3204
0
  }
3205
0
  fz_catch(ctx)
3206
0
  {
3207
    /* fz_drop_document may clobber our error code/message so we have to stash them temporarily. */
3208
0
    char message[256];
3209
0
    int code;
3210
0
    fz_strlcpy(message, fz_convert_error(ctx, &code), sizeof message);
3211
0
    fz_drop_document(ctx, &doc->super);
3212
0
    fz_throw(ctx, code, "%s", message);
3213
0
  }
3214
3215
#ifdef TEST_PROGRESSIVE_HACK
3216
  if (doc->file_reading_linearly)
3217
  {
3218
    fz_try(ctx)
3219
      pdf_progressive_advance(ctx, doc, doc->linear_page_count-1);
3220
    fz_catch(ctx)
3221
    {
3222
      doc->file_reading_linearly = 0;
3223
      /* swallow the error */
3224
    }
3225
  }
3226
#endif
3227
3228
0
  return doc;
3229
0
}
3230
3231
static void
3232
pdf_load_hints(fz_context *ctx, pdf_document *doc, int objnum)
3233
0
{
3234
0
  fz_stream *stream = NULL;
3235
0
  pdf_obj *dict;
3236
3237
0
  fz_var(stream);
3238
0
  fz_var(dict);
3239
3240
0
  fz_try(ctx)
3241
0
  {
3242
0
    int i, j, least_num_page_objs, page_obj_num_bits;
3243
0
    int least_page_len, page_len_num_bits, shared_hint_offset;
3244
    /* int least_page_offset, page_offset_num_bits; */
3245
    /* int least_content_stream_len, content_stream_len_num_bits; */
3246
0
    int num_shared_obj_num_bits, shared_obj_num_bits;
3247
    /* int numerator_bits, denominator_bits; */
3248
0
    int shared;
3249
0
    int shared_obj_num, shared_obj_offset, shared_obj_count_page1;
3250
0
    int shared_obj_count_total;
3251
0
    int least_shared_group_len, shared_group_len_num_bits;
3252
0
    int max_object_num = pdf_xref_len(ctx, doc);
3253
3254
0
    stream = pdf_open_stream_number(ctx, doc, objnum);
3255
0
    dict = pdf_get_xref_entry_no_null(ctx, doc, objnum)->obj;
3256
0
    if (dict == NULL || !pdf_is_dict(ctx, dict))
3257
0
      fz_throw(ctx, FZ_ERROR_FORMAT, "malformed hint object");
3258
3259
0
    shared_hint_offset = pdf_dict_get_int(ctx, dict, PDF_NAME(S));
3260
3261
    /* Malloc the structures (use realloc to cope with the fact we
3262
     * may try this several times before enough data is loaded) */
3263
0
    doc->hint_page = fz_realloc_array(ctx, doc->hint_page, doc->linear_page_count+1, pdf_hint_page);
3264
0
    memset(doc->hint_page, 0, sizeof(*doc->hint_page) * (doc->linear_page_count+1));
3265
0
    doc->hint_obj_offsets = fz_realloc_array(ctx, doc->hint_obj_offsets, max_object_num, int64_t);
3266
0
    memset(doc->hint_obj_offsets, 0, sizeof(*doc->hint_obj_offsets) * max_object_num);
3267
0
    doc->hint_obj_offsets_max = max_object_num;
3268
3269
    /* Read the page object hints table: Header first */
3270
0
    least_num_page_objs = fz_read_bits(ctx, stream, 32);
3271
    /* The following is sometimes a lie, but we read this version,
3272
     * as other table values are built from it. In
3273
     * pdf_reference17.pdf, this points to 2 objects before the
3274
     * first pages page object. */
3275
0
    doc->hint_page[0].offset = fz_read_bits(ctx, stream, 32);
3276
0
    if (doc->hint_page[0].offset > doc->hint_object_offset)
3277
0
      doc->hint_page[0].offset += doc->hint_object_length;
3278
0
    page_obj_num_bits = fz_read_bits(ctx, stream, 16);
3279
0
    least_page_len = fz_read_bits(ctx, stream, 32);
3280
0
    page_len_num_bits = fz_read_bits(ctx, stream, 16);
3281
0
    /* least_page_offset = */ (void) fz_read_bits(ctx, stream, 32);
3282
0
    /* page_offset_num_bits = */ (void) fz_read_bits(ctx, stream, 16);
3283
0
    /* least_content_stream_len = */ (void) fz_read_bits(ctx, stream, 32);
3284
0
    /* content_stream_len_num_bits = */ (void) fz_read_bits(ctx, stream, 16);
3285
0
    num_shared_obj_num_bits = fz_read_bits(ctx, stream, 16);
3286
0
    shared_obj_num_bits = fz_read_bits(ctx, stream, 16);
3287
0
    /* numerator_bits = */ (void) fz_read_bits(ctx, stream, 16);
3288
0
    /* denominator_bits = */ (void) fz_read_bits(ctx, stream, 16);
3289
3290
    /* Item 1: Page object numbers */
3291
0
    doc->hint_page[0].number = doc->linear_page1_obj_num;
3292
    /* We don't care about the number of objects in the first page */
3293
0
    (void)fz_read_bits(ctx, stream, page_obj_num_bits);
3294
0
    j = 1;
3295
0
    for (i = 1; i < doc->linear_page_count; i++)
3296
0
    {
3297
0
      int delta_page_objs = fz_read_bits(ctx, stream, page_obj_num_bits);
3298
3299
0
      doc->hint_page[i].number = j;
3300
0
      j += least_num_page_objs + delta_page_objs;
3301
0
    }
3302
0
    doc->hint_page[i].number = j; /* Not a real page object */
3303
0
    fz_sync_bits(ctx, stream);
3304
    /* Item 2: Page lengths */
3305
0
    j = doc->hint_page[0].offset;
3306
0
    for (i = 0; i < doc->linear_page_count; i++)
3307
0
    {
3308
0
      int delta_page_len = fz_read_bits(ctx, stream, page_len_num_bits);
3309
0
      int old = j;
3310
3311
0
      doc->hint_page[i].offset = j;
3312
0
      j += least_page_len + delta_page_len;
3313
0
      if (old <= doc->hint_object_offset && j > doc->hint_object_offset)
3314
0
        j += doc->hint_object_length;
3315
0
    }
3316
0
    doc->hint_page[i].offset = j;
3317
0
    fz_sync_bits(ctx, stream);
3318
    /* Item 3: Shared references */
3319
0
    shared = 0;
3320
0
    for (i = 0; i < doc->linear_page_count; i++)
3321
0
    {
3322
0
      int num_shared_objs = fz_read_bits(ctx, stream, num_shared_obj_num_bits);
3323
0
      doc->hint_page[i].index = shared;
3324
0
      shared += num_shared_objs;
3325
0
    }
3326
0
    doc->hint_page[i].index = shared;
3327
0
    doc->hint_shared_ref = fz_realloc_array(ctx, doc->hint_shared_ref, shared, int);
3328
0
    memset(doc->hint_shared_ref, 0, sizeof(*doc->hint_shared_ref) * shared);
3329
0
    fz_sync_bits(ctx, stream);
3330
    /* Item 4: Shared references */
3331
0
    for (i = 0; i < shared; i++)
3332
0
    {
3333
0
      int ref = fz_read_bits(ctx, stream, shared_obj_num_bits);
3334
0
      doc->hint_shared_ref[i] = ref;
3335
0
    }
3336
    /* Skip items 5,6,7 as we don't use them */
3337
3338
0
    fz_seek(ctx, stream, shared_hint_offset, SEEK_SET);
3339
3340
    /* Read the shared object hints table: Header first */
3341
0
    shared_obj_num = fz_read_bits(ctx, stream, 32);
3342
0
    shared_obj_offset = fz_read_bits(ctx, stream, 32);
3343
0
    if (shared_obj_offset > doc->hint_object_offset)
3344
0
      shared_obj_offset += doc->hint_object_length;
3345
0
    shared_obj_count_page1 = fz_read_bits(ctx, stream, 32);
3346
0
    shared_obj_count_total = fz_read_bits(ctx, stream, 32);
3347
0
    shared_obj_num_bits = fz_read_bits(ctx, stream, 16);
3348
0
    least_shared_group_len = fz_read_bits(ctx, stream, 32);
3349
0
    shared_group_len_num_bits = fz_read_bits(ctx, stream, 16);
3350
3351
    /* Sanity check the references in Item 4 above to ensure we
3352
     * don't access out of range with malicious files. */
3353
0
    for (i = 0; i < shared; i++)
3354
0
    {
3355
0
      if (doc->hint_shared_ref[i] >= shared_obj_count_total)
3356
0
      {
3357
0
        fz_throw(ctx, FZ_ERROR_FORMAT, "malformed hint stream (shared refs)");
3358
0
      }
3359
0
    }
3360
3361
0
    doc->hint_shared = fz_realloc_array(ctx, doc->hint_shared, shared_obj_count_total+1, pdf_hint_shared);
3362
0
    memset(doc->hint_shared, 0, sizeof(*doc->hint_shared) * (shared_obj_count_total+1));
3363
3364
    /* Item 1: Shared references */
3365
0
    j = doc->hint_page[0].offset;
3366
0
    for (i = 0; i < shared_obj_count_page1; i++)
3367
0
    {
3368
0
      int off = fz_read_bits(ctx, stream, shared_group_len_num_bits);
3369
0
      int old = j;
3370
0
      doc->hint_shared[i].offset = j;
3371
0
      j += off + least_shared_group_len;
3372
0
      if (old <= doc->hint_object_offset && j > doc->hint_object_offset)
3373
0
        j += doc->hint_object_length;
3374
0
    }
3375
    /* FIXME: We would have problems recreating the length of the
3376
     * last page 1 shared reference group. But we'll never need
3377
     * to, so ignore it. */
3378
0
    j = shared_obj_offset;
3379
0
    for (; i < shared_obj_count_total; i++)
3380
0
    {
3381
0
      int off = fz_read_bits(ctx, stream, shared_group_len_num_bits);
3382
0
      int old = j;
3383
0
      doc->hint_shared[i].offset = j;
3384
0
      j += off + least_shared_group_len;
3385
0
      if (old <= doc->hint_object_offset && j > doc->hint_object_offset)
3386
0
        j += doc->hint_object_length;
3387
0
    }
3388
0
    doc->hint_shared[i].offset = j;
3389
0
    fz_sync_bits(ctx, stream);
3390
    /* Item 2: Signature flags: read these just so we can skip */
3391
0
    for (i = 0; i < shared_obj_count_total; i++)
3392
0
    {
3393
0
      doc->hint_shared[i].number = fz_read_bits(ctx, stream, 1);
3394
0
    }
3395
0
    fz_sync_bits(ctx, stream);
3396
    /* Item 3: Signatures: just skip */
3397
0
    for (i = 0; i < shared_obj_count_total; i++)
3398
0
    {
3399
0
      if (doc->hint_shared[i].number)
3400
0
      {
3401
0
        (void) fz_read_bits(ctx, stream, 128);
3402
0
      }
3403
0
    }
3404
0
    fz_sync_bits(ctx, stream);
3405
    /* Item 4: Shared object object numbers */
3406
0
    j = doc->linear_page1_obj_num; /* FIXME: This is a lie! */
3407
0
    for (i = 0; i < shared_obj_count_page1; i++)
3408
0
    {
3409
0
      doc->hint_shared[i].number = j;
3410
0
      j += fz_read_bits(ctx, stream, shared_obj_num_bits) + 1;
3411
0
    }
3412
0
    j = shared_obj_num;
3413
0
    for (; i < shared_obj_count_total; i++)
3414
0
    {
3415
0
      doc->hint_shared[i].number = j;
3416
0
      j += fz_read_bits(ctx, stream, shared_obj_num_bits) + 1;
3417
0
    }
3418
0
    doc->hint_shared[i].number = j;
3419
3420
    /* Now, actually use the data we have gathered. */
3421
0
    for (i = 0 /*shared_obj_count_page1*/; i < shared_obj_count_total; i++)
3422
0
    {
3423
0
      if (doc->hint_shared[i].number >= 0 && doc->hint_shared[i].number < max_object_num)
3424
0
        doc->hint_obj_offsets[doc->hint_shared[i].number] = doc->hint_shared[i].offset;
3425
0
    }
3426
0
    for (i = 0; i < doc->linear_page_count; i++)
3427
0
    {
3428
0
      if (doc->hint_page[i].number >= 0 && doc->hint_page[i].number < max_object_num)
3429
0
        doc->hint_obj_offsets[doc->hint_page[i].number] = doc->hint_page[i].offset;
3430
0
    }
3431
0
  }
3432
0
  fz_always(ctx)
3433
0
  {
3434
0
    fz_drop_stream(ctx, stream);
3435
0
  }
3436
0
  fz_catch(ctx)
3437
0
  {
3438
0
    fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
3439
    /* Don't try to load hints again */
3440
0
    doc->hints_loaded = 1;
3441
    /* We won't use the linearized object anymore. */
3442
0
    doc->file_reading_linearly = 0;
3443
0
    fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
3444
    /* Any other error becomes a TRYLATER */
3445
0
    fz_report_error(ctx);
3446
0
    fz_throw(ctx, FZ_ERROR_TRYLATER, "malformed hints object");
3447
0
  }
3448
0
  doc->hints_loaded = 1;
3449
0
}
3450
3451
static void
3452
pdf_load_hint_object(fz_context *ctx, pdf_document *doc)
3453
0
{
3454
0
  pdf_lexbuf *buf = &doc->lexbuf.base;
3455
0
  int64_t curr_pos;
3456
3457
0
  curr_pos = fz_tell(ctx, doc->file);
3458
0
  fz_seek(ctx, doc->file, doc->hint_object_offset, SEEK_SET);
3459
0
  fz_try(ctx)
3460
0
  {
3461
0
    while (1)
3462
0
    {
3463
0
      pdf_obj *page = NULL;
3464
0
      int num, tok;
3465
3466
0
      tok = pdf_lex(ctx, doc->file, buf);
3467
0
      if (tok != PDF_TOK_INT)
3468
0
        break;
3469
0
      num = buf->i;
3470
0
      tok = pdf_lex(ctx, doc->file, buf);
3471
0
      if (tok != PDF_TOK_INT)
3472
0
        break;
3473
      /* Ignore gen = buf->i */
3474
0
      tok = pdf_lex(ctx, doc->file, buf);
3475
0
      if (tok != PDF_TOK_OBJ)
3476
0
        break;
3477
0
      (void)pdf_repair_obj(ctx, doc, buf, NULL, NULL, NULL, NULL, &page, NULL, NULL);
3478
0
      pdf_load_hints(ctx, doc, num);
3479
0
    }
3480
0
  }
3481
0
  fz_always(ctx)
3482
0
  {
3483
0
    fz_seek(ctx, doc->file, curr_pos, SEEK_SET);
3484
0
  }
3485
0
  fz_catch(ctx)
3486
0
  {
3487
0
    fz_rethrow(ctx);
3488
0
  }
3489
0
}
3490
3491
pdf_obj *pdf_progressive_advance(fz_context *ctx, pdf_document *doc, int pagenum)
3492
0
{
3493
0
  int curr_pos;
3494
0
  pdf_obj *page = NULL;
3495
3496
0
  pdf_load_hinted_page(ctx, doc, pagenum);
3497
3498
0
  if (pagenum < 0 || pagenum >= doc->linear_page_count)
3499
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "page load out of range (%d of %d)", pagenum, doc->linear_page_count);
3500
3501
0
  if (doc->linear_pos == doc->file_length)
3502
0
    return doc->linear_page_refs[pagenum];
3503
3504
  /* Only load hints once, and then only after we have got page 0 */
3505
0
  if (pagenum > 0 && !doc->hints_loaded && doc->hint_object_offset > 0 && doc->linear_pos >= doc->hint_object_offset)
3506
0
  {
3507
    /* Found hint object */
3508
0
    pdf_load_hint_object(ctx, doc);
3509
0
  }
3510
3511
0
  DEBUGMESS((ctx, "continuing to try to advance from %d", doc->linear_pos));
3512
0
  curr_pos = fz_tell(ctx, doc->file);
3513
3514
0
  fz_var(page);
3515
3516
0
  fz_try(ctx)
3517
0
  {
3518
0
    int eof;
3519
0
    do
3520
0
    {
3521
0
      int num;
3522
0
      eof = pdf_obj_read(ctx, doc, &doc->linear_pos, &num, &page);
3523
0
      pdf_drop_obj(ctx, page);
3524
0
      page = NULL;
3525
0
    }
3526
0
    while (!eof);
3527
3528
0
    {
3529
0
      pdf_obj *catalog;
3530
0
      pdf_obj *pages;
3531
0
      doc->linear_pos = doc->file_length;
3532
0
      pdf_load_xref(ctx, doc);
3533
0
      catalog = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
3534
0
      pages = pdf_dict_get(ctx, catalog, PDF_NAME(Pages));
3535
3536
0
      if (!pdf_is_dict(ctx, pages))
3537
0
        fz_throw(ctx, FZ_ERROR_FORMAT, "missing page tree");
3538
0
      break;
3539
0
    }
3540
0
  }
3541
0
  fz_always(ctx)
3542
0
  {
3543
0
    fz_seek(ctx, doc->file, curr_pos, SEEK_SET);
3544
0
  }
3545
0
  fz_catch(ctx)
3546
0
  {
3547
0
    pdf_drop_obj(ctx, page);
3548
0
    if (fz_caught(ctx) == FZ_ERROR_TRYLATER)
3549
0
    {
3550
0
      if (doc->linear_page_refs[pagenum] == NULL)
3551
0
      {
3552
        /* Still not got a page */
3553
0
        fz_rethrow(ctx);
3554
0
      }
3555
      // TODO: should we really swallow this error?
3556
0
      fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
3557
0
      fz_report_error(ctx);
3558
0
    }
3559
0
    else
3560
0
      fz_rethrow(ctx);
3561
0
  }
3562
3563
0
  return doc->linear_page_refs[pagenum];
3564
0
}
3565
3566
pdf_document *pdf_document_from_fz_document(fz_context *ctx, fz_document *ptr)
3567
0
{
3568
0
  return (pdf_document *)((ptr && ptr->count_pages == pdf_count_pages_imp) ? ptr : NULL);
3569
0
}
3570
3571
pdf_page *pdf_page_from_fz_page(fz_context *ctx, fz_page *ptr)
3572
0
{
3573
0
  return (pdf_page *)((ptr && ptr->bound_page == (fz_page_bound_page_fn*)pdf_bound_page) ? ptr : NULL);
3574
0
}
3575
3576
pdf_document *pdf_specifics(fz_context *ctx, fz_document *doc)
3577
0
{
3578
0
  return pdf_document_from_fz_document(ctx, doc);
3579
0
}
3580
3581
pdf_obj *
3582
pdf_add_object(fz_context *ctx, pdf_document *doc, pdf_obj *obj)
3583
6.60k
{
3584
6.60k
  pdf_document *orig_doc;
3585
6.60k
  int num;
3586
3587
6.60k
  orig_doc = pdf_get_bound_document(ctx, obj);
3588
6.60k
  if (orig_doc && orig_doc != doc)
3589
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "tried to add an object belonging to a different document");
3590
6.60k
  if (pdf_is_indirect(ctx, obj))
3591
0
    return pdf_keep_obj(ctx, obj);
3592
6.60k
  num = pdf_create_object(ctx, doc);
3593
6.60k
  pdf_update_object(ctx, doc, num, obj);
3594
6.60k
  return pdf_new_indirect(ctx, doc, num, 0);
3595
6.60k
}
3596
3597
pdf_obj *
3598
pdf_add_object_drop(fz_context *ctx, pdf_document *doc, pdf_obj *obj)
3599
933
{
3600
933
  pdf_obj *ind = NULL;
3601
1.86k
  fz_try(ctx)
3602
1.86k
    ind = pdf_add_object(ctx, doc, obj);
3603
1.86k
  fz_always(ctx)
3604
933
    pdf_drop_obj(ctx, obj);
3605
933
  fz_catch(ctx)
3606
0
    fz_rethrow(ctx);
3607
933
  return ind;
3608
933
}
3609
3610
pdf_obj *
3611
pdf_add_new_dict(fz_context *ctx, pdf_document *doc, int initial)
3612
888
{
3613
888
  return pdf_add_object_drop(ctx, doc, pdf_new_dict(ctx, doc, initial));
3614
888
}
3615
3616
pdf_obj *
3617
pdf_add_new_array(fz_context *ctx, pdf_document *doc, int initial)
3618
45
{
3619
45
  return pdf_add_object_drop(ctx, doc, pdf_new_array(ctx, doc, initial));
3620
45
}
3621
3622
pdf_obj *
3623
pdf_add_stream(fz_context *ctx, pdf_document *doc, fz_buffer *buf, pdf_obj *obj, int compressed)
3624
5.58k
{
3625
5.58k
  pdf_obj *ind;
3626
5.58k
  if (!obj)
3627
45
    ind = pdf_add_new_dict(ctx, doc, 4);
3628
5.53k
  else
3629
5.53k
    ind = pdf_add_object(ctx, doc, obj);
3630
11.1k
  fz_try(ctx)
3631
11.1k
    pdf_update_stream(ctx, doc, ind, buf, compressed);
3632
11.1k
  fz_catch(ctx)
3633
0
  {
3634
0
    pdf_drop_obj(ctx, ind);
3635
0
    fz_rethrow(ctx);
3636
0
  }
3637
5.58k
  return ind;
3638
5.58k
}
3639
3640
pdf_document *pdf_create_document(fz_context *ctx)
3641
0
{
3642
0
  pdf_document *doc;
3643
0
  pdf_obj *root;
3644
0
  pdf_obj *pages;
3645
0
  pdf_obj *trailer = NULL;
3646
3647
0
  fz_var(trailer);
3648
3649
0
  doc = pdf_new_document(ctx, NULL);
3650
0
  fz_try(ctx)
3651
0
  {
3652
0
    doc->file_size = 0;
3653
0
    doc->startxref = 0;
3654
0
    doc->num_xref_sections = 0;
3655
0
    doc->num_incremental_sections = 0;
3656
0
    doc->xref_base = 0;
3657
0
    doc->disallow_new_increments = 0;
3658
0
    pdf_get_populating_xref_entry(ctx, doc, 0);
3659
3660
0
    trailer = pdf_new_dict(ctx, doc, 2);
3661
0
    pdf_dict_put_int(ctx, trailer, PDF_NAME(Size), 3);
3662
0
    pdf_dict_put_drop(ctx, trailer, PDF_NAME(Root), root = pdf_add_new_dict(ctx, doc, 2));
3663
0
    pdf_dict_put(ctx, root, PDF_NAME(Type), PDF_NAME(Catalog));
3664
0
    pdf_dict_put_drop(ctx, root, PDF_NAME(Pages), pages = pdf_add_new_dict(ctx, doc, 3));
3665
0
    pdf_dict_put(ctx, pages, PDF_NAME(Type), PDF_NAME(Pages));
3666
0
    pdf_dict_put_int(ctx, pages, PDF_NAME(Count), 0);
3667
0
    pdf_dict_put_array(ctx, pages, PDF_NAME(Kids), 1);
3668
3669
    /* Set the trailer of the final xref section. */
3670
0
    doc->xref_sections[0].trailer = trailer;
3671
0
  }
3672
0
  fz_catch(ctx)
3673
0
  {
3674
0
    pdf_drop_obj(ctx, trailer);
3675
0
    fz_drop_document(ctx, &doc->super);
3676
0
    fz_rethrow(ctx);
3677
0
  }
3678
0
  return doc;
3679
0
}
3680
3681
static const char *pdf_extensions[] =
3682
{
3683
  "pdf",
3684
  "fdf",
3685
  "pclm",
3686
  "ai",
3687
  NULL
3688
};
3689
3690
static const char *pdf_mimetypes[] =
3691
{
3692
  "application/pdf",
3693
  "application/PCLm",
3694
  NULL
3695
};
3696
3697
static int
3698
pdf_recognize_doc_content(fz_context *ctx, fz_stream *stream, fz_archive *dir)
3699
15.6k
{
3700
15.6k
  const char *match = "%PDF-";
3701
15.6k
  const char *match2 = "%FDF-";
3702
15.6k
  int pos = 0;
3703
15.6k
  int n = 4096+5;
3704
15.6k
  int c;
3705
3706
15.6k
  if (stream == NULL)
3707
0
    return 0;
3708
3709
15.6k
  do
3710
12.5M
  {
3711
12.5M
    c = fz_read_byte(ctx, stream);
3712
12.5M
    if (c == EOF)
3713
6.28k
      return 0;
3714
12.5M
    if (c == match[pos] || c == match2[pos])
3715
68.5k
    {
3716
68.5k
      pos++;
3717
68.5k
      if (pos == 5)
3718
8.35k
        return 100;
3719
68.5k
    }
3720
12.4M
    else
3721
12.4M
    {
3722
      /* Restart matching, but recheck c against the start. */
3723
12.4M
      pos = (c == match[0]);
3724
12.4M
    }
3725
12.5M
  }
3726
12.4M
  while (--n > 0);
3727
3728
1.04k
  return 0;
3729
15.6k
}
3730
3731
static fz_document *
3732
open_document(fz_context *ctx, fz_stream *file, fz_stream *accel, fz_archive *zip)
3733
11.8k
{
3734
11.8k
  if (file == NULL)
3735
0
    return NULL;
3736
11.8k
  return (fz_document *)pdf_open_document_with_stream(ctx, file);
3737
11.8k
}
3738
3739
fz_document_handler pdf_document_handler =
3740
{
3741
  NULL,
3742
  open_document,
3743
  pdf_extensions,
3744
  pdf_mimetypes,
3745
  pdf_recognize_doc_content
3746
};
3747
3748
void pdf_mark_xref(fz_context *ctx, pdf_document *doc)
3749
0
{
3750
0
  int x, e;
3751
3752
0
  for (x = 0; x < doc->num_xref_sections; x++)
3753
0
  {
3754
0
    pdf_xref *xref = &doc->xref_sections[x];
3755
0
    pdf_xref_subsec *sub;
3756
3757
0
    for (sub = xref->subsec; sub != NULL; sub = sub->next)
3758
0
    {
3759
0
      for (e = 0; e < sub->len; e++)
3760
0
      {
3761
0
        pdf_xref_entry *entry = &sub->table[e];
3762
0
        if (entry->obj)
3763
0
        {
3764
0
          entry->marked = 1;
3765
0
        }
3766
0
      }
3767
0
    }
3768
0
  }
3769
0
}
3770
3771
void pdf_clear_xref(fz_context *ctx, pdf_document *doc)
3772
200
{
3773
200
  int x, e;
3774
3775
400
  for (x = 0; x < doc->num_xref_sections; x++)
3776
200
  {
3777
200
    pdf_xref *xref = &doc->xref_sections[x];
3778
200
    pdf_xref_subsec *sub;
3779
3780
400
    for (sub = xref->subsec; sub != NULL; sub = sub->next)
3781
200
    {
3782
33.4k
      for (e = 0; e < sub->len; e++)
3783
33.2k
      {
3784
33.2k
        pdf_xref_entry *entry = &sub->table[e];
3785
        /* We cannot drop objects if the stream
3786
         * buffer has been updated */
3787
33.2k
        if (entry->obj != NULL && entry->stm_buf == NULL)
3788
4.54k
        {
3789
4.54k
          if (pdf_obj_refs(ctx, entry->obj) == 1)
3790
4.46k
          {
3791
4.46k
            pdf_drop_obj(ctx, entry->obj);
3792
4.46k
            entry->obj = NULL;
3793
4.46k
          }
3794
4.54k
        }
3795
33.2k
      }
3796
200
    }
3797
200
  }
3798
200
}
3799
3800
void pdf_clear_xref_to_mark(fz_context *ctx, pdf_document *doc)
3801
0
{
3802
0
  int x, e;
3803
3804
0
  for (x = 0; x < doc->num_xref_sections; x++)
3805
0
  {
3806
0
    pdf_xref *xref = &doc->xref_sections[x];
3807
0
    pdf_xref_subsec *sub;
3808
3809
0
    for (sub = xref->subsec; sub != NULL; sub = sub->next)
3810
0
    {
3811
0
      for (e = 0; e < sub->len; e++)
3812
0
      {
3813
0
        pdf_xref_entry *entry = &sub->table[e];
3814
3815
        /* We cannot drop objects if the stream buffer has
3816
         * been updated */
3817
0
        if (entry->obj != NULL && entry->stm_buf == NULL)
3818
0
        {
3819
0
          if (!entry->marked && pdf_obj_refs(ctx, entry->obj) == 1)
3820
0
          {
3821
0
            pdf_drop_obj(ctx, entry->obj);
3822
0
            entry->obj = NULL;
3823
0
          }
3824
0
        }
3825
0
      }
3826
0
    }
3827
0
  }
3828
0
}
3829
3830
int
3831
pdf_count_versions(fz_context *ctx, pdf_document *doc)
3832
0
{
3833
0
  return doc->num_xref_sections-doc->num_incremental_sections-doc->has_linearization_object;
3834
0
}
3835
3836
int
3837
pdf_count_unsaved_versions(fz_context *ctx, pdf_document *doc)
3838
0
{
3839
0
  return doc->num_incremental_sections;
3840
0
}
3841
3842
int
3843
pdf_doc_was_linearized(fz_context *ctx, pdf_document *doc)
3844
0
{
3845
0
  return doc->has_linearization_object;
3846
0
}
3847
3848
static int pdf_obj_exists(fz_context *ctx, pdf_document *doc, int i)
3849
0
{
3850
0
  pdf_xref_subsec *sub;
3851
0
  int j;
3852
3853
0
  if (i < 0)
3854
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "Negative object number requested");
3855
3856
0
  if (i <= doc->max_xref_len)
3857
0
    j = doc->xref_index[i];
3858
0
  else
3859
0
    j = 0;
3860
3861
  /* We may be accessing an earlier version of the document using xref_base
3862
   * and j may be an index into a later xref section */
3863
0
  if (doc->xref_base > j)
3864
0
    j = doc->xref_base;
3865
3866
  /* Find the first xref section where the entry is defined. */
3867
0
  for (; j < doc->num_xref_sections; j++)
3868
0
  {
3869
0
    pdf_xref *xref = &doc->xref_sections[j];
3870
3871
0
    if (i < xref->num_objects)
3872
0
    {
3873
0
      for (sub = xref->subsec; sub != NULL; sub = sub->next)
3874
0
      {
3875
0
        if (i < sub->start || i >= sub->start + sub->len)
3876
0
          continue;
3877
3878
0
        if (sub->table[i - sub->start].type)
3879
0
          return 1;
3880
0
      }
3881
0
    }
3882
0
  }
3883
3884
0
  return 0;
3885
0
}
3886
3887
enum {
3888
  FIELD_CHANGED = 1,
3889
  FIELD_CHANGE_VALID = 2,
3890
  FIELD_CHANGE_INVALID = 4
3891
};
3892
3893
typedef struct
3894
{
3895
  int num_obj;
3896
  int obj_changes[1];
3897
} pdf_changes;
3898
3899
static int
3900
check_unchanged_between(fz_context *ctx, pdf_document *doc, pdf_changes *changes, pdf_obj *nobj, pdf_obj *oobj)
3901
0
{
3902
0
  int marked = 0;
3903
0
  int changed = 0;
3904
3905
  /* Trivially identical => trivially unchanged. */
3906
0
  if (nobj == oobj)
3907
0
    return 0;
3908
3909
  /* Strictly speaking we shouldn't need to call fz_var,
3910
   * but I suspect static analysis tools are not smart
3911
   * enough to figure that out. */
3912
0
  fz_var(marked);
3913
3914
0
  if (pdf_is_indirect(ctx, nobj))
3915
0
  {
3916
0
    int o_xref_base = doc->xref_base;
3917
3918
    /* Both must be indirect if one is. */
3919
0
    if (!pdf_is_indirect(ctx, oobj))
3920
0
    {
3921
0
      changes->obj_changes[pdf_to_num(ctx, nobj)] |= FIELD_CHANGE_INVALID;
3922
0
      return 1;
3923
0
    }
3924
3925
    /* Handle recursing back into ourselves. */
3926
0
    if (pdf_obj_marked(ctx, nobj))
3927
0
    {
3928
0
      if (pdf_obj_marked(ctx, oobj))
3929
0
        return 0;
3930
0
      changes->obj_changes[pdf_to_num(ctx, nobj)] |= FIELD_CHANGE_INVALID;
3931
0
      return 1;
3932
0
    }
3933
0
    else if (pdf_obj_marked(ctx, oobj))
3934
0
    {
3935
0
      changes->obj_changes[pdf_to_num(ctx, nobj)] |= FIELD_CHANGE_INVALID;
3936
0
      return 1;
3937
0
    }
3938
3939
0
    nobj = pdf_resolve_indirect_chain(ctx, nobj);
3940
0
    doc->xref_base = o_xref_base+1;
3941
0
    fz_try(ctx)
3942
0
    {
3943
0
      oobj = pdf_resolve_indirect_chain(ctx, oobj);
3944
0
      if (oobj != nobj)
3945
0
      {
3946
        /* Different objects, so lock them */
3947
0
        if (!pdf_obj_marked(ctx, nobj) && !pdf_obj_marked(ctx, oobj))
3948
0
        {
3949
0
          (void)pdf_mark_obj(ctx, nobj);
3950
0
          (void)pdf_mark_obj(ctx, oobj);
3951
0
          marked = 1;
3952
0
        }
3953
0
      }
3954
0
    }
3955
0
    fz_always(ctx)
3956
0
      doc->xref_base = o_xref_base;
3957
0
    fz_catch(ctx)
3958
0
      fz_rethrow(ctx);
3959
3960
0
    if (nobj == oobj)
3961
0
      return 0; /* Trivially identical */
3962
0
  }
3963
3964
0
  fz_var(changed);
3965
3966
0
  fz_try(ctx)
3967
0
  {
3968
0
    if (pdf_is_dict(ctx, nobj))
3969
0
    {
3970
0
      int i, n = pdf_dict_len(ctx, nobj);
3971
3972
0
      if (!pdf_is_dict(ctx, oobj) || n != pdf_dict_len(ctx, oobj))
3973
0
      {
3974
0
change_found:
3975
0
        changes->obj_changes[pdf_to_num(ctx, nobj)] |= FIELD_CHANGE_INVALID;
3976
0
        changed = 1;
3977
0
        break;
3978
0
      }
3979
3980
0
      for (i = 0; i < n; i++)
3981
0
      {
3982
0
        pdf_obj *key = pdf_dict_get_key(ctx, nobj, i);
3983
0
        pdf_obj *nval = pdf_dict_get(ctx, nobj, key);
3984
0
        pdf_obj *oval = pdf_dict_get(ctx, oobj, key);
3985
3986
0
        changed |= check_unchanged_between(ctx, doc, changes, nval, oval);
3987
0
      }
3988
0
    }
3989
0
    else if (pdf_is_array(ctx, nobj))
3990
0
    {
3991
0
      int i, n = pdf_array_len(ctx, nobj);
3992
3993
0
      if (!pdf_is_array(ctx, oobj) || n != pdf_array_len(ctx, oobj))
3994
0
        goto change_found;
3995
3996
0
      for (i = 0; i < n; i++)
3997
0
      {
3998
0
        pdf_obj *nval = pdf_array_get(ctx, nobj, i);
3999
0
        pdf_obj *oval = pdf_array_get(ctx, oobj, i);
4000
4001
0
        changed |= check_unchanged_between(ctx, doc, changes, nval, oval);
4002
0
      }
4003
0
    }
4004
0
    else if (pdf_objcmp(ctx, nobj, oobj))
4005
0
      goto change_found;
4006
0
  }
4007
0
  fz_always(ctx)
4008
0
  {
4009
0
    if (marked)
4010
0
    {
4011
0
      pdf_unmark_obj(ctx, nobj);
4012
0
      pdf_unmark_obj(ctx, oobj);
4013
0
    }
4014
0
  }
4015
0
  fz_catch(ctx)
4016
0
    fz_rethrow(ctx);
4017
4018
0
  return changed;
4019
0
}
4020
4021
typedef struct
4022
{
4023
  int max;
4024
  int len;
4025
  char **list;
4026
} char_list;
4027
4028
/* This structure is used to hold the definition of which fields
4029
 * are locked. */
4030
struct pdf_locked_fields
4031
{
4032
  int p;
4033
  int all;
4034
  char_list includes;
4035
  char_list excludes;
4036
};
4037
4038
static void
4039
free_char_list(fz_context *ctx, char_list *c)
4040
0
{
4041
0
  int i;
4042
4043
0
  if (c == NULL)
4044
0
    return;
4045
4046
0
  for (i = c->len-1; i >= 0; i--)
4047
0
    fz_free(ctx, c->list[i]);
4048
0
  fz_free(ctx, c->list);
4049
0
  c->len = 0;
4050
0
  c->max = 0;
4051
0
}
4052
4053
void
4054
pdf_drop_locked_fields(fz_context *ctx, pdf_locked_fields *fl)
4055
0
{
4056
0
  if (fl == NULL)
4057
0
    return;
4058
4059
0
  free_char_list(ctx, &fl->includes);
4060
0
  free_char_list(ctx, &fl->excludes);
4061
0
  fz_free(ctx, fl);
4062
0
}
4063
4064
static void
4065
char_list_append(fz_context *ctx, char_list *list, const char *s)
4066
0
{
4067
0
  if (list->len == list->max)
4068
0
  {
4069
0
    int n = list->max * 2;
4070
0
    if (n == 0) n = 4;
4071
4072
0
    list->list = fz_realloc_array(ctx, list->list, n, char *);
4073
0
    list->max = n;
4074
0
  }
4075
0
  list->list[list->len] = fz_strdup(ctx, s);
4076
0
  list->len++;
4077
0
}
4078
4079
int
4080
pdf_is_field_locked(fz_context *ctx, pdf_locked_fields *locked, const char *name)
4081
0
{
4082
0
  int i;
4083
4084
0
  if (locked->p == 1)
4085
0
  {
4086
    /* Permissions were set, and say that field changes are not to be allowed. */
4087
0
    return 1; /* Locked */
4088
0
  }
4089
4090
0
  if(locked->all)
4091
0
  {
4092
    /* The only way we might not be unlocked is if
4093
     * we are listed in the excludes. */
4094
0
    for (i = 0; i < locked->excludes.len; i++)
4095
0
      if (!strcmp(locked->excludes.list[i], name))
4096
0
        return 0;
4097
0
    return 1;
4098
0
  }
4099
4100
  /* The only way we can be locked is for us to be in the includes. */
4101
0
  for (i = 0; i < locked->includes.len; i++)
4102
0
    if (strcmp(locked->includes.list[i], name) == 0)
4103
0
      return 1;
4104
4105
  /* Anything else is unlocked */
4106
0
  return 0;
4107
0
}
4108
4109
/* Unfortunately, in C, there is no legal way to define a function
4110
 * type that returns itself. We therefore have to use a struct
4111
 * wrapper. */
4112
typedef struct filter_wrap
4113
{
4114
  struct filter_wrap (*func)(fz_context *ctx, pdf_obj *dict, pdf_obj *key);
4115
} filter_wrap;
4116
4117
typedef struct filter_wrap (*filter_fn)(fz_context *ctx, pdf_obj *dict, pdf_obj *key);
4118
4119
0
#define RETURN_FILTER(f) { filter_wrap rf; rf.func = (f); return rf; }
4120
4121
static filter_wrap filter_simple(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4122
0
{
4123
0
  RETURN_FILTER(NULL);
4124
0
}
4125
4126
static filter_wrap filter_transformparams(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4127
0
{
4128
0
  if (pdf_name_eq(ctx, key, PDF_NAME(Type)) ||
4129
0
    pdf_name_eq(ctx, key, PDF_NAME(P)) ||
4130
0
    pdf_name_eq(ctx, key, PDF_NAME(V)) ||
4131
0
    pdf_name_eq(ctx, key, PDF_NAME(Document)) ||
4132
0
    pdf_name_eq(ctx, key, PDF_NAME(Msg)) ||
4133
0
    pdf_name_eq(ctx, key, PDF_NAME(V)) ||
4134
0
    pdf_name_eq(ctx, key, PDF_NAME(Annots)) ||
4135
0
    pdf_name_eq(ctx, key, PDF_NAME(Form)) ||
4136
0
    pdf_name_eq(ctx, key, PDF_NAME(FormEx)) ||
4137
0
    pdf_name_eq(ctx, key, PDF_NAME(EF)) ||
4138
0
    pdf_name_eq(ctx, key, PDF_NAME(P)) ||
4139
0
    pdf_name_eq(ctx, key, PDF_NAME(Action)) ||
4140
0
    pdf_name_eq(ctx, key, PDF_NAME(Fields)))
4141
0
    RETURN_FILTER(&filter_simple);
4142
0
  RETURN_FILTER(NULL);
4143
0
}
4144
4145
static filter_wrap filter_reference(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4146
0
{
4147
0
  if (pdf_name_eq(ctx, key, PDF_NAME(Type)) ||
4148
0
    pdf_name_eq(ctx, key, PDF_NAME(TransformMethod)) ||
4149
0
    pdf_name_eq(ctx, key, PDF_NAME(DigestMethod)) ||
4150
0
    pdf_name_eq(ctx, key, PDF_NAME(DigestValue)) ||
4151
0
    pdf_name_eq(ctx, key, PDF_NAME(DigestLocation)))
4152
0
    RETURN_FILTER(&filter_simple);
4153
0
  if (pdf_name_eq(ctx, key, PDF_NAME(TransformParams)))
4154
0
    RETURN_FILTER(&filter_transformparams);
4155
0
  RETURN_FILTER(NULL);
4156
0
}
4157
4158
static filter_wrap filter_prop_build_sub(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4159
0
{
4160
0
  if (pdf_name_eq(ctx, key, PDF_NAME(Name)) ||
4161
0
    pdf_name_eq(ctx, key, PDF_NAME(Date)) ||
4162
0
    pdf_name_eq(ctx, key, PDF_NAME(R)) ||
4163
0
    pdf_name_eq(ctx, key, PDF_NAME(PreRelease)) ||
4164
0
    pdf_name_eq(ctx, key, PDF_NAME(OS)) ||
4165
0
    pdf_name_eq(ctx, key, PDF_NAME(NonEFontNoWarn)) ||
4166
0
    pdf_name_eq(ctx, key, PDF_NAME(TrustedMode)) ||
4167
0
    pdf_name_eq(ctx, key, PDF_NAME(V)) ||
4168
0
    pdf_name_eq(ctx, key, PDF_NAME(REx)) ||
4169
0
    pdf_name_eq(ctx, key, PDF_NAME(Preview)))
4170
0
    RETURN_FILTER(&filter_simple);
4171
0
  RETURN_FILTER(NULL);
4172
0
}
4173
4174
static filter_wrap filter_prop_build(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4175
0
{
4176
0
  if (pdf_name_eq(ctx, key, PDF_NAME(Filter)) ||
4177
0
    pdf_name_eq(ctx, key, PDF_NAME(PubSec)) ||
4178
0
    pdf_name_eq(ctx, key, PDF_NAME(App)) ||
4179
0
    pdf_name_eq(ctx, key, PDF_NAME(SigQ)))
4180
0
    RETURN_FILTER(&filter_prop_build_sub);
4181
0
  RETURN_FILTER(NULL);
4182
0
}
4183
4184
static filter_wrap filter_v(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4185
0
{
4186
  /* Text can point to a stream object */
4187
0
  if (pdf_name_eq(ctx, key, PDF_NAME(Length)) && pdf_is_stream(ctx, dict))
4188
0
    RETURN_FILTER(&filter_simple);
4189
  /* Sigs point to a dict. */
4190
0
  if (pdf_name_eq(ctx, key, PDF_NAME(Type)) ||
4191
0
    pdf_name_eq(ctx, key, PDF_NAME(Filter)) ||
4192
0
    pdf_name_eq(ctx, key, PDF_NAME(SubFilter)) ||
4193
0
    pdf_name_eq(ctx, key, PDF_NAME(Contents)) ||
4194
0
    pdf_name_eq(ctx, key, PDF_NAME(Cert)) ||
4195
0
    pdf_name_eq(ctx, key, PDF_NAME(ByteRange)) ||
4196
0
    pdf_name_eq(ctx, key, PDF_NAME(Changes)) ||
4197
0
    pdf_name_eq(ctx, key, PDF_NAME(Name)) ||
4198
0
    pdf_name_eq(ctx, key, PDF_NAME(M)) ||
4199
0
    pdf_name_eq(ctx, key, PDF_NAME(Location)) ||
4200
0
    pdf_name_eq(ctx, key, PDF_NAME(Reason)) ||
4201
0
    pdf_name_eq(ctx, key, PDF_NAME(ContactInfo)) ||
4202
0
    pdf_name_eq(ctx, key, PDF_NAME(R)) ||
4203
0
    pdf_name_eq(ctx, key, PDF_NAME(V)) ||
4204
0
    pdf_name_eq(ctx, key, PDF_NAME(Prop_AuthTime)) ||
4205
0
    pdf_name_eq(ctx, key, PDF_NAME(Prop_AuthType)))
4206
0
  RETURN_FILTER(&filter_simple);
4207
0
  if (pdf_name_eq(ctx, key, PDF_NAME(Reference)))
4208
0
    RETURN_FILTER(filter_reference);
4209
0
  if (pdf_name_eq(ctx, key, PDF_NAME(Prop_Build)))
4210
0
    RETURN_FILTER(filter_prop_build);
4211
0
  RETURN_FILTER(NULL);
4212
0
}
4213
4214
static filter_wrap filter_appearance(fz_context *ctx, pdf_obj *dict, pdf_obj *key);
4215
4216
static filter_wrap filter_xobject_list(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4217
0
{
4218
  /* FIXME: Infinite recursion possible here? */
4219
0
  RETURN_FILTER(&filter_appearance);
4220
0
}
4221
4222
static filter_wrap filter_font(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4223
0
{
4224
  /* In the example I've seen the /Name field was dropped, so we'll allow
4225
   * local changes, but none that follow an indirection. */
4226
0
  RETURN_FILTER(NULL);
4227
0
}
4228
4229
/* FIXME: One idea here is to make filter_font_list and filter_xobject_list
4230
 * only accept NEW objects as changes. Will think about this. */
4231
static filter_wrap filter_font_list(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4232
0
{
4233
0
  RETURN_FILTER(&filter_font);
4234
0
}
4235
4236
static filter_wrap filter_resources(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4237
0
{
4238
0
  if (pdf_name_eq(ctx, key, PDF_NAME(XObject)))
4239
0
    RETURN_FILTER(&filter_xobject_list);
4240
0
  if (pdf_name_eq(ctx, key, PDF_NAME(Font)))
4241
0
    RETURN_FILTER(&filter_font_list);
4242
0
  RETURN_FILTER(NULL);
4243
0
}
4244
4245
static filter_wrap filter_appearance(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4246
0
{
4247
0
  if (pdf_name_eq(ctx, key, PDF_NAME(Resources)))
4248
0
    RETURN_FILTER(&filter_resources);
4249
0
  RETURN_FILTER(NULL);
4250
0
}
4251
4252
static filter_wrap filter_ap(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4253
0
{
4254
  /* Just the /N entry for now. May need to add more later. */
4255
0
  if (pdf_name_eq(ctx, key, PDF_NAME(N)) && pdf_is_stream(ctx, pdf_dict_get(ctx, dict, key)))
4256
0
    RETURN_FILTER(&filter_appearance);
4257
0
  RETURN_FILTER(NULL);
4258
0
}
4259
4260
static filter_wrap filter_xfa(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4261
0
{
4262
  /* Text can point to a stream object */
4263
0
  if (pdf_is_stream(ctx, dict))
4264
0
    RETURN_FILTER(&filter_simple);
4265
0
  RETURN_FILTER(NULL);
4266
0
}
4267
4268
static void
4269
filter_changes_accepted(fz_context *ctx, pdf_changes *changes, pdf_obj *obj, filter_fn filter)
4270
0
{
4271
0
  int obj_num;
4272
4273
0
  if (obj == NULL || pdf_obj_marked(ctx, obj))
4274
0
    return;
4275
4276
0
  obj_num = pdf_to_num(ctx, obj);
4277
4278
0
  fz_try(ctx)
4279
0
  {
4280
0
    if (obj_num != 0)
4281
0
    {
4282
0
      (void)pdf_mark_obj(ctx, obj);
4283
0
      changes->obj_changes[obj_num] |= FIELD_CHANGE_VALID;
4284
0
    }
4285
0
    if (filter == NULL)
4286
0
      break;
4287
0
    if (pdf_is_dict(ctx, obj))
4288
0
    {
4289
0
      int i, n = pdf_dict_len(ctx, obj);
4290
4291
0
      for (i = 0; i < n; i++)
4292
0
      {
4293
0
        pdf_obj *key = pdf_dict_get_key(ctx, obj, i);
4294
0
        pdf_obj *val = pdf_dict_get_val(ctx, obj, i);
4295
0
        filter_fn f = (filter(ctx, obj, key)).func;
4296
0
        if (f != NULL)
4297
0
          filter_changes_accepted(ctx, changes, val, f);
4298
0
      }
4299
0
    }
4300
0
    else if (pdf_is_array(ctx, obj))
4301
0
    {
4302
0
      int i, n = pdf_array_len(ctx, obj);
4303
4304
0
      for (i = 0; i < n; i++)
4305
0
      {
4306
0
        pdf_obj *val = pdf_array_get(ctx, obj, i);
4307
0
        filter_changes_accepted(ctx, changes, val, filter);
4308
0
      }
4309
0
    }
4310
0
  }
4311
0
  fz_always(ctx)
4312
0
    if (obj_num != 0)
4313
0
      pdf_unmark_obj(ctx, obj);
4314
0
  fz_catch(ctx)
4315
0
    fz_rethrow(ctx);
4316
0
}
4317
4318
static void
4319
check_field(fz_context *ctx, pdf_document *doc, pdf_changes *changes, pdf_obj *obj, pdf_locked_fields *locked, const char *name_prefix, pdf_obj *new_v, pdf_obj *old_v)
4320
0
{
4321
0
  pdf_obj *old_obj, *new_obj, *n_v, *o_v;
4322
0
  int o_xref_base;
4323
0
  int obj_num;
4324
0
  char *field_name = NULL;
4325
4326
  /* All fields MUST be indirections, either in the Fields array
4327
   * or AcroForms, or in the Kids array of other Fields. */
4328
0
  if (!pdf_is_indirect(ctx, obj))
4329
0
    return;
4330
4331
0
  obj_num = pdf_to_num(ctx, obj);
4332
0
  o_xref_base = doc->xref_base;
4333
0
  new_obj = pdf_resolve_indirect_chain(ctx, obj);
4334
4335
  /* Similarly, all fields must be dicts */
4336
0
  if (!pdf_is_dict(ctx, new_obj))
4337
0
    return;
4338
4339
0
  if (pdf_obj_marked(ctx, obj))
4340
0
    return;
4341
4342
0
  fz_var(field_name);
4343
4344
0
  fz_try(ctx)
4345
0
  {
4346
0
    int i, len;
4347
0
    const char *name;
4348
0
    size_t n;
4349
0
    pdf_obj *t;
4350
0
    int is_locked;
4351
4352
0
    (void)pdf_mark_obj(ctx, obj);
4353
4354
    /* Do this within the try, so we can catch any problems */
4355
0
    doc->xref_base = o_xref_base+1;
4356
0
    old_obj = pdf_resolve_indirect_chain(ctx, obj);
4357
4358
0
    t = pdf_dict_get(ctx, old_obj, PDF_NAME(T));
4359
0
    if (t != NULL)
4360
0
    {
4361
0
      name = pdf_dict_get_text_string(ctx, old_obj, PDF_NAME(T));
4362
0
      n = strlen(name)+1;
4363
0
      if (*name_prefix)
4364
0
        n += 1 + strlen(name_prefix);
4365
0
      field_name = fz_malloc(ctx, n);
4366
0
      if (*name_prefix)
4367
0
      {
4368
0
        strcpy(field_name, name_prefix);
4369
0
        strcat(field_name, ".");
4370
0
      }
4371
0
      else
4372
0
        *field_name = 0;
4373
0
      strcat(field_name, name);
4374
0
      name_prefix = field_name;
4375
0
    }
4376
4377
0
    doc->xref_base = o_xref_base;
4378
4379
0
    if (!pdf_is_dict(ctx, old_obj))
4380
0
      break;
4381
4382
    /* Check V explicitly, allowing for it being inherited. */
4383
0
    n_v = pdf_dict_get(ctx, new_obj, PDF_NAME(V));
4384
0
    if (n_v == NULL)
4385
0
      n_v = new_v;
4386
0
    o_v = pdf_dict_get(ctx, old_obj, PDF_NAME(V));
4387
0
    if (o_v == NULL)
4388
0
      o_v = old_v;
4389
4390
0
    is_locked = pdf_is_field_locked(ctx, locked, name_prefix);
4391
0
    if (pdf_name_eq(ctx, pdf_dict_get(ctx, new_obj, PDF_NAME(Type)), PDF_NAME(Annot)) &&
4392
0
      pdf_name_eq(ctx, pdf_dict_get(ctx, new_obj, PDF_NAME(Subtype)), PDF_NAME(Widget)))
4393
0
    {
4394
0
      if (is_locked)
4395
0
      {
4396
        /* If locked, V must not change! */
4397
0
        if (check_unchanged_between(ctx, doc, changes, n_v, o_v))
4398
0
          changes->obj_changes[obj_num] |= FIELD_CHANGE_INVALID;
4399
0
      }
4400
0
      else
4401
0
      {
4402
        /* If not locked, V can change to be filled in! */
4403
0
        filter_changes_accepted(ctx, changes, n_v, &filter_v);
4404
0
        changes->obj_changes[obj_num] |= FIELD_CHANGE_VALID;
4405
0
      }
4406
0
    }
4407
4408
    /* Check all the fields in the new object are
4409
     * either the same as the old object, or are
4410
     * expected changes. */
4411
0
    len = pdf_dict_len(ctx, new_obj);
4412
0
    for (i = 0; i < len; i++)
4413
0
    {
4414
0
      pdf_obj *key = pdf_dict_get_key(ctx, new_obj, i);
4415
0
      pdf_obj *nval = pdf_dict_get(ctx, new_obj, key);
4416
0
      pdf_obj *oval = pdf_dict_get(ctx, old_obj, key);
4417
4418
      /* Kids arrays shouldn't change. */
4419
0
      if (pdf_name_eq(ctx, key, PDF_NAME(Kids)))
4420
0
      {
4421
0
        int j, m;
4422
4423
        /* Kids must be an array. If it's not, count it as a difference. */
4424
0
        if (!pdf_is_array(ctx, nval) || !pdf_is_array(ctx, oval))
4425
0
        {
4426
0
change_found:
4427
0
          changes->obj_changes[obj_num] |= FIELD_CHANGE_INVALID;
4428
0
          break;
4429
0
        }
4430
0
        m = pdf_array_len(ctx, nval);
4431
        /* Any change in length counts as a difference */
4432
0
        if (m != pdf_array_len(ctx, oval))
4433
0
          goto change_found;
4434
0
        for (j = 0; j < m; j++)
4435
0
        {
4436
0
          pdf_obj *nkid = pdf_array_get(ctx, nval, j);
4437
0
          pdf_obj *okid = pdf_array_get(ctx, oval, j);
4438
          /* Kids arrays are supposed to all be indirect. If they aren't,
4439
           * count it as a difference. */
4440
0
          if (!pdf_is_indirect(ctx, nkid) || !pdf_is_indirect(ctx, okid))
4441
0
            goto change_found;
4442
          /* For now at least, we'll count any change in number as a difference. */
4443
0
          if (pdf_to_num(ctx, nkid) != pdf_to_num(ctx, okid))
4444
0
            goto change_found;
4445
0
          check_field(ctx, doc, changes, nkid, locked, name_prefix, n_v, o_v);
4446
0
        }
4447
0
      }
4448
0
      else if (pdf_name_eq(ctx, key, PDF_NAME(V)))
4449
0
      {
4450
        /* V is checked above */
4451
0
      }
4452
0
      else if (pdf_name_eq(ctx, key, PDF_NAME(AP)))
4453
0
      {
4454
        /* If we're locked, then nothing can change. If not,
4455
         * we can change to be filled in. */
4456
0
        if (is_locked)
4457
0
          check_unchanged_between(ctx, doc, changes, nval, oval);
4458
0
        else
4459
0
          filter_changes_accepted(ctx, changes, nval, &filter_ap);
4460
0
      }
4461
      /* All other fields can't change */
4462
0
      else
4463
0
        check_unchanged_between(ctx, doc, changes, nval, oval);
4464
0
    }
4465
4466
    /* Now check all the fields in the old object to
4467
     * make sure none were dropped. */
4468
0
    len = pdf_dict_len(ctx, old_obj);
4469
0
    for (i = 0; i < len; i++)
4470
0
    {
4471
0
      pdf_obj *key = pdf_dict_get_key(ctx, old_obj, i);
4472
0
      pdf_obj *nval, *oval;
4473
4474
      /* V is checked above */
4475
0
      if (pdf_name_eq(ctx, key, PDF_NAME(V)))
4476
0
        continue;
4477
4478
0
      nval = pdf_dict_get(ctx, new_obj, key);
4479
0
      oval = pdf_dict_get(ctx, old_obj, key);
4480
4481
0
      if (nval == NULL && oval != NULL)
4482
0
        changes->obj_changes[pdf_to_num(ctx, nval)] |= FIELD_CHANGE_INVALID;
4483
0
    }
4484
0
    changes->obj_changes[obj_num] |= FIELD_CHANGE_VALID;
4485
4486
0
  }
4487
0
  fz_always(ctx)
4488
0
  {
4489
0
    pdf_unmark_obj(ctx, obj);
4490
0
    fz_free(ctx, field_name);
4491
0
    doc->xref_base = o_xref_base;
4492
0
  }
4493
0
  fz_catch(ctx)
4494
0
    fz_rethrow(ctx);
4495
0
}
4496
4497
static int
4498
pdf_obj_changed_in_version(fz_context *ctx, pdf_document *doc, int num, int version)
4499
0
{
4500
0
  if (num < 0 || num > doc->max_xref_len)
4501
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "Invalid object number requested");
4502
4503
0
  return version == doc->xref_index[num];
4504
0
}
4505
4506
static void
4507
merge_lock_specification(fz_context *ctx, pdf_locked_fields *fields, pdf_obj *lock)
4508
0
{
4509
0
  pdf_obj *action;
4510
0
  int i, r, w;
4511
4512
0
  if (lock == NULL)
4513
0
    return;
4514
4515
0
  action = pdf_dict_get(ctx, lock, PDF_NAME(Action));
4516
4517
0
  if (pdf_name_eq(ctx, action, PDF_NAME(All)))
4518
0
  {
4519
    /* All fields locked means we don't need any stored
4520
     * includes/excludes. */
4521
0
    fields->all = 1;
4522
0
    free_char_list(ctx, &fields->includes);
4523
0
    free_char_list(ctx, &fields->excludes);
4524
0
  }
4525
0
  else
4526
0
  {
4527
0
    pdf_obj *f = pdf_dict_get(ctx, lock, PDF_NAME(Fields));
4528
0
    int len = pdf_array_len(ctx, f);
4529
4530
0
    if (pdf_name_eq(ctx, action, PDF_NAME(Include)))
4531
0
    {
4532
0
      if (fields->all)
4533
0
      {
4534
        /* Current state = "All except <excludes> are locked".
4535
         * We need to remove <Fields> from <excludes>. */
4536
0
        for (i = 0; i < len; i++)
4537
0
        {
4538
0
          const char *s = pdf_array_get_text_string(ctx, f, i);
4539
0
          int r, w;
4540
4541
0
          for (r = w = 0; r < fields->excludes.len; r++)
4542
0
          {
4543
0
            if (strcmp(s, fields->excludes.list[r]))
4544
0
              fields->excludes.list[w++] = fields->excludes.list[r];
4545
0
          }
4546
0
          fields->excludes.len = w;
4547
0
        }
4548
0
      }
4549
0
      else
4550
0
      {
4551
        /* Current state = <includes> are locked.
4552
         * We need to add <Fields> to <include> (avoiding repetition). */
4553
0
        for (i = 0; i < len; i++)
4554
0
        {
4555
0
          const char *s = pdf_array_get_text_string(ctx, f, i);
4556
4557
0
          for (r = 0; r < fields->includes.len; r++)
4558
0
          {
4559
0
            if (!strcmp(s, fields->includes.list[r]))
4560
0
              break;
4561
0
          }
4562
0
          if (r == fields->includes.len)
4563
0
            char_list_append(ctx, &fields->includes, s);
4564
0
        }
4565
0
      }
4566
0
    }
4567
0
    else if (pdf_name_eq(ctx, action, PDF_NAME(Exclude)))
4568
0
    {
4569
0
      if (fields->all)
4570
0
      {
4571
        /* Current state = "All except <excludes> are locked.
4572
         * We need to remove anything from <excludes> that isn't in <Fields>. */
4573
0
        for (r = w = 0; r < fields->excludes.len; r++)
4574
0
        {
4575
0
          for (i = 0; i < len; i++)
4576
0
          {
4577
0
            const char *s = pdf_array_get_text_string(ctx, f, i);
4578
0
            if (!strcmp(s, fields->excludes.list[r]))
4579
0
              break;
4580
0
          }
4581
0
          if (i != len) /* we found a match */
4582
0
            fields->excludes.list[w++] = fields->excludes.list[r];
4583
0
        }
4584
0
        fields->excludes.len = w;
4585
0
      }
4586
0
      else
4587
0
      {
4588
        /* Current state = <includes> are locked.
4589
         * Set all. <excludes> becomes <Fields> less <includes>. Remove <includes>. */
4590
0
        fields->all = 1;
4591
0
        for (i = 0; i < len; i++)
4592
0
        {
4593
0
          const char *s = pdf_array_get_text_string(ctx, f, i);
4594
0
          for (r = 0; r < fields->includes.len; r++)
4595
0
          {
4596
0
            if (!strcmp(s, fields->includes.list[r]))
4597
0
              break;
4598
0
          }
4599
0
          if (r == fields->includes.len)
4600
0
            char_list_append(ctx, &fields->excludes, s);
4601
0
        }
4602
0
        free_char_list(ctx, &fields->includes);
4603
0
      }
4604
0
    }
4605
0
  }
4606
0
}
4607
4608
static void
4609
find_locked_fields_value(fz_context *ctx, pdf_locked_fields *fields, pdf_obj *v)
4610
0
{
4611
0
  pdf_obj *ref = pdf_dict_get(ctx, v, PDF_NAME(Reference));
4612
0
  int i, n;
4613
4614
0
  if (!ref)
4615
0
    return;
4616
4617
0
  n = pdf_array_len(ctx, ref);
4618
0
  for (i = 0; i < n; i++)
4619
0
  {
4620
0
    pdf_obj *sr = pdf_array_get(ctx, ref, i);
4621
0
    pdf_obj *tm, *tp, *type;
4622
4623
    /* Type is optional, but if it exists, it'd better be SigRef. */
4624
0
    type = pdf_dict_get(ctx, sr, PDF_NAME(Type));
4625
0
    if (type != NULL && !pdf_name_eq(ctx, type, PDF_NAME(SigRef)))
4626
0
      continue;
4627
0
    tm = pdf_dict_get(ctx, sr, PDF_NAME(TransformMethod));
4628
0
    tp = pdf_dict_get(ctx, sr, PDF_NAME(TransformParams));
4629
0
    if (pdf_name_eq(ctx, tm, PDF_NAME(DocMDP)))
4630
0
    {
4631
0
      int p = pdf_dict_get_int(ctx, tp, PDF_NAME(P));
4632
4633
0
      if (p == 0)
4634
0
        p = 2;
4635
0
      if (fields->p == 0)
4636
0
        fields->p = p;
4637
0
      else
4638
0
        fields->p = fz_mini(fields->p, p);
4639
0
    }
4640
0
    else if (pdf_name_eq(ctx, tm, PDF_NAME(FieldMDP)))
4641
0
      merge_lock_specification(ctx, fields, tp);
4642
0
  }
4643
0
}
4644
4645
static void
4646
find_locked_fields_aux(fz_context *ctx, pdf_obj *field, pdf_locked_fields *fields, pdf_obj *inherit_v, pdf_obj *inherit_ft)
4647
0
{
4648
0
  int i, n;
4649
4650
0
  if (!pdf_name_eq(ctx, pdf_dict_get(ctx, field, PDF_NAME(Type)), PDF_NAME(Annot)))
4651
0
    return;
4652
4653
0
  if (pdf_obj_marked(ctx, field))
4654
0
    return;
4655
4656
0
  fz_try(ctx)
4657
0
  {
4658
0
    pdf_obj *kids, *v, *ft;
4659
4660
0
    (void)pdf_mark_obj(ctx, field);
4661
4662
0
    v = pdf_dict_get(ctx, field, PDF_NAME(V));
4663
0
    if (v == NULL)
4664
0
      v = inherit_v;
4665
0
    ft = pdf_dict_get(ctx, field, PDF_NAME(FT));
4666
0
    if (ft == NULL)
4667
0
      ft = inherit_ft;
4668
4669
    /* We are looking for Widget annotations of type Sig that are
4670
     * signed (i.e. have a 'V' field). */
4671
0
    if (pdf_name_eq(ctx, pdf_dict_get(ctx, field, PDF_NAME(Subtype)), PDF_NAME(Widget)) &&
4672
0
      pdf_name_eq(ctx, ft, PDF_NAME(Sig)) &&
4673
0
      pdf_name_eq(ctx, pdf_dict_get(ctx, v, PDF_NAME(Type)), PDF_NAME(Sig)))
4674
0
    {
4675
      /* Signed Sig Widgets (i.e. ones with a 'V' field) need
4676
       * to have their lock field respected. */
4677
0
      merge_lock_specification(ctx, fields, pdf_dict_get(ctx, field, PDF_NAME(Lock)));
4678
4679
      /* Look for DocMDP and FieldMDP entries to see what
4680
       * flavours of alterations are allowed. */
4681
0
      find_locked_fields_value(ctx, fields, v);
4682
0
    }
4683
4684
    /* Recurse as required */
4685
0
    kids = pdf_dict_get(ctx, field, PDF_NAME(Kids));
4686
0
    if (kids)
4687
0
    {
4688
0
      n = pdf_array_len(ctx, kids);
4689
0
      for (i = 0; i < n; i++)
4690
0
        find_locked_fields_aux(ctx, pdf_array_get(ctx, kids, i), fields, v, ft);
4691
0
    }
4692
0
  }
4693
0
  fz_always(ctx)
4694
0
    pdf_unmark_obj(ctx, field);
4695
0
  fz_catch(ctx)
4696
0
    fz_rethrow(ctx);
4697
0
}
4698
4699
pdf_locked_fields *
4700
pdf_find_locked_fields(fz_context *ctx, pdf_document *doc, int version)
4701
0
{
4702
0
  pdf_locked_fields *fields = fz_malloc_struct(ctx, pdf_locked_fields);
4703
0
  int o_xref_base = doc->xref_base;
4704
0
  doc->xref_base = version;
4705
4706
0
  fz_var(fields);
4707
4708
0
  fz_try(ctx)
4709
0
  {
4710
0
    pdf_obj *fobj = pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/AcroForm/Fields");
4711
0
    int i, len = pdf_array_len(ctx, fobj);
4712
4713
0
    if (len == 0)
4714
0
      break;
4715
4716
0
    for (i = 0; i < len; i++)
4717
0
      find_locked_fields_aux(ctx, pdf_array_get(ctx, fobj, i), fields, NULL, NULL);
4718
4719
    /* Add in any DocMDP referenced directly from the Perms dict. */
4720
0
    find_locked_fields_value(ctx, fields, pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/Perms/DocMDP"));
4721
0
  }
4722
0
  fz_always(ctx)
4723
0
    doc->xref_base = o_xref_base;
4724
0
  fz_catch(ctx)
4725
0
  {
4726
0
    pdf_drop_locked_fields(ctx, fields);
4727
0
    fz_rethrow(ctx);
4728
0
  }
4729
4730
0
  return fields;
4731
0
}
4732
4733
pdf_locked_fields *
4734
pdf_find_locked_fields_for_sig(fz_context *ctx, pdf_document *doc, pdf_obj *sig)
4735
0
{
4736
0
  pdf_locked_fields *fields = fz_malloc_struct(ctx, pdf_locked_fields);
4737
4738
0
  fz_var(fields);
4739
4740
0
  fz_try(ctx)
4741
0
  {
4742
0
    pdf_obj *ref;
4743
0
    int i, len;
4744
4745
    /* Ensure it really is a sig */
4746
0
    if (!pdf_name_eq(ctx, pdf_dict_get(ctx, sig, PDF_NAME(Subtype)), PDF_NAME(Widget)) ||
4747
0
      !pdf_name_eq(ctx, pdf_dict_get_inheritable(ctx, sig, PDF_NAME(FT)), PDF_NAME(Sig)))
4748
0
      break;
4749
4750
    /* Check the locking details given in the V (i.e. what the signature value
4751
     * claims to lock). */
4752
0
    ref = pdf_dict_getp(ctx, sig, "V/Reference");
4753
0
    len = pdf_array_len(ctx, ref);
4754
0
    for (i = 0; i < len; i++)
4755
0
    {
4756
0
      pdf_obj *tp = pdf_dict_get(ctx, pdf_array_get(ctx, ref, i), PDF_NAME(TransformParams));
4757
0
      merge_lock_specification(ctx, fields, tp);
4758
0
    }
4759
4760
    /* Also, check the locking details given in the Signature definition. This may
4761
     * not strictly be necessary as it's supposed to be "what the form author told
4762
     * the signature that it should lock". A well-formed signature should lock
4763
     * at least that much (possibly with extra fields locked from the XFA). If the
4764
     * signature doesn't lock as much as it was told to, we should be suspicious
4765
     * of the signing application. It is not clear that this test is actually
4766
     * necessary, or in keeping with what Acrobat does. */
4767
0
    merge_lock_specification(ctx, fields, pdf_dict_get(ctx, sig, PDF_NAME(Lock)));
4768
0
  }
4769
0
  fz_catch(ctx)
4770
0
  {
4771
0
    pdf_drop_locked_fields(ctx, fields);
4772
0
    fz_rethrow(ctx);
4773
0
  }
4774
4775
0
  return fields;
4776
0
}
4777
4778
static int
4779
validate_locked_fields(fz_context *ctx, pdf_document *doc, int version, pdf_locked_fields *locked)
4780
0
{
4781
0
  int o_xref_base = doc->xref_base;
4782
0
  pdf_changes *changes;
4783
0
  int num_objs;
4784
0
  int i, n;
4785
0
  int all_indirects = 1;
4786
4787
0
  num_objs = doc->max_xref_len;
4788
0
  changes = Memento_label(fz_calloc(ctx, 1, sizeof(*changes) + sizeof(int)*(num_objs-1)), "pdf_changes");
4789
0
  changes->num_obj = num_objs;
4790
4791
0
  fz_try(ctx)
4792
0
  {
4793
0
    pdf_obj *acroform, *new_acroform, *old_acroform;
4794
0
    int len, acroform_num;
4795
4796
0
    doc->xref_base = version;
4797
4798
    /* Detect every object that has changed */
4799
0
    for (i = 1; i < num_objs; i++)
4800
0
    {
4801
0
      if (pdf_obj_changed_in_version(ctx, doc, i, version))
4802
0
        changes->obj_changes[i] = FIELD_CHANGED;
4803
0
    }
4804
4805
    /* FIXME: Compare PageTrees and NumberTrees (just to allow for them being regenerated
4806
     * and having produced stuff that represents the same stuff). */
4807
4808
    /* The metadata of a document may be regenerated. Allow for that. */
4809
0
    filter_changes_accepted(ctx, changes, pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/Metadata"), &filter_simple);
4810
4811
    /* The ModDate of document info may be regenerated. Allow for that. */
4812
    /* FIXME: We accept all changes in document info, when maybe we ought to just
4813
     * accept ModDate? */
4814
0
    filter_changes_accepted(ctx, changes, pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Info"), &filter_simple);
4815
4816
    /* The Encryption dict may be rewritten for the new Xref. */
4817
0
    filter_changes_accepted(ctx, changes, pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Encrypt"), &filter_simple);
4818
4819
    /* We have to accept certain changes in the top level AcroForms dict,
4820
     * so get the 2 versions... */
4821
0
    acroform = pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/AcroForm");
4822
0
    acroform_num = pdf_to_num(ctx, acroform);
4823
0
    new_acroform = pdf_resolve_indirect_chain(ctx, acroform);
4824
0
    doc->xref_base = version+1;
4825
0
    old_acroform = pdf_resolve_indirect_chain(ctx, pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/AcroForm"));
4826
0
    doc->xref_base = version;
4827
0
    n = pdf_dict_len(ctx, new_acroform);
4828
0
    for (i = 0; i < n; i++)
4829
0
    {
4830
0
      pdf_obj *key = pdf_dict_get_key(ctx, new_acroform, i);
4831
0
      pdf_obj *nval = pdf_dict_get(ctx, new_acroform, key);
4832
0
      pdf_obj *oval = pdf_dict_get(ctx, old_acroform, key);
4833
4834
0
      if (pdf_name_eq(ctx, key, PDF_NAME(Fields)))
4835
0
      {
4836
0
        int j;
4837
4838
0
        len = pdf_array_len(ctx, nval);
4839
0
        for (j = 0; j < len; j++)
4840
0
        {
4841
0
          pdf_obj *field = pdf_array_get(ctx, nval, j);
4842
0
          if (!pdf_is_indirect(ctx, field))
4843
0
            all_indirects = 0;
4844
0
          check_field(ctx, doc, changes, field, locked, "", NULL, NULL);
4845
0
        }
4846
0
      }
4847
0
      else if (pdf_name_eq(ctx, key, PDF_NAME(SigFlags)))
4848
0
      {
4849
        /* Accept this */
4850
0
        changes->obj_changes[acroform_num] |= FIELD_CHANGE_VALID;
4851
0
      }
4852
0
      else if (pdf_name_eq(ctx, key, PDF_NAME(DR)))
4853
0
      {
4854
        /* Accept any changes from within the Document Resources */
4855
0
        filter_changes_accepted(ctx, changes, nval, &filter_resources);
4856
0
      }
4857
0
      else if (pdf_name_eq(ctx, key, PDF_NAME(XFA)))
4858
0
      {
4859
        /* Allow any changes within the XFA streams. */
4860
0
        filter_changes_accepted(ctx, changes, nval, &filter_xfa);
4861
0
      }
4862
0
      else if (pdf_objcmp(ctx, nval, oval))
4863
0
      {
4864
0
        changes->obj_changes[acroform_num] |= FIELD_CHANGE_INVALID;
4865
0
      }
4866
0
    }
4867
4868
    /* Allow for any object streams/XRefs to be changed. */
4869
0
    doc->xref_base = version+1;
4870
0
    for (i = 1; i < num_objs; i++)
4871
0
    {
4872
0
      pdf_obj *oobj, *otype;
4873
0
      if (changes->obj_changes[i] != FIELD_CHANGED)
4874
0
        continue;
4875
0
      if (!pdf_obj_exists(ctx, doc, i))
4876
0
      {
4877
        /* Not present this version - must be newly created, can't be a change. */
4878
0
        changes->obj_changes[i] |= FIELD_CHANGE_VALID;
4879
0
        continue;
4880
0
      }
4881
0
      oobj = pdf_load_object(ctx, doc, i);
4882
0
      otype = pdf_dict_get(ctx, oobj, PDF_NAME(Type));
4883
0
      if (pdf_name_eq(ctx, otype, PDF_NAME(ObjStm)) ||
4884
0
        pdf_name_eq(ctx, otype, PDF_NAME(XRef)))
4885
0
      {
4886
0
        changes->obj_changes[i] |= FIELD_CHANGE_VALID;
4887
0
      }
4888
0
      pdf_drop_obj(ctx, oobj);
4889
0
    }
4890
0
  }
4891
0
  fz_always(ctx)
4892
0
    doc->xref_base = o_xref_base;
4893
0
  fz_catch(ctx)
4894
0
  {
4895
0
    fz_free(ctx, changes);
4896
0
    fz_rethrow(ctx);
4897
0
  }
4898
4899
0
  for (i = 1; i < num_objs; i++)
4900
0
  {
4901
0
    if (changes->obj_changes[i] == FIELD_CHANGED)
4902
      /* Change with no reason */
4903
0
      break;
4904
0
    if (changes->obj_changes[i] & FIELD_CHANGE_INVALID)
4905
      /* Illegal Change */
4906
0
      break;
4907
0
  }
4908
4909
0
  fz_free(ctx, changes);
4910
4911
0
  return (i == num_objs) && all_indirects;
4912
0
}
4913
4914
int
4915
pdf_validate_changes(fz_context *ctx, pdf_document *doc, int version)
4916
0
{
4917
0
  int unsaved_versions = pdf_count_unsaved_versions(ctx, doc);
4918
0
  int n = pdf_count_versions(ctx, doc);
4919
0
  pdf_locked_fields *locked = NULL;
4920
0
  int result;
4921
4922
0
  if (version < 0 || version >= n)
4923
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "There aren't that many changes to find in this document!");
4924
4925
  /* We are wanting to compare version+1 with version to make sure
4926
   * that the only changes made in going to version are conformant
4927
   * with what was allowed in version+1. The production of version
4928
   * might have involved signing a signature field and locking down
4929
   * more fields - this means that taking the list of locked things
4930
   * from version rather than version+1 will give us bad results! */
4931
0
  locked = pdf_find_locked_fields(ctx, doc, unsaved_versions+version+1);
4932
4933
0
  fz_try(ctx)
4934
0
  {
4935
0
    if (!locked->all && locked->includes.len == 0 && locked->p == 0)
4936
0
    {
4937
      /* If nothing is locked at all, then all changes are permissible. */
4938
0
      result = 1;
4939
0
    }
4940
0
    else
4941
0
      result = validate_locked_fields(ctx, doc, unsaved_versions+version, locked);
4942
0
  }
4943
0
  fz_always(ctx)
4944
0
    pdf_drop_locked_fields(ctx, locked);
4945
0
  fz_catch(ctx)
4946
0
    fz_rethrow(ctx);
4947
4948
0
  return result;
4949
0
}
4950
4951
int
4952
pdf_validate_change_history(fz_context *ctx, pdf_document *doc)
4953
0
{
4954
0
  int num_versions = pdf_count_versions(ctx, doc);
4955
0
  int v;
4956
4957
0
  if (num_versions < 2)
4958
0
    return 0; /* Unless there are at least 2 versions, there have been no updates. */
4959
4960
0
  for(v = num_versions - 2; v >= 0; v--)
4961
0
  {
4962
0
    if (!pdf_validate_changes(ctx, doc, v))
4963
0
      return v+1;
4964
0
  }
4965
0
  return 0;
4966
0
}
4967
4968
/* Return the version that obj appears in, or -1 for not found. */
4969
static int
4970
pdf_find_incremental_update_num_for_obj(fz_context *ctx, pdf_document *doc, pdf_obj *obj)
4971
6
{
4972
6
  pdf_xref *xref = NULL;
4973
6
  pdf_xref_subsec *sub;
4974
6
  int i, j;
4975
4976
6
  if (obj == NULL)
4977
0
    return -1;
4978
4979
  /* obj needs to be indirect for us to get a num out of it. */
4980
6
  i = pdf_to_num(ctx, obj);
4981
6
  if (i <= 0)
4982
0
    return -1;
4983
4984
  /* obj can't be indirect below, so resolve it here. */
4985
6
  obj = pdf_resolve_indirect_chain(ctx, obj);
4986
4987
  /* Find the first xref section where the entry is defined. */
4988
9
  for (j = 0; j < doc->num_xref_sections; j++)
4989
9
  {
4990
9
    xref = &doc->xref_sections[j];
4991
4992
9
    if (i < xref->num_objects)
4993
9
    {
4994
12
      for (sub = xref->subsec; sub != NULL; sub = sub->next)
4995
9
      {
4996
9
        pdf_xref_entry *entry;
4997
4998
9
        if (i < sub->start || i >= sub->start + sub->len)
4999
0
          continue;
5000
5001
9
        entry = &sub->table[i - sub->start];
5002
9
        if (entry->obj == obj)
5003
6
          return j;
5004
9
      }
5005
9
    }
5006
9
  }
5007
0
  return -1;
5008
6
}
5009
5010
int pdf_find_version_for_obj(fz_context *ctx, pdf_document *doc, pdf_obj *obj)
5011
0
{
5012
0
  int v = pdf_find_incremental_update_num_for_obj(ctx, doc, obj);
5013
0
  int n;
5014
5015
0
  if (v == -1)
5016
0
    return -1;
5017
5018
0
  n = pdf_count_versions(ctx, doc) + pdf_count_unsaved_versions(ctx, doc);
5019
0
  if (v > n)
5020
0
    return n;
5021
5022
0
  return v;
5023
0
}
5024
5025
int pdf_validate_signature(fz_context *ctx, pdf_annot *widget)
5026
0
{
5027
0
  pdf_document *doc = widget->page->doc;
5028
0
  int unsaved_versions = pdf_count_unsaved_versions(ctx, doc);
5029
0
  int num_versions = pdf_count_versions(ctx, doc) + unsaved_versions;
5030
0
  int version = pdf_find_version_for_obj(ctx, doc, widget->obj);
5031
0
  int i;
5032
0
  pdf_locked_fields *locked = NULL;
5033
0
  int o_xref_base;
5034
5035
0
  if (version > num_versions-1)
5036
0
    version = num_versions-1;
5037
5038
  /* Get the locked definition from the object when it was signed. */
5039
0
  o_xref_base = doc->xref_base;
5040
0
  doc->xref_base = version;
5041
5042
0
  fz_var(locked); /* Not really needed, but it stops warnings */
5043
5044
0
  fz_try(ctx)
5045
0
  {
5046
0
    locked = pdf_find_locked_fields_for_sig(ctx, doc, widget->obj);
5047
0
    for (i = version-1; i >= unsaved_versions; i--)
5048
0
    {
5049
0
      doc->xref_base = i;
5050
0
      if (!validate_locked_fields(ctx, doc, i, locked))
5051
0
        break;
5052
0
    }
5053
0
  }
5054
0
  fz_always(ctx)
5055
0
  {
5056
0
    doc->xref_base = o_xref_base;
5057
0
    pdf_drop_locked_fields(ctx, locked);
5058
0
  }
5059
0
  fz_catch(ctx)
5060
0
    fz_rethrow(ctx);
5061
5062
0
  return i+1-unsaved_versions;
5063
0
}
5064
5065
int pdf_was_pure_xfa(fz_context *ctx, pdf_document *doc)
5066
0
{
5067
0
  int num_unsaved_versions = pdf_count_unsaved_versions(ctx, doc);
5068
0
  int num_versions = pdf_count_versions(ctx, doc);
5069
0
  int v;
5070
0
  int o_xref_base = doc->xref_base;
5071
0
  int pure_xfa = 0;
5072
5073
0
  fz_var(pure_xfa);
5074
5075
0
  fz_try(ctx)
5076
0
  {
5077
0
    for(v = num_versions + num_unsaved_versions; !pure_xfa && v >= num_unsaved_versions; v--)
5078
0
    {
5079
0
      pdf_obj *o;
5080
0
      doc->xref_base = v;
5081
0
      o = pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/AcroForm");
5082
      /* If we find a version that had an empty Root/AcroForm/Fields, but had a
5083
       * Root/AcroForm/XFA entry, then we deduce that this was at one time a
5084
       * pure XFA form. */
5085
0
      if (pdf_array_len(ctx, pdf_dict_get(ctx, o, PDF_NAME(Fields))) == 0 &&
5086
0
        pdf_dict_get(ctx, o, PDF_NAME(XFA)) != NULL)
5087
0
        pure_xfa = 1;
5088
0
    }
5089
0
  }
5090
0
  fz_always(ctx)
5091
0
    doc->xref_base = o_xref_base;
5092
0
  fz_catch(ctx)
5093
0
    fz_rethrow(ctx);
5094
5095
0
  return pure_xfa;
5096
0
}
5097
5098
pdf_xref *pdf_new_local_xref(fz_context *ctx, pdf_document *doc)
5099
597
{
5100
597
  int n = pdf_xref_len(ctx, doc);
5101
597
  pdf_xref *xref = fz_malloc_struct(ctx, pdf_xref);
5102
5103
597
  xref->subsec = NULL;
5104
597
  xref->num_objects = n;
5105
597
  xref->trailer = NULL;
5106
597
  xref->pre_repair_trailer = NULL;
5107
597
  xref->unsaved_sigs = NULL;
5108
597
  xref->unsaved_sigs_end = NULL;
5109
5110
1.19k
  fz_try(ctx)
5111
1.19k
  {
5112
597
    xref->subsec = fz_malloc_struct(ctx, pdf_xref_subsec);
5113
597
    xref->subsec->len = n;
5114
597
    xref->subsec->start = 0;
5115
597
    xref->subsec->table = fz_malloc_struct_array(ctx, n, pdf_xref_entry);
5116
597
    xref->subsec->next = NULL;
5117
597
  }
5118
1.19k
  fz_catch(ctx)
5119
0
  {
5120
0
    fz_free(ctx, xref->subsec);
5121
0
    fz_free(ctx, xref);
5122
0
    fz_rethrow(ctx);
5123
0
  }
5124
5125
597
  return xref;
5126
597
}
5127
5128
void pdf_drop_local_xref(fz_context *ctx, pdf_xref *xref)
5129
23.2k
{
5130
23.2k
  if (xref == NULL)
5131
22.6k
    return;
5132
5133
597
  pdf_drop_xref_subsec(ctx, xref);
5134
5135
597
  fz_free(ctx, xref);
5136
597
}
5137
5138
void pdf_drop_local_xref_and_resources(fz_context *ctx, pdf_document *doc)
5139
11.3k
{
5140
11.3k
  pdf_purge_local_font_resources(ctx, doc);
5141
11.3k
  pdf_purge_locals_from_store(ctx, doc);
5142
11.3k
  pdf_drop_local_xref(ctx, doc->local_xref);
5143
11.3k
  doc->local_xref = NULL;
5144
11.3k
  doc->resynth_required = 1;
5145
11.3k
}
5146
5147
void
5148
pdf_debug_doc_changes(fz_context *ctx, pdf_document *doc)
5149
0
{
5150
0
  int i, j;
5151
5152
0
  if (doc->num_incremental_sections == 0)
5153
0
    fz_write_printf(ctx, fz_stddbg(ctx), "No incremental xrefs");
5154
0
  else
5155
0
  {
5156
0
    for (i = 0; i < doc->num_incremental_sections; i++)
5157
0
    {
5158
0
      pdf_xref *xref = &doc->xref_sections[i];
5159
0
      pdf_xref_subsec *sub;
5160
5161
0
      fz_write_printf(ctx, fz_stddbg(ctx), "Incremental xref:\n");
5162
0
      for (sub = xref->subsec; sub != NULL; sub = sub->next)
5163
0
      {
5164
0
        fz_write_printf(ctx, fz_stddbg(ctx), "  Objects %d->%d\n", sub->start, sub->start + sub->len - 1);
5165
0
        for (j = 0; j < sub->len; j++)
5166
0
        {
5167
0
          pdf_xref_entry *e = &sub->table[j];
5168
0
          if (e->type == 0)
5169
0
            continue;
5170
0
          fz_write_printf(ctx, fz_stddbg(ctx), "%d %d obj (%c)\n", j + sub->start, e->gen, e->type);
5171
0
          pdf_debug_obj(ctx, e->obj);
5172
0
          fz_write_printf(ctx, fz_stddbg(ctx), "\nendobj\n");
5173
0
        }
5174
0
      }
5175
0
    }
5176
0
  }
5177
5178
0
  if (doc->local_xref == NULL)
5179
0
    fz_write_printf(ctx, fz_stddbg(ctx), "No local xref");
5180
0
  else
5181
0
  {
5182
0
    for (i = 0; i < doc->num_incremental_sections; i++)
5183
0
    {
5184
0
      pdf_xref *xref = doc->local_xref;
5185
0
      pdf_xref_subsec *sub;
5186
5187
0
      fz_write_printf(ctx, fz_stddbg(ctx), "Local xref (%sin force):\n", doc->local_xref_nesting == 0 ? "not " : "");
5188
0
      for (sub = xref->subsec; sub != NULL; sub = sub->next)
5189
0
      {
5190
0
        fz_write_printf(ctx, fz_stddbg(ctx), "  Objects %d->%d\n", sub->start, sub->start + sub->len - 1);
5191
0
        for (j = 0; j < sub->len; j++)
5192
0
        {
5193
0
          pdf_xref_entry *e = &sub->table[j];
5194
0
          if (e->type == 0)
5195
0
            continue;
5196
0
          fz_write_printf(ctx, fz_stddbg(ctx), "%d %d obj (%c)\n", j + sub->start, e->gen, e->type);
5197
0
          pdf_debug_obj(ctx, e->obj);
5198
0
          fz_write_printf(ctx, fz_stddbg(ctx), "\nendobj\n");
5199
0
        }
5200
0
      }
5201
0
    }
5202
0
  }
5203
5204
0
}
5205
5206
pdf_obj *
5207
pdf_metadata(fz_context *ctx, pdf_document *doc)
5208
0
{
5209
0
  int initial = doc->xref_base;
5210
0
  pdf_obj *obj = NULL;
5211
5212
0
  fz_var(obj);
5213
5214
0
  fz_try(ctx)
5215
0
  {
5216
0
    do
5217
0
    {
5218
0
      pdf_obj *root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
5219
0
      obj = pdf_dict_get(ctx, root, PDF_NAME(Metadata));
5220
0
      if (obj)
5221
0
        break;
5222
0
      doc->xref_base++;
5223
0
    }
5224
0
    while (doc->xref_base < doc->num_xref_sections);
5225
0
  }
5226
0
  fz_always(ctx)
5227
0
    doc->xref_base = initial;
5228
0
  fz_catch(ctx)
5229
0
    fz_rethrow(ctx);
5230
5231
0
  return obj;
5232
0
}
5233
5234
int pdf_obj_is_incremental(fz_context *ctx, pdf_obj *obj)
5235
4.30k
{
5236
4.30k
  pdf_document *doc = pdf_get_bound_document(ctx, obj);
5237
4.30k
  int v;
5238
5239
4.30k
  if (doc == NULL || doc->num_incremental_sections == 0)
5240
4.29k
    return 0;
5241
5242
6
  v = pdf_find_incremental_update_num_for_obj(ctx, doc, obj);
5243
5244
6
  return (v == 0);
5245
4.30k
}
5246
5247
void pdf_minimize_document(fz_context *ctx, pdf_document *doc)
5248
0
{
5249
0
  int i;
5250
5251
  /* Don't throw anything away if we've done a repair! */
5252
0
  if (doc == NULL || doc->repair_attempted)
5253
0
    return;
5254
5255
  /* Don't throw anything away in the incremental section, as that's where
5256
   * all our changes will be. */
5257
0
  for (i = doc->num_incremental_sections; i < doc->num_xref_sections; i++)
5258
0
  {
5259
0
    pdf_xref *xref = &doc->xref_sections[i];
5260
0
    pdf_xref_subsec *sub;
5261
5262
0
    for (sub = xref->subsec; sub; sub = sub->next)
5263
0
    {
5264
0
      int len = sub->len;
5265
0
      int j;
5266
0
      for (j = 0; j < len; j++)
5267
0
      {
5268
0
        pdf_xref_entry *e = &sub->table[j];
5269
0
        if (e->obj == NULL)
5270
0
          continue;
5271
0
        e->obj = pdf_drop_singleton_obj(ctx, e->obj);
5272
0
      }
5273
0
    }
5274
0
  }
5275
0
}