Coverage Report

Created: 2023-06-07 06:20

/src/mupdf/source/pdf/pdf-xref.c
Line
Count
Source (jump to first uncovered line)
1
// Copyright (C) 2004-2023 Artifex Software, Inc.
2
//
3
// This file is part of MuPDF.
4
//
5
// MuPDF is free software: you can redistribute it and/or modify it under the
6
// terms of the GNU Affero General Public License as published by the Free
7
// Software Foundation, either version 3 of the License, or (at your option)
8
// any later version.
9
//
10
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13
// details.
14
//
15
// You should have received a copy of the GNU Affero General Public License
16
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17
//
18
// Alternative licensing terms are available from the licensor.
19
// For commercial licensing, see <https://www.artifex.com/> or contact
20
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21
// CA 94129, USA, for further information.
22
23
#include "mupdf/fitz.h"
24
#include "pdf-annot-imp.h"
25
26
#include <assert.h>
27
#include <limits.h>
28
#include <string.h>
29
30
#undef DEBUG_PROGESSIVE_ADVANCE
31
32
#ifdef DEBUG_PROGESSIVE_ADVANCE
33
#define DEBUGMESS(A) do { fz_warn A; } while (0)
34
#else
35
0
#define DEBUGMESS(A) do { } while (0)
36
#endif
37
38
541k
#define isdigit(c) (c >= '0' && c <= '9')
39
40
static inline int iswhite(int ch)
41
153k
{
42
153k
  return
43
153k
    ch == '\000' || ch == '\011' || ch == '\012' ||
44
153k
    ch == '\014' || ch == '\015' || ch == '\040';
45
153k
}
46
47
/*
48
 * xref tables
49
 */
50
51
static void
52
pdf_drop_xref_subsec(fz_context *ctx, pdf_xref *xref)
53
17.7k
{
54
17.7k
  pdf_xref_subsec *sub = xref->subsec;
55
17.7k
  pdf_unsaved_sig *usig;
56
17.7k
  int e;
57
58
34.2k
  while (sub != NULL)
59
16.5k
  {
60
16.5k
    pdf_xref_subsec *next_sub = sub->next;
61
33.8M
    for (e = 0; e < sub->len; e++)
62
33.7M
    {
63
33.7M
      pdf_xref_entry *entry = &sub->table[e];
64
33.7M
      pdf_drop_obj(ctx, entry->obj);
65
33.7M
      fz_drop_buffer(ctx, entry->stm_buf);
66
33.7M
    }
67
16.5k
    fz_free(ctx, sub->table);
68
16.5k
    fz_free(ctx, sub);
69
16.5k
    sub = next_sub;
70
16.5k
  }
71
72
17.7k
  pdf_drop_obj(ctx, xref->pre_repair_trailer);
73
17.7k
  pdf_drop_obj(ctx, xref->trailer);
74
75
17.7k
  while ((usig = xref->unsaved_sigs) != NULL)
76
0
  {
77
0
    xref->unsaved_sigs = usig->next;
78
0
    pdf_drop_obj(ctx, usig->field);
79
0
    pdf_drop_signer(ctx, usig->signer);
80
0
    fz_free(ctx, usig);
81
0
  }
82
17.7k
}
83
84
static void pdf_drop_xref_sections_imp(fz_context *ctx, pdf_document *doc, pdf_xref *xref_sections, int num_xref_sections)
85
57.4k
{
86
57.4k
  int x;
87
88
72.7k
  for (x = 0; x < num_xref_sections; x++)
89
15.3k
    pdf_drop_xref_subsec(ctx, &xref_sections[x]);
90
91
57.4k
  fz_free(ctx, xref_sections);
92
57.4k
}
93
94
static void pdf_drop_xref_sections(fz_context *ctx, pdf_document *doc)
95
28.7k
{
96
28.7k
  pdf_drop_xref_sections_imp(ctx, doc, doc->saved_xref_sections, doc->saved_num_xref_sections);
97
28.7k
  pdf_drop_xref_sections_imp(ctx, doc, doc->xref_sections, doc->num_xref_sections);
98
99
28.7k
  doc->saved_xref_sections = NULL;
100
28.7k
  doc->saved_num_xref_sections = 0;
101
28.7k
  doc->xref_sections = NULL;
102
28.7k
  doc->num_xref_sections = 0;
103
28.7k
  doc->num_incremental_sections = 0;
104
28.7k
}
105
106
static void
107
extend_xref_index(fz_context *ctx, pdf_document *doc, int newlen)
108
42.7k
{
109
42.7k
  int i;
110
111
42.7k
  doc->xref_index = fz_realloc_array(ctx, doc->xref_index, newlen, int);
112
33.7M
  for (i = doc->max_xref_len; i < newlen; i++)
113
33.7M
  {
114
33.7M
    doc->xref_index[i] = 0;
115
33.7M
  }
116
42.7k
  doc->max_xref_len = newlen;
117
42.7k
}
118
119
static void
120
resize_xref_sub(fz_context *ctx, pdf_xref *xref, int base, int newlen)
121
8.31k
{
122
8.31k
  pdf_xref_subsec *sub;
123
8.31k
  int i;
124
125
8.31k
  assert(xref != NULL);
126
8.31k
  sub = xref->subsec;
127
8.31k
  assert(sub->next == NULL && sub->start == base && sub->len+base == xref->num_objects);
128
8.31k
  assert(newlen+base > xref->num_objects);
129
130
8.31k
  sub->table = fz_realloc_array(ctx, sub->table, newlen, pdf_xref_entry);
131
16.6k
  for (i = sub->len; i < newlen; i++)
132
8.31k
  {
133
8.31k
    sub->table[i].type = 0;
134
8.31k
    sub->table[i].ofs = 0;
135
8.31k
    sub->table[i].gen = 0;
136
8.31k
    sub->table[i].num = 0;
137
8.31k
    sub->table[i].stm_ofs = 0;
138
8.31k
    sub->table[i].stm_buf = NULL;
139
8.31k
    sub->table[i].obj = NULL;
140
8.31k
  }
141
8.31k
  sub->len = newlen;
142
8.31k
  if (newlen+base > xref->num_objects)
143
8.31k
    xref->num_objects = newlen+base;
144
8.31k
}
145
146
/* This is only ever called when we already have an incremental
147
 * xref. This means there will only be 1 subsec, and it will be
148
 * a complete subsec. */
149
static void pdf_resize_xref(fz_context *ctx, pdf_document *doc, int newlen)
150
0
{
151
0
  pdf_xref *xref = &doc->xref_sections[doc->xref_base];
152
153
0
  resize_xref_sub(ctx, xref, 0, newlen);
154
0
  if (doc->max_xref_len < newlen)
155
0
    extend_xref_index(ctx, doc, newlen);
156
0
}
157
158
static void pdf_populate_next_xref_level(fz_context *ctx, pdf_document *doc)
159
3.18k
{
160
3.18k
  pdf_xref *xref;
161
3.18k
  doc->xref_sections = fz_realloc_array(ctx, doc->xref_sections, doc->num_xref_sections + 1, pdf_xref);
162
3.18k
  doc->num_xref_sections++;
163
164
3.18k
  xref = &doc->xref_sections[doc->num_xref_sections - 1];
165
3.18k
  xref->subsec = NULL;
166
3.18k
  xref->num_objects = 0;
167
3.18k
  xref->trailer = NULL;
168
3.18k
  xref->pre_repair_trailer = NULL;
169
3.18k
  xref->unsaved_sigs = NULL;
170
3.18k
  xref->unsaved_sigs_end = NULL;
171
3.18k
}
172
173
pdf_obj *pdf_trailer(fz_context *ctx, pdf_document *doc)
174
363k
{
175
  /* Return the document's trailer (of the appropriate vintage) */
176
363k
  pdf_xref *xrefs = doc->xref_sections;
177
178
363k
  return xrefs ? xrefs[doc->xref_base].trailer : NULL;
179
363k
}
180
181
void pdf_set_populating_xref_trailer(fz_context *ctx, pdf_document *doc, pdf_obj *trailer)
182
14.9k
{
183
  /* Update the trailer of the xref section being populated */
184
14.9k
  pdf_xref *xref = &doc->xref_sections[doc->num_xref_sections - 1];
185
14.9k
  if (xref->trailer)
186
83
  {
187
83
    pdf_drop_obj(ctx, xref->pre_repair_trailer);
188
83
    xref->pre_repair_trailer = xref->trailer;
189
83
  }
190
14.9k
  xref->trailer = pdf_keep_obj(ctx, trailer);
191
14.9k
}
192
193
int pdf_xref_len(fz_context *ctx, pdf_document *doc)
194
10.2M
{
195
10.2M
  int i = doc->xref_base;
196
10.2M
  int xref_len = 0;
197
198
10.2M
  if (doc->local_xref && doc->local_xref_nesting > 0)
199
944k
    xref_len = doc->local_xref->num_objects;
200
201
21.5M
  while (i < doc->num_xref_sections)
202
11.2M
    xref_len = fz_maxi(xref_len, doc->xref_sections[i++].num_objects);
203
204
10.2M
  return xref_len;
205
10.2M
}
206
207
/* Ensure that the given xref has a single subsection
208
 * that covers the entire range. */
209
static void
210
ensure_solid_xref(fz_context *ctx, pdf_document *doc, int num, int which)
211
42.3k
{
212
42.3k
  pdf_xref *xref = &doc->xref_sections[which];
213
42.3k
  pdf_xref_subsec *sub = xref->subsec;
214
42.3k
  pdf_xref_subsec *new_sub;
215
216
42.3k
  if (num < xref->num_objects)
217
776
    num = xref->num_objects;
218
219
42.3k
  if (sub != NULL && sub->next == NULL && sub->start == 0 && sub->len >= num)
220
872
    return;
221
222
41.4k
  new_sub = fz_malloc_struct(ctx, pdf_xref_subsec);
223
82.9k
  fz_try(ctx)
224
82.9k
  {
225
41.4k
    new_sub->table = fz_malloc_struct_array(ctx, num, pdf_xref_entry);
226
41.4k
    new_sub->start = 0;
227
41.4k
    new_sub->len = num;
228
41.4k
    new_sub->next = NULL;
229
41.4k
  }
230
82.9k
  fz_catch(ctx)
231
0
  {
232
0
    fz_free(ctx, new_sub);
233
0
    fz_rethrow(ctx);
234
0
  }
235
236
  /* Move objects over to the new subsection and destroy the old
237
   * ones */
238
41.4k
  sub = xref->subsec;
239
69.1k
  while (sub != NULL)
240
27.6k
  {
241
27.6k
    pdf_xref_subsec *next = sub->next;
242
27.6k
    int i;
243
244
32.8M
    for (i = 0; i < sub->len; i++)
245
32.7M
    {
246
32.7M
      new_sub->table[i+sub->start] = sub->table[i];
247
32.7M
    }
248
27.6k
    fz_free(ctx, sub->table);
249
27.6k
    fz_free(ctx, sub);
250
27.6k
    sub = next;
251
27.6k
  }
252
41.4k
  xref->num_objects = num;
253
41.4k
  xref->subsec = new_sub;
254
41.4k
  if (doc->max_xref_len < num)
255
40.3k
    extend_xref_index(ctx, doc, num);
256
41.4k
}
257
258
static pdf_xref_entry *
259
pdf_get_local_xref_entry(fz_context *ctx, pdf_document *doc, int num)
260
31.2k
{
261
31.2k
  pdf_xref *xref = doc->local_xref;
262
31.2k
  pdf_xref_subsec *sub;
263
264
31.2k
  if (xref == NULL || doc->local_xref_nesting == 0)
265
0
    fz_throw(ctx, FZ_ERROR_GENERIC, "Local xref not present!");
266
267
  /* Local xrefs only ever have 1 section, and it should be solid. */
268
31.2k
  sub = xref->subsec;
269
31.2k
  assert(sub && !sub->next);
270
31.2k
  if (num >= sub->start && num < sub->start + sub->len)
271
22.8k
    return &sub->table[num - sub->start];
272
273
  /* Expand the xref so we can return a pointer. */
274
8.31k
  resize_xref_sub(ctx, xref, 0, num+1);
275
8.31k
  sub = xref->subsec;
276
8.31k
  return &sub->table[num - sub->start];
277
31.2k
}
278
279
pdf_xref_entry *pdf_get_populating_xref_entry(fz_context *ctx, pdf_document *doc, int num)
280
131M
{
281
  /* Return an entry within the xref currently being populated */
282
131M
  pdf_xref *xref;
283
131M
  pdf_xref_subsec *sub;
284
285
131M
  if (doc->num_xref_sections == 0)
286
14.0k
  {
287
14.0k
    doc->xref_sections = fz_malloc_struct(ctx, pdf_xref);
288
14.0k
    doc->num_xref_sections = 1;
289
14.0k
  }
290
291
131M
  if (doc->local_xref && doc->local_xref_nesting > 0)
292
0
    return pdf_get_local_xref_entry(ctx, doc, num);
293
294
  /* Prevent accidental heap underflow */
295
131M
  if (num < 0 || num > PDF_MAX_OBJECT_NUMBER)
296
0
    fz_throw(ctx, FZ_ERROR_GENERIC, "object number out of range (%d)", num);
297
298
  /* Return the pointer to the entry in the last section. */
299
131M
  xref = &doc->xref_sections[doc->num_xref_sections-1];
300
301
131M
  for (sub = xref->subsec; sub != NULL; sub = sub->next)
302
131M
  {
303
131M
    if (num >= sub->start && num < sub->start + sub->len)
304
131M
      return &sub->table[num-sub->start];
305
131M
  }
306
307
  /* We've been asked for an object that's not in a subsec. */
308
27.8k
  ensure_solid_xref(ctx, doc, num+1, doc->num_xref_sections-1);
309
27.8k
  xref = &doc->xref_sections[doc->num_xref_sections-1];
310
27.8k
  sub = xref->subsec;
311
312
27.8k
  return &sub->table[num-sub->start];
313
131M
}
314
315
/* It is vital that pdf_get_xref_entry_aux called with !solidify_if_needed
316
 * and a value object number, does NOT try/catch or throw. */
317
static
318
pdf_xref_entry *pdf_get_xref_entry_aux(fz_context *ctx, pdf_document *doc, int i, int solidify_if_needed)
319
43.1M
{
320
43.1M
  pdf_xref *xref = NULL;
321
43.1M
  pdf_xref_subsec *sub;
322
43.1M
  int j;
323
324
43.1M
  if (i < 0)
325
0
    fz_throw(ctx, FZ_ERROR_GENERIC, "Negative object number requested");
326
327
43.1M
  if (i < doc->max_xref_len)
328
43.0M
    j = doc->xref_index[i];
329
134k
  else
330
134k
    j = 0;
331
332
  /* If we have an active local xref, check there first. */
333
43.1M
  if (doc->local_xref && doc->local_xref_nesting > 0)
334
949k
  {
335
949k
    xref = doc->local_xref;
336
337
949k
    if (i < xref->num_objects)
338
949k
    {
339
1.60M
      for (sub = xref->subsec; sub != NULL; sub = sub->next)
340
949k
      {
341
949k
        pdf_xref_entry *entry;
342
343
949k
        if (i < sub->start || i >= sub->start + sub->len)
344
0
          continue;
345
346
949k
        entry = &sub->table[i - sub->start];
347
949k
        if (entry->type)
348
296k
          return entry;
349
949k
      }
350
949k
    }
351
949k
  }
352
353
  /* We may be accessing an earlier version of the document using xref_base
354
   * and j may be an index into a later xref section */
355
42.8M
  if (doc->xref_base > j)
356
4
    j = doc->xref_base;
357
42.8M
  else
358
42.8M
    j = 0;
359
360
361
  /* Find the first xref section where the entry is defined. */
362
43.4M
  for (; j < doc->num_xref_sections; j++)
363
43.4M
  {
364
43.4M
    xref = &doc->xref_sections[j];
365
366
43.4M
    if (i < xref->num_objects)
367
43.4M
    {
368
47.3M
      for (sub = xref->subsec; sub != NULL; sub = sub->next)
369
46.8M
      {
370
46.8M
        pdf_xref_entry *entry;
371
372
46.8M
        if (i < sub->start || i >= sub->start + sub->len)
373
3.95M
          continue;
374
375
42.8M
        entry = &sub->table[i - sub->start];
376
42.8M
        if (entry->type)
377
42.8M
        {
378
          /* Don't update xref_index if xref_base may have
379
           * influenced the value of j */
380
42.8M
          if (doc->xref_base == 0)
381
42.8M
            doc->xref_index[i] = j;
382
42.8M
          return entry;
383
42.8M
        }
384
42.8M
      }
385
43.4M
    }
386
43.4M
  }
387
388
  /* Didn't find the entry in any section. Return the entry from
389
   * the local_xref (if there is one active), or the final section. */
390
639
  if (doc->local_xref && doc->local_xref_nesting > 0)
391
0
  {
392
0
    if (xref == NULL || i < xref->num_objects)
393
0
    {
394
0
      xref = doc->local_xref;
395
0
      sub = xref->subsec;
396
0
      assert(sub != NULL && sub->next == NULL);
397
0
      if (i >= sub->start && i < sub->start + sub->len)
398
0
        return &sub->table[i - sub->start];
399
0
    }
400
401
    /* Expand the xref so we can return a pointer. */
402
0
    resize_xref_sub(ctx, xref, 0, i+1);
403
0
    sub = xref->subsec;
404
0
    return &sub->table[i - sub->start];
405
0
  }
406
407
639
  doc->xref_index[i] = 0;
408
639
  if (xref == NULL || i < xref->num_objects)
409
364
  {
410
364
    xref = &doc->xref_sections[doc->xref_base];
411
366
    for (sub = xref->subsec; sub != NULL; sub = sub->next)
412
364
    {
413
364
      if (i >= sub->start && i < sub->start + sub->len)
414
362
        return &sub->table[i - sub->start];
415
364
    }
416
364
  }
417
418
  /* Some really hairy code here. When we are reading the file in
419
   * initially, we read from 'newest' to 'oldest' (i.e. from 0 to
420
   * doc->num_xref_sections-1). Each section is created initially
421
   * with num_objects == 0 in it, and remains like that while we
422
   * are parsing the stream from the file. This is the only time
423
   * we'll ever have xref_sections with 0 objects in them. */
424
277
  if (doc->xref_sections[doc->num_xref_sections-1].num_objects == 0)
425
255
  {
426
    /* The oldest xref section has 0 objects in it. So we are
427
     * parsing an xref stream while loading. We don't want to
428
     * solidify the xref we are currently parsing for (as it'll
429
     * get very confused, and end up a different 'shape' in
430
     * memory to that which is in the file, and would hence
431
     * render 'fingerprinting' for snapshotting invalid) so
432
     * just give up at this point. */
433
255
    return NULL;
434
255
  }
435
436
22
  if (!solidify_if_needed)
437
0
    return NULL;
438
439
  /* At this point, we solidify the xref. This ensures that we
440
   * can return a pointer. This is the only case where this function
441
   * might throw an exception, and it will never happen when we are
442
   * working within a 'solid' xref. */
443
22
  ensure_solid_xref(ctx, doc, i+1, 0);
444
22
  xref = &doc->xref_sections[0];
445
22
  sub = xref->subsec;
446
22
  return &sub->table[i - sub->start];
447
22
}
448
449
pdf_xref_entry *pdf_get_xref_entry(fz_context *ctx, pdf_document *doc, int i)
450
42.2M
{
451
42.2M
  return pdf_get_xref_entry_aux(ctx, doc, i, 1);
452
42.2M
}
453
454
pdf_xref_entry *pdf_get_xref_entry_no_change(fz_context *ctx, pdf_document *doc, int i)
455
985k
{
456
985k
  return pdf_get_xref_entry_aux(ctx, doc, i, 0);
457
985k
}
458
459
pdf_xref_entry *pdf_get_xref_entry_no_null(fz_context *ctx, pdf_document *doc, int i)
460
32.9M
{
461
32.9M
  pdf_xref_entry *entry = pdf_get_xref_entry(ctx, doc, i);
462
32.9M
  if (entry != NULL)
463
32.9M
    return entry;
464
0
  fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find object in xref (%d 0 R), but not allowed to return NULL", i);
465
32.9M
}
466
467
void pdf_xref_entry_map(fz_context *ctx, pdf_document *doc, void (*fn)(fz_context *, pdf_xref_entry *, int, pdf_document *, void *), void *arg)
468
761
{
469
761
  int i, j;
470
761
  pdf_xref_subsec *sub;
471
761
  int xref_base = doc->xref_base;
472
473
1.52k
  fz_try(ctx)
474
1.52k
  {
475
    /* Map over any active local xref first. */
476
761
    if (doc->local_xref && doc->local_xref_nesting > 0)
477
0
    {
478
0
      pdf_xref *xref = doc->local_xref;
479
480
0
      for (sub = xref->subsec; sub != NULL; sub = sub->next)
481
0
      {
482
0
        for (i = sub->start; i < sub->start + sub->len; i++)
483
0
        {
484
0
          pdf_xref_entry *entry = &sub->table[i - sub->start];
485
0
          if (entry->type)
486
0
            fn(ctx, entry, i, doc, arg);
487
0
        }
488
0
      }
489
0
    }
490
491
1.81k
    for (j = 0; j < doc->num_xref_sections; j++)
492
1.05k
    {
493
1.05k
      pdf_xref *xref = &doc->xref_sections[j];
494
1.05k
      doc->xref_base = j;
495
496
2.49k
      for (sub = xref->subsec; sub != NULL; sub = sub->next)
497
1.43k
      {
498
158k
        for (i = sub->start; i < sub->start + sub->len; i++)
499
157k
        {
500
157k
          pdf_xref_entry *entry = &sub->table[i - sub->start];
501
157k
          if (entry->type)
502
113k
            fn(ctx, entry, i, doc, arg);
503
157k
        }
504
1.43k
      }
505
1.05k
    }
506
761
  }
507
1.52k
  fz_always(ctx)
508
761
  {
509
761
    doc->xref_base = xref_base;
510
761
  }
511
761
  fz_catch(ctx)
512
19
    fz_rethrow(ctx);
513
761
}
514
515
/*
516
  Ensure we have an incremental xref section where we can store
517
  updated versions of indirect objects. This is a new xref section
518
  consisting of a single xref subsection.
519
*/
520
static void ensure_incremental_xref(fz_context *ctx, pdf_document *doc)
521
0
{
522
  /* If there are as yet no incremental sections, or if the most recent
523
   * one has been used to sign a signature field, then we need a new one.
524
   * After a signing, any further document changes require a new increment */
525
0
  if ((doc->num_incremental_sections == 0 || doc->xref_sections[0].unsaved_sigs != NULL)
526
0
    && !doc->disallow_new_increments)
527
0
  {
528
0
    pdf_xref *xref = &doc->xref_sections[0];
529
0
    pdf_xref *pxref;
530
0
    pdf_xref_entry *new_table = fz_malloc_struct_array(ctx, xref->num_objects, pdf_xref_entry);
531
0
    pdf_xref_subsec *sub = NULL;
532
0
    pdf_obj *trailer = NULL;
533
0
    int i;
534
535
0
    fz_var(trailer);
536
0
    fz_var(sub);
537
0
    fz_try(ctx)
538
0
    {
539
0
      sub = fz_malloc_struct(ctx, pdf_xref_subsec);
540
0
      trailer = xref->trailer ? pdf_copy_dict(ctx, xref->trailer) : NULL;
541
0
      doc->xref_sections = fz_realloc_array(ctx, doc->xref_sections, doc->num_xref_sections + 1, pdf_xref);
542
0
      xref = &doc->xref_sections[0];
543
0
      pxref = &doc->xref_sections[1];
544
0
      memmove(pxref, xref, doc->num_xref_sections * sizeof(pdf_xref));
545
      /* xref->num_objects is already correct */
546
0
      xref->subsec = sub;
547
0
      sub = NULL;
548
0
      xref->trailer = trailer;
549
0
      xref->pre_repair_trailer = NULL;
550
0
      xref->unsaved_sigs = NULL;
551
0
      xref->unsaved_sigs_end = NULL;
552
0
      xref->subsec->next = NULL;
553
0
      xref->subsec->len = xref->num_objects;
554
0
      xref->subsec->start = 0;
555
0
      xref->subsec->table = new_table;
556
0
      doc->num_xref_sections++;
557
0
      doc->num_incremental_sections++;
558
0
    }
559
0
    fz_catch(ctx)
560
0
    {
561
0
      fz_free(ctx, sub);
562
0
      fz_free(ctx, new_table);
563
0
      pdf_drop_obj(ctx, trailer);
564
0
      fz_rethrow(ctx);
565
0
    }
566
567
    /* Update the xref_index */
568
0
    for (i = 0; i < doc->max_xref_len; i++)
569
0
    {
570
0
      doc->xref_index[i]++;
571
0
    }
572
0
  }
573
0
}
574
575
/* Used when altering a document */
576
pdf_xref_entry *pdf_get_incremental_xref_entry(fz_context *ctx, pdf_document *doc, int i)
577
0
{
578
0
  pdf_xref *xref;
579
0
  pdf_xref_subsec *sub;
580
581
  /* Make a new final xref section if we haven't already */
582
0
  ensure_incremental_xref(ctx, doc);
583
584
0
  xref = &doc->xref_sections[doc->xref_base];
585
0
  if (i >= xref->num_objects)
586
0
    pdf_resize_xref(ctx, doc, i + 1);
587
588
0
  sub = xref->subsec;
589
0
  assert(sub != NULL && sub->next == NULL);
590
0
  assert(i >= sub->start && i < sub->start + sub->len);
591
0
  doc->xref_index[i] = 0;
592
0
  return &sub->table[i - sub->start];
593
0
}
594
595
int pdf_xref_is_incremental(fz_context *ctx, pdf_document *doc, int num)
596
0
{
597
0
  pdf_xref *xref = &doc->xref_sections[doc->xref_base];
598
0
  pdf_xref_subsec *sub = xref->subsec;
599
600
0
  assert(sub != NULL && sub->next == NULL && sub->len == xref->num_objects && sub->start == 0);
601
602
0
  return num < xref->num_objects && sub->table[num].type;
603
0
}
604
605
/* Used when clearing signatures. Removes the signature
606
from the list of unsaved signed signatures. */
607
void pdf_xref_remove_unsaved_signature(fz_context *ctx, pdf_document *doc, pdf_obj *field)
608
0
{
609
0
  int num = pdf_to_num(ctx, field);
610
0
  int idx = doc->xref_index[num];
611
0
  pdf_xref *xref = &doc->xref_sections[idx];
612
0
  pdf_unsaved_sig **usigptr = &xref->unsaved_sigs;
613
0
  pdf_unsaved_sig *usig = xref->unsaved_sigs;
614
615
0
  while (usig)
616
0
  {
617
0
    pdf_unsaved_sig **nextptr = &usig->next;
618
0
    pdf_unsaved_sig *next = usig->next;
619
620
0
    if (usig->field == field)
621
0
    {
622
0
      if (xref->unsaved_sigs_end == &usig->next)
623
0
      {
624
0
        if (usig->next)
625
0
          xref->unsaved_sigs_end = &usig->next->next;
626
0
        else
627
0
          xref->unsaved_sigs_end = NULL;
628
0
      }
629
0
      if (usigptr)
630
0
        *usigptr = usig->next;
631
632
0
      usig->next = NULL;
633
0
      pdf_drop_obj(ctx, usig->field);
634
0
      pdf_drop_signer(ctx, usig->signer);
635
0
      fz_free(ctx, usig);
636
637
0
      break;
638
0
    }
639
640
0
    usig = next;
641
0
    usigptr = nextptr;
642
0
  }
643
0
}
644
645
void pdf_xref_store_unsaved_signature(fz_context *ctx, pdf_document *doc, pdf_obj *field, pdf_pkcs7_signer *signer)
646
0
{
647
0
  pdf_xref *xref = &doc->xref_sections[0];
648
0
  pdf_unsaved_sig *unsaved_sig;
649
650
  /* Record details within the document structure so that contents
651
   * and byte_range can be updated with their correct values at
652
   * saving time */
653
0
  unsaved_sig = fz_malloc_struct(ctx, pdf_unsaved_sig);
654
0
  unsaved_sig->field = pdf_keep_obj(ctx, field);
655
0
  unsaved_sig->signer = signer->keep(ctx, signer);
656
0
  unsaved_sig->next = NULL;
657
0
  if (xref->unsaved_sigs_end == NULL)
658
0
    xref->unsaved_sigs_end = &xref->unsaved_sigs;
659
660
0
  *xref->unsaved_sigs_end = unsaved_sig;
661
0
  xref->unsaved_sigs_end = &unsaved_sig->next;
662
0
}
663
664
int pdf_xref_obj_is_unsaved_signature(pdf_document *doc, pdf_obj *obj)
665
0
{
666
0
  int i;
667
0
  for (i = 0; i < doc->num_incremental_sections; i++)
668
0
  {
669
0
    pdf_xref *xref = &doc->xref_sections[i];
670
0
    pdf_unsaved_sig *usig;
671
672
0
    for (usig = xref->unsaved_sigs; usig; usig = usig->next)
673
0
    {
674
0
      if (usig->field == obj)
675
0
        return 1;
676
0
    }
677
0
  }
678
679
0
  return 0;
680
0
}
681
682
void pdf_ensure_solid_xref(fz_context *ctx, pdf_document *doc, int num)
683
13.7k
{
684
13.7k
  if (doc->num_xref_sections == 0)
685
0
    pdf_populate_next_xref_level(ctx, doc);
686
687
13.7k
  ensure_solid_xref(ctx, doc, num, 0);
688
13.7k
}
689
690
int pdf_xref_ensure_incremental_object(fz_context *ctx, pdf_document *doc, int num)
691
0
{
692
0
  pdf_xref_entry *new_entry, *old_entry;
693
0
  pdf_xref_subsec *sub = NULL;
694
0
  int i;
695
696
  /* Make sure we have created an xref section for incremental updates */
697
0
  ensure_incremental_xref(ctx, doc);
698
699
  /* Search for the section that contains this object */
700
0
  for (i = doc->xref_index[num]; i < doc->num_xref_sections; i++)
701
0
  {
702
0
    pdf_xref *xref = &doc->xref_sections[i];
703
704
0
    if (num < 0 && num >= xref->num_objects)
705
0
      break;
706
0
    for (sub = xref->subsec; sub != NULL; sub = sub->next)
707
0
    {
708
0
      if (sub->start <= num && num < sub->start + sub->len && sub->table[num - sub->start].type)
709
0
        break;
710
0
    }
711
0
    if (sub != NULL)
712
0
      break;
713
0
  }
714
  /* sub == NULL implies we did not find it */
715
716
  /* If we don't find it, or it's already in the incremental section, return */
717
0
  if (i == 0 || sub == NULL)
718
0
    return 0;
719
720
  /* Move the object to the incremental section */
721
0
  doc->xref_index[num] = 0;
722
0
  old_entry = &sub->table[num - sub->start];
723
0
  new_entry = pdf_get_incremental_xref_entry(ctx, doc, num);
724
0
  *new_entry = *old_entry;
725
  /* Better keep a copy. We must override the old entry with
726
   * the copy because the caller may be holding a reference to
727
   * the original and expect it to end up in the new entry */
728
0
  old_entry->obj = pdf_deep_copy_obj(ctx, old_entry->obj);
729
0
  old_entry->stm_buf = NULL;
730
731
0
  return 1;
732
0
}
733
734
void pdf_xref_ensure_local_object(fz_context *ctx, pdf_document *doc, int num)
735
48.2k
{
736
48.2k
  pdf_xref_entry *new_entry, *old_entry;
737
48.2k
  pdf_xref_subsec *sub = NULL;
738
48.2k
  int i;
739
48.2k
  pdf_xref *xref;
740
48.2k
  pdf_obj *copy;
741
742
  /* Is it in the local section already? */
743
48.2k
  xref = doc->local_xref;
744
55.0k
  for (sub = xref->subsec; sub != NULL; sub = sub->next)
745
48.2k
  {
746
48.2k
    if (sub->start <= num && num < sub->start + sub->len && sub->table[num - sub->start].type)
747
41.4k
      break;
748
48.2k
  }
749
  /* If we found it, it's in the local section already. */
750
48.2k
  if (sub != NULL)
751
41.4k
    return;
752
753
  /* Search for the section that contains this object */
754
6.80k
  for (i = doc->xref_index[num]; i < doc->num_xref_sections; i++)
755
6.80k
  {
756
6.80k
    xref = &doc->xref_sections[i];
757
758
6.80k
    if (num < 0 && num >= xref->num_objects)
759
0
      break;
760
11.9k
    for (sub = xref->subsec; sub != NULL; sub = sub->next)
761
11.9k
    {
762
11.9k
      if (sub->start <= num && num < sub->start + sub->len && sub->table[num - sub->start].type)
763
6.80k
        break;
764
11.9k
    }
765
6.80k
    if (sub != NULL)
766
6.80k
      break;
767
6.80k
  }
768
  /* sub == NULL implies we did not find it */
769
6.80k
  if (sub == NULL)
770
0
    return; /* No object to find */
771
772
  /* Copy the object to the local section */
773
6.80k
  doc->xref_index[num] = 0;
774
6.80k
  old_entry = &sub->table[num - sub->start];
775
6.80k
  new_entry = pdf_get_local_xref_entry(ctx, doc, num);
776
6.80k
  *new_entry = *old_entry;
777
6.80k
  new_entry->stm_buf = NULL;
778
6.80k
  new_entry->obj = NULL;
779
  /* old entry is incremental and may have changes.
780
   * Better keep a copy. We must override the old entry with
781
   * the copy because the caller may be holding a reference to
782
   * the original and expect it to end up in the new entry */
783
6.80k
  copy = pdf_deep_copy_obj(ctx, old_entry->obj);
784
6.80k
  new_entry->obj = old_entry->obj;
785
6.80k
  old_entry->obj = copy;
786
6.80k
  new_entry->stm_buf = NULL; /* FIXME */
787
6.80k
}
788
789
void pdf_replace_xref(fz_context *ctx, pdf_document *doc, pdf_xref_entry *entries, int n)
790
0
{
791
0
  int *xref_index = NULL;
792
0
  pdf_xref *xref = NULL;
793
0
  pdf_xref_subsec *sub;
794
795
0
  fz_var(xref_index);
796
0
  fz_var(xref);
797
798
0
  fz_try(ctx)
799
0
  {
800
0
    xref_index = fz_calloc(ctx, n, sizeof(int));
801
0
    xref = fz_malloc_struct(ctx, pdf_xref);
802
0
    sub = fz_malloc_struct(ctx, pdf_xref_subsec);
803
0
  }
804
0
  fz_catch(ctx)
805
0
  {
806
0
    fz_free(ctx, xref);
807
0
    fz_free(ctx, xref_index);
808
0
    fz_rethrow(ctx);
809
0
  }
810
811
0
  sub->table = entries;
812
0
  sub->start = 0;
813
0
  sub->len = n;
814
815
0
  xref->subsec = sub;
816
0
  xref->num_objects = n;
817
0
  xref->trailer = pdf_keep_obj(ctx, pdf_trailer(ctx, doc));
818
819
  /* The new table completely replaces the previous separate sections */
820
0
  pdf_drop_xref_sections(ctx, doc);
821
822
0
  doc->xref_sections = xref;
823
0
  doc->num_xref_sections = 1;
824
0
  doc->num_incremental_sections = 0;
825
0
  doc->xref_base = 0;
826
0
  doc->disallow_new_increments = 0;
827
0
  doc->max_xref_len = n;
828
829
0
  fz_free(ctx, doc->xref_index);
830
0
  doc->xref_index = xref_index;
831
0
}
832
833
void pdf_forget_xref(fz_context *ctx, pdf_document *doc)
834
14.0k
{
835
14.0k
  pdf_obj *trailer = pdf_keep_obj(ctx, pdf_trailer(ctx, doc));
836
837
14.0k
  pdf_drop_local_xref_and_resources(ctx, doc);
838
839
14.0k
  if (doc->saved_xref_sections)
840
0
    pdf_drop_xref_sections_imp(ctx, doc, doc->saved_xref_sections, doc->saved_num_xref_sections);
841
842
14.0k
  doc->saved_xref_sections = doc->xref_sections;
843
14.0k
  doc->saved_num_xref_sections = doc->num_xref_sections;
844
845
14.0k
  doc->xref_sections = NULL;
846
14.0k
  doc->startxref = 0;
847
14.0k
  doc->num_xref_sections = 0;
848
14.0k
  doc->num_incremental_sections = 0;
849
14.0k
  doc->xref_base = 0;
850
14.0k
  doc->disallow_new_increments = 0;
851
852
28.1k
  fz_try(ctx)
853
28.1k
  {
854
14.0k
    pdf_get_populating_xref_entry(ctx, doc, 0);
855
14.0k
  }
856
28.1k
  fz_catch(ctx)
857
0
  {
858
0
    pdf_drop_obj(ctx, trailer);
859
0
    fz_rethrow(ctx);
860
0
  }
861
862
  /* Set the trailer of the final xref section. */
863
14.0k
  doc->xref_sections[0].trailer = trailer;
864
14.0k
}
865
866
/*
867
 * magic version tag and startxref
868
 */
869
870
int
871
pdf_version(fz_context *ctx, pdf_document *doc)
872
0
{
873
0
  int version = doc->version;
874
0
  fz_try(ctx)
875
0
  {
876
0
    pdf_obj *obj = pdf_dict_getl(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root), PDF_NAME(Version), NULL);
877
0
    const char *str = pdf_to_name(ctx, obj);
878
0
    if (*str)
879
0
      version = 10 * (fz_atof(str) + 0.05f);
880
0
  }
881
0
  fz_catch(ctx)
882
0
  {
883
0
    fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
884
0
    fz_warn(ctx, "Ignoring broken Root/Version number.");
885
0
  }
886
0
  return version;
887
0
}
888
889
static void
890
pdf_load_version(fz_context *ctx, pdf_document *doc)
891
14.7k
{
892
14.7k
  char buf[20];
893
894
14.7k
  fz_seek(ctx, doc->file, 0, SEEK_SET);
895
14.7k
  fz_read_line(ctx, doc->file, buf, sizeof buf);
896
14.7k
  if (strlen(buf) < 5 || memcmp(buf, "%PDF-", 5) != 0)
897
8.86k
    fz_throw(ctx, FZ_ERROR_GENERIC, "cannot recognize version marker");
898
899
5.86k
  doc->version = 10 * (fz_atof(buf+5) + 0.05f);
900
5.86k
  if (doc->version < 10 || doc->version > 17)
901
559
    if (doc->version != 20)
902
556
      fz_warn(ctx, "unknown PDF version: %d.%d", doc->version / 10, doc->version % 10);
903
5.86k
}
904
905
static void
906
pdf_read_start_xref(fz_context *ctx, pdf_document *doc)
907
5.86k
{
908
5.86k
  unsigned char buf[1024];
909
5.86k
  size_t i, n;
910
5.86k
  int64_t t;
911
912
5.86k
  fz_seek(ctx, doc->file, 0, SEEK_END);
913
914
5.86k
  doc->file_size = fz_tell(ctx, doc->file);
915
916
5.86k
  t = fz_maxi64(0, doc->file_size - (int64_t)sizeof buf);
917
5.86k
  fz_seek(ctx, doc->file, t, SEEK_SET);
918
919
5.86k
  n = fz_read(ctx, doc->file, buf, sizeof buf);
920
5.86k
  if (n < 9)
921
0
    fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find startxref");
922
923
5.86k
  i = n - 9;
924
5.86k
  do
925
2.91M
  {
926
2.91M
    if (memcmp(buf + i, "startxref", 9) == 0)
927
2.72k
    {
928
2.72k
      i += 9;
929
6.45k
      while (i < n && iswhite(buf[i]))
930
3.72k
        i ++;
931
2.72k
      doc->startxref = 0;
932
14.9k
      while (i < n && isdigit(buf[i]))
933
12.1k
      {
934
12.1k
        if (doc->startxref >= INT64_MAX/10)
935
3
          fz_throw(ctx, FZ_ERROR_GENERIC, "startxref too large");
936
12.1k
        doc->startxref = doc->startxref * 10 + (buf[i++] - '0');
937
12.1k
      }
938
2.72k
      if (doc->startxref != 0)
939
2.67k
        return;
940
47
      break;
941
2.72k
    }
942
2.91M
  } while (i-- > 0);
943
944
3.18k
  fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find startxref");
945
5.86k
}
946
947
void fz_skip_space(fz_context *ctx, fz_stream *stm)
948
2.78k
{
949
2.78k
  do
950
4.45k
  {
951
4.45k
    int c = fz_peek_byte(ctx, stm);
952
4.45k
    if (c == EOF || c > 32)
953
2.78k
      return;
954
1.67k
    (void)fz_read_byte(ctx, stm);
955
1.67k
  }
956
2.78k
  while (1);
957
2.78k
}
958
959
int fz_skip_string(fz_context *ctx, fz_stream *stm, const char *str)
960
1.39k
{
961
6.96k
  while (*str)
962
5.57k
  {
963
5.57k
    int c = fz_peek_byte(ctx, stm);
964
5.57k
    if (c == EOF || c != *str++)
965
5
      return 1;
966
5.57k
    (void)fz_read_byte(ctx, stm);
967
5.57k
  }
968
1.39k
  return 0;
969
1.39k
}
970
971
/*
972
 * trailer dictionary
973
 */
974
975
static int
976
pdf_xref_size_from_old_trailer(fz_context *ctx, pdf_document *doc)
977
724
{
978
724
  int len;
979
724
  char *s;
980
724
  int64_t t;
981
724
  pdf_token tok;
982
724
  int c;
983
724
  int size = 0;
984
724
  int64_t ofs;
985
724
  pdf_obj *trailer = NULL;
986
724
  size_t n;
987
724
  pdf_lexbuf *buf = &doc->lexbuf.base;
988
724
  pdf_obj *obj = NULL;
989
990
724
  fz_var(trailer);
991
992
  /* Record the current file read offset so that we can reinstate it */
993
724
  ofs = fz_tell(ctx, doc->file);
994
995
724
  fz_skip_space(ctx, doc->file);
996
724
  if (fz_skip_string(ctx, doc->file, "xref"))
997
5
    fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find xref marker");
998
719
  fz_skip_space(ctx, doc->file);
999
1000
3.15k
  while (1)
1001
3.15k
  {
1002
3.15k
    c = fz_peek_byte(ctx, doc->file);
1003
3.15k
    if (!isdigit(c))
1004
710
      break;
1005
1006
2.44k
    fz_read_line(ctx, doc->file, buf->scratch, buf->size);
1007
2.44k
    s = buf->scratch;
1008
2.44k
    fz_strsep(&s, " "); /* ignore start */
1009
2.44k
    if (!s)
1010
5
      fz_throw(ctx, FZ_ERROR_GENERIC, "xref subsection length missing");
1011
2.44k
    len = fz_atoi(fz_strsep(&s, " "));
1012
2.44k
    if (len < 0)
1013
0
      fz_throw(ctx, FZ_ERROR_GENERIC, "xref subsection length must be positive");
1014
1015
    /* broken pdfs where the section is not on a separate line */
1016
2.44k
    if (s && *s != '\0')
1017
80
      fz_seek(ctx, doc->file, -(2 + (int)strlen(s)), SEEK_CUR);
1018
1019
2.44k
    t = fz_tell(ctx, doc->file);
1020
2.44k
    if (t < 0)
1021
0
      fz_throw(ctx, FZ_ERROR_GENERIC, "cannot tell in file");
1022
1023
    /* Spec says xref entries should be 20 bytes, but it's not infrequent
1024
     * to see 19, in particular for some PCLm drivers. Cope. */
1025
2.44k
    if (len > 0)
1026
2.30k
    {
1027
2.30k
      n = fz_read(ctx, doc->file, (unsigned char *)buf->scratch, 20);
1028
2.30k
      if (n < 19)
1029
4
        fz_throw(ctx, FZ_ERROR_GENERIC, "malformed xref table");
1030
2.29k
      if (n == 20 && buf->scratch[19] > 32)
1031
95
        n = 19;
1032
2.29k
    }
1033
138
    else
1034
138
      n = 20;
1035
1036
2.43k
    if (len > (int64_t)((INT64_MAX - t) / n))
1037
0
      fz_throw(ctx, FZ_ERROR_GENERIC, "xref has too many entries");
1038
1039
2.43k
    fz_seek(ctx, doc->file, t + n * (int64_t)len, SEEK_SET);
1040
2.43k
  }
1041
1042
1.42k
  fz_try(ctx)
1043
1.42k
  {
1044
710
    tok = pdf_lex(ctx, doc->file, buf);
1045
710
    if (tok != PDF_TOK_TRAILER)
1046
30
      fz_throw(ctx, FZ_ERROR_GENERIC, "expected trailer marker");
1047
1048
680
    tok = pdf_lex(ctx, doc->file, buf);
1049
680
    if (tok != PDF_TOK_OPEN_DICT)
1050
2
      fz_throw(ctx, FZ_ERROR_GENERIC, "expected trailer dictionary");
1051
1052
678
    trailer = pdf_parse_dict(ctx, doc, doc->file, buf);
1053
1054
678
    obj = pdf_dict_get(ctx, trailer, PDF_NAME(Size));
1055
678
    if (pdf_is_indirect(ctx, obj))
1056
3
      fz_throw(ctx, FZ_ERROR_GENERIC, "trailer Size entry is indirect");
1057
1058
675
    size = pdf_dict_get_int(ctx, trailer, PDF_NAME(Size));
1059
675
    if (size < 0 || size > PDF_MAX_OBJECT_NUMBER + 1)
1060
1
      fz_throw(ctx, FZ_ERROR_GENERIC, "trailer Size entry out of range");
1061
675
  }
1062
1.42k
  fz_always(ctx)
1063
710
  {
1064
710
    pdf_drop_obj(ctx, trailer);
1065
710
  }
1066
710
  fz_catch(ctx)
1067
38
  {
1068
38
    fz_rethrow(ctx);
1069
38
  }
1070
1071
636
  fz_seek(ctx, doc->file, ofs, SEEK_SET);
1072
1073
636
  return size;
1074
674
}
1075
1076
static pdf_xref_entry *
1077
pdf_xref_find_subsection(fz_context *ctx, pdf_document *doc, int start, int len)
1078
3.14k
{
1079
3.14k
  pdf_xref *xref = &doc->xref_sections[doc->num_xref_sections-1];
1080
3.14k
  pdf_xref_subsec *sub, *extend = NULL;
1081
3.14k
  int num_objects;
1082
3.14k
  int solidify = 0;
1083
1084
3.14k
  if (len == 0)
1085
28
    return NULL;
1086
1087
  /* Different cases here.
1088
   * Case 1) We might be asking for a subsection (or a subset of a
1089
   *         subsection) that we already have - Just return it.
1090
   * Case 2) We might be asking for a subsection that overlaps (or
1091
   *         extends) a subsection we already have - extend the existing one.
1092
   * Case 3) We might be asking for a subsection that overlaps multiple
1093
   *         existing subsections - solidify the whole set.
1094
   * Case 4) We might be asking for a completely new subsection - just
1095
   *         allocate it.
1096
   */
1097
1098
  /* Sanity check */
1099
19.3k
  for (sub = xref->subsec; sub != NULL; sub = sub->next)
1100
16.2k
  {
1101
16.2k
    if (start >= sub->start && start <= sub->start + sub->len)
1102
954
    {
1103
      /* 'start' is in (or immediately after) 'sub' */
1104
954
      if (start + len <= sub->start + sub->len)
1105
17
      {
1106
        /* And so is start+len-1 - just return this! Case 1. */
1107
17
        return &sub->table[start-sub->start];
1108
17
      }
1109
      /* So we overlap with sub. */
1110
937
      if (extend == NULL)
1111
937
      {
1112
        /* Maybe we can extend sub? */
1113
937
        extend = sub;
1114
937
      }
1115
0
      else
1116
0
      {
1117
        /* OK, so we've already found an overlapping one. We'll need to solidify. Case 3. */
1118
0
        solidify = 1;
1119
0
        break;
1120
0
      }
1121
937
    }
1122
15.3k
    else if (start + len > sub->start && start + len < sub->start + sub->len)
1123
0
    {
1124
      /* The end of the start+len range is in 'sub'. */
1125
      /* For now, we won't support extending sub backwards. Just take this as
1126
       * needing to solidify. Case 3. */
1127
0
      solidify = 1;
1128
0
      break;
1129
0
    }
1130
15.3k
    else if (start < sub->start && start + len >= sub->start + sub->len)
1131
3
    {
1132
      /* The end of the start+len range is beyond 'sub'. */
1133
      /* For now, we won't support extending sub backwards. Just take this as
1134
       * needing to solidify. Another variant of case 3. */
1135
3
      solidify = 1;
1136
3
      break;
1137
3
    }
1138
16.2k
  }
1139
1140
3.10k
  num_objects = xref->num_objects;
1141
3.10k
  if (num_objects < start + len)
1142
3.06k
    num_objects = start + len;
1143
1144
3.10k
  if (solidify)
1145
3
  {
1146
    /* Case 3: Solidify the xref */
1147
3
    ensure_solid_xref(ctx, doc, num_objects, doc->num_xref_sections-1);
1148
3
    xref = &doc->xref_sections[doc->num_xref_sections-1];
1149
3
    sub = xref->subsec;
1150
3
  }
1151
3.10k
  else if (extend)
1152
935
  {
1153
    /* Case 2: Extend the subsection */
1154
935
    int newlen = start + len - extend->start;
1155
935
    sub = extend;
1156
935
    sub->table = fz_realloc_array(ctx, sub->table, newlen, pdf_xref_entry);
1157
935
    memset(&sub->table[sub->len], 0, sizeof(pdf_xref_entry) * (newlen - sub->len));
1158
935
    sub->len = newlen;
1159
935
    if (xref->num_objects < sub->start + sub->len)
1160
918
      xref->num_objects = sub->start + sub->len;
1161
935
    if (doc->max_xref_len < sub->start + sub->len)
1162
893
      extend_xref_index(ctx, doc, sub->start + sub->len);
1163
935
  }
1164
2.16k
  else
1165
2.16k
  {
1166
    /* Case 4 */
1167
2.16k
    sub = fz_malloc_struct(ctx, pdf_xref_subsec);
1168
4.33k
    fz_try(ctx)
1169
4.33k
    {
1170
2.16k
      sub->table = fz_malloc_struct_array(ctx, len, pdf_xref_entry);
1171
2.16k
      sub->start = start;
1172
2.16k
      sub->len = len;
1173
2.16k
      sub->next = xref->subsec;
1174
2.16k
      xref->subsec = sub;
1175
2.16k
    }
1176
4.33k
    fz_catch(ctx)
1177
0
    {
1178
0
      fz_free(ctx, sub);
1179
0
      fz_rethrow(ctx);
1180
0
    }
1181
2.16k
    if (xref->num_objects < num_objects)
1182
2.14k
      xref->num_objects = num_objects;
1183
2.16k
    if (doc->max_xref_len < num_objects)
1184
1.47k
      extend_xref_index(ctx, doc, num_objects);
1185
2.16k
  }
1186
3.10k
  return &sub->table[start-sub->start];
1187
3.10k
}
1188
1189
static inline void
1190
validate_object_number_range(fz_context *ctx, int first, int len, const char *what)
1191
35.3k
{
1192
35.3k
  if (first < 0 || first > PDF_MAX_OBJECT_NUMBER)
1193
159
    fz_throw(ctx, FZ_ERROR_GENERIC, "first object number in %s out of range", what);
1194
35.1k
  if (len < 0 || len > PDF_MAX_OBJECT_NUMBER)
1195
1
    fz_throw(ctx, FZ_ERROR_GENERIC, "number of objects in %s out of range", what);
1196
35.1k
  if (len > 0 && len - 1 > PDF_MAX_OBJECT_NUMBER - first)
1197
0
    fz_throw(ctx, FZ_ERROR_GENERIC, "last object number in %s out of range", what);
1198
35.1k
}
1199
1200
static pdf_obj *
1201
pdf_read_old_xref(fz_context *ctx, pdf_document *doc)
1202
724
{
1203
724
  int start, len, c, i, xref_len, carried;
1204
724
  fz_stream *file = doc->file;
1205
724
  pdf_xref_entry *table;
1206
724
  pdf_token tok;
1207
724
  size_t n;
1208
724
  char *s, *e;
1209
724
  pdf_lexbuf *buf = &doc->lexbuf.base;
1210
1211
724
  xref_len = pdf_xref_size_from_old_trailer(ctx, doc);
1212
1213
724
  fz_skip_space(ctx, doc->file);
1214
724
  if (fz_skip_string(ctx, doc->file, "xref"))
1215
0
    fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find xref marker");
1216
724
  fz_skip_space(ctx, doc->file);
1217
1218
2.81k
  while (1)
1219
2.76k
  {
1220
2.76k
    c = fz_peek_byte(ctx, file);
1221
2.76k
    if (!isdigit(c))
1222
633
      break;
1223
1224
2.13k
    fz_read_line(ctx, file, buf->scratch, buf->size);
1225
2.13k
    s = buf->scratch;
1226
2.13k
    start = fz_atoi(fz_strsep(&s, " "));
1227
2.13k
    len = fz_atoi(fz_strsep(&s, " "));
1228
1229
    /* broken pdfs where the section is not on a separate line */
1230
2.13k
    if (s && *s != '\0')
1231
7
    {
1232
7
      fz_warn(ctx, "broken xref subsection. proceeding anyway.");
1233
7
      fz_seek(ctx, file, -(2 + (int)strlen(s)), SEEK_CUR);
1234
7
    }
1235
1236
2.13k
    validate_object_number_range(ctx, start, len, "xref subsection");
1237
1238
    /* broken pdfs where size in trailer undershoots entries in xref sections */
1239
2.13k
    if (start + len > xref_len)
1240
1.10k
    {
1241
1.10k
      fz_warn(ctx, "broken xref subsection, proceeding anyway.");
1242
1.10k
    }
1243
1244
2.13k
    table = pdf_xref_find_subsection(ctx, doc, start, len);
1245
1246
    /* Xref entries SHOULD be 20 bytes long, but we see 19 byte
1247
     * ones more frequently than we'd like (e.g. PCLm drivers).
1248
     * Cope with this by 'carrying' data forward. */
1249
2.13k
    carried = 0;
1250
29.4k
    for (i = 0; i < len; i++)
1251
27.3k
    {
1252
27.3k
      pdf_xref_entry *entry = &table[i];
1253
27.3k
      n = fz_read(ctx, file, (unsigned char *) buf->scratch + carried, 20-carried);
1254
27.3k
      if (n != (size_t)(20-carried))
1255
0
        fz_throw(ctx, FZ_ERROR_GENERIC, "unexpected EOF in xref table");
1256
27.3k
      n += carried;
1257
27.3k
      buf->scratch[n] = '\0';
1258
27.3k
      if (!entry->type)
1259
27.2k
      {
1260
27.2k
        s = buf->scratch;
1261
27.2k
        e = s + n;
1262
1263
27.2k
        entry->num = start + i;
1264
1265
        /* broken pdfs where line start with white space */
1266
27.4k
        while (s < e && iswhite(*s))
1267
152
          s++;
1268
1269
27.2k
        if (s == e || !isdigit(*s))
1270
8
          fz_throw(ctx, FZ_ERROR_GENERIC, "xref offset missing");
1271
299k
        while (s < e && isdigit(*s))
1272
272k
          entry->ofs = entry->ofs * 10 + *s++ - '0';
1273
1274
54.5k
        while (s < e && iswhite(*s))
1275
27.3k
          s++;
1276
27.2k
        if (s == e || !isdigit(*s))
1277
15
          fz_throw(ctx, FZ_ERROR_GENERIC, "xref generation number missing");
1278
163k
        while (s < e && isdigit(*s))
1279
136k
          entry->gen = entry->gen * 10 + *s++ - '0';
1280
1281
54.5k
        while (s < e && iswhite(*s))
1282
27.2k
          s++;
1283
27.2k
        if (s == e || (*s != 'f' && *s != 'n' && *s != 'o'))
1284
16
          fz_throw(ctx, FZ_ERROR_GENERIC, "unexpected xref type: 0x%x (%d %d R)", s == e ? 0 : *s, entry->num, entry->gen);
1285
27.2k
        entry->type = *s++;
1286
1287
        /* If the last byte of our buffer isn't an EOL (or space), carry one byte forward */
1288
27.2k
        carried = buf->scratch[19] > 32;
1289
27.2k
        if (carried)
1290
40
          buf->scratch[0] = buf->scratch[19];
1291
27.2k
      }
1292
27.3k
    }
1293
2.09k
    if (carried)
1294
11
      fz_unread_byte(ctx, file);
1295
2.09k
  }
1296
1297
685
  tok = pdf_lex(ctx, file, buf);
1298
685
  if (tok != PDF_TOK_TRAILER)
1299
2
    fz_throw(ctx, FZ_ERROR_GENERIC, "expected trailer marker");
1300
1301
683
  tok = pdf_lex(ctx, file, buf);
1302
683
  if (tok != PDF_TOK_OPEN_DICT)
1303
0
    fz_throw(ctx, FZ_ERROR_GENERIC, "expected trailer dictionary");
1304
1305
683
  doc->last_xref_was_old_style = 1;
1306
1307
683
  return pdf_parse_dict(ctx, doc, file, buf);
1308
683
}
1309
1310
static void
1311
pdf_read_new_xref_section(fz_context *ctx, pdf_document *doc, fz_stream *stm, int i0, int i1, int w0, int w1, int w2)
1312
1.02k
{
1313
1.02k
  pdf_xref_entry *table;
1314
1.02k
  int i, n;
1315
1316
1.02k
  validate_object_number_range(ctx, i0, i1, "xref subsection");
1317
1318
1.02k
  table = pdf_xref_find_subsection(ctx, doc, i0, i1);
1319
114k
  for (i = i0; i < i0 + i1; i++)
1320
113k
  {
1321
113k
    pdf_xref_entry *entry = &table[i-i0];
1322
113k
    int a = 0;
1323
113k
    int64_t b = 0;
1324
113k
    int c = 0;
1325
1326
113k
    if (fz_is_eof(ctx, stm))
1327
38
      fz_throw(ctx, FZ_ERROR_GENERIC, "truncated xref stream");
1328
1329
185k
    for (n = 0; n < w0; n++)
1330
72.0k
      a = (a << 8) + fz_read_byte(ctx, stm);
1331
275k
    for (n = 0; n < w1; n++)
1332
162k
      b = (b << 8) + fz_read_byte(ctx, stm);
1333
3.02M
    for (n = 0; n < w2; n++)
1334
2.91M
      c = (c << 8) + fz_read_byte(ctx, stm);
1335
1336
113k
    if (!entry->type)
1337
113k
    {
1338
113k
      int t = w0 ? a : 1;
1339
113k
      entry->type = t == 0 ? 'f' : t == 1 ? 'n' : t == 2 ? 'o' : 0;
1340
113k
      entry->ofs = w1 ? b : 0;
1341
113k
      entry->gen = w2 ? c : 0;
1342
113k
      entry->num = i;
1343
113k
    }
1344
113k
  }
1345
1346
982
  doc->last_xref_was_old_style = 0;
1347
982
}
1348
1349
/* Entered with file locked, remains locked throughout. */
1350
static pdf_obj *
1351
pdf_read_new_xref(fz_context *ctx, pdf_document *doc)
1352
900
{
1353
900
  fz_stream *stm = NULL;
1354
900
  pdf_obj *trailer = NULL;
1355
900
  pdf_obj *index = NULL;
1356
900
  pdf_obj *obj = NULL;
1357
900
  int gen, num = 0;
1358
900
  int64_t ofs, stm_ofs;
1359
900
  int size, w0, w1, w2;
1360
900
  int t;
1361
1362
900
  fz_var(trailer);
1363
900
  fz_var(stm);
1364
1365
1.80k
  fz_try(ctx)
1366
1.80k
  {
1367
900
    ofs = fz_tell(ctx, doc->file);
1368
900
    trailer = pdf_parse_ind_obj(ctx, doc, doc->file, &num, &gen, &stm_ofs, NULL);
1369
900
    if (num == 0)
1370
1
      fz_throw(ctx, FZ_ERROR_GENERIC, "Trailer object number cannot be 0\n");
1371
900
  }
1372
1.80k
  fz_catch(ctx)
1373
186
  {
1374
186
    pdf_drop_obj(ctx, trailer);
1375
186
    fz_rethrow(ctx);
1376
186
  }
1377
1378
1.42k
  fz_try(ctx)
1379
1.42k
  {
1380
714
    pdf_xref_entry *entry;
1381
1382
714
    obj = pdf_dict_get(ctx, trailer, PDF_NAME(Size));
1383
714
    if (!obj)
1384
6
      fz_throw(ctx, FZ_ERROR_GENERIC, "xref stream missing Size entry (%d 0 R)", num);
1385
1386
708
    size = pdf_to_int(ctx, obj);
1387
1388
708
    obj = pdf_dict_get(ctx, trailer, PDF_NAME(W));
1389
708
    if (!obj)
1390
1
      fz_throw(ctx, FZ_ERROR_GENERIC, "xref stream missing W entry (%d  R)", num);
1391
1392
707
    if (pdf_is_indirect(ctx, pdf_array_get(ctx, obj, 0)))
1393
0
      fz_throw(ctx, FZ_ERROR_GENERIC, "xref stream object type field width an indirect object");
1394
707
    if (pdf_is_indirect(ctx, pdf_array_get(ctx, obj, 1)))
1395
0
      fz_throw(ctx, FZ_ERROR_GENERIC, "xref stream object field 2 width an indirect object");
1396
707
    if (pdf_is_indirect(ctx, pdf_array_get(ctx, obj, 2)))
1397
0
      fz_throw(ctx, FZ_ERROR_GENERIC, "xref stream object field 3 width an indirect object");
1398
1399
707
    if (doc->file_reading_linearly && pdf_dict_get(ctx, trailer, PDF_NAME(Encrypt)))
1400
0
      fz_throw(ctx, FZ_ERROR_GENERIC, "Cannot read linearly with encryption");
1401
1402
707
    w0 = pdf_array_get_int(ctx, obj, 0);
1403
707
    w1 = pdf_array_get_int(ctx, obj, 1);
1404
707
    w2 = pdf_array_get_int(ctx, obj, 2);
1405
1406
707
    if (w0 < 0)
1407
3
      fz_warn(ctx, "xref stream objects have corrupt type");
1408
707
    if (w1 < 0)
1409
0
      fz_warn(ctx, "xref stream objects have corrupt offset");
1410
707
    if (w2 < 0)
1411
0
      fz_warn(ctx, "xref stream objects have corrupt generation");
1412
1413
707
    w0 = w0 < 0 ? 0 : w0;
1414
707
    w1 = w1 < 0 ? 0 : w1;
1415
707
    w2 = w2 < 0 ? 0 : w2;
1416
1417
707
    index = pdf_dict_get(ctx, trailer, PDF_NAME(Index));
1418
1419
707
    stm = pdf_open_stream_with_offset(ctx, doc, num, trailer, stm_ofs);
1420
1421
707
    if (!index)
1422
240
    {
1423
240
      pdf_read_new_xref_section(ctx, doc, stm, 0, size, w0, w1, w2);
1424
240
    }
1425
467
    else
1426
467
    {
1427
467
      int n = pdf_array_len(ctx, index);
1428
1.24k
      for (t = 0; t < n; t += 2)
1429
780
      {
1430
780
        int i0 = pdf_array_get_int(ctx, index, t + 0);
1431
780
        int i1 = pdf_array_get_int(ctx, index, t + 1);
1432
780
        pdf_read_new_xref_section(ctx, doc, stm, i0, i1, w0, w1, w2);
1433
780
      }
1434
467
    }
1435
707
    entry = pdf_get_populating_xref_entry(ctx, doc, num);
1436
707
    entry->ofs = ofs;
1437
707
    entry->gen = gen;
1438
707
    entry->num = num;
1439
707
    entry->stm_ofs = stm_ofs;
1440
707
    pdf_drop_obj(ctx, entry->obj);
1441
707
    entry->obj = pdf_keep_obj(ctx, trailer);
1442
707
    entry->type = 'n';
1443
707
    pdf_set_obj_parent(ctx, trailer, num);
1444
707
  }
1445
1.42k
  fz_always(ctx)
1446
714
  {
1447
714
    fz_drop_stream(ctx, stm);
1448
714
  }
1449
714
  fz_catch(ctx)
1450
49
  {
1451
49
    pdf_drop_obj(ctx, trailer);
1452
49
    fz_rethrow(ctx);
1453
49
  }
1454
1455
657
  return trailer;
1456
706
}
1457
1458
static pdf_obj *
1459
pdf_read_xref(fz_context *ctx, pdf_document *doc, int64_t ofs)
1460
3.21k
{
1461
3.21k
  pdf_obj *trailer;
1462
3.21k
  int c;
1463
1464
3.21k
  fz_seek(ctx, doc->file, ofs, SEEK_SET);
1465
1466
10.7k
  while (iswhite(fz_peek_byte(ctx, doc->file)))
1467
7.53k
    fz_read_byte(ctx, doc->file);
1468
1469
3.21k
  c = fz_peek_byte(ctx, doc->file);
1470
3.21k
  if (c == 'x')
1471
724
    trailer = pdf_read_old_xref(ctx, doc);
1472
2.48k
  else if (isdigit(c))
1473
900
    trailer = pdf_read_new_xref(ctx, doc);
1474
1.58k
  else
1475
1.58k
    fz_throw(ctx, FZ_ERROR_GENERIC, "cannot recognize xref format");
1476
1477
1.62k
  return trailer;
1478
3.21k
}
1479
1480
static int64_t
1481
read_xref_section(fz_context *ctx, pdf_document *doc, int64_t ofs)
1482
3.18k
{
1483
3.18k
  pdf_obj *trailer = NULL;
1484
3.18k
  pdf_obj *prevobj;
1485
3.18k
  int64_t xrefstmofs = 0;
1486
3.18k
  int64_t prevofs = 0;
1487
1488
3.18k
  trailer = pdf_read_xref(ctx, doc, ofs);
1489
3.18k
  fz_try(ctx)
1490
2.54k
  {
1491
1.27k
    pdf_set_populating_xref_trailer(ctx, doc, trailer);
1492
1493
    /* FIXME: do we overwrite free entries properly? */
1494
    /* FIXME: Does this work properly with progression? */
1495
1.27k
    xrefstmofs = pdf_to_int64(ctx, pdf_dict_get(ctx, trailer, PDF_NAME(XRefStm)));
1496
1.27k
    if (xrefstmofs)
1497
26
    {
1498
26
      if (xrefstmofs < 0)
1499
0
        fz_throw(ctx, FZ_ERROR_GENERIC, "negative xref stream offset");
1500
1501
      /*
1502
        Read the XRefStm stream, but throw away the resulting trailer. We do not
1503
        follow any Prev tag therein, as specified on Page 108 of the PDF reference
1504
        1.7
1505
      */
1506
26
      pdf_drop_obj(ctx, pdf_read_xref(ctx, doc, xrefstmofs));
1507
26
    }
1508
1509
1.27k
    prevobj = pdf_dict_get(ctx, trailer, PDF_NAME(Prev));
1510
1.27k
    if (pdf_is_int(ctx, prevobj))
1511
509
    {
1512
509
      prevofs = pdf_to_int64(ctx, prevobj);
1513
509
      if (prevofs <= 0)
1514
1
        fz_throw(ctx, FZ_ERROR_GENERIC, "invalid offset for previous xref section");
1515
509
    }
1516
1.27k
  }
1517
2.54k
  fz_always(ctx)
1518
1.27k
    pdf_drop_obj(ctx, trailer);
1519
1.27k
  fz_catch(ctx)
1520
4
    fz_rethrow(ctx);
1521
1522
3.18k
  return prevofs;
1523
3.18k
}
1524
1525
static void
1526
pdf_read_xref_sections(fz_context *ctx, pdf_document *doc, int64_t ofs, int read_previous)
1527
2.67k
{
1528
2.67k
  int i, len, cap;
1529
2.67k
  int64_t *offsets;
1530
2.67k
  int populated = 0;
1531
1532
2.67k
  len = 0;
1533
2.67k
  cap = 10;
1534
2.67k
  offsets = fz_malloc_array(ctx, cap, int64_t);
1535
1536
2.67k
  fz_var(populated);
1537
1538
5.35k
  fz_try(ctx)
1539
5.35k
  {
1540
5.86k
    while(ofs)
1541
3.18k
    {
1542
3.91k
      for (i = 0; i < len; i ++)
1543
732
      {
1544
732
        if (offsets[i] == ofs)
1545
0
          break;
1546
732
      }
1547
3.18k
      if (i < len)
1548
0
      {
1549
0
        fz_warn(ctx, "ignoring xref section recursion at offset %d", (int)ofs);
1550
0
        break;
1551
0
      }
1552
3.18k
      if (len == cap)
1553
0
      {
1554
0
        cap *= 2;
1555
0
        offsets = fz_realloc_array(ctx, offsets, cap, int64_t);
1556
0
      }
1557
3.18k
      offsets[len++] = ofs;
1558
1559
3.18k
      pdf_populate_next_xref_level(ctx, doc);
1560
3.18k
      populated = 1;
1561
3.18k
      ofs = read_xref_section(ctx, doc, ofs);
1562
3.18k
      if (!read_previous)
1563
0
        break;
1564
3.18k
    }
1565
1566
    /* For pathological files, such as chinese-example.pdf, where the original
1567
     * xref in the file is highly fragmented, we can safely solidify it here
1568
     * with no ill effects. */
1569
2.67k
    ensure_solid_xref(ctx, doc, 0, doc->num_xref_sections-1);
1570
2.67k
  }
1571
5.35k
  fz_always(ctx)
1572
2.67k
  {
1573
2.67k
    fz_free(ctx, offsets);
1574
2.67k
  }
1575
2.67k
  fz_catch(ctx)
1576
1.91k
  {
1577
    /* Undo pdf_populate_next_xref_level if we've done that already. */
1578
1.91k
    if (populated)
1579
1.91k
    {
1580
1.91k
      pdf_drop_xref_subsec(ctx, &doc->xref_sections[doc->num_xref_sections - 1]);
1581
1.91k
      doc->num_xref_sections--;
1582
1.91k
    }
1583
1.91k
    fz_rethrow(ctx);
1584
1.91k
  }
1585
2.67k
}
1586
1587
static void
1588
pdf_prime_xref_index(fz_context *ctx, pdf_document *doc)
1589
14.4k
{
1590
14.4k
  int i, j;
1591
14.4k
  int *idx = doc->xref_index;
1592
1593
29.2k
  for (i = doc->num_xref_sections-1; i >= 0; i--)
1594
14.7k
  {
1595
14.7k
    pdf_xref *xref = &doc->xref_sections[i];
1596
14.7k
    pdf_xref_subsec *subsec = xref->subsec;
1597
29.9k
    while (subsec != NULL)
1598
15.1k
    {
1599
15.1k
      int start = subsec->start;
1600
15.1k
      int end = subsec->start + subsec->len;
1601
32.8M
      for (j = start; j < end; j++)
1602
32.8M
      {
1603
32.8M
        char t = subsec->table[j-start].type;
1604
32.8M
        if (t != 0 && t != 'f')
1605
319k
          idx[j] = i;
1606
32.8M
      }
1607
1608
15.1k
      subsec = subsec->next;
1609
15.1k
    }
1610
14.7k
  }
1611
14.4k
}
1612
1613
static void
1614
check_xref_entry_offsets(fz_context *ctx, pdf_xref_entry *entry, int i, pdf_document *doc, void *arg)
1615
113k
{
1616
113k
  int xref_len = (int)(intptr_t)arg;
1617
1618
113k
  if (entry->type == 'n')
1619
89.4k
  {
1620
    /* Special case code: "0000000000 * n" means free,
1621
     * according to some producers (inc Quartz) */
1622
89.4k
    if (entry->ofs == 0)
1623
65.5k
      entry->type = 'f';
1624
23.8k
    else if (entry->ofs <= 0 || entry->ofs >= doc->file_size)
1625
7
      fz_throw(ctx, FZ_ERROR_GENERIC, "object offset out of range: %d (%d 0 R)", (int)entry->ofs, i);
1626
89.4k
  }
1627
23.9k
  else if (entry->type == 'o')
1628
16.8k
  {
1629
    /* Read this into a local variable here, because pdf_get_xref_entry
1630
     * may solidify the xref, hence invalidating "entry", meaning we
1631
     * need a stashed value for the throw. */
1632
16.8k
    int64_t ofs = entry->ofs;
1633
16.8k
    if (ofs <= 0 || ofs >= xref_len || pdf_get_xref_entry_no_null(ctx, doc, ofs)->type != 'n')
1634
12
      fz_throw(ctx, FZ_ERROR_GENERIC, "invalid reference to an objstm that does not exist: %d (%d 0 R)", (int)ofs, i);
1635
16.8k
  }
1636
113k
}
1637
1638
/*
1639
 * load xref tables from pdf
1640
 *
1641
 * File locked on entry, throughout and on exit.
1642
 */
1643
1644
static void
1645
pdf_load_xref(fz_context *ctx, pdf_document *doc)
1646
5.86k
{
1647
5.86k
  int xref_len;
1648
5.86k
  pdf_xref_entry *entry;
1649
1650
5.86k
  pdf_read_start_xref(ctx, doc);
1651
1652
5.86k
  pdf_read_xref_sections(ctx, doc, doc->startxref, 1);
1653
1654
5.86k
  if (pdf_xref_len(ctx, doc) == 0)
1655
0
    fz_throw(ctx, FZ_ERROR_GENERIC, "found xref was empty");
1656
1657
5.86k
  pdf_prime_xref_index(ctx, doc);
1658
1659
5.86k
  entry = pdf_get_xref_entry_no_null(ctx, doc, 0);
1660
  /* broken pdfs where first object is missing */
1661
5.86k
  if (!entry->type)
1662
17
  {
1663
17
    entry->type = 'f';
1664
17
    entry->gen = 65535;
1665
17
    entry->num = 0;
1666
17
  }
1667
  /* broken pdfs where first object is not free */
1668
5.84k
  else if (entry->type != 'f')
1669
8
    fz_warn(ctx, "first object in xref is not free");
1670
1671
  /* broken pdfs where object offsets are out of range */
1672
5.86k
  xref_len = pdf_xref_len(ctx, doc);
1673
5.86k
  pdf_xref_entry_map(ctx, doc, check_xref_entry_offsets, (void *)(intptr_t)xref_len);
1674
5.86k
}
1675
1676
static void
1677
pdf_check_linear(fz_context *ctx, pdf_document *doc)
1678
5.86k
{
1679
5.86k
  pdf_obj *dict = NULL;
1680
5.86k
  pdf_obj *o;
1681
5.86k
  int num, gen;
1682
5.86k
  int64_t stmofs;
1683
1684
5.86k
  fz_var(dict);
1685
1686
11.7k
  fz_try(ctx)
1687
11.7k
  {
1688
5.86k
    dict = pdf_parse_ind_obj(ctx, doc, doc->file, &num, &gen, &stmofs, NULL);
1689
5.86k
    if (!pdf_is_dict(ctx, dict))
1690
71
      break;
1691
5.79k
    o = pdf_dict_get(ctx, dict, PDF_NAME(Linearized));
1692
5.79k
    if (o == NULL)
1693
4.30k
      break;
1694
1.49k
    if (pdf_to_int(ctx, o) != 1)
1695
1
      break;
1696
1.49k
    doc->has_linearization_object = 1;
1697
1.49k
  }
1698
11.7k
  fz_always(ctx)
1699
5.86k
    pdf_drop_obj(ctx, dict);
1700
5.86k
  fz_catch(ctx)
1701
910
  {
1702
    /* Silently swallow this error. */
1703
910
  }
1704
5.86k
}
1705
1706
static void
1707
pdf_load_linear(fz_context *ctx, pdf_document *doc)
1708
0
{
1709
0
  pdf_obj *dict = NULL;
1710
0
  pdf_obj *hint = NULL;
1711
0
  pdf_obj *o;
1712
0
  int num, gen, lin, len;
1713
0
  int64_t stmofs;
1714
1715
0
  fz_var(dict);
1716
0
  fz_var(hint);
1717
1718
0
  fz_try(ctx)
1719
0
  {
1720
0
    pdf_xref_entry *entry;
1721
1722
0
    dict = pdf_parse_ind_obj(ctx, doc, doc->file, &num, &gen, &stmofs, NULL);
1723
0
    if (!pdf_is_dict(ctx, dict))
1724
0
      fz_throw(ctx, FZ_ERROR_GENERIC, "Failed to read linearized dictionary");
1725
0
    o = pdf_dict_get(ctx, dict, PDF_NAME(Linearized));
1726
0
    if (o == NULL)
1727
0
      fz_throw(ctx, FZ_ERROR_GENERIC, "Failed to read linearized dictionary");
1728
0
    lin = pdf_to_int(ctx, o);
1729
0
    if (lin != 1)
1730
0
      fz_throw(ctx, FZ_ERROR_GENERIC, "Unexpected version of Linearized tag (%d)", lin);
1731
0
    doc->has_linearization_object = 1;
1732
0
    len = pdf_dict_get_int(ctx, dict, PDF_NAME(L));
1733
0
    if (len != doc->file_length)
1734
0
      fz_throw(ctx, FZ_ERROR_GENERIC, "File has been updated since linearization");
1735
1736
0
    pdf_read_xref_sections(ctx, doc, fz_tell(ctx, doc->file), 0);
1737
1738
0
    doc->linear_page_count = pdf_dict_get_int(ctx, dict, PDF_NAME(N));
1739
0
    doc->linear_page_refs = fz_realloc_array(ctx, doc->linear_page_refs, doc->linear_page_count, pdf_obj *);
1740
0
    memset(doc->linear_page_refs, 0, doc->linear_page_count * sizeof(pdf_obj*));
1741
0
    doc->linear_obj = dict;
1742
0
    doc->linear_pos = fz_tell(ctx, doc->file);
1743
0
    doc->linear_page1_obj_num = pdf_dict_get_int(ctx, dict, PDF_NAME(O));
1744
0
    doc->linear_page_refs[0] = pdf_new_indirect(ctx, doc, doc->linear_page1_obj_num, 0);
1745
0
    doc->linear_page_num = 0;
1746
0
    hint = pdf_dict_get(ctx, dict, PDF_NAME(H));
1747
0
    doc->hint_object_offset = pdf_array_get_int(ctx, hint, 0);
1748
0
    doc->hint_object_length = pdf_array_get_int(ctx, hint, 1);
1749
1750
0
    entry = pdf_get_populating_xref_entry(ctx, doc, 0);
1751
0
    entry->type = 'f';
1752
0
  }
1753
0
  fz_catch(ctx)
1754
0
  {
1755
0
    pdf_drop_obj(ctx, dict);
1756
0
    fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
1757
    /* Drop back to non linearized reading mode */
1758
0
    doc->file_reading_linearly = 0;
1759
0
  }
1760
0
}
1761
1762
/*
1763
 * Initialize and load xref tables.
1764
 * If password is not null, try to decrypt.
1765
 */
1766
1767
static void
1768
pdf_init_document(fz_context *ctx, pdf_document *doc)
1769
14.7k
{
1770
14.7k
  pdf_obj *encrypt, *id;
1771
14.7k
  int repaired = 0;
1772
1773
29.4k
  fz_try(ctx)
1774
29.4k
  {
1775
    /* Check to see if we should work in progressive mode */
1776
14.7k
    if (doc->file->progressive)
1777
0
    {
1778
0
      doc->file_reading_linearly = 1;
1779
0
      fz_seek(ctx, doc->file, 0, SEEK_END);
1780
0
      doc->file_length = fz_tell(ctx, doc->file);
1781
0
      if (doc->file_length < 0)
1782
0
        doc->file_length = 0;
1783
0
      fz_seek(ctx, doc->file, 0, SEEK_SET);
1784
0
    }
1785
1786
14.7k
    pdf_load_version(ctx, doc);
1787
1788
    /* Try to load the linearized file if we are in progressive
1789
     * mode. */
1790
14.7k
    if (doc->file_reading_linearly)
1791
0
      pdf_load_linear(ctx, doc);
1792
14.7k
    else
1793
      /* Even if we're not in progressive mode, check to see
1794
       * if the file claims to be linearized. This is important
1795
       * for checking signatures later on. */
1796
14.7k
      pdf_check_linear(ctx, doc);
1797
1798
    /* If we aren't in progressive mode (or the linear load failed
1799
     * and has set us back to non-progressive mode), load normally.
1800
     */
1801
14.7k
    if (!doc->file_reading_linearly)
1802
5.86k
      pdf_load_xref(ctx, doc);
1803
14.7k
  }
1804
29.4k
  fz_catch(ctx)
1805
13.9k
  {
1806
13.9k
    pdf_drop_xref_sections(ctx, doc);
1807
13.9k
    fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
1808
13.9k
    doc->file_reading_linearly = 0;
1809
13.9k
    fz_warn(ctx, "trying to repair broken xref");
1810
13.9k
    repaired = 1;
1811
13.9k
  }
1812
1813
29.4k
  fz_try(ctx)
1814
29.4k
  {
1815
14.7k
    if (repaired)
1816
13.9k
    {
1817
      /* pdf_repair_xref may access xref_index, so reset it properly */
1818
13.9k
      if (doc->xref_index)
1819
283
        memset(doc->xref_index, 0, sizeof(int) * doc->max_xref_len);
1820
13.9k
      pdf_repair_xref(ctx, doc);
1821
13.9k
      pdf_prime_xref_index(ctx, doc);
1822
13.9k
    }
1823
1824
14.7k
    encrypt = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt));
1825
14.7k
    id = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(ID));
1826
14.7k
    if (pdf_is_dict(ctx, encrypt))
1827
236
      doc->crypt = pdf_new_crypt(ctx, encrypt, id);
1828
1829
    /* Allow lazy clients to read encrypted files with a blank password */
1830
14.7k
    (void)pdf_authenticate_password(ctx, doc, "");
1831
1832
14.7k
    if (repaired)
1833
13.6k
    {
1834
13.6k
      pdf_repair_trailer(ctx, doc);
1835
13.6k
    }
1836
14.7k
  }
1837
29.4k
  fz_catch(ctx)
1838
373
  {
1839
373
    fz_rethrow(ctx);
1840
373
  }
1841
14.7k
}
1842
1843
void pdf_repair_trailer(fz_context *ctx, pdf_document *doc)
1844
13.6k
{
1845
13.6k
  int hasroot, hasinfo;
1846
13.6k
  pdf_obj *obj, *nobj;
1847
13.6k
  pdf_obj *dict = NULL;
1848
13.6k
  int i;
1849
1850
13.6k
  int xref_len = pdf_xref_len(ctx, doc);
1851
13.6k
  pdf_repair_obj_stms(ctx, doc);
1852
1853
13.6k
  hasroot = (pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root)) != NULL);
1854
13.6k
  hasinfo = (pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info)) != NULL);
1855
1856
13.6k
  fz_var(dict);
1857
1858
27.3k
  fz_try(ctx)
1859
27.3k
  {
1860
    /* Scan from the end so we have a better chance of finding
1861
     * newer objects if there are multiple instances of Info and
1862
     * Root objects.
1863
     */
1864
32.4M
    for (i = xref_len - 1; i > 0 && (!hasinfo || !hasroot); --i)
1865
32.4M
    {
1866
32.4M
      pdf_xref_entry *entry = pdf_get_xref_entry_no_null(ctx, doc, i);
1867
32.4M
      if (entry->type == 0 || entry->type == 'f')
1868
32.2M
        continue;
1869
1870
324k
      fz_try(ctx)
1871
324k
      {
1872
162k
        dict = pdf_load_object(ctx, doc, i);
1873
162k
      }
1874
324k
      fz_catch(ctx)
1875
13.3k
      {
1876
13.3k
        fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
1877
13.3k
        fz_warn(ctx, "ignoring broken object (%d 0 R)", i);
1878
13.3k
        continue;
1879
13.3k
      }
1880
1881
148k
      if (!hasroot)
1882
89.9k
      {
1883
89.9k
        obj = pdf_dict_get(ctx, dict, PDF_NAME(Type));
1884
89.9k
        if (obj == PDF_NAME(Catalog))
1885
9.80k
        {
1886
9.80k
          nobj = pdf_new_indirect(ctx, doc, i, 0);
1887
9.80k
          pdf_dict_put_drop(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root), nobj);
1888
9.80k
          hasroot = 1;
1889
9.80k
        }
1890
89.9k
      }
1891
1892
148k
      if (!hasinfo)
1893
144k
      {
1894
144k
        if (pdf_dict_get(ctx, dict, PDF_NAME(Creator)) || pdf_dict_get(ctx, dict, PDF_NAME(Producer)))
1895
1.10k
        {
1896
1.10k
          nobj = pdf_new_indirect(ctx, doc, i, 0);
1897
1.10k
          pdf_dict_put_drop(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info), nobj);
1898
1.10k
          hasinfo = 1;
1899
1.10k
        }
1900
144k
      }
1901
1902
148k
      pdf_drop_obj(ctx, dict);
1903
148k
      dict = NULL;
1904
148k
    }
1905
13.6k
  }
1906
27.3k
  fz_always(ctx)
1907
13.6k
  {
1908
    /* ensure that strings are not used in their repaired, non-decrypted form */
1909
13.6k
    if (doc->crypt)
1910
197
      pdf_clear_xref(ctx, doc);
1911
13.6k
  }
1912
13.6k
  fz_catch(ctx)
1913
0
  {
1914
0
    pdf_drop_obj(ctx, dict);
1915
0
    fz_rethrow(ctx);
1916
0
  }
1917
13.6k
}
1918
1919
void
1920
pdf_invalidate_xfa(fz_context *ctx, pdf_document *doc)
1921
14.7k
{
1922
14.7k
  if (doc == NULL)
1923
0
    return;
1924
14.7k
  fz_drop_xml(ctx, doc->xfa);
1925
14.7k
  doc->xfa = NULL;
1926
14.7k
}
1927
1928
static void
1929
pdf_drop_document_imp(fz_context *ctx, pdf_document *doc)
1930
14.7k
{
1931
14.7k
  int i;
1932
1933
14.7k
  fz_defer_reap_start(ctx);
1934
1935
  /* Type3 glyphs in the glyph cache can contain pdf_obj pointers
1936
   * that we are about to destroy. Simplest solution is to bin the
1937
   * glyph cache at this point. */
1938
29.4k
  fz_try(ctx)
1939
29.4k
    fz_purge_glyph_cache(ctx);
1940
29.4k
  fz_catch(ctx)
1941
0
  {
1942
    /* Swallow error, but continue dropping */
1943
0
  }
1944
1945
14.7k
  pdf_set_doc_event_callback(ctx, doc, NULL, NULL, NULL);
1946
14.7k
  pdf_drop_js(ctx, doc->js);
1947
1948
14.7k
  pdf_drop_journal(ctx, doc->journal);
1949
1950
14.7k
  pdf_drop_resource_tables(ctx, doc);
1951
1952
14.7k
  pdf_drop_local_xref(ctx, doc->local_xref);
1953
1954
14.7k
  pdf_drop_xref_sections(ctx, doc);
1955
14.7k
  fz_free(ctx, doc->xref_index);
1956
1957
14.7k
  fz_drop_stream(ctx, doc->file);
1958
14.7k
  pdf_drop_crypt(ctx, doc->crypt);
1959
1960
14.7k
  pdf_drop_obj(ctx, doc->linear_obj);
1961
14.7k
  if (doc->linear_page_refs)
1962
0
  {
1963
0
    for (i=0; i < doc->linear_page_count; i++)
1964
0
      pdf_drop_obj(ctx, doc->linear_page_refs[i]);
1965
1966
0
    fz_free(ctx, doc->linear_page_refs);
1967
0
  }
1968
1969
14.7k
  fz_free(ctx, doc->hint_page);
1970
14.7k
  fz_free(ctx, doc->hint_shared_ref);
1971
14.7k
  fz_free(ctx, doc->hint_shared);
1972
14.7k
  fz_free(ctx, doc->hint_obj_offsets);
1973
1974
15.2k
  for (i=0; i < doc->num_type3_fonts; i++)
1975
549
  {
1976
1.09k
    fz_try(ctx)
1977
1.09k
      fz_decouple_type3_font(ctx, doc->type3_fonts[i], (void *)doc);
1978
1.09k
    fz_always(ctx)
1979
549
      fz_drop_font(ctx, doc->type3_fonts[i]);
1980
549
    fz_catch(ctx)
1981
0
    {
1982
      /* Swallow error, but continue dropping */
1983
0
    }
1984
549
  }
1985
1986
14.7k
  fz_free(ctx, doc->type3_fonts);
1987
1988
14.7k
  pdf_drop_ocg(ctx, doc);
1989
1990
14.7k
  pdf_empty_store(ctx, doc);
1991
1992
14.7k
  pdf_lexbuf_fin(ctx, &doc->lexbuf.base);
1993
1994
14.7k
  fz_drop_colorspace(ctx, doc->oi);
1995
1996
49.0k
  for (i = 0; i < doc->orphans_count; i++)
1997
34.2k
    pdf_drop_obj(ctx, doc->orphans[i]);
1998
1999
14.7k
  fz_free(ctx, doc->orphans);
2000
2001
14.7k
  pdf_drop_page_tree_internal(ctx, doc);
2002
2003
14.7k
  fz_defer_reap_end(ctx);
2004
2005
14.7k
  pdf_invalidate_xfa(ctx, doc);
2006
14.7k
}
2007
2008
void
2009
pdf_drop_document(fz_context *ctx, pdf_document *doc)
2010
36.5k
{
2011
36.5k
  fz_drop_document(ctx, &doc->super);
2012
36.5k
}
2013
2014
pdf_document *
2015
pdf_keep_document(fz_context *ctx, pdf_document *doc)
2016
36.5k
{
2017
36.5k
  return (pdf_document *)fz_keep_document(ctx, &doc->super);
2018
36.5k
}
2019
2020
/*
2021
 * compressed object streams
2022
 */
2023
2024
/*
2025
  Do not hold pdf_xref_entry's over call to this function as they
2026
  may be invalidated!
2027
*/
2028
static pdf_xref_entry *
2029
pdf_load_obj_stm(fz_context *ctx, pdf_document *doc, int num, pdf_lexbuf *buf, int target)
2030
32.5k
{
2031
32.5k
  fz_stream *stm = NULL;
2032
32.5k
  pdf_obj *objstm = NULL;
2033
32.5k
  int *numbuf = NULL;
2034
32.5k
  int64_t *ofsbuf = NULL;
2035
2036
32.5k
  pdf_obj *obj;
2037
32.5k
  int64_t first;
2038
32.5k
  int count;
2039
32.5k
  int i;
2040
32.5k
  pdf_token tok;
2041
32.5k
  pdf_xref_entry *ret_entry = NULL;
2042
32.5k
  int ret_idx;
2043
32.5k
  int xref_len;
2044
32.5k
  int found;
2045
32.5k
  fz_stream *sub = NULL;
2046
2047
32.5k
  fz_var(numbuf);
2048
32.5k
  fz_var(ofsbuf);
2049
32.5k
  fz_var(objstm);
2050
32.5k
  fz_var(stm);
2051
32.5k
  fz_var(sub);
2052
2053
65.1k
  fz_try(ctx)
2054
65.1k
  {
2055
32.5k
    objstm = pdf_load_object(ctx, doc, num);
2056
2057
32.5k
    if (pdf_obj_marked(ctx, objstm))
2058
0
      fz_throw(ctx, FZ_ERROR_GENERIC, "recursive object stream lookup");
2059
32.5k
  }
2060
65.1k
  fz_catch(ctx)
2061
355
  {
2062
355
    pdf_drop_obj(ctx, objstm);
2063
355
    fz_rethrow(ctx);
2064
355
  }
2065
2066
64.4k
  fz_try(ctx)
2067
64.4k
  {
2068
32.2k
    (void)pdf_mark_obj(ctx, objstm);
2069
2070
32.2k
    count = pdf_dict_get_int(ctx, objstm, PDF_NAME(N));
2071
32.2k
    first = pdf_dict_get_int(ctx, objstm, PDF_NAME(First));
2072
2073
32.2k
    validate_object_number_range(ctx, first, count, "object stream");
2074
2075
32.2k
    numbuf = fz_calloc(ctx, count, sizeof(*numbuf));
2076
32.2k
    ofsbuf = fz_calloc(ctx, count, sizeof(*ofsbuf));
2077
2078
32.2k
    xref_len = pdf_xref_len(ctx, doc);
2079
2080
32.2k
    found = 0;
2081
2082
32.2k
    stm = pdf_open_stream_number(ctx, doc, num);
2083
2.08M
    for (i = 0; i < count; i++)
2084
2.05M
    {
2085
2.05M
      tok = pdf_lex(ctx, stm, buf);
2086
2.05M
      if (tok != PDF_TOK_INT)
2087
2.85k
        fz_throw(ctx, FZ_ERROR_GENERIC, "corrupt object stream (%d 0 R)", num);
2088
2.05M
      numbuf[found] = buf->i;
2089
2090
2.05M
      tok = pdf_lex(ctx, stm, buf);
2091
2.05M
      if (tok != PDF_TOK_INT)
2092
3.22k
        fz_throw(ctx, FZ_ERROR_GENERIC, "corrupt object stream (%d 0 R)", num);
2093
2.05M
      ofsbuf[found] = buf->i;
2094
2095
2.05M
      if (numbuf[found] <= 0 || numbuf[found] >= xref_len)
2096
145k
        fz_warn(ctx, "object stream object out of range, skipping");
2097
1.90M
      else
2098
1.90M
        found++;
2099
2.05M
    }
2100
2101
26.1k
    ret_idx = -1;
2102
552k
    for (i = 0; i < found; i++)
2103
526k
    {
2104
526k
      pdf_xref_entry *entry;
2105
526k
      uint64_t length;
2106
526k
      int64_t offset;
2107
2108
526k
      offset = first + ofsbuf[i];
2109
526k
      if (i+1 < found)
2110
521k
        length = ofsbuf[i+1] - ofsbuf[i];
2111
5.25k
      else
2112
5.25k
        length = UINT64_MAX;
2113
2114
526k
      sub = fz_open_null_filter(ctx, stm, length, offset);
2115
2116
526k
      obj = pdf_parse_stm_obj(ctx, doc, sub, buf);
2117
526k
      fz_drop_stream(ctx, sub);
2118
526k
      sub = NULL;
2119
2120
526k
      entry = pdf_get_xref_entry_no_null(ctx, doc, numbuf[i]);
2121
2122
526k
      pdf_set_obj_parent(ctx, obj, numbuf[i]);
2123
2124
      /* We may have set entry->type to be 'O' from being 'o' to avoid nasty
2125
       * recursions in pdf_cache_object. Accept the type being 'O' here. */
2126
526k
      if ((entry->type == 'o' || entry->type == 'O') && entry->ofs == num)
2127
503k
      {
2128
        /* If we already have an entry for this object,
2129
         * we'd like to drop it and use the new one -
2130
         * but this means that anyone currently holding
2131
         * a pointer to the old one will be left with a
2132
         * stale pointer. Instead, we drop the new one
2133
         * and trust that the old one is correct. */
2134
503k
        if (entry->obj)
2135
427k
        {
2136
427k
          if (pdf_objcmp(ctx, entry->obj, obj))
2137
322
            fz_warn(ctx, "Encountered new definition for object %d - keeping the original one", numbuf[i]);
2138
427k
          pdf_drop_obj(ctx, obj);
2139
427k
        }
2140
75.5k
        else
2141
75.5k
        {
2142
75.5k
          entry->obj = obj;
2143
75.5k
          fz_drop_buffer(ctx, entry->stm_buf);
2144
75.5k
          entry->stm_buf = NULL;
2145
75.5k
        }
2146
503k
        if (numbuf[i] == target)
2147
4.12k
          ret_idx = i;
2148
503k
      }
2149
23.4k
      else
2150
23.4k
      {
2151
23.4k
        pdf_drop_obj(ctx, obj);
2152
23.4k
      }
2153
526k
    }
2154
    /* Parsing our way through the stream can cause the xref to be
2155
     * solidified, which will move an entry. We therefore can't
2156
     * read the entry for returning until no more parsing is to be
2157
     * done. Thus we end up reading this entry twice. */
2158
26.1k
    if (ret_idx >= 0)
2159
4.04k
      ret_entry = pdf_get_xref_entry_no_null(ctx, doc, numbuf[ret_idx]);
2160
26.1k
  }
2161
64.4k
  fz_always(ctx)
2162
32.2k
  {
2163
32.2k
    fz_drop_stream(ctx, stm);
2164
32.2k
    fz_drop_stream(ctx, sub);
2165
32.2k
    fz_free(ctx, ofsbuf);
2166
32.2k
    fz_free(ctx, numbuf);
2167
32.2k
    pdf_unmark_obj(ctx, objstm);
2168
32.2k
    pdf_drop_obj(ctx, objstm);
2169
32.2k
  }
2170
32.2k
  fz_catch(ctx)
2171
28.0k
  {
2172
28.0k
    fz_rethrow(ctx);
2173
28.0k
  }
2174
18.4E
  return ret_entry;
2175
26.1k
}
2176
2177
/*
2178
 * object loading
2179
 */
2180
static int
2181
pdf_obj_read(fz_context *ctx, pdf_document *doc, int64_t *offset, int *nump, pdf_obj **page)
2182
0
{
2183
0
  pdf_lexbuf *buf = &doc->lexbuf.base;
2184
0
  int num, gen, tok;
2185
0
  int64_t numofs, genofs, stmofs, tmpofs, newtmpofs;
2186
0
  int xref_len;
2187
0
  pdf_xref_entry *entry;
2188
2189
0
  numofs = *offset;
2190
0
  fz_seek(ctx, doc->file, numofs, SEEK_SET);
2191
2192
  /* We expect to read 'num' here */
2193
0
  tok = pdf_lex(ctx, doc->file, buf);
2194
0
  genofs = fz_tell(ctx, doc->file);
2195
0
  if (tok != PDF_TOK_INT)
2196
0
  {
2197
    /* Failed! */
2198
0
    DEBUGMESS((ctx, "skipping unexpected data (tok=%d) at %d", tok, *offset));
2199
0
    *offset = genofs;
2200
0
    return tok == PDF_TOK_EOF;
2201
0
  }
2202
0
  *nump = num = buf->i;
2203
2204
  /* We expect to read 'gen' here */
2205
0
  tok = pdf_lex(ctx, doc->file, buf);
2206
0
  tmpofs = fz_tell(ctx, doc->file);
2207
0
  if (tok != PDF_TOK_INT)
2208
0
  {
2209
    /* Failed! */
2210
0
    DEBUGMESS((ctx, "skipping unexpected data after \"%d\" (tok=%d) at %d", num, tok, *offset));
2211
0
    *offset = tmpofs;
2212
0
    return tok == PDF_TOK_EOF;
2213
0
  }
2214
0
  gen = buf->i;
2215
2216
  /* We expect to read 'obj' here */
2217
0
  do
2218
0
  {
2219
0
    tmpofs = fz_tell(ctx, doc->file);
2220
0
    tok = pdf_lex(ctx, doc->file, buf);
2221
0
    if (tok == PDF_TOK_OBJ)
2222
0
      break;
2223
0
    if (tok != PDF_TOK_INT)
2224
0
    {
2225
0
      DEBUGMESS((ctx, "skipping unexpected data (tok=%d) at %d", tok, tmpofs));
2226
0
      *offset = fz_tell(ctx, doc->file);
2227
0
      return tok == PDF_TOK_EOF;
2228
0
    }
2229
0
    DEBUGMESS((ctx, "skipping unexpected int %d at %d", num, numofs));
2230
0
    *nump = num = gen;
2231
0
    numofs = genofs;
2232
0
    gen = buf->i;
2233
0
    genofs = tmpofs;
2234
0
  }
2235
0
  while (1);
2236
2237
  /* Now we read the actual object */
2238
0
  xref_len = pdf_xref_len(ctx, doc);
2239
2240
  /* When we are reading a progressive file, we typically see:
2241
   *    File Header
2242
   *    obj m (Linearization params)
2243
   *    xref #1 (refers to objects m-n)
2244
   *    obj m+1
2245
   *    ...
2246
   *    obj n
2247
   *    obj 1
2248
   *    ...
2249
   *    obj n-1
2250
   *    xref #2
2251
   *
2252
   * The linearisation params are read elsewhere, hence
2253
   * whenever we read an object it should just go into the
2254
   * previous xref.
2255
   */
2256
0
  tok = pdf_repair_obj(ctx, doc, buf, &stmofs, NULL, NULL, NULL, page, &newtmpofs, NULL);
2257
2258
0
  do /* So we can break out of it */
2259
0
  {
2260
0
    if (num <= 0 || num >= xref_len)
2261
0
    {
2262
0
      fz_warn(ctx, "Not a valid object number (%d %d obj)", num, gen);
2263
0
      break;
2264
0
    }
2265
0
    if (gen != 0)
2266
0
    {
2267
0
      fz_warn(ctx, "Unexpected non zero generation number in linearized file");
2268
0
    }
2269
0
    entry = pdf_get_populating_xref_entry(ctx, doc, num);
2270
0
    if (entry->type != 0)
2271
0
    {
2272
0
      DEBUGMESS((ctx, "Duplicate object found (%d %d obj)", num, gen));
2273
0
      break;
2274
0
    }
2275
0
    if (page && *page)
2276
0
    {
2277
0
      DEBUGMESS((ctx, "Successfully read object %d @ %d - and found page %d!", num, numofs, doc->linear_page_num));
2278
0
      if (!entry->obj)
2279
0
        entry->obj = pdf_keep_obj(ctx, *page);
2280
2281
0
      if (doc->linear_page_refs[doc->linear_page_num] == NULL)
2282
0
        doc->linear_page_refs[doc->linear_page_num] = pdf_new_indirect(ctx, doc, num, gen);
2283
0
    }
2284
0
    else
2285
0
    {
2286
0
      DEBUGMESS((ctx, "Successfully read object %d @ %d", num, numofs));
2287
0
    }
2288
0
    entry->type = 'n';
2289
0
    entry->gen = gen; // XXX: was 0
2290
0
    entry->num = num;
2291
0
    entry->ofs = numofs;
2292
0
    entry->stm_ofs = stmofs;
2293
0
  }
2294
0
  while (0);
2295
0
  if (page && *page)
2296
0
    doc->linear_page_num++;
2297
2298
0
  if (tok == PDF_TOK_ENDOBJ)
2299
0
  {
2300
0
    *offset = fz_tell(ctx, doc->file);
2301
0
  }
2302
0
  else
2303
0
  {
2304
0
    *offset = newtmpofs;
2305
0
  }
2306
0
  return 0;
2307
0
}
2308
2309
static void
2310
pdf_load_hinted_page(fz_context *ctx, pdf_document *doc, int pagenum)
2311
0
{
2312
0
  pdf_obj *page = NULL;
2313
2314
0
  if (!doc->hints_loaded || !doc->linear_page_refs)
2315
0
    return;
2316
2317
0
  if (doc->linear_page_refs[pagenum])
2318
0
    return;
2319
2320
0
  fz_var(page);
2321
2322
0
  fz_try(ctx)
2323
0
  {
2324
0
    int num = doc->hint_page[pagenum].number;
2325
0
    page = pdf_load_object(ctx, doc, num);
2326
0
    if (pdf_name_eq(ctx, PDF_NAME(Page), pdf_dict_get(ctx, page, PDF_NAME(Type))))
2327
0
    {
2328
      /* We have found the page object! */
2329
0
      DEBUGMESS((ctx, "LoadHintedPage pagenum=%d num=%d", pagenum, num));
2330
0
      doc->linear_page_refs[pagenum] = pdf_new_indirect(ctx, doc, num, 0);
2331
0
    }
2332
0
  }
2333
0
  fz_always(ctx)
2334
0
    pdf_drop_obj(ctx, page);
2335
0
  fz_catch(ctx)
2336
0
  {
2337
0
    fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
2338
    /* Silently swallow the error and proceed as normal */
2339
0
  }
2340
0
}
2341
2342
static int
2343
read_hinted_object(fz_context *ctx, pdf_document *doc, int num)
2344
0
{
2345
  /* Try to find the object using our hint table. Find the closest
2346
   * object <= the one we want that has a hint and read forward from
2347
   * there. */
2348
0
  int expected = num;
2349
0
  int curr_pos;
2350
0
  int64_t start, offset;
2351
2352
0
  while (doc->hint_obj_offsets[expected] == 0 && expected > 0)
2353
0
    expected--;
2354
0
  if (expected != num)
2355
0
    DEBUGMESS((ctx, "object %d is unhinted, will search forward from %d", expected, num));
2356
0
  if (expected == 0) /* No hints found, just bail */
2357
0
    return 0;
2358
2359
0
  curr_pos = fz_tell(ctx, doc->file);
2360
0
  offset = doc->hint_obj_offsets[expected];
2361
2362
0
  fz_var(expected);
2363
2364
0
  fz_try(ctx)
2365
0
  {
2366
0
    int found;
2367
2368
    /* Try to read forward from there */
2369
0
    do
2370
0
    {
2371
0
      start = offset;
2372
0
      DEBUGMESS((ctx, "Searching for object %d @ %d", expected, offset));
2373
0
      pdf_obj_read(ctx, doc, &offset, &found, 0);
2374
0
      DEBUGMESS((ctx, "Found object %d - next will be @ %d", found, offset));
2375
0
      if (found <= expected)
2376
0
      {
2377
        /* We found the right one (or one earlier than
2378
         * we expected). Update the hints. */
2379
0
        doc->hint_obj_offsets[expected] = offset;
2380
0
        doc->hint_obj_offsets[found] = start;
2381
0
        doc->hint_obj_offsets[found+1] = offset;
2382
        /* Retry with the next one */
2383
0
        expected = found+1;
2384
0
      }
2385
0
      else
2386
0
      {
2387
        /* We found one later than we expected. */
2388
0
        doc->hint_obj_offsets[expected] = 0;
2389
0
        doc->hint_obj_offsets[found] = start;
2390
0
        doc->hint_obj_offsets[found+1] = offset;
2391
0
        while (doc->hint_obj_offsets[expected] == 0 && expected > 0)
2392
0
          expected--;
2393
0
        if (expected == 0) /* No hints found, we give up */
2394
0
          break;
2395
0
      }
2396
0
    }
2397
0
    while (found != num);
2398
0
  }
2399
0
  fz_always(ctx)
2400
0
  {
2401
0
    fz_seek(ctx, doc->file, curr_pos, SEEK_SET);
2402
0
  }
2403
0
  fz_catch(ctx)
2404
0
  {
2405
0
    fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
2406
    /* FIXME: Currently we ignore the hint. Perhaps we should
2407
     * drop back to non-hinted operation here. */
2408
0
    doc->hint_obj_offsets[expected] = 0;
2409
0
    fz_rethrow(ctx);
2410
0
  }
2411
0
  return expected != 0;
2412
0
}
2413
2414
pdf_obj *
2415
pdf_load_unencrypted_object(fz_context *ctx, pdf_document *doc, int num)
2416
0
{
2417
0
  pdf_xref_entry *x;
2418
2419
0
  if (num <= 0 || num >= pdf_xref_len(ctx, doc))
2420
0
    fz_throw(ctx, FZ_ERROR_GENERIC, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(ctx, doc));
2421
2422
0
  x = pdf_get_xref_entry_no_null(ctx, doc, num);
2423
0
  if (x->type == 'n')
2424
0
  {
2425
0
    fz_seek(ctx, doc->file, x->ofs, SEEK_SET);
2426
0
    return pdf_parse_ind_obj(ctx, doc, doc->file, NULL, NULL, NULL, NULL);
2427
0
  }
2428
0
  return NULL;
2429
0
}
2430
2431
pdf_xref_entry *
2432
pdf_cache_object(fz_context *ctx, pdf_document *doc, int num)
2433
9.24M
{
2434
9.24M
  pdf_xref_entry *x;
2435
9.24M
  int rnum, rgen, try_repair;
2436
2437
9.24M
  fz_var(try_repair);
2438
2439
9.24M
  if (num <= 0 || num >= pdf_xref_len(ctx, doc))
2440
115k
    fz_throw(ctx, FZ_ERROR_GENERIC, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(ctx, doc));
2441
2442
9.13M
object_updated:
2443
9.13M
  try_repair = 0;
2444
9.13M
  rnum = num;
2445
2446
9.13M
  x = pdf_get_xref_entry(ctx, doc, num);
2447
9.13M
  if (x == NULL)
2448
0
    fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find object in xref (%d 0 R)", num);
2449
2450
9.13M
  if (x->obj != NULL)
2451
8.49M
    return x;
2452
2453
635k
  if (x->type == 'f')
2454
312k
  {
2455
312k
    x->obj = PDF_NULL;
2456
312k
  }
2457
323k
  else if (x->type == 'n')
2458
290k
  {
2459
290k
    fz_seek(ctx, doc->file, x->ofs, SEEK_SET);
2460
2461
581k
    fz_try(ctx)
2462
581k
    {
2463
290k
      x->obj = pdf_parse_ind_obj(ctx, doc, doc->file,
2464
290k
          &rnum, &rgen, &x->stm_ofs, &try_repair);
2465
290k
    }
2466
581k
    fz_catch(ctx)
2467
96.3k
    {
2468
96.3k
      if (!try_repair || fz_caught(ctx) == FZ_ERROR_TRYLATER)
2469
96.0k
        fz_rethrow(ctx);
2470
96.3k
    }
2471
2472
194k
    if (!try_repair && rnum != num)
2473
13
    {
2474
13
      pdf_drop_obj(ctx, x->obj);
2475
13
      x->type = 'f';
2476
13
      x->ofs = -1;
2477
13
      x->gen = 0;
2478
13
      x->num = 0;
2479
13
      x->stm_ofs = 0;
2480
13
      x->obj = NULL;
2481
13
      try_repair = (doc->repair_attempted == 0);
2482
13
    }
2483
2484
194k
    if (try_repair)
2485
251
    {
2486
251
perform_repair:
2487
502
      fz_try(ctx)
2488
502
      {
2489
251
        pdf_repair_xref(ctx, doc);
2490
251
        pdf_prime_xref_index(ctx, doc);
2491
251
        pdf_repair_obj_stms(ctx, doc);
2492
251
        pdf_repair_trailer(ctx, doc);
2493
251
      }
2494
502
      fz_catch(ctx)
2495
179
      {
2496
179
        fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
2497
179
        fz_rethrow_if(ctx, FZ_ERROR_REPAIRED);
2498
179
        if (rnum == num)
2499
166
          fz_throw(ctx, FZ_ERROR_GENERIC, "cannot parse object (%d 0 R)", num);
2500
13
        else
2501
13
          fz_throw(ctx, FZ_ERROR_GENERIC, "found object (%d 0 R) instead of (%d 0 R)", rnum, num);
2502
179
      }
2503
72
      goto object_updated;
2504
251
    }
2505
2506
194k
    if (doc->crypt)
2507
6.20k
      pdf_crypt_obj(ctx, doc->crypt, x->obj, x->num, x->gen);
2508
194k
  }
2509
32.9k
  else if (x->type == 'o')
2510
32.5k
  {
2511
32.5k
    if (!x->obj)
2512
32.5k
    {
2513
32.5k
      pdf_xref_entry *orig_x = x;
2514
32.5k
      pdf_xref_entry *ox = x; /* This init is unused, but it shuts warnings up. */
2515
32.5k
      orig_x->type = 'O'; /* Mark this node so we know we're recursing. */
2516
65.1k
      fz_try(ctx)
2517
65.1k
        x = pdf_load_obj_stm(ctx, doc, x->ofs, &doc->lexbuf.base, num);
2518
65.1k
      fz_always(ctx)
2519
32.5k
      {
2520
        /* Most of the time ox == orig_x, but if pdf_load_obj_stm performed a
2521
         * repair, it may not be. It is safe to call pdf_get_xref_entry_no_change
2522
         * here, as it does not try/catch. */
2523
32.5k
        ox = pdf_get_xref_entry_no_change(ctx, doc, num);
2524
32.5k
        ox->type = 'o'; /* Not recursing any more. */
2525
32.5k
      }
2526
32.5k
      fz_catch(ctx)
2527
28.3k
        fz_rethrow(ctx);
2528
4.16k
      if (x == NULL)
2529
120
        fz_throw(ctx, FZ_ERROR_GENERIC, "cannot load object stream containing object (%d 0 R)", num);
2530
4.04k
      if (!x->obj)
2531
0
      {
2532
0
        x->type = 'f';
2533
0
        ox->type = 'f';
2534
0
        if (doc->repair_attempted)
2535
0
          fz_throw(ctx, FZ_ERROR_GENERIC, "object (%d 0 R) was not found in its object stream", num);
2536
0
        goto perform_repair;
2537
0
      }
2538
4.04k
    }
2539
32.5k
  }
2540
375
  else if (doc->hint_obj_offsets && read_hinted_object(ctx, doc, num))
2541
0
  {
2542
0
    goto object_updated;
2543
0
  }
2544
375
  else if (doc->file_length && doc->linear_pos < doc->file_length)
2545
0
  {
2546
0
    fz_throw(ctx, FZ_ERROR_TRYLATER, "cannot find object in xref (%d 0 R) - not loaded yet?", num);
2547
0
  }
2548
375
  else
2549
375
  {
2550
375
    fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find object in xref (%d 0 R)", num);
2551
375
  }
2552
2553
510k
  pdf_set_obj_parent(ctx, x->obj, num);
2554
510k
  return x;
2555
635k
}
2556
2557
pdf_obj *
2558
pdf_load_object(fz_context *ctx, pdf_document *doc, int num)
2559
346k
{
2560
346k
  pdf_xref_entry *entry = pdf_cache_object(ctx, doc, num);
2561
346k
  return pdf_keep_obj(ctx, entry->obj);
2562
346k
}
2563
2564
pdf_obj *
2565
pdf_resolve_indirect(fz_context *ctx, pdf_obj *ref)
2566
8.26M
{
2567
8.26M
  if (pdf_is_indirect(ctx, ref))
2568
8.26M
  {
2569
8.26M
    pdf_document *doc = pdf_get_indirect_document(ctx, ref);
2570
8.26M
    int num = pdf_to_num(ctx, ref);
2571
8.26M
    pdf_xref_entry *entry;
2572
2573
8.26M
    if (!doc)
2574
0
      return NULL;
2575
8.26M
    if (num <= 0)
2576
1.71k
    {
2577
1.71k
      fz_warn(ctx, "invalid indirect reference (%d 0 R)", num);
2578
1.71k
      return NULL;
2579
1.71k
    }
2580
2581
16.5M
    fz_try(ctx)
2582
16.5M
      entry = pdf_cache_object(ctx, doc, num);
2583
16.5M
    fz_catch(ctx)
2584
226k
    {
2585
226k
      fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
2586
226k
      fz_rethrow_if(ctx, FZ_ERROR_REPAIRED);
2587
226k
      fz_warn(ctx, "cannot load object (%d 0 R) into cache", num);
2588
226k
      return NULL;
2589
226k
    }
2590
2591
8.03M
    ref = entry->obj;
2592
8.03M
  }
2593
8.03M
  return ref;
2594
8.26M
}
2595
2596
pdf_obj *
2597
pdf_resolve_indirect_chain(fz_context *ctx, pdf_obj *ref)
2598
8.26M
{
2599
8.26M
  int sanity = 10;
2600
2601
16.5M
  while (pdf_is_indirect(ctx, ref))
2602
8.26M
  {
2603
8.26M
    if (--sanity == 0)
2604
49
    {
2605
49
      fz_warn(ctx, "too many indirections (possible indirection cycle involving %d 0 R)", pdf_to_num(ctx, ref));
2606
49
      return NULL;
2607
49
    }
2608
2609
8.26M
    ref = pdf_resolve_indirect(ctx, ref);
2610
8.26M
  }
2611
2612
8.26M
  return ref;
2613
8.26M
}
2614
2615
int
2616
pdf_count_objects(fz_context *ctx, pdf_document *doc)
2617
0
{
2618
0
  return pdf_xref_len(ctx, doc);
2619
0
}
2620
2621
int
2622
pdf_is_local_object(fz_context *ctx, pdf_document *doc, pdf_obj *obj)
2623
188
{
2624
188
  pdf_xref *xref = doc->local_xref;
2625
188
  pdf_xref_subsec *sub;
2626
188
  int num;
2627
2628
188
  if (!pdf_is_indirect(ctx, obj))
2629
5
    return 0;
2630
2631
183
  if (xref == NULL)
2632
79
    return 0; /* no local xref present */
2633
2634
104
  num = pdf_to_num(ctx, obj);
2635
2636
  /* Local xrefs only ever have 1 section, and it should be solid. */
2637
104
  sub = xref->subsec;
2638
104
  if (num >= sub->start && num < sub->start + sub->len)
2639
104
    return sub->table[num - sub->start].type != 0;
2640
2641
0
  return 0;
2642
104
}
2643
2644
static int
2645
pdf_create_local_object(fz_context *ctx, pdf_document *doc)
2646
8.31k
{
2647
  /* TODO: reuse free object slots by properly linking free object chains in the ofs field */
2648
8.31k
  pdf_xref_entry *entry;
2649
8.31k
  int num;
2650
2651
8.31k
  num = doc->local_xref->num_objects;
2652
2653
8.31k
  entry = pdf_get_local_xref_entry(ctx, doc, num);
2654
8.31k
  entry->type = 'f';
2655
8.31k
  entry->ofs = -1;
2656
8.31k
  entry->gen = 0;
2657
8.31k
  entry->num = num;
2658
8.31k
  entry->stm_ofs = 0;
2659
8.31k
  entry->stm_buf = NULL;
2660
8.31k
  entry->obj = NULL;
2661
8.31k
  return num;
2662
8.31k
}
2663
2664
int
2665
pdf_create_object(fz_context *ctx, pdf_document *doc)
2666
8.31k
{
2667
  /* TODO: reuse free object slots by properly linking free object chains in the ofs field */
2668
8.31k
  pdf_xref_entry *entry;
2669
8.31k
  int num;
2670
2671
8.31k
  if (doc->local_xref && doc->local_xref_nesting > 0)
2672
8.31k
    return pdf_create_local_object(ctx, doc);
2673
2674
0
  num = pdf_xref_len(ctx, doc);
2675
2676
0
  if (num > PDF_MAX_OBJECT_NUMBER)
2677
0
    fz_throw(ctx, FZ_ERROR_GENERIC, "too many objects stored in pdf");
2678
2679
0
  entry = pdf_get_incremental_xref_entry(ctx, doc, num);
2680
0
  entry->type = 'f';
2681
0
  entry->ofs = -1;
2682
0
  entry->gen = 0;
2683
0
  entry->num = num;
2684
0
  entry->stm_ofs = 0;
2685
0
  entry->stm_buf = NULL;
2686
0
  entry->obj = NULL;
2687
2688
0
  pdf_add_journal_fragment(ctx, doc, num, NULL, NULL, 1);
2689
2690
0
  return num;
2691
0
}
2692
2693
static void
2694
pdf_delete_local_object(fz_context *ctx, pdf_document *doc, int num)
2695
0
{
2696
0
  pdf_xref_entry *x;
2697
2698
0
  if (doc->local_xref == NULL || doc->local_xref_nesting == 0)
2699
0
    fz_throw(ctx, FZ_ERROR_GENERIC, "No local xref to delete from!");
2700
2701
0
  if (num <= 0 || num >= doc->local_xref->num_objects)
2702
0
  {
2703
0
    fz_warn(ctx, "local object out of range (%d 0 R); xref size %d", num, doc->local_xref->num_objects);
2704
0
    return;
2705
0
  }
2706
2707
0
  x = pdf_get_local_xref_entry(ctx, doc, num);
2708
2709
0
  fz_drop_buffer(ctx, x->stm_buf);
2710
0
  pdf_drop_obj(ctx, x->obj);
2711
2712
0
  x->type = 'f';
2713
0
  x->ofs = 0;
2714
0
  x->gen += 1;
2715
0
  x->num = 0;
2716
0
  x->stm_ofs = 0;
2717
0
  x->stm_buf = NULL;
2718
0
  x->obj = NULL;
2719
0
}
2720
2721
void
2722
pdf_delete_object(fz_context *ctx, pdf_document *doc, int num)
2723
0
{
2724
0
  pdf_xref_entry *x;
2725
0
  pdf_xref *xref;
2726
0
  int j;
2727
2728
0
  if (doc->local_xref && doc->local_xref_nesting > 0)
2729
0
  {
2730
0
    pdf_delete_local_object(ctx, doc, num);
2731
0
    return;
2732
0
  }
2733
2734
0
  if (num <= 0 || num >= pdf_xref_len(ctx, doc))
2735
0
  {
2736
0
    fz_warn(ctx, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(ctx, doc));
2737
0
    return;
2738
0
  }
2739
2740
0
  x = pdf_get_incremental_xref_entry(ctx, doc, num);
2741
2742
0
  fz_drop_buffer(ctx, x->stm_buf);
2743
0
  pdf_drop_obj(ctx, x->obj);
2744
2745
0
  x->type = 'f';
2746
0
  x->ofs = 0;
2747
0
  x->gen += 1;
2748
0
  x->num = 0;
2749
0
  x->stm_ofs = 0;
2750
0
  x->stm_buf = NULL;
2751
0
  x->obj = NULL;
2752
2753
  /* Currently we've left a 'free' object in the incremental
2754
   * section. This is enough to cause us to think that the
2755
   * document has changes. Check back in the non-incremental
2756
   * sections to see if the last instance of the object there
2757
   * was free (or if this object never appeared). If so, we
2758
   * can mark this object as non-existent in the incremental
2759
   * xref. This is important so we can 'undo' back to emptiness
2760
   * after we save/when we reload a snapshot. */
2761
0
  for (j = 1; j < doc->num_xref_sections; j++)
2762
0
  {
2763
0
    xref = &doc->xref_sections[j];
2764
2765
0
    if (num < xref->num_objects)
2766
0
    {
2767
0
      pdf_xref_subsec *sub;
2768
0
      for (sub = xref->subsec; sub != NULL; sub = sub->next)
2769
0
      {
2770
0
        pdf_xref_entry *entry;
2771
2772
0
        if (num < sub->start || num >= sub->start + sub->len)
2773
0
          continue;
2774
2775
0
        entry = &sub->table[num - sub->start];
2776
0
        if (entry->type)
2777
0
        {
2778
0
          if (entry->type == 'f')
2779
0
          {
2780
            /* It was free already! */
2781
0
            x->type = 0;
2782
0
            x->gen = 0;
2783
0
          }
2784
          /* It was a real object. */
2785
0
          return;
2786
0
        }
2787
0
      }
2788
0
    }
2789
0
  }
2790
  /* It never appeared before. */
2791
0
  x->type = 0;
2792
0
  x->gen = 0;
2793
0
}
2794
2795
static void
2796
pdf_update_local_object(fz_context *ctx, pdf_document *doc, int num, pdf_obj *newobj)
2797
8.31k
{
2798
8.31k
  pdf_xref_entry *x;
2799
2800
8.31k
  if (doc->local_xref == NULL || doc->local_xref_nesting == 0)
2801
0
    fz_throw(ctx, FZ_ERROR_GENERIC, "Can't update local object without a local xref");
2802
2803
8.31k
  if (!newobj)
2804
0
  {
2805
0
    pdf_delete_local_object(ctx, doc, num);
2806
0
    return;
2807
0
  }
2808
2809
8.31k
  x = pdf_get_local_xref_entry(ctx, doc, num);
2810
2811
8.31k
  pdf_drop_obj(ctx, x->obj);
2812
2813
8.31k
  x->type = 'n';
2814
8.31k
  x->ofs = 0;
2815
8.31k
  x->obj = pdf_keep_obj(ctx, newobj);
2816
2817
8.31k
  pdf_set_obj_parent(ctx, newobj, num);
2818
8.31k
}
2819
2820
void
2821
pdf_update_object(fz_context *ctx, pdf_document *doc, int num, pdf_obj *newobj)
2822
8.31k
{
2823
8.31k
  pdf_xref_entry *x;
2824
2825
8.31k
  if (doc->local_xref && doc->local_xref_nesting > 0)
2826
8.31k
  {
2827
8.31k
    pdf_update_local_object(ctx, doc, num, newobj);
2828
8.31k
    return;
2829
8.31k
  }
2830
2831
0
  if (num <= 0 || num >= pdf_xref_len(ctx, doc))
2832
0
  {
2833
0
    fz_warn(ctx, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(ctx, doc));
2834
0
    return;
2835
0
  }
2836
2837
0
  if (!newobj)
2838
0
  {
2839
0
    pdf_delete_object(ctx, doc, num);
2840
0
    return;
2841
0
  }
2842
2843
0
  x = pdf_get_incremental_xref_entry(ctx, doc, num);
2844
2845
0
  pdf_drop_obj(ctx, x->obj);
2846
2847
0
  x->type = 'n';
2848
0
  x->ofs = 0;
2849
0
  x->obj = pdf_keep_obj(ctx, newobj);
2850
2851
0
  pdf_set_obj_parent(ctx, newobj, num);
2852
0
}
2853
2854
void
2855
pdf_update_stream(fz_context *ctx, pdf_document *doc, pdf_obj *obj, fz_buffer *newbuf, int compressed)
2856
7.77k
{
2857
7.77k
  int num;
2858
7.77k
  pdf_xref_entry *x;
2859
2860
7.77k
  if (pdf_is_indirect(ctx, obj))
2861
7.77k
    num = pdf_to_num(ctx, obj);
2862
0
  else
2863
0
    num = pdf_obj_parent_num(ctx, obj);
2864
2865
  /* Write the Length first, as this has the effect of moving the
2866
   * old object into the journal for undo. This also moves the
2867
   * stream buffer with it, keeping it consistent. */
2868
7.77k
  pdf_dict_put_int(ctx, obj, PDF_NAME(Length), fz_buffer_storage(ctx, newbuf, NULL));
2869
2870
7.77k
  if (doc->local_xref && doc->local_xref_nesting > 0)
2871
7.77k
  {
2872
7.77k
    x = pdf_get_local_xref_entry(ctx, doc, num);
2873
7.77k
  }
2874
0
  else
2875
0
  {
2876
0
    if (num <= 0 || num >= pdf_xref_len(ctx, doc))
2877
0
    {
2878
0
      fz_warn(ctx, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(ctx, doc));
2879
0
      return;
2880
0
    }
2881
2882
0
    x = pdf_get_xref_entry_no_null(ctx, doc, num);
2883
0
  }
2884
2885
7.77k
  fz_drop_buffer(ctx, x->stm_buf);
2886
7.77k
  x->stm_buf = fz_keep_buffer(ctx, newbuf);
2887
2888
7.77k
  if (!compressed)
2889
7.77k
  {
2890
7.77k
    pdf_dict_del(ctx, obj, PDF_NAME(Filter));
2891
7.77k
    pdf_dict_del(ctx, obj, PDF_NAME(DecodeParms));
2892
7.77k
  }
2893
7.77k
}
2894
2895
int
2896
pdf_lookup_metadata(fz_context *ctx, pdf_document *doc, const char *key, char *buf, int size)
2897
0
{
2898
0
  if (!strcmp(key, FZ_META_FORMAT))
2899
0
  {
2900
0
    int version = pdf_version(ctx, doc);
2901
0
    return 1 + (int)fz_snprintf(buf, size, "PDF %d.%d", version/10, version % 10);
2902
0
  }
2903
2904
0
  if (!strcmp(key, FZ_META_ENCRYPTION))
2905
0
  {
2906
0
    if (doc->crypt)
2907
0
    {
2908
0
      const char *stream_method = pdf_crypt_stream_method(ctx, doc->crypt);
2909
0
      const char *string_method = pdf_crypt_string_method(ctx, doc->crypt);
2910
0
      if (stream_method == string_method)
2911
0
        return 1 + (int)fz_snprintf(buf, size, "Standard V%d R%d %d-bit %s",
2912
0
            pdf_crypt_version(ctx, doc->crypt),
2913
0
            pdf_crypt_revision(ctx, doc->crypt),
2914
0
            pdf_crypt_length(ctx, doc->crypt),
2915
0
            pdf_crypt_string_method(ctx, doc->crypt));
2916
0
      else
2917
0
        return 1 + (int)fz_snprintf(buf, size, "Standard V%d R%d %d-bit streams: %s strings: %s",
2918
0
            pdf_crypt_version(ctx, doc->crypt),
2919
0
            pdf_crypt_revision(ctx, doc->crypt),
2920
0
            pdf_crypt_length(ctx, doc->crypt),
2921
0
            pdf_crypt_stream_method(ctx, doc->crypt),
2922
0
            pdf_crypt_string_method(ctx, doc->crypt));
2923
0
    }
2924
0
    else
2925
0
      return 1 + (int)fz_strlcpy(buf, "None", size);
2926
0
  }
2927
2928
0
  if (strstr(key, "info:") == key)
2929
0
  {
2930
0
    pdf_obj *info;
2931
0
    const char *s;
2932
0
    int n;
2933
2934
0
    info = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info));
2935
0
    if (!info)
2936
0
      return -1;
2937
2938
0
    info = pdf_dict_gets(ctx, info, key + 5);
2939
0
    if (!info)
2940
0
      return -1;
2941
2942
0
    s = pdf_to_text_string(ctx, info);
2943
0
    if (strlen(s) <= 0)
2944
0
      return -1;
2945
2946
0
    n = 1 + (int)fz_strlcpy(buf, s, size);
2947
0
    return n;
2948
0
  }
2949
2950
0
  return -1;
2951
0
}
2952
2953
void
2954
pdf_set_metadata(fz_context *ctx, pdf_document *doc, const char *key, const char *value)
2955
0
{
2956
2957
0
  pdf_obj *info = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info));
2958
2959
0
  pdf_begin_operation(ctx, doc, "Set Metadata");
2960
2961
0
  fz_try(ctx)
2962
0
  {
2963
0
    if (!strcmp(key, FZ_META_INFO_TITLE))
2964
0
      pdf_dict_put_text_string(ctx, info, PDF_NAME(Title), value);
2965
0
    else if (!strcmp(key, FZ_META_INFO_AUTHOR))
2966
0
      pdf_dict_put_text_string(ctx, info, PDF_NAME(Author), value);
2967
0
    else if (!strcmp(key, FZ_META_INFO_SUBJECT))
2968
0
      pdf_dict_put_text_string(ctx, info, PDF_NAME(Subject), value);
2969
0
    else if (!strcmp(key, FZ_META_INFO_KEYWORDS))
2970
0
      pdf_dict_put_text_string(ctx, info, PDF_NAME(Keywords), value);
2971
0
    else if (!strcmp(key, FZ_META_INFO_CREATOR))
2972
0
      pdf_dict_put_text_string(ctx, info, PDF_NAME(Creator), value);
2973
0
    else if (!strcmp(key, FZ_META_INFO_PRODUCER))
2974
0
      pdf_dict_put_text_string(ctx, info, PDF_NAME(Producer), value);
2975
0
    else if (!strcmp(key, FZ_META_INFO_CREATIONDATE))
2976
0
    {
2977
0
      int64_t time = pdf_parse_date(ctx, value);
2978
0
      if (time >= 0)
2979
0
        pdf_dict_put_date(ctx, info, PDF_NAME(CreationDate), time);
2980
0
    }
2981
0
    else if (!strcmp(key, FZ_META_INFO_MODIFICATIONDATE))
2982
0
    {
2983
0
      int64_t time = pdf_parse_date(ctx, value);
2984
0
      if (time >= 0)
2985
0
        pdf_dict_put_date(ctx, info, PDF_NAME(ModDate), time);
2986
0
    }
2987
2988
0
    if (!strncmp(key, FZ_META_INFO, strlen(FZ_META_INFO)))
2989
0
      key += strlen(FZ_META_INFO);
2990
0
    pdf_dict_put_text_string(ctx, info, pdf_new_name(ctx, key), value);
2991
0
    pdf_end_operation(ctx, doc);
2992
0
  }
2993
0
  fz_catch(ctx)
2994
0
  {
2995
0
    pdf_abandon_operation(ctx, doc);
2996
0
    fz_rethrow(ctx);
2997
0
  }
2998
0
}
2999
3000
static fz_link_dest
3001
pdf_resolve_link_imp(fz_context *ctx, fz_document *doc_, const char *uri)
3002
0
{
3003
0
  pdf_document *doc = (pdf_document*)doc_;
3004
0
  return pdf_resolve_link_dest(ctx, doc, uri);
3005
0
}
3006
3007
char *
3008
pdf_format_link_uri_imp(fz_context *ctx, fz_document *doc, fz_link_dest dest)
3009
0
{
3010
0
  return pdf_format_link_uri(ctx, dest);
3011
0
}
3012
3013
/*
3014
  Initializers for the fz_document interface.
3015
3016
  The functions are split across two files to allow calls to a
3017
  version of the constructor that does not link in the interpreter.
3018
  The interpreter references the built-in font and cmap resources
3019
  which are quite big. Not linking those into the mutool binary
3020
  saves roughly 6MB of space.
3021
*/
3022
3023
static pdf_document *
3024
pdf_new_document(fz_context *ctx, fz_stream *file)
3025
14.7k
{
3026
14.7k
  pdf_document *doc = fz_new_derived_document(ctx, pdf_document);
3027
3028
14.7k
#ifndef NDEBUG
3029
14.7k
  {
3030
14.7k
    void pdf_verify_name_table_sanity(void);
3031
14.7k
    pdf_verify_name_table_sanity();
3032
14.7k
  }
3033
14.7k
#endif
3034
3035
14.7k
  doc->super.drop_document = (fz_document_drop_fn*)pdf_drop_document_imp;
3036
14.7k
  doc->super.get_output_intent = (fz_document_output_intent_fn*)pdf_document_output_intent;
3037
14.7k
  doc->super.needs_password = (fz_document_needs_password_fn*)pdf_needs_password;
3038
14.7k
  doc->super.authenticate_password = (fz_document_authenticate_password_fn*)pdf_authenticate_password;
3039
14.7k
  doc->super.has_permission = (fz_document_has_permission_fn*)pdf_has_permission;
3040
14.7k
  doc->super.outline_iterator = (fz_document_outline_iterator_fn*)pdf_new_outline_iterator;
3041
14.7k
  doc->super.resolve_link_dest = pdf_resolve_link_imp;
3042
14.7k
  doc->super.format_link_uri = pdf_format_link_uri_imp;
3043
14.7k
  doc->super.count_pages = pdf_count_pages_imp;
3044
14.7k
  doc->super.load_page = pdf_load_page_imp;
3045
14.7k
  doc->super.page_label = pdf_page_label_imp;
3046
14.7k
  doc->super.lookup_metadata = (fz_document_lookup_metadata_fn*)pdf_lookup_metadata;
3047
14.7k
  doc->super.set_metadata = (fz_document_set_metadata_fn*)pdf_set_metadata;
3048
3049
14.7k
  pdf_lexbuf_init(ctx, &doc->lexbuf.base, PDF_LEXBUF_LARGE);
3050
14.7k
  doc->file = fz_keep_stream(ctx, file);
3051
3052
  /* Default to PDF-1.7 if the version header is missing and for new documents */
3053
14.7k
  doc->version = 17;
3054
3055
14.7k
  return doc;
3056
14.7k
}
3057
3058
pdf_document *
3059
pdf_open_document_with_stream(fz_context *ctx, fz_stream *file)
3060
14.7k
{
3061
14.7k
  pdf_document *doc = pdf_new_document(ctx, file);
3062
29.4k
  fz_try(ctx)
3063
29.4k
  {
3064
14.7k
    pdf_init_document(ctx, doc);
3065
14.7k
  }
3066
29.4k
  fz_catch(ctx)
3067
373
  {
3068
    /* fz_drop_document may clobber our error code/message so we have to stash them temporarily. */
3069
373
    char message[256];
3070
373
    int caught = fz_caught(ctx);
3071
373
    fz_strlcpy(message, fz_caught_message(ctx), sizeof message);
3072
373
    fz_drop_document(ctx, &doc->super);
3073
373
    fz_throw(ctx, caught, "%s", message);
3074
373
  }
3075
14.3k
  return doc;
3076
14.7k
}
3077
3078
/* Uncomment the following to test progressive loading. */
3079
/* #define TEST_PROGRESSIVE_HACK */
3080
3081
pdf_document *
3082
pdf_open_document(fz_context *ctx, const char *filename)
3083
0
{
3084
0
  fz_stream *file = NULL;
3085
0
  pdf_document *doc = NULL;
3086
3087
0
  fz_var(file);
3088
0
  fz_var(doc);
3089
3090
0
  fz_try(ctx)
3091
0
  {
3092
0
    file = fz_open_file(ctx, filename);
3093
#ifdef TEST_PROGRESSIVE_HACK
3094
    file->progressive = 1;
3095
#endif
3096
0
    doc = pdf_new_document(ctx, file);
3097
0
    pdf_init_document(ctx, doc);
3098
0
  }
3099
0
  fz_always(ctx)
3100
0
  {
3101
0
    fz_drop_stream(ctx, file);
3102
0
  }
3103
0
  fz_catch(ctx)
3104
0
  {
3105
0
    fz_drop_document(ctx, &doc->super);
3106
0
    fz_rethrow(ctx);
3107
0
  }
3108
3109
#ifdef TEST_PROGRESSIVE_HACK
3110
  if (doc->file_reading_linearly)
3111
  {
3112
    fz_try(ctx)
3113
      pdf_progressive_advance(ctx, doc, doc->linear_page_count-1);
3114
    fz_catch(ctx)
3115
    {
3116
      doc->file_reading_linearly = 0;
3117
      /* swallow the error */
3118
    }
3119
  }
3120
#endif
3121
3122
0
  return doc;
3123
0
}
3124
3125
static void
3126
pdf_load_hints(fz_context *ctx, pdf_document *doc, int objnum)
3127
0
{
3128
0
  fz_stream *stream = NULL;
3129
0
  pdf_obj *dict;
3130
3131
0
  fz_var(stream);
3132
0
  fz_var(dict);
3133
3134
0
  fz_try(ctx)
3135
0
  {
3136
0
    int i, j, least_num_page_objs, page_obj_num_bits;
3137
0
    int least_page_len, page_len_num_bits, shared_hint_offset;
3138
    /* int least_page_offset, page_offset_num_bits; */
3139
    /* int least_content_stream_len, content_stream_len_num_bits; */
3140
0
    int num_shared_obj_num_bits, shared_obj_num_bits;
3141
    /* int numerator_bits, denominator_bits; */
3142
0
    int shared;
3143
0
    int shared_obj_num, shared_obj_offset, shared_obj_count_page1;
3144
0
    int shared_obj_count_total;
3145
0
    int least_shared_group_len, shared_group_len_num_bits;
3146
0
    int max_object_num = pdf_xref_len(ctx, doc);
3147
3148
0
    stream = pdf_open_stream_number(ctx, doc, objnum);
3149
0
    dict = pdf_get_xref_entry_no_null(ctx, doc, objnum)->obj;
3150
0
    if (dict == NULL || !pdf_is_dict(ctx, dict))
3151
0
      fz_throw(ctx, FZ_ERROR_GENERIC, "malformed hint object");
3152
3153
0
    shared_hint_offset = pdf_dict_get_int(ctx, dict, PDF_NAME(S));
3154
3155
    /* Malloc the structures (use realloc to cope with the fact we
3156
     * may try this several times before enough data is loaded) */
3157
0
    doc->hint_page = fz_realloc_array(ctx, doc->hint_page, doc->linear_page_count+1, pdf_hint_page);
3158
0
    memset(doc->hint_page, 0, sizeof(*doc->hint_page) * (doc->linear_page_count+1));
3159
0
    doc->hint_obj_offsets = fz_realloc_array(ctx, doc->hint_obj_offsets, max_object_num, int64_t);
3160
0
    memset(doc->hint_obj_offsets, 0, sizeof(*doc->hint_obj_offsets) * max_object_num);
3161
0
    doc->hint_obj_offsets_max = max_object_num;
3162
3163
    /* Read the page object hints table: Header first */
3164
0
    least_num_page_objs = fz_read_bits(ctx, stream, 32);
3165
    /* The following is sometimes a lie, but we read this version,
3166
     * as other table values are built from it. In
3167
     * pdf_reference17.pdf, this points to 2 objects before the
3168
     * first pages page object. */
3169
0
    doc->hint_page[0].offset = fz_read_bits(ctx, stream, 32);
3170
0
    if (doc->hint_page[0].offset > doc->hint_object_offset)
3171
0
      doc->hint_page[0].offset += doc->hint_object_length;
3172
0
    page_obj_num_bits = fz_read_bits(ctx, stream, 16);
3173
0
    least_page_len = fz_read_bits(ctx, stream, 32);
3174
0
    page_len_num_bits = fz_read_bits(ctx, stream, 16);
3175
0
    /* least_page_offset = */ (void) fz_read_bits(ctx, stream, 32);
3176
0
    /* page_offset_num_bits = */ (void) fz_read_bits(ctx, stream, 16);
3177
0
    /* least_content_stream_len = */ (void) fz_read_bits(ctx, stream, 32);
3178
0
    /* content_stream_len_num_bits = */ (void) fz_read_bits(ctx, stream, 16);
3179
0
    num_shared_obj_num_bits = fz_read_bits(ctx, stream, 16);
3180
0
    shared_obj_num_bits = fz_read_bits(ctx, stream, 16);
3181
0
    /* numerator_bits = */ (void) fz_read_bits(ctx, stream, 16);
3182
0
    /* denominator_bits = */ (void) fz_read_bits(ctx, stream, 16);
3183
3184
    /* Item 1: Page object numbers */
3185
0
    doc->hint_page[0].number = doc->linear_page1_obj_num;
3186
    /* We don't care about the number of objects in the first page */
3187
0
    (void)fz_read_bits(ctx, stream, page_obj_num_bits);
3188
0
    j = 1;
3189
0
    for (i = 1; i < doc->linear_page_count; i++)
3190
0
    {
3191
0
      int delta_page_objs = fz_read_bits(ctx, stream, page_obj_num_bits);
3192
3193
0
      doc->hint_page[i].number = j;
3194
0
      j += least_num_page_objs + delta_page_objs;
3195
0
    }
3196
0
    doc->hint_page[i].number = j; /* Not a real page object */
3197
0
    fz_sync_bits(ctx, stream);
3198
    /* Item 2: Page lengths */
3199
0
    j = doc->hint_page[0].offset;
3200
0
    for (i = 0; i < doc->linear_page_count; i++)
3201
0
    {
3202
0
      int delta_page_len = fz_read_bits(ctx, stream, page_len_num_bits);
3203
0
      int old = j;
3204
3205
0
      doc->hint_page[i].offset = j;
3206
0
      j += least_page_len + delta_page_len;
3207
0
      if (old <= doc->hint_object_offset && j > doc->hint_object_offset)
3208
0
        j += doc->hint_object_length;
3209
0
    }
3210
0
    doc->hint_page[i].offset = j;
3211
0
    fz_sync_bits(ctx, stream);
3212
    /* Item 3: Shared references */
3213
0
    shared = 0;
3214
0
    for (i = 0; i < doc->linear_page_count; i++)
3215
0
    {
3216
0
      int num_shared_objs = fz_read_bits(ctx, stream, num_shared_obj_num_bits);
3217
0
      doc->hint_page[i].index = shared;
3218
0
      shared += num_shared_objs;
3219
0
    }
3220
0
    doc->hint_page[i].index = shared;
3221
0
    doc->hint_shared_ref = fz_realloc_array(ctx, doc->hint_shared_ref, shared, int);
3222
0
    memset(doc->hint_shared_ref, 0, sizeof(*doc->hint_shared_ref) * shared);
3223
0
    fz_sync_bits(ctx, stream);
3224
    /* Item 4: Shared references */
3225
0
    for (i = 0; i < shared; i++)
3226
0
    {
3227
0
      int ref = fz_read_bits(ctx, stream, shared_obj_num_bits);
3228
0
      doc->hint_shared_ref[i] = ref;
3229
0
    }
3230
    /* Skip items 5,6,7 as we don't use them */
3231
3232
0
    fz_seek(ctx, stream, shared_hint_offset, SEEK_SET);
3233
3234
    /* Read the shared object hints table: Header first */
3235
0
    shared_obj_num = fz_read_bits(ctx, stream, 32);
3236
0
    shared_obj_offset = fz_read_bits(ctx, stream, 32);
3237
0
    if (shared_obj_offset > doc->hint_object_offset)
3238
0
      shared_obj_offset += doc->hint_object_length;
3239
0
    shared_obj_count_page1 = fz_read_bits(ctx, stream, 32);
3240
0
    shared_obj_count_total = fz_read_bits(ctx, stream, 32);
3241
0
    shared_obj_num_bits = fz_read_bits(ctx, stream, 16);
3242
0
    least_shared_group_len = fz_read_bits(ctx, stream, 32);
3243
0
    shared_group_len_num_bits = fz_read_bits(ctx, stream, 16);
3244
3245
    /* Sanity check the references in Item 4 above to ensure we
3246
     * don't access out of range with malicious files. */
3247
0
    for (i = 0; i < shared; i++)
3248
0
    {
3249
0
      if (doc->hint_shared_ref[i] >= shared_obj_count_total)
3250
0
      {
3251
0
        fz_throw(ctx, FZ_ERROR_GENERIC, "malformed hint stream (shared refs)");
3252
0
      }
3253
0
    }
3254
3255
0
    doc->hint_shared = fz_realloc_array(ctx, doc->hint_shared, shared_obj_count_total+1, pdf_hint_shared);
3256
0
    memset(doc->hint_shared, 0, sizeof(*doc->hint_shared) * (shared_obj_count_total+1));
3257
3258
    /* Item 1: Shared references */
3259
0
    j = doc->hint_page[0].offset;
3260
0
    for (i = 0; i < shared_obj_count_page1; i++)
3261
0
    {
3262
0
      int off = fz_read_bits(ctx, stream, shared_group_len_num_bits);
3263
0
      int old = j;
3264
0
      doc->hint_shared[i].offset = j;
3265
0
      j += off + least_shared_group_len;
3266
0
      if (old <= doc->hint_object_offset && j > doc->hint_object_offset)
3267
0
        j += doc->hint_object_length;
3268
0
    }
3269
    /* FIXME: We would have problems recreating the length of the
3270
     * last page 1 shared reference group. But we'll never need
3271
     * to, so ignore it. */
3272
0
    j = shared_obj_offset;
3273
0
    for (; i < shared_obj_count_total; i++)
3274
0
    {
3275
0
      int off = fz_read_bits(ctx, stream, shared_group_len_num_bits);
3276
0
      int old = j;
3277
0
      doc->hint_shared[i].offset = j;
3278
0
      j += off + least_shared_group_len;
3279
0
      if (old <= doc->hint_object_offset && j > doc->hint_object_offset)
3280
0
        j += doc->hint_object_length;
3281
0
    }
3282
0
    doc->hint_shared[i].offset = j;
3283
0
    fz_sync_bits(ctx, stream);
3284
    /* Item 2: Signature flags: read these just so we can skip */
3285
0
    for (i = 0; i < shared_obj_count_total; i++)
3286
0
    {
3287
0
      doc->hint_shared[i].number = fz_read_bits(ctx, stream, 1);
3288
0
    }
3289
0
    fz_sync_bits(ctx, stream);
3290
    /* Item 3: Signatures: just skip */
3291
0
    for (i = 0; i < shared_obj_count_total; i++)
3292
0
    {
3293
0
      if (doc->hint_shared[i].number)
3294
0
      {
3295
0
        (void) fz_read_bits(ctx, stream, 128);
3296
0
      }
3297
0
    }
3298
0
    fz_sync_bits(ctx, stream);
3299
    /* Item 4: Shared object object numbers */
3300
0
    j = doc->linear_page1_obj_num; /* FIXME: This is a lie! */
3301
0
    for (i = 0; i < shared_obj_count_page1; i++)
3302
0
    {
3303
0
      doc->hint_shared[i].number = j;
3304
0
      j += fz_read_bits(ctx, stream, shared_obj_num_bits) + 1;
3305
0
    }
3306
0
    j = shared_obj_num;
3307
0
    for (; i < shared_obj_count_total; i++)
3308
0
    {
3309
0
      doc->hint_shared[i].number = j;
3310
0
      j += fz_read_bits(ctx, stream, shared_obj_num_bits) + 1;
3311
0
    }
3312
0
    doc->hint_shared[i].number = j;
3313
3314
    /* Now, actually use the data we have gathered. */
3315
0
    for (i = 0 /*shared_obj_count_page1*/; i < shared_obj_count_total; i++)
3316
0
    {
3317
0
      if (doc->hint_shared[i].number >= 0 && doc->hint_shared[i].number < max_object_num)
3318
0
        doc->hint_obj_offsets[doc->hint_shared[i].number] = doc->hint_shared[i].offset;
3319
0
    }
3320
0
    for (i = 0; i < doc->linear_page_count; i++)
3321
0
    {
3322
0
      if (doc->hint_page[i].number >= 0 && doc->hint_page[i].number < max_object_num)
3323
0
        doc->hint_obj_offsets[doc->hint_page[i].number] = doc->hint_page[i].offset;
3324
0
    }
3325
0
  }
3326
0
  fz_always(ctx)
3327
0
  {
3328
0
    fz_drop_stream(ctx, stream);
3329
0
  }
3330
0
  fz_catch(ctx)
3331
0
  {
3332
0
    fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
3333
    /* Don't try to load hints again */
3334
0
    doc->hints_loaded = 1;
3335
    /* We won't use the linearized object anymore. */
3336
0
    doc->file_reading_linearly = 0;
3337
    /* Any other error becomes a TRYLATER */
3338
0
    fz_throw(ctx, FZ_ERROR_TRYLATER, "malformed hints object");
3339
0
  }
3340
0
  doc->hints_loaded = 1;
3341
0
}
3342
3343
static void
3344
pdf_load_hint_object(fz_context *ctx, pdf_document *doc)
3345
0
{
3346
0
  pdf_lexbuf *buf = &doc->lexbuf.base;
3347
0
  int64_t curr_pos;
3348
3349
0
  curr_pos = fz_tell(ctx, doc->file);
3350
0
  fz_seek(ctx, doc->file, doc->hint_object_offset, SEEK_SET);
3351
0
  fz_try(ctx)
3352
0
  {
3353
0
    while (1)
3354
0
    {
3355
0
      pdf_obj *page = NULL;
3356
0
      int num, tok;
3357
3358
0
      tok = pdf_lex(ctx, doc->file, buf);
3359
0
      if (tok != PDF_TOK_INT)
3360
0
        break;
3361
0
      num = buf->i;
3362
0
      tok = pdf_lex(ctx, doc->file, buf);
3363
0
      if (tok != PDF_TOK_INT)
3364
0
        break;
3365
      /* Ignore gen = buf->i */
3366
0
      tok = pdf_lex(ctx, doc->file, buf);
3367
0
      if (tok != PDF_TOK_OBJ)
3368
0
        break;
3369
0
      (void)pdf_repair_obj(ctx, doc, buf, NULL, NULL, NULL, NULL, &page, NULL, NULL);
3370
0
      pdf_load_hints(ctx, doc, num);
3371
0
    }
3372
0
  }
3373
0
  fz_always(ctx)
3374
0
  {
3375
0
    fz_seek(ctx, doc->file, curr_pos, SEEK_SET);
3376
0
  }
3377
0
  fz_catch(ctx)
3378
0
  {
3379
0
    fz_rethrow(ctx);
3380
0
  }
3381
0
}
3382
3383
pdf_obj *pdf_progressive_advance(fz_context *ctx, pdf_document *doc, int pagenum)
3384
0
{
3385
0
  int curr_pos;
3386
0
  pdf_obj *page = NULL;
3387
3388
0
  pdf_load_hinted_page(ctx, doc, pagenum);
3389
3390
0
  if (pagenum < 0 || pagenum >= doc->linear_page_count)
3391
0
    fz_throw(ctx, FZ_ERROR_GENERIC, "page load out of range (%d of %d)", pagenum, doc->linear_page_count);
3392
3393
0
  if (doc->linear_pos == doc->file_length)
3394
0
    return doc->linear_page_refs[pagenum];
3395
3396
  /* Only load hints once, and then only after we have got page 0 */
3397
0
  if (pagenum > 0 && !doc->hints_loaded && doc->hint_object_offset > 0 && doc->linear_pos >= doc->hint_object_offset)
3398
0
  {
3399
    /* Found hint object */
3400
0
    pdf_load_hint_object(ctx, doc);
3401
0
  }
3402
3403
0
  DEBUGMESS((ctx, "continuing to try to advance from %d", doc->linear_pos));
3404
0
  curr_pos = fz_tell(ctx, doc->file);
3405
3406
0
  fz_var(page);
3407
3408
0
  fz_try(ctx)
3409
0
  {
3410
0
    int eof;
3411
0
    do
3412
0
    {
3413
0
      int num;
3414
0
      eof = pdf_obj_read(ctx, doc, &doc->linear_pos, &num, &page);
3415
0
      pdf_drop_obj(ctx, page);
3416
0
      page = NULL;
3417
0
    }
3418
0
    while (!eof);
3419
3420
0
    {
3421
0
      pdf_obj *catalog;
3422
0
      pdf_obj *pages;
3423
0
      doc->linear_pos = doc->file_length;
3424
0
      pdf_load_xref(ctx, doc);
3425
0
      catalog = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
3426
0
      pages = pdf_dict_get(ctx, catalog, PDF_NAME(Pages));
3427
3428
0
      if (!pdf_is_dict(ctx, pages))
3429
0
        fz_throw(ctx, FZ_ERROR_GENERIC, "missing page tree");
3430
0
      break;
3431
0
    }
3432
0
  }
3433
0
  fz_always(ctx)
3434
0
  {
3435
0
    fz_seek(ctx, doc->file, curr_pos, SEEK_SET);
3436
0
  }
3437
0
  fz_catch(ctx)
3438
0
  {
3439
0
    pdf_drop_obj(ctx, page);
3440
0
    if (fz_caught(ctx) == FZ_ERROR_TRYLATER)
3441
0
    {
3442
0
      if (doc->linear_page_refs[pagenum] == NULL)
3443
0
      {
3444
        /* Still not got a page */
3445
0
        fz_rethrow(ctx);
3446
0
      }
3447
0
    }
3448
0
    else
3449
0
      fz_rethrow(ctx);
3450
0
  }
3451
3452
0
  return doc->linear_page_refs[pagenum];
3453
0
}
3454
3455
pdf_document *pdf_document_from_fz_document(fz_context *ctx, fz_document *ptr)
3456
0
{
3457
0
  return (pdf_document *)((ptr && ptr->count_pages == pdf_count_pages_imp) ? ptr : NULL);
3458
0
}
3459
3460
pdf_page *pdf_page_from_fz_page(fz_context *ctx, fz_page *ptr)
3461
0
{
3462
0
  return (pdf_page *)((ptr && ptr->bound_page == (fz_page_bound_page_fn*)pdf_bound_page) ? ptr : NULL);
3463
0
}
3464
3465
pdf_document *pdf_specifics(fz_context *ctx, fz_document *doc)
3466
0
{
3467
0
  return pdf_document_from_fz_document(ctx, doc);
3468
0
}
3469
3470
pdf_obj *
3471
pdf_add_object(fz_context *ctx, pdf_document *doc, pdf_obj *obj)
3472
8.31k
{
3473
8.31k
  pdf_document *orig_doc;
3474
8.31k
  int num;
3475
3476
8.31k
  orig_doc = pdf_get_bound_document(ctx, obj);
3477
8.31k
  if (orig_doc && orig_doc != doc)
3478
0
    fz_throw(ctx, FZ_ERROR_GENERIC, "tried to add an object belonging to a different document");
3479
8.31k
  if (pdf_is_indirect(ctx, obj))
3480
0
    return pdf_keep_obj(ctx, obj);
3481
8.31k
  num = pdf_create_object(ctx, doc);
3482
8.31k
  pdf_update_object(ctx, doc, num, obj);
3483
8.31k
  return pdf_new_indirect(ctx, doc, num, 0);
3484
8.31k
}
3485
3486
pdf_obj *
3487
pdf_add_object_drop(fz_context *ctx, pdf_document *doc, pdf_obj *obj)
3488
540
{
3489
540
  pdf_obj *ind = NULL;
3490
1.08k
  fz_try(ctx)
3491
1.08k
    ind = pdf_add_object(ctx, doc, obj);
3492
1.08k
  fz_always(ctx)
3493
540
    pdf_drop_obj(ctx, obj);
3494
540
  fz_catch(ctx)
3495
0
    fz_rethrow(ctx);
3496
540
  return ind;
3497
540
}
3498
3499
pdf_obj *
3500
pdf_add_new_dict(fz_context *ctx, pdf_document *doc, int initial)
3501
540
{
3502
540
  return pdf_add_object_drop(ctx, doc, pdf_new_dict(ctx, doc, initial));
3503
540
}
3504
3505
pdf_obj *
3506
pdf_add_new_array(fz_context *ctx, pdf_document *doc, int initial)
3507
0
{
3508
0
  return pdf_add_object_drop(ctx, doc, pdf_new_array(ctx, doc, initial));
3509
0
}
3510
3511
pdf_obj *
3512
pdf_add_stream(fz_context *ctx, pdf_document *doc, fz_buffer *buf, pdf_obj *obj, int compressed)
3513
7.77k
{
3514
7.77k
  pdf_obj *ind;
3515
7.77k
  if (!obj)
3516
0
    ind = pdf_add_new_dict(ctx, doc, 4);
3517
7.77k
  else
3518
7.77k
    ind = pdf_add_object(ctx, doc, obj);
3519
15.5k
  fz_try(ctx)
3520
15.5k
    pdf_update_stream(ctx, doc, ind, buf, compressed);
3521
15.5k
  fz_catch(ctx)
3522
0
  {
3523
0
    pdf_drop_obj(ctx, ind);
3524
0
    fz_rethrow(ctx);
3525
0
  }
3526
7.77k
  return ind;
3527
7.77k
}
3528
3529
pdf_document *pdf_create_document(fz_context *ctx)
3530
0
{
3531
0
  pdf_document *doc;
3532
0
  pdf_obj *root;
3533
0
  pdf_obj *pages;
3534
0
  pdf_obj *trailer = NULL;
3535
3536
0
  fz_var(trailer);
3537
3538
0
  doc = pdf_new_document(ctx, NULL);
3539
0
  fz_try(ctx)
3540
0
  {
3541
0
    doc->file_size = 0;
3542
0
    doc->startxref = 0;
3543
0
    doc->num_xref_sections = 0;
3544
0
    doc->num_incremental_sections = 0;
3545
0
    doc->xref_base = 0;
3546
0
    doc->disallow_new_increments = 0;
3547
0
    pdf_get_populating_xref_entry(ctx, doc, 0);
3548
3549
0
    trailer = pdf_new_dict(ctx, doc, 2);
3550
0
    pdf_dict_put_int(ctx, trailer, PDF_NAME(Size), 3);
3551
0
    pdf_dict_put_drop(ctx, trailer, PDF_NAME(Root), root = pdf_add_new_dict(ctx, doc, 2));
3552
0
    pdf_dict_put(ctx, root, PDF_NAME(Type), PDF_NAME(Catalog));
3553
0
    pdf_dict_put_drop(ctx, root, PDF_NAME(Pages), pages = pdf_add_new_dict(ctx, doc, 3));
3554
0
    pdf_dict_put(ctx, pages, PDF_NAME(Type), PDF_NAME(Pages));
3555
0
    pdf_dict_put_int(ctx, pages, PDF_NAME(Count), 0);
3556
0
    pdf_dict_put_array(ctx, pages, PDF_NAME(Kids), 1);
3557
3558
    /* Set the trailer of the final xref section. */
3559
0
    doc->xref_sections[0].trailer = trailer;
3560
0
  }
3561
0
  fz_catch(ctx)
3562
0
  {
3563
0
    pdf_drop_obj(ctx, trailer);
3564
0
    fz_drop_document(ctx, &doc->super);
3565
0
    fz_rethrow(ctx);
3566
0
  }
3567
0
  return doc;
3568
0
}
3569
3570
static const char *pdf_extensions[] =
3571
{
3572
  "pdf",
3573
  "pclm",
3574
  "ai",
3575
  NULL
3576
};
3577
3578
static const char *pdf_mimetypes[] =
3579
{
3580
  "application/pdf",
3581
  "application/PCLm",
3582
  NULL
3583
};
3584
3585
static int
3586
pdf_recognize_doc_content(fz_context *ctx, fz_stream *stream)
3587
14.8k
{
3588
14.8k
  const char *match = "%PDF-";
3589
14.8k
  int pos = 0;
3590
14.8k
  int n = 4096+5;
3591
14.8k
  int c;
3592
3593
14.8k
  do
3594
16.8M
  {
3595
16.8M
    c = fz_read_byte(ctx, stream);
3596
16.8M
    if (c == EOF)
3597
5.52k
      return 0;
3598
16.7M
    if (c == match[pos])
3599
70.4k
    {
3600
70.4k
      pos++;
3601
70.4k
      if (pos == 5)
3602
7.69k
        return 100;
3603
70.4k
    }
3604
16.7M
    else
3605
16.7M
    {
3606
      /* Restart matching, but recheck c against the start. */
3607
16.7M
      pos = (c == match[0]);
3608
16.7M
    }
3609
16.7M
  }
3610
16.7M
  while (--n > 0);
3611
3612
1.67k
  return 0;
3613
14.8k
}
3614
3615
fz_document_handler pdf_document_handler =
3616
{
3617
  NULL,
3618
  (fz_document_open_fn*)pdf_open_document,
3619
  (fz_document_open_with_stream_fn*)pdf_open_document_with_stream,
3620
  pdf_extensions,
3621
  pdf_mimetypes,
3622
  NULL,
3623
  NULL,
3624
  pdf_recognize_doc_content
3625
};
3626
3627
void pdf_mark_xref(fz_context *ctx, pdf_document *doc)
3628
0
{
3629
0
  int x, e;
3630
3631
0
  for (x = 0; x < doc->num_xref_sections; x++)
3632
0
  {
3633
0
    pdf_xref *xref = &doc->xref_sections[x];
3634
0
    pdf_xref_subsec *sub;
3635
3636
0
    for (sub = xref->subsec; sub != NULL; sub = sub->next)
3637
0
    {
3638
0
      for (e = 0; e < sub->len; e++)
3639
0
      {
3640
0
        pdf_xref_entry *entry = &sub->table[e];
3641
0
        if (entry->obj)
3642
0
        {
3643
0
          entry->marked = 1;
3644
0
        }
3645
0
      }
3646
0
    }
3647
0
  }
3648
0
}
3649
3650
void pdf_clear_xref(fz_context *ctx, pdf_document *doc)
3651
197
{
3652
197
  int x, e;
3653
3654
394
  for (x = 0; x < doc->num_xref_sections; x++)
3655
197
  {
3656
197
    pdf_xref *xref = &doc->xref_sections[x];
3657
197
    pdf_xref_subsec *sub;
3658
3659
394
    for (sub = xref->subsec; sub != NULL; sub = sub->next)
3660
197
    {
3661
16.5k
      for (e = 0; e < sub->len; e++)
3662
16.3k
      {
3663
16.3k
        pdf_xref_entry *entry = &sub->table[e];
3664
        /* We cannot drop objects if the stream
3665
         * buffer has been updated */
3666
16.3k
        if (entry->obj != NULL && entry->stm_buf == NULL)
3667
4.39k
        {
3668
4.39k
          if (pdf_obj_refs(ctx, entry->obj) == 1)
3669
4.39k
          {
3670
4.39k
            pdf_drop_obj(ctx, entry->obj);
3671
4.39k
            entry->obj = NULL;
3672
4.39k
          }
3673
4.39k
        }
3674
16.3k
      }
3675
197
    }
3676
197
  }
3677
197
}
3678
3679
void pdf_clear_xref_to_mark(fz_context *ctx, pdf_document *doc)
3680
0
{
3681
0
  int x, e;
3682
3683
0
  for (x = 0; x < doc->num_xref_sections; x++)
3684
0
  {
3685
0
    pdf_xref *xref = &doc->xref_sections[x];
3686
0
    pdf_xref_subsec *sub;
3687
3688
0
    for (sub = xref->subsec; sub != NULL; sub = sub->next)
3689
0
    {
3690
0
      for (e = 0; e < sub->len; e++)
3691
0
      {
3692
0
        pdf_xref_entry *entry = &sub->table[e];
3693
3694
        /* We cannot drop objects if the stream buffer has
3695
         * been updated */
3696
0
        if (entry->obj != NULL && entry->stm_buf == NULL)
3697
0
        {
3698
0
          if (!entry->marked && pdf_obj_refs(ctx, entry->obj) == 1)
3699
0
          {
3700
0
            pdf_drop_obj(ctx, entry->obj);
3701
0
            entry->obj = NULL;
3702
0
          }
3703
0
        }
3704
0
      }
3705
0
    }
3706
0
  }
3707
0
}
3708
3709
int
3710
pdf_count_versions(fz_context *ctx, pdf_document *doc)
3711
0
{
3712
0
  return doc->num_xref_sections-doc->num_incremental_sections-doc->has_linearization_object;
3713
0
}
3714
3715
int
3716
pdf_count_unsaved_versions(fz_context *ctx, pdf_document *doc)
3717
0
{
3718
0
  return doc->num_incremental_sections;
3719
0
}
3720
3721
int
3722
pdf_doc_was_linearized(fz_context *ctx, pdf_document *doc)
3723
0
{
3724
0
  return doc->has_linearization_object;
3725
0
}
3726
3727
static int pdf_obj_exists(fz_context *ctx, pdf_document *doc, int i)
3728
0
{
3729
0
  pdf_xref_subsec *sub;
3730
0
  int j;
3731
3732
0
  if (i < 0)
3733
0
    fz_throw(ctx, FZ_ERROR_GENERIC, "Negative object number requested");
3734
3735
0
  if (i <= doc->max_xref_len)
3736
0
    j = doc->xref_index[i];
3737
0
  else
3738
0
    j = 0;
3739
3740
  /* We may be accessing an earlier version of the document using xref_base
3741
   * and j may be an index into a later xref section */
3742
0
  if (doc->xref_base > j)
3743
0
    j = doc->xref_base;
3744
3745
  /* Find the first xref section where the entry is defined. */
3746
0
  for (; j < doc->num_xref_sections; j++)
3747
0
  {
3748
0
    pdf_xref *xref = &doc->xref_sections[j];
3749
3750
0
    if (i < xref->num_objects)
3751
0
    {
3752
0
      for (sub = xref->subsec; sub != NULL; sub = sub->next)
3753
0
      {
3754
0
        if (i < sub->start || i >= sub->start + sub->len)
3755
0
          continue;
3756
3757
0
        if (sub->table[i - sub->start].type)
3758
0
          return 1;
3759
0
      }
3760
0
    }
3761
0
  }
3762
3763
0
  return 0;
3764
0
}
3765
3766
enum {
3767
  FIELD_CHANGED = 1,
3768
  FIELD_CHANGE_VALID = 2,
3769
  FIELD_CHANGE_INVALID = 4
3770
};
3771
3772
typedef struct
3773
{
3774
  int num_obj;
3775
  int obj_changes[1];
3776
} pdf_changes;
3777
3778
static int
3779
check_unchanged_between(fz_context *ctx, pdf_document *doc, pdf_changes *changes, pdf_obj *nobj, pdf_obj *oobj)
3780
0
{
3781
0
  int marked = 0;
3782
0
  int changed = 0;
3783
3784
  /* Trivially identical => trivially unchanged. */
3785
0
  if (nobj == oobj)
3786
0
    return 0;
3787
3788
  /* Strictly speaking we shouldn't need to call fz_var,
3789
   * but I suspect static analysis tools are not smart
3790
   * enough to figure that out. */
3791
0
  fz_var(marked);
3792
3793
0
  if (pdf_is_indirect(ctx, nobj))
3794
0
  {
3795
0
    int o_xref_base = doc->xref_base;
3796
3797
    /* Both must be indirect if one is. */
3798
0
    if (!pdf_is_indirect(ctx, oobj))
3799
0
    {
3800
0
      changes->obj_changes[pdf_to_num(ctx, nobj)] |= FIELD_CHANGE_INVALID;
3801
0
      return 1;
3802
0
    }
3803
3804
    /* Handle recursing back into ourselves. */
3805
0
    if (pdf_obj_marked(ctx, nobj))
3806
0
    {
3807
0
      if (pdf_obj_marked(ctx, oobj))
3808
0
        return 0;
3809
0
      changes->obj_changes[pdf_to_num(ctx, nobj)] |= FIELD_CHANGE_INVALID;
3810
0
      return 1;
3811
0
    }
3812
0
    else if (pdf_obj_marked(ctx, oobj))
3813
0
    {
3814
0
      changes->obj_changes[pdf_to_num(ctx, nobj)] |= FIELD_CHANGE_INVALID;
3815
0
      return 1;
3816
0
    }
3817
3818
0
    nobj = pdf_resolve_indirect_chain(ctx, nobj);
3819
0
    doc->xref_base = o_xref_base+1;
3820
0
    fz_try(ctx)
3821
0
    {
3822
0
      oobj = pdf_resolve_indirect_chain(ctx, oobj);
3823
0
      if (oobj != nobj)
3824
0
      {
3825
        /* Different objects, so lock them */
3826
0
        if (!pdf_obj_marked(ctx, nobj) && !pdf_obj_marked(ctx, oobj))
3827
0
        {
3828
0
          (void)pdf_mark_obj(ctx, nobj);
3829
0
          (void)pdf_mark_obj(ctx, oobj);
3830
0
          marked = 1;
3831
0
        }
3832
0
      }
3833
0
    }
3834
0
    fz_always(ctx)
3835
0
      doc->xref_base = o_xref_base;
3836
0
    fz_catch(ctx)
3837
0
      fz_rethrow(ctx);
3838
3839
0
    if (nobj == oobj)
3840
0
      return 0; /* Trivially identical */
3841
0
  }
3842
3843
0
  fz_var(changed);
3844
3845
0
  fz_try(ctx)
3846
0
  {
3847
0
    if (pdf_is_dict(ctx, nobj))
3848
0
    {
3849
0
      int i, n = pdf_dict_len(ctx, nobj);
3850
3851
0
      if (!pdf_is_dict(ctx, oobj) || n != pdf_dict_len(ctx, oobj))
3852
0
      {
3853
0
change_found:
3854
0
        changes->obj_changes[pdf_to_num(ctx, nobj)] |= FIELD_CHANGE_INVALID;
3855
0
        changed = 1;
3856
0
        break;
3857
0
      }
3858
3859
0
      for (i = 0; i < n; i++)
3860
0
      {
3861
0
        pdf_obj *key = pdf_dict_get_key(ctx, nobj, i);
3862
0
        pdf_obj *nval = pdf_dict_get(ctx, nobj, key);
3863
0
        pdf_obj *oval = pdf_dict_get(ctx, oobj, key);
3864
3865
0
        changed |= check_unchanged_between(ctx, doc, changes, nval, oval);
3866
0
      }
3867
0
    }
3868
0
    else if (pdf_is_array(ctx, nobj))
3869
0
    {
3870
0
      int i, n = pdf_array_len(ctx, nobj);
3871
3872
0
      if (!pdf_is_array(ctx, oobj) || n != pdf_array_len(ctx, oobj))
3873
0
        goto change_found;
3874
3875
0
      for (i = 0; i < n; i++)
3876
0
      {
3877
0
        pdf_obj *nval = pdf_array_get(ctx, nobj, i);
3878
0
        pdf_obj *oval = pdf_array_get(ctx, oobj, i);
3879
3880
0
        changed |= check_unchanged_between(ctx, doc, changes, nval, oval);
3881
0
      }
3882
0
    }
3883
0
    else if (pdf_objcmp(ctx, nobj, oobj))
3884
0
      goto change_found;
3885
0
  }
3886
0
  fz_always(ctx)
3887
0
  {
3888
0
    if (marked)
3889
0
    {
3890
0
      pdf_unmark_obj(ctx, nobj);
3891
0
      pdf_unmark_obj(ctx, oobj);
3892
0
    }
3893
0
  }
3894
0
  fz_catch(ctx)
3895
0
    fz_rethrow(ctx);
3896
3897
0
  return changed;
3898
0
}
3899
3900
typedef struct
3901
{
3902
  int max;
3903
  int len;
3904
  char **list;
3905
} char_list;
3906
3907
/* This structure is used to hold the definition of which fields
3908
 * are locked. */
3909
struct pdf_locked_fields
3910
{
3911
  int p;
3912
  int all;
3913
  char_list includes;
3914
  char_list excludes;
3915
};
3916
3917
static void
3918
free_char_list(fz_context *ctx, char_list *c)
3919
0
{
3920
0
  int i;
3921
3922
0
  if (c == NULL)
3923
0
    return;
3924
3925
0
  for (i = c->len-1; i >= 0; i--)
3926
0
    fz_free(ctx, c->list[i]);
3927
0
  fz_free(ctx, c->list);
3928
0
  c->len = 0;
3929
0
  c->max = 0;
3930
0
}
3931
3932
void
3933
pdf_drop_locked_fields(fz_context *ctx, pdf_locked_fields *fl)
3934
0
{
3935
0
  if (fl == NULL)
3936
0
    return;
3937
3938
0
  free_char_list(ctx, &fl->includes);
3939
0
  free_char_list(ctx, &fl->excludes);
3940
0
  fz_free(ctx, fl);
3941
0
}
3942
3943
static void
3944
char_list_append(fz_context *ctx, char_list *list, const char *s)
3945
0
{
3946
0
  if (list->len == list->max)
3947
0
  {
3948
0
    int n = list->max * 2;
3949
0
    if (n == 0) n = 4;
3950
3951
0
    list->list = fz_realloc_array(ctx, list->list, n, char *);
3952
0
    list->max = n;
3953
0
  }
3954
0
  list->list[list->len] = fz_strdup(ctx, s);
3955
0
  list->len++;
3956
0
}
3957
3958
int
3959
pdf_is_field_locked(fz_context *ctx, pdf_locked_fields *locked, const char *name)
3960
0
{
3961
0
  int i;
3962
3963
0
  if (locked->p == 1)
3964
0
  {
3965
    /* Permissions were set, and say that field changes are not to be allowed. */
3966
0
    return 1; /* Locked */
3967
0
  }
3968
3969
0
  if(locked->all)
3970
0
  {
3971
    /* The only way we might not be unlocked is if
3972
     * we are listed in the excludes. */
3973
0
    for (i = 0; i < locked->excludes.len; i++)
3974
0
      if (!strcmp(locked->excludes.list[i], name))
3975
0
        return 0;
3976
0
    return 1;
3977
0
  }
3978
3979
  /* The only way we can be locked is for us to be in the includes. */
3980
0
  for (i = 0; i < locked->includes.len; i++)
3981
0
    if (strcmp(locked->includes.list[i], name) == 0)
3982
0
      return 1;
3983
3984
  /* Anything else is unlocked */
3985
0
  return 0;
3986
0
}
3987
3988
/* Unfortunately, in C, there is no legal way to define a function
3989
 * type that returns itself. We therefore have to use a struct
3990
 * wrapper. */
3991
typedef struct filter_wrap
3992
{
3993
  struct filter_wrap (*func)(fz_context *ctx, pdf_obj *dict, pdf_obj *key);
3994
} filter_wrap;
3995
3996
typedef struct filter_wrap (*filter_fn)(fz_context *ctx, pdf_obj *dict, pdf_obj *key);
3997
3998
0
#define RETURN_FILTER(f) { filter_wrap rf; rf.func = (f); return rf; }
3999
4000
static filter_wrap filter_simple(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4001
0
{
4002
0
  RETURN_FILTER(NULL);
4003
0
}
4004
4005
static filter_wrap filter_transformparams(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4006
0
{
4007
0
  if (pdf_name_eq(ctx, key, PDF_NAME(Type)) ||
4008
0
    pdf_name_eq(ctx, key, PDF_NAME(P)) ||
4009
0
    pdf_name_eq(ctx, key, PDF_NAME(V)) ||
4010
0
    pdf_name_eq(ctx, key, PDF_NAME(Document)) ||
4011
0
    pdf_name_eq(ctx, key, PDF_NAME(Msg)) ||
4012
0
    pdf_name_eq(ctx, key, PDF_NAME(V)) ||
4013
0
    pdf_name_eq(ctx, key, PDF_NAME(Annots)) ||
4014
0
    pdf_name_eq(ctx, key, PDF_NAME(Form)) ||
4015
0
    pdf_name_eq(ctx, key, PDF_NAME(FormEx)) ||
4016
0
    pdf_name_eq(ctx, key, PDF_NAME(EF)) ||
4017
0
    pdf_name_eq(ctx, key, PDF_NAME(P)) ||
4018
0
    pdf_name_eq(ctx, key, PDF_NAME(Action)) ||
4019
0
    pdf_name_eq(ctx, key, PDF_NAME(Fields)))
4020
0
    RETURN_FILTER(&filter_simple);
4021
0
  RETURN_FILTER(NULL);
4022
0
}
4023
4024
static filter_wrap filter_reference(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4025
0
{
4026
0
  if (pdf_name_eq(ctx, key, PDF_NAME(Type)) ||
4027
0
    pdf_name_eq(ctx, key, PDF_NAME(TransformMethod)) ||
4028
0
    pdf_name_eq(ctx, key, PDF_NAME(DigestMethod)) ||
4029
0
    pdf_name_eq(ctx, key, PDF_NAME(DigestValue)) ||
4030
0
    pdf_name_eq(ctx, key, PDF_NAME(DigestLocation)))
4031
0
    RETURN_FILTER(&filter_simple);
4032
0
  if (pdf_name_eq(ctx, key, PDF_NAME(TransformParams)))
4033
0
    RETURN_FILTER(&filter_transformparams);
4034
0
  RETURN_FILTER(NULL);
4035
0
}
4036
4037
static filter_wrap filter_prop_build_sub(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4038
0
{
4039
0
  if (pdf_name_eq(ctx, key, PDF_NAME(Name)) ||
4040
0
    pdf_name_eq(ctx, key, PDF_NAME(Date)) ||
4041
0
    pdf_name_eq(ctx, key, PDF_NAME(R)) ||
4042
0
    pdf_name_eq(ctx, key, PDF_NAME(PreRelease)) ||
4043
0
    pdf_name_eq(ctx, key, PDF_NAME(OS)) ||
4044
0
    pdf_name_eq(ctx, key, PDF_NAME(NonEFontNoWarn)) ||
4045
0
    pdf_name_eq(ctx, key, PDF_NAME(TrustedMode)) ||
4046
0
    pdf_name_eq(ctx, key, PDF_NAME(V)) ||
4047
0
    pdf_name_eq(ctx, key, PDF_NAME(REx)) ||
4048
0
    pdf_name_eq(ctx, key, PDF_NAME(Preview)))
4049
0
    RETURN_FILTER(&filter_simple);
4050
0
  RETURN_FILTER(NULL);
4051
0
}
4052
4053
static filter_wrap filter_prop_build(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4054
0
{
4055
0
  if (pdf_name_eq(ctx, key, PDF_NAME(Filter)) ||
4056
0
    pdf_name_eq(ctx, key, PDF_NAME(PubSec)) ||
4057
0
    pdf_name_eq(ctx, key, PDF_NAME(App)) ||
4058
0
    pdf_name_eq(ctx, key, PDF_NAME(SigQ)))
4059
0
    RETURN_FILTER(&filter_prop_build_sub);
4060
0
  RETURN_FILTER(NULL);
4061
0
}
4062
4063
static filter_wrap filter_v(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4064
0
{
4065
  /* Text can point to a stream object */
4066
0
  if (pdf_name_eq(ctx, key, PDF_NAME(Length)) && pdf_is_stream(ctx, dict))
4067
0
    RETURN_FILTER(&filter_simple);
4068
  /* Sigs point to a dict. */
4069
0
  if (pdf_name_eq(ctx, key, PDF_NAME(Type)) ||
4070
0
    pdf_name_eq(ctx, key, PDF_NAME(Filter)) ||
4071
0
    pdf_name_eq(ctx, key, PDF_NAME(SubFilter)) ||
4072
0
    pdf_name_eq(ctx, key, PDF_NAME(Contents)) ||
4073
0
    pdf_name_eq(ctx, key, PDF_NAME(Cert)) ||
4074
0
    pdf_name_eq(ctx, key, PDF_NAME(ByteRange)) ||
4075
0
    pdf_name_eq(ctx, key, PDF_NAME(Changes)) ||
4076
0
    pdf_name_eq(ctx, key, PDF_NAME(Name)) ||
4077
0
    pdf_name_eq(ctx, key, PDF_NAME(M)) ||
4078
0
    pdf_name_eq(ctx, key, PDF_NAME(Location)) ||
4079
0
    pdf_name_eq(ctx, key, PDF_NAME(Reason)) ||
4080
0
    pdf_name_eq(ctx, key, PDF_NAME(ContactInfo)) ||
4081
0
    pdf_name_eq(ctx, key, PDF_NAME(R)) ||
4082
0
    pdf_name_eq(ctx, key, PDF_NAME(V)) ||
4083
0
    pdf_name_eq(ctx, key, PDF_NAME(Prop_AuthTime)) ||
4084
0
    pdf_name_eq(ctx, key, PDF_NAME(Prop_AuthType)))
4085
0
  RETURN_FILTER(&filter_simple);
4086
0
  if (pdf_name_eq(ctx, key, PDF_NAME(Reference)))
4087
0
    RETURN_FILTER(filter_reference);
4088
0
  if (pdf_name_eq(ctx, key, PDF_NAME(Prop_Build)))
4089
0
    RETURN_FILTER(filter_prop_build);
4090
0
  RETURN_FILTER(NULL);
4091
0
}
4092
4093
static filter_wrap filter_appearance(fz_context *ctx, pdf_obj *dict, pdf_obj *key);
4094
4095
static filter_wrap filter_xobject_list(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4096
0
{
4097
  /* FIXME: Infinite recursion possible here? */
4098
0
  RETURN_FILTER(&filter_appearance);
4099
0
}
4100
4101
static filter_wrap filter_font(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4102
0
{
4103
  /* In the example I've seen the /Name field was dropped, so we'll allow
4104
   * local changes, but none that follow an indirection. */
4105
0
  RETURN_FILTER(NULL);
4106
0
}
4107
4108
/* FIXME: One idea here is to make filter_font_list and filter_xobject_list
4109
 * only accept NEW objects as changes. Will think about this. */
4110
static filter_wrap filter_font_list(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4111
0
{
4112
0
  RETURN_FILTER(&filter_font);
4113
0
}
4114
4115
static filter_wrap filter_resources(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4116
0
{
4117
0
  if (pdf_name_eq(ctx, key, PDF_NAME(XObject)))
4118
0
    RETURN_FILTER(&filter_xobject_list);
4119
0
  if (pdf_name_eq(ctx, key, PDF_NAME(Font)))
4120
0
    RETURN_FILTER(&filter_font_list);
4121
0
  RETURN_FILTER(NULL);
4122
0
}
4123
4124
static filter_wrap filter_appearance(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4125
0
{
4126
0
  if (pdf_name_eq(ctx, key, PDF_NAME(Resources)))
4127
0
    RETURN_FILTER(&filter_resources);
4128
0
  RETURN_FILTER(NULL);
4129
0
}
4130
4131
static filter_wrap filter_ap(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4132
0
{
4133
  /* Just the /N entry for now. May need to add more later. */
4134
0
  if (pdf_name_eq(ctx, key, PDF_NAME(N)) && pdf_is_stream(ctx, pdf_dict_get(ctx, dict, key)))
4135
0
    RETURN_FILTER(&filter_appearance);
4136
0
  RETURN_FILTER(NULL);
4137
0
}
4138
4139
static filter_wrap filter_xfa(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4140
0
{
4141
  /* Text can point to a stream object */
4142
0
  if (pdf_is_stream(ctx, dict))
4143
0
    RETURN_FILTER(&filter_simple);
4144
0
  RETURN_FILTER(NULL);
4145
0
}
4146
4147
static void
4148
filter_changes_accepted(fz_context *ctx, pdf_changes *changes, pdf_obj *obj, filter_fn filter)
4149
0
{
4150
0
  int obj_num;
4151
4152
0
  if (obj == NULL || pdf_obj_marked(ctx, obj))
4153
0
    return;
4154
4155
0
  obj_num = pdf_to_num(ctx, obj);
4156
4157
0
  fz_try(ctx)
4158
0
  {
4159
0
    if (obj_num != 0)
4160
0
    {
4161
0
      (void)pdf_mark_obj(ctx, obj);
4162
0
      changes->obj_changes[obj_num] |= FIELD_CHANGE_VALID;
4163
0
    }
4164
0
    if (filter == NULL)
4165
0
      break;
4166
0
    if (pdf_is_dict(ctx, obj))
4167
0
    {
4168
0
      int i, n = pdf_dict_len(ctx, obj);
4169
4170
0
      for (i = 0; i < n; i++)
4171
0
      {
4172
0
        pdf_obj *key = pdf_dict_get_key(ctx, obj, i);
4173
0
        pdf_obj *val = pdf_dict_get_val(ctx, obj, i);
4174
0
        filter_fn f = (filter(ctx, obj, key)).func;
4175
0
        if (f != NULL)
4176
0
          filter_changes_accepted(ctx, changes, val, f);
4177
0
      }
4178
0
    }
4179
0
    else if (pdf_is_array(ctx, obj))
4180
0
    {
4181
0
      int i, n = pdf_array_len(ctx, obj);
4182
4183
0
      for (i = 0; i < n; i++)
4184
0
      {
4185
0
        pdf_obj *val = pdf_array_get(ctx, obj, i);
4186
0
        filter_changes_accepted(ctx, changes, val, filter);
4187
0
      }
4188
0
    }
4189
0
  }
4190
0
  fz_always(ctx)
4191
0
    if (obj_num != 0)
4192
0
      pdf_unmark_obj(ctx, obj);
4193
0
  fz_catch(ctx)
4194
0
    fz_rethrow(ctx);
4195
0
}
4196
4197
static void
4198
check_field(fz_context *ctx, pdf_document *doc, pdf_changes *changes, pdf_obj *obj, pdf_locked_fields *locked, const char *name_prefix, pdf_obj *new_v, pdf_obj *old_v)
4199
0
{
4200
0
  pdf_obj *old_obj, *new_obj, *n_v, *o_v;
4201
0
  int o_xref_base;
4202
0
  int obj_num;
4203
0
  char *field_name = NULL;
4204
4205
  /* All fields MUST be indirections, either in the Fields array
4206
   * or AcroForms, or in the Kids array of other Fields. */
4207
0
  if (!pdf_is_indirect(ctx, obj))
4208
0
    return;
4209
4210
0
  obj_num = pdf_to_num(ctx, obj);
4211
0
  o_xref_base = doc->xref_base;
4212
0
  new_obj = pdf_resolve_indirect_chain(ctx, obj);
4213
4214
  /* Similarly, all fields must be dicts */
4215
0
  if (!pdf_is_dict(ctx, new_obj))
4216
0
    return;
4217
4218
0
  if (pdf_obj_marked(ctx, obj))
4219
0
    return;
4220
4221
0
  fz_var(field_name);
4222
4223
0
  fz_try(ctx)
4224
0
  {
4225
0
    int i, len;
4226
0
    const char *name;
4227
0
    size_t n;
4228
0
    pdf_obj *t;
4229
0
    int is_locked;
4230
4231
0
    (void)pdf_mark_obj(ctx, obj);
4232
4233
    /* Do this within the try, so we can catch any problems */
4234
0
    doc->xref_base = o_xref_base+1;
4235
0
    old_obj = pdf_resolve_indirect_chain(ctx, obj);
4236
4237
0
    t = pdf_dict_get(ctx, old_obj, PDF_NAME(T));
4238
0
    if (t != NULL)
4239
0
    {
4240
0
      name = pdf_to_text_string(ctx, pdf_dict_get(ctx, old_obj, PDF_NAME(T)));
4241
0
      n = strlen(name)+1;
4242
0
      if (*name_prefix)
4243
0
        n += 1 + strlen(name_prefix);
4244
0
      field_name = fz_malloc(ctx, n);
4245
0
      if (*name_prefix)
4246
0
      {
4247
0
        strcpy(field_name, name_prefix);
4248
0
        strcat(field_name, ".");
4249
0
      }
4250
0
      else
4251
0
        *field_name = 0;
4252
0
      strcat(field_name, name);
4253
0
      name_prefix = field_name;
4254
0
    }
4255
4256
0
    doc->xref_base = o_xref_base;
4257
4258
0
    if (!pdf_is_dict(ctx, old_obj))
4259
0
      break;
4260
4261
    /* Check V explicitly, allowing for it being inherited. */
4262
0
    n_v = pdf_dict_get(ctx, new_obj, PDF_NAME(V));
4263
0
    if (n_v == NULL)
4264
0
      n_v = new_v;
4265
0
    o_v = pdf_dict_get(ctx, old_obj, PDF_NAME(V));
4266
0
    if (o_v == NULL)
4267
0
      o_v = old_v;
4268
4269
0
    is_locked = pdf_is_field_locked(ctx, locked, name_prefix);
4270
0
    if (pdf_name_eq(ctx, pdf_dict_get(ctx, new_obj, PDF_NAME(Type)), PDF_NAME(Annot)) &&
4271
0
      pdf_name_eq(ctx, pdf_dict_get(ctx, new_obj, PDF_NAME(Subtype)), PDF_NAME(Widget)))
4272
0
    {
4273
0
      if (is_locked)
4274
0
      {
4275
        /* If locked, V must not change! */
4276
0
        if (check_unchanged_between(ctx, doc, changes, n_v, o_v))
4277
0
          changes->obj_changes[obj_num] |= FIELD_CHANGE_INVALID;
4278
0
      }
4279
0
      else
4280
0
      {
4281
        /* If not locked, V can change to be filled in! */
4282
0
        filter_changes_accepted(ctx, changes, n_v, &filter_v);
4283
0
        changes->obj_changes[obj_num] |= FIELD_CHANGE_VALID;
4284
0
      }
4285
0
    }
4286
4287
    /* Check all the fields in the new object are
4288
     * either the same as the old object, or are
4289
     * expected changes. */
4290
0
    len = pdf_dict_len(ctx, new_obj);
4291
0
    for (i = 0; i < len; i++)
4292
0
    {
4293
0
      pdf_obj *key = pdf_dict_get_key(ctx, new_obj, i);
4294
0
      pdf_obj *nval = pdf_dict_get(ctx, new_obj, key);
4295
0
      pdf_obj *oval = pdf_dict_get(ctx, old_obj, key);
4296
4297
      /* Kids arrays shouldn't change. */
4298
0
      if (pdf_name_eq(ctx, key, PDF_NAME(Kids)))
4299
0
      {
4300
0
        int j, m;
4301
4302
        /* Kids must be an array. If it's not, count it as a difference. */
4303
0
        if (!pdf_is_array(ctx, nval) || !pdf_is_array(ctx, oval))
4304
0
        {
4305
0
change_found:
4306
0
          changes->obj_changes[obj_num] |= FIELD_CHANGE_INVALID;
4307
0
          break;
4308
0
        }
4309
0
        m = pdf_array_len(ctx, nval);
4310
        /* Any change in length counts as a difference */
4311
0
        if (m != pdf_array_len(ctx, oval))
4312
0
          goto change_found;
4313
0
        for (j = 0; j < m; j++)
4314
0
        {
4315
0
          pdf_obj *nkid = pdf_array_get(ctx, nval, j);
4316
0
          pdf_obj *okid = pdf_array_get(ctx, oval, j);
4317
          /* Kids arrays are supposed to all be indirect. If they aren't,
4318
           * count it as a difference. */
4319
0
          if (!pdf_is_indirect(ctx, nkid) || !pdf_is_indirect(ctx, okid))
4320
0
            goto change_found;
4321
          /* For now at least, we'll count any change in number as a difference. */
4322
0
          if (pdf_to_num(ctx, nkid) != pdf_to_num(ctx, okid))
4323
0
            goto change_found;
4324
0
          check_field(ctx, doc, changes, nkid, locked, name_prefix, n_v, o_v);
4325
0
        }
4326
0
      }
4327
0
      else if (pdf_name_eq(ctx, key, PDF_NAME(V)))
4328
0
      {
4329
        /* V is checked above */
4330
0
      }
4331
0
      else if (pdf_name_eq(ctx, key, PDF_NAME(AP)))
4332
0
      {
4333
        /* If we're locked, then nothing can change. If not,
4334
         * we can change to be filled in. */
4335
0
        if (is_locked)
4336
0
          check_unchanged_between(ctx, doc, changes, nval, oval);
4337
0
        else
4338
0
          filter_changes_accepted(ctx, changes, nval, &filter_ap);
4339
0
      }
4340
      /* All other fields can't change */
4341
0
      else
4342
0
        check_unchanged_between(ctx, doc, changes, nval, oval);
4343
0
    }
4344
4345
    /* Now check all the fields in the old object to
4346
     * make sure none were dropped. */
4347
0
    len = pdf_dict_len(ctx, old_obj);
4348
0
    for (i = 0; i < len; i++)
4349
0
    {
4350
0
      pdf_obj *key = pdf_dict_get_key(ctx, old_obj, i);
4351
0
      pdf_obj *nval, *oval;
4352
4353
      /* V is checked above */
4354
0
      if (pdf_name_eq(ctx, key, PDF_NAME(V)))
4355
0
        continue;
4356
4357
0
      nval = pdf_dict_get(ctx, new_obj, key);
4358
0
      oval = pdf_dict_get(ctx, old_obj, key);
4359
4360
0
      if (nval == NULL && oval != NULL)
4361
0
        changes->obj_changes[pdf_to_num(ctx, nval)] |= FIELD_CHANGE_INVALID;
4362
0
    }
4363
0
    changes->obj_changes[obj_num] |= FIELD_CHANGE_VALID;
4364
4365
0
  }
4366
0
  fz_always(ctx)
4367
0
  {
4368
0
    pdf_unmark_obj(ctx, obj);
4369
0
    fz_free(ctx, field_name);
4370
0
    doc->xref_base = o_xref_base;
4371
0
  }
4372
0
  fz_catch(ctx)
4373
0
    fz_rethrow(ctx);
4374
0
}
4375
4376
static int
4377
pdf_obj_changed_in_version(fz_context *ctx, pdf_document *doc, int num, int version)
4378
0
{
4379
0
  if (num < 0 || num > doc->max_xref_len)
4380
0
    fz_throw(ctx, FZ_ERROR_GENERIC, "Invalid object number requested");
4381
4382
0
  return version == doc->xref_index[num];
4383
0
}
4384
4385
static void
4386
merge_lock_specification(fz_context *ctx, pdf_locked_fields *fields, pdf_obj *lock)
4387
0
{
4388
0
  pdf_obj *action;
4389
0
  int i, r, w;
4390
4391
0
  if (lock == NULL)
4392
0
    return;
4393
4394
0
  action = pdf_dict_get(ctx, lock, PDF_NAME(Action));
4395
4396
0
  if (pdf_name_eq(ctx, action, PDF_NAME(All)))
4397
0
  {
4398
    /* All fields locked means we don't need any stored
4399
     * includes/excludes. */
4400
0
    fields->all = 1;
4401
0
    free_char_list(ctx, &fields->includes);
4402
0
    free_char_list(ctx, &fields->excludes);
4403
0
  }
4404
0
  else
4405
0
  {
4406
0
    pdf_obj *f = pdf_dict_get(ctx, lock, PDF_NAME(Fields));
4407
0
    int len = pdf_array_len(ctx, f);
4408
4409
0
    if (pdf_name_eq(ctx, action, PDF_NAME(Include)))
4410
0
    {
4411
0
      if (fields->all)
4412
0
      {
4413
        /* Current state = "All except <excludes> are locked".
4414
         * We need to remove <Fields> from <excludes>. */
4415
0
        for (i = 0; i < len; i++)
4416
0
        {
4417
0
          const char *s = pdf_to_text_string(ctx, pdf_array_get(ctx, f, i));
4418
0
          int r, w;
4419
4420
0
          for (r = w = 0; r < fields->excludes.len; r++)
4421
0
          {
4422
0
            if (strcmp(s, fields->excludes.list[r]))
4423
0
              fields->excludes.list[w++] = fields->excludes.list[r];
4424
0
          }
4425
0
          fields->excludes.len = w;
4426
0
        }
4427
0
      }
4428
0
      else
4429
0
      {
4430
        /* Current state = <includes> are locked.
4431
         * We need to add <Fields> to <include> (avoiding repetition). */
4432
0
        for (i = 0; i < len; i++)
4433
0
        {
4434
0
          const char *s = pdf_to_text_string(ctx, pdf_array_get(ctx, f, i));
4435
4436
0
          for (r = 0; r < fields->includes.len; r++)
4437
0
          {
4438
0
            if (!strcmp(s, fields->includes.list[r]))
4439
0
              break;
4440
0
          }
4441
0
          if (r == fields->includes.len)
4442
0
            char_list_append(ctx, &fields->includes, s);
4443
0
        }
4444
0
      }
4445
0
    }
4446
0
    else if (pdf_name_eq(ctx, action, PDF_NAME(Exclude)))
4447
0
    {
4448
0
      if (fields->all)
4449
0
      {
4450
        /* Current state = "All except <excludes> are locked.
4451
         * We need to remove anything from <excludes> that isn't in <Fields>. */
4452
0
        for (r = w = 0; r < fields->excludes.len; r++)
4453
0
        {
4454
0
          for (i = 0; i < len; i++)
4455
0
          {
4456
0
            const char *s = pdf_to_text_string(ctx, pdf_array_get(ctx, f, i));
4457
0
            if (!strcmp(s, fields->excludes.list[r]))
4458
0
              break;
4459
0
          }
4460
0
          if (i != len) /* we found a match */
4461
0
            fields->excludes.list[w++] = fields->excludes.list[r];
4462
0
        }
4463
0
        fields->excludes.len = w;
4464
0
      }
4465
0
      else
4466
0
      {
4467
        /* Current state = <includes> are locked.
4468
         * Set all. <excludes> becomes <Fields> less <includes>. Remove <includes>. */
4469
0
        fields->all = 1;
4470
0
        for (i = 0; i < len; i++)
4471
0
        {
4472
0
          const char *s = pdf_to_text_string(ctx, pdf_array_get(ctx, f, i));
4473
0
          for (r = 0; r < fields->includes.len; r++)
4474
0
          {
4475
0
            if (!strcmp(s, fields->includes.list[r]))
4476
0
              break;
4477
0
          }
4478
0
          if (r == fields->includes.len)
4479
0
            char_list_append(ctx, &fields->excludes, s);
4480
0
        }
4481
0
        free_char_list(ctx, &fields->includes);
4482
0
      }
4483
0
    }
4484
0
  }
4485
0
}
4486
4487
static void
4488
find_locked_fields_value(fz_context *ctx, pdf_locked_fields *fields, pdf_obj *v)
4489
0
{
4490
0
  pdf_obj *ref = pdf_dict_get(ctx, v, PDF_NAME(Reference));
4491
0
  int i, n;
4492
4493
0
  if (!ref)
4494
0
    return;
4495
4496
0
  n = pdf_array_len(ctx, ref);
4497
0
  for (i = 0; i < n; i++)
4498
0
  {
4499
0
    pdf_obj *sr = pdf_array_get(ctx, ref, i);
4500
0
    pdf_obj *tm, *tp, *type;
4501
4502
    /* Type is optional, but if it exists, it'd better be SigRef. */
4503
0
    type = pdf_dict_get(ctx, sr, PDF_NAME(Type));
4504
0
    if (type != NULL && !pdf_name_eq(ctx, type, PDF_NAME(SigRef)))
4505
0
      continue;
4506
0
    tm = pdf_dict_get(ctx, sr, PDF_NAME(TransformMethod));
4507
0
    tp = pdf_dict_get(ctx, sr, PDF_NAME(TransformParams));
4508
0
    if (pdf_name_eq(ctx, tm, PDF_NAME(DocMDP)))
4509
0
    {
4510
0
      int p = pdf_to_int(ctx, pdf_dict_get(ctx, tp, PDF_NAME(P)));
4511
4512
0
      if (p == 0)
4513
0
        p = 2;
4514
0
      if (fields->p == 0)
4515
0
        fields->p = p;
4516
0
      else
4517
0
        fields->p = fz_mini(fields->p, p);
4518
0
    }
4519
0
    else if (pdf_name_eq(ctx, tm, PDF_NAME(FieldMDP)))
4520
0
      merge_lock_specification(ctx, fields, tp);
4521
0
  }
4522
0
}
4523
4524
static void
4525
find_locked_fields_aux(fz_context *ctx, pdf_obj *field, pdf_locked_fields *fields, pdf_obj *inherit_v, pdf_obj *inherit_ft)
4526
0
{
4527
0
  int i, n;
4528
4529
0
  if (!pdf_name_eq(ctx, pdf_dict_get(ctx, field, PDF_NAME(Type)), PDF_NAME(Annot)))
4530
0
    return;
4531
4532
0
  if (pdf_obj_marked(ctx, field))
4533
0
    return;
4534
4535
0
  fz_try(ctx)
4536
0
  {
4537
0
    pdf_obj *kids, *v, *ft;
4538
4539
0
    (void)pdf_mark_obj(ctx, field);
4540
4541
0
    v = pdf_dict_get(ctx, field, PDF_NAME(V));
4542
0
    if (v == NULL)
4543
0
      v = inherit_v;
4544
0
    ft = pdf_dict_get(ctx, field, PDF_NAME(FT));
4545
0
    if (ft == NULL)
4546
0
      ft = inherit_ft;
4547
4548
    /* We are looking for Widget annotations of type Sig that are
4549
     * signed (i.e. have a 'V' field). */
4550
0
    if (pdf_name_eq(ctx, pdf_dict_get(ctx, field, PDF_NAME(Subtype)), PDF_NAME(Widget)) &&
4551
0
      pdf_name_eq(ctx, ft, PDF_NAME(Sig)) &&
4552
0
      pdf_name_eq(ctx, pdf_dict_get(ctx, v, PDF_NAME(Type)), PDF_NAME(Sig)))
4553
0
    {
4554
      /* Signed Sig Widgets (i.e. ones with a 'V' field) need
4555
       * to have their lock field respected. */
4556
0
      merge_lock_specification(ctx, fields, pdf_dict_get(ctx, field, PDF_NAME(Lock)));
4557
4558
      /* Look for DocMDP and FieldMDP entries to see what
4559
       * flavours of alterations are allowed. */
4560
0
      find_locked_fields_value(ctx, fields, v);
4561
0
    }
4562
4563
    /* Recurse as required */
4564
0
    kids = pdf_dict_get(ctx, field, PDF_NAME(Kids));
4565
0
    if (kids)
4566
0
    {
4567
0
      n = pdf_array_len(ctx, kids);
4568
0
      for (i = 0; i < n; i++)
4569
0
        find_locked_fields_aux(ctx, pdf_array_get(ctx, kids, i), fields, v, ft);
4570
0
    }
4571
0
  }
4572
0
  fz_always(ctx)
4573
0
    pdf_unmark_obj(ctx, field);
4574
0
  fz_catch(ctx)
4575
0
    fz_rethrow(ctx);
4576
0
}
4577
4578
pdf_locked_fields *
4579
pdf_find_locked_fields(fz_context *ctx, pdf_document *doc, int version)
4580
0
{
4581
0
  pdf_locked_fields *fields = fz_malloc_struct(ctx, pdf_locked_fields);
4582
0
  int o_xref_base = doc->xref_base;
4583
0
  doc->xref_base = version;
4584
4585
0
  fz_var(fields);
4586
4587
0
  fz_try(ctx)
4588
0
  {
4589
0
    pdf_obj *fobj = pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/AcroForm/Fields");
4590
0
    int i, len = pdf_array_len(ctx, fobj);
4591
4592
0
    if (len == 0)
4593
0
      break;
4594
4595
0
    for (i = 0; i < len; i++)
4596
0
      find_locked_fields_aux(ctx, pdf_array_get(ctx, fobj, i), fields, NULL, NULL);
4597
4598
    /* Add in any DocMDP referenced directly from the Perms dict. */
4599
0
    find_locked_fields_value(ctx, fields, pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/Perms/DocMDP"));
4600
0
  }
4601
0
  fz_always(ctx)
4602
0
    doc->xref_base = o_xref_base;
4603
0
  fz_catch(ctx)
4604
0
  {
4605
0
    pdf_drop_locked_fields(ctx, fields);
4606
0
    fz_rethrow(ctx);
4607
0
  }
4608
4609
0
  return fields;
4610
0
}
4611
4612
pdf_locked_fields *
4613
pdf_find_locked_fields_for_sig(fz_context *ctx, pdf_document *doc, pdf_obj *sig)
4614
0
{
4615
0
  pdf_locked_fields *fields = fz_malloc_struct(ctx, pdf_locked_fields);
4616
4617
0
  fz_var(fields);
4618
4619
0
  fz_try(ctx)
4620
0
  {
4621
0
    pdf_obj *ref;
4622
0
    int i, len;
4623
4624
    /* Ensure it really is a sig */
4625
0
    if (!pdf_name_eq(ctx, pdf_dict_get(ctx, sig, PDF_NAME(Subtype)), PDF_NAME(Widget)) ||
4626
0
      !pdf_name_eq(ctx, pdf_dict_get_inheritable(ctx, sig, PDF_NAME(FT)), PDF_NAME(Sig)))
4627
0
      break;
4628
4629
    /* Check the locking details given in the V (i.e. what the signature value
4630
     * claims to lock). */
4631
0
    ref = pdf_dict_getp(ctx, sig, "V/Reference");
4632
0
    len = pdf_array_len(ctx, ref);
4633
0
    for (i = 0; i < len; i++)
4634
0
    {
4635
0
      pdf_obj *tp = pdf_dict_get(ctx, pdf_array_get(ctx, ref, i), PDF_NAME(TransformParams));
4636
0
      merge_lock_specification(ctx, fields, tp);
4637
0
    }
4638
4639
    /* Also, check the locking details given in the Signature definition. This may
4640
     * not strictly be necessary as it's supposed to be "what the form author told
4641
     * the signature that it should lock". A well-formed signature should lock
4642
     * at least that much (possibly with extra fields locked from the XFA). If the
4643
     * signature doesn't lock as much as it was told to, we should be suspicious
4644
     * of the signing application. It is not clear that this test is actually
4645
     * necessary, or in keeping with what Acrobat does. */
4646
0
    merge_lock_specification(ctx, fields, pdf_dict_get(ctx, sig, PDF_NAME(Lock)));
4647
0
  }
4648
0
  fz_catch(ctx)
4649
0
  {
4650
0
    pdf_drop_locked_fields(ctx, fields);
4651
0
    fz_rethrow(ctx);
4652
0
  }
4653
4654
0
  return fields;
4655
0
}
4656
4657
static int
4658
validate_locked_fields(fz_context *ctx, pdf_document *doc, int version, pdf_locked_fields *locked)
4659
0
{
4660
0
  int o_xref_base = doc->xref_base;
4661
0
  pdf_changes *changes;
4662
0
  int num_objs;
4663
0
  int i, n;
4664
0
  int all_indirects = 1;
4665
4666
0
  num_objs = doc->max_xref_len;
4667
0
  changes = Memento_label(fz_calloc(ctx, 1, sizeof(*changes) + sizeof(int)*(num_objs-1)), "pdf_changes");
4668
0
  changes->num_obj = num_objs;
4669
4670
0
  fz_try(ctx)
4671
0
  {
4672
0
    pdf_obj *acroform, *new_acroform, *old_acroform;
4673
0
    int len, acroform_num;
4674
4675
0
    doc->xref_base = version;
4676
4677
    /* Detect every object that has changed */
4678
0
    for (i = 1; i < num_objs; i++)
4679
0
    {
4680
0
      if (pdf_obj_changed_in_version(ctx, doc, i, version))
4681
0
        changes->obj_changes[i] = FIELD_CHANGED;
4682
0
    }
4683
4684
    /* FIXME: Compare PageTrees and NumberTrees (just to allow for them being regenerated
4685
     * and having produced stuff that represents the same stuff). */
4686
4687
    /* The metadata of a document may be regenerated. Allow for that. */
4688
0
    filter_changes_accepted(ctx, changes, pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/Metadata"), &filter_simple);
4689
4690
    /* The ModDate of document info may be regenerated. Allow for that. */
4691
    /* FIXME: We accept all changes in document info, when maybe we ought to just
4692
     * accept ModDate? */
4693
0
    filter_changes_accepted(ctx, changes, pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Info"), &filter_simple);
4694
4695
    /* The Encryption dict may be rewritten for the new Xref. */
4696
0
    filter_changes_accepted(ctx, changes, pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Encrypt"), &filter_simple);
4697
4698
    /* We have to accept certain changes in the top level AcroForms dict,
4699
     * so get the 2 versions... */
4700
0
    acroform = pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/AcroForm");
4701
0
    acroform_num = pdf_to_num(ctx, acroform);
4702
0
    new_acroform = pdf_resolve_indirect_chain(ctx, acroform);
4703
0
    doc->xref_base = version+1;
4704
0
    old_acroform = pdf_resolve_indirect_chain(ctx, pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/AcroForm"));
4705
0
    doc->xref_base = version;
4706
0
    n = pdf_dict_len(ctx, new_acroform);
4707
0
    for (i = 0; i < n; i++)
4708
0
    {
4709
0
      pdf_obj *key = pdf_dict_get_key(ctx, new_acroform, i);
4710
0
      pdf_obj *nval = pdf_dict_get(ctx, new_acroform, key);
4711
0
      pdf_obj *oval = pdf_dict_get(ctx, old_acroform, key);
4712
4713
0
      if (pdf_name_eq(ctx, key, PDF_NAME(Fields)))
4714
0
      {
4715
0
        int j;
4716
4717
0
        len = pdf_array_len(ctx, nval);
4718
0
        for (j = 0; j < len; j++)
4719
0
        {
4720
0
          pdf_obj *field = pdf_array_get(ctx, nval, j);
4721
0
          if (!pdf_is_indirect(ctx, field))
4722
0
            all_indirects = 0;
4723
0
          check_field(ctx, doc, changes, field, locked, "", NULL, NULL);
4724
0
        }
4725
0
      }
4726
0
      else if (pdf_name_eq(ctx, key, PDF_NAME(SigFlags)))
4727
0
      {
4728
        /* Accept this */
4729
0
        changes->obj_changes[acroform_num] |= FIELD_CHANGE_VALID;
4730
0
      }
4731
0
      else if (pdf_name_eq(ctx, key, PDF_NAME(DR)))
4732
0
      {
4733
        /* Accept any changes from within the Document Resources */
4734
0
        filter_changes_accepted(ctx, changes, nval, &filter_resources);
4735
0
      }
4736
0
      else if (pdf_name_eq(ctx, key, PDF_NAME(XFA)))
4737
0
      {
4738
        /* Allow any changes within the XFA streams. */
4739
0
        filter_changes_accepted(ctx, changes, nval, &filter_xfa);
4740
0
      }
4741
0
      else if (pdf_objcmp(ctx, nval, oval))
4742
0
      {
4743
0
        changes->obj_changes[acroform_num] |= FIELD_CHANGE_INVALID;
4744
0
      }
4745
0
    }
4746
4747
    /* Allow for any object streams/XRefs to be changed. */
4748
0
    doc->xref_base = version+1;
4749
0
    for (i = 1; i < num_objs; i++)
4750
0
    {
4751
0
      pdf_obj *oobj, *otype;
4752
0
      if (changes->obj_changes[i] != FIELD_CHANGED)
4753
0
        continue;
4754
0
      if (!pdf_obj_exists(ctx, doc, i))
4755
0
      {
4756
        /* Not present this version - must be newly created, can't be a change. */
4757
0
        changes->obj_changes[i] |= FIELD_CHANGE_VALID;
4758
0
        continue;
4759
0
      }
4760
0
      oobj = pdf_load_object(ctx, doc, i);
4761
0
      otype = pdf_dict_get(ctx, oobj, PDF_NAME(Type));
4762
0
      if (pdf_name_eq(ctx, otype, PDF_NAME(ObjStm)) ||
4763
0
        pdf_name_eq(ctx, otype, PDF_NAME(XRef)))
4764
0
      {
4765
0
        changes->obj_changes[i] |= FIELD_CHANGE_VALID;
4766
0
      }
4767
0
      pdf_drop_obj(ctx, oobj);
4768
0
    }
4769
0
  }
4770
0
  fz_always(ctx)
4771
0
    doc->xref_base = o_xref_base;
4772
0
  fz_catch(ctx)
4773
0
    fz_rethrow(ctx);
4774
4775
0
  for (i = 1; i < num_objs; i++)
4776
0
  {
4777
0
    if (changes->obj_changes[i] == FIELD_CHANGED)
4778
      /* Change with no reason */
4779
0
      break;
4780
0
    if (changes->obj_changes[i] & FIELD_CHANGE_INVALID)
4781
      /* Illegal Change */
4782
0
      break;
4783
0
  }
4784
4785
0
  fz_free(ctx, changes);
4786
4787
0
  return (i == num_objs) && all_indirects;
4788
0
}
4789
4790
int
4791
pdf_validate_changes(fz_context *ctx, pdf_document *doc, int version)
4792
0
{
4793
0
  int unsaved_versions = pdf_count_unsaved_versions(ctx, doc);
4794
0
  int n = pdf_count_versions(ctx, doc);
4795
0
  pdf_locked_fields *locked = NULL;
4796
0
  int result;
4797
4798
0
  if (version < 0 || version >= n)
4799
0
    fz_throw(ctx, FZ_ERROR_GENERIC, "There aren't that many changes to find in this document!");
4800
4801
  /* We are wanting to compare version+1 with version to make sure
4802
   * that the only changes made in going to version are conformant
4803
   * with what was allowed in version+1. The production of version
4804
   * might have involved signing a signature field and locking down
4805
   * more fields - this means that taking the list of locked things
4806
   * from version rather than version+1 will give us bad results! */
4807
0
  locked = pdf_find_locked_fields(ctx, doc, unsaved_versions+version+1);
4808
4809
0
  if (!locked->all && locked->includes.len == 0 && locked->p == 0)
4810
0
  {
4811
    /* If nothing is locked at all, then all changes are permissible. */
4812
0
    result = 1;
4813
0
  }
4814
0
  else
4815
0
    result = validate_locked_fields(ctx, doc, unsaved_versions+version, locked);
4816
4817
0
  pdf_drop_locked_fields(ctx, locked);
4818
4819
0
  return result;
4820
0
}
4821
4822
int
4823
pdf_validate_change_history(fz_context *ctx, pdf_document *doc)
4824
0
{
4825
0
  int num_versions = pdf_count_versions(ctx, doc);
4826
0
  int v;
4827
4828
0
  if (num_versions < 2)
4829
0
    return 0; /* Unless there are at least 2 versions, there have been no updates. */
4830
4831
0
  for(v = num_versions - 2; v >= 0; v--)
4832
0
  {
4833
0
    if (!pdf_validate_changes(ctx, doc, v))
4834
0
      return v+1;
4835
0
  }
4836
0
  return 0;
4837
0
}
4838
4839
/* Return the version that obj appears in, or -1 for not found. */
4840
static int
4841
pdf_find_incremental_update_num_for_obj(fz_context *ctx, pdf_document *doc, pdf_obj *obj)
4842
0
{
4843
0
  pdf_xref *xref = NULL;
4844
0
  pdf_xref_subsec *sub;
4845
0
  int i, j;
4846
4847
0
  if (obj == NULL)
4848
0
    return -1;
4849
4850
  /* obj needs to be indirect for us to get a num out of it. */
4851
0
  i = pdf_to_num(ctx, obj);
4852
0
  if (i <= 0)
4853
0
    return -1;
4854
4855
  /* obj can't be indirect below, so resolve it here. */
4856
0
  obj = pdf_resolve_indirect_chain(ctx, obj);
4857
4858
  /* Find the first xref section where the entry is defined. */
4859
0
  for (j = 0; j < doc->num_xref_sections; j++)
4860
0
  {
4861
0
    xref = &doc->xref_sections[j];
4862
4863
0
    if (i < xref->num_objects)
4864
0
    {
4865
0
      for (sub = xref->subsec; sub != NULL; sub = sub->next)
4866
0
      {
4867
0
        pdf_xref_entry *entry;
4868
4869
0
        if (i < sub->start || i >= sub->start + sub->len)
4870
0
          continue;
4871
4872
0
        entry = &sub->table[i - sub->start];
4873
0
        if (entry->obj == obj)
4874
0
          return j;
4875
0
      }
4876
0
    }
4877
0
  }
4878
0
  return -1;
4879
0
}
4880
4881
int pdf_find_version_for_obj(fz_context *ctx, pdf_document *doc, pdf_obj *obj)
4882
0
{
4883
0
  int v = pdf_find_incremental_update_num_for_obj(ctx, doc, obj);
4884
0
  int n;
4885
4886
0
  if (v == -1)
4887
0
    return -1;
4888
4889
0
  n = pdf_count_versions(ctx, doc) + pdf_count_unsaved_versions(ctx, doc);
4890
0
  if (v > n)
4891
0
    return n;
4892
4893
0
  return v;
4894
0
}
4895
4896
int pdf_validate_signature(fz_context *ctx, pdf_annot *widget)
4897
0
{
4898
0
  pdf_document *doc = widget->page->doc;
4899
0
  int unsaved_versions = pdf_count_unsaved_versions(ctx, doc);
4900
0
  int num_versions = pdf_count_versions(ctx, doc) + unsaved_versions;
4901
0
  int version = pdf_find_version_for_obj(ctx, doc, widget->obj);
4902
0
  int i;
4903
0
  pdf_locked_fields *locked = NULL;
4904
0
  int o_xref_base;
4905
4906
0
  if (version > num_versions-1)
4907
0
    version = num_versions-1;
4908
4909
  /* Get the locked definition from the object when it was signed. */
4910
0
  o_xref_base = doc->xref_base;
4911
0
  doc->xref_base = version;
4912
4913
0
  fz_var(locked); /* Not really needed, but it stops warnings */
4914
4915
0
  fz_try(ctx)
4916
0
  {
4917
0
    locked = pdf_find_locked_fields_for_sig(ctx, doc, widget->obj);
4918
0
    for (i = version-1; i >= unsaved_versions; i--)
4919
0
    {
4920
0
      doc->xref_base = i;
4921
0
      if (!validate_locked_fields(ctx, doc, i, locked))
4922
0
        break;
4923
0
    }
4924
0
  }
4925
0
  fz_always(ctx)
4926
0
  {
4927
0
    doc->xref_base = o_xref_base;
4928
0
    pdf_drop_locked_fields(ctx, locked);
4929
0
  }
4930
0
  fz_catch(ctx)
4931
0
    fz_rethrow(ctx);
4932
4933
0
  return i+1-unsaved_versions;
4934
0
}
4935
4936
int pdf_was_pure_xfa(fz_context *ctx, pdf_document *doc)
4937
0
{
4938
0
  int num_unsaved_versions = pdf_count_unsaved_versions(ctx, doc);
4939
0
  int num_versions = pdf_count_versions(ctx, doc);
4940
0
  int v;
4941
0
  int o_xref_base = doc->xref_base;
4942
0
  int pure_xfa = 0;
4943
4944
0
  fz_var(pure_xfa);
4945
4946
0
  fz_try(ctx)
4947
0
  {
4948
0
    for(v = num_versions + num_unsaved_versions; !pure_xfa && v >= num_unsaved_versions; v--)
4949
0
    {
4950
0
      pdf_obj *o;
4951
0
      doc->xref_base = v;
4952
0
      o = pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/AcroForm");
4953
      /* If we find a version that had an empty Root/AcroForm/Fields, but had a
4954
       * Root/AcroForm/XFA entry, then we deduce that this was at one time a
4955
       * pure XFA form. */
4956
0
      if (pdf_array_len(ctx, pdf_dict_get(ctx, o, PDF_NAME(Fields))) == 0 &&
4957
0
        pdf_dict_get(ctx, o, PDF_NAME(XFA)) != NULL)
4958
0
        pure_xfa = 1;
4959
0
    }
4960
0
  }
4961
0
  fz_always(ctx)
4962
0
    doc->xref_base = o_xref_base;
4963
0
  fz_catch(ctx)
4964
0
    fz_rethrow(ctx);
4965
4966
0
  return pure_xfa;
4967
0
}
4968
4969
pdf_xref *pdf_new_local_xref(fz_context *ctx, pdf_document *doc)
4970
521
{
4971
521
  int n = pdf_xref_len(ctx, doc);
4972
521
  pdf_xref *xref = fz_malloc_struct(ctx, pdf_xref);
4973
4974
521
  xref->subsec = NULL;
4975
521
  xref->num_objects = n;
4976
521
  xref->trailer = NULL;
4977
521
  xref->pre_repair_trailer = NULL;
4978
521
  xref->unsaved_sigs = NULL;
4979
521
  xref->unsaved_sigs_end = NULL;
4980
4981
1.04k
  fz_try(ctx)
4982
1.04k
  {
4983
521
    xref->subsec = fz_malloc_struct(ctx, pdf_xref_subsec);
4984
521
    xref->subsec->len = n;
4985
521
    xref->subsec->start = 0;
4986
521
    xref->subsec->table = fz_malloc_struct_array(ctx, n, pdf_xref_entry);
4987
521
    xref->subsec->next = NULL;
4988
521
  }
4989
1.04k
  fz_catch(ctx)
4990
0
  {
4991
0
    fz_free(ctx, xref->subsec);
4992
0
    fz_free(ctx, xref);
4993
0
    fz_rethrow(ctx);
4994
0
  }
4995
4996
521
  return xref;
4997
521
}
4998
4999
void pdf_drop_local_xref(fz_context *ctx, pdf_xref *xref)
5000
28.8k
{
5001
28.8k
  if (xref == NULL)
5002
28.2k
    return;
5003
5004
521
  pdf_drop_xref_subsec(ctx, xref);
5005
5006
521
  fz_free(ctx, xref);
5007
521
}
5008
5009
void pdf_drop_local_xref_and_resources(fz_context *ctx, pdf_document *doc)
5010
14.0k
{
5011
14.0k
  pdf_purge_local_font_resources(ctx, doc);
5012
14.0k
  pdf_purge_locals_from_store(ctx, doc);
5013
14.0k
  pdf_drop_local_xref(ctx, doc->local_xref);
5014
14.0k
  doc->local_xref = NULL;
5015
14.0k
  doc->resynth_required = 1;
5016
14.0k
}
5017
5018
void
5019
pdf_debug_doc_changes(fz_context *ctx, pdf_document *doc)
5020
0
{
5021
0
  int i, j;
5022
5023
0
  if (doc->num_incremental_sections == 0)
5024
0
    fz_write_printf(ctx, fz_stddbg(ctx), "No incremental xrefs");
5025
0
  else
5026
0
  {
5027
0
    for (i = 0; i < doc->num_incremental_sections; i++)
5028
0
    {
5029
0
      pdf_xref *xref = &doc->xref_sections[i];
5030
0
      pdf_xref_subsec *sub;
5031
5032
0
      fz_write_printf(ctx, fz_stddbg(ctx), "Incremental xref:\n");
5033
0
      for (sub = xref->subsec; sub != NULL; sub = sub->next)
5034
0
      {
5035
0
        fz_write_printf(ctx, fz_stddbg(ctx), "  Objects %d->%d\n", sub->start, sub->start + sub->len - 1);
5036
0
        for (j = 0; j < sub->len; j++)
5037
0
        {
5038
0
          pdf_xref_entry *e = &sub->table[j];
5039
0
          if (e->type == 0)
5040
0
            continue;
5041
0
          fz_write_printf(ctx, fz_stddbg(ctx), "%d %d obj (%c)\n", j + sub->start, e->gen, e->type);
5042
0
          pdf_debug_obj(ctx, e->obj);
5043
0
          fz_write_printf(ctx, fz_stddbg(ctx), "\nendobj\n");
5044
0
        }
5045
0
      }
5046
0
    }
5047
0
  }
5048
5049
0
  if (doc->local_xref == NULL)
5050
0
    fz_write_printf(ctx, fz_stddbg(ctx), "No local xref");
5051
0
  else
5052
0
  {
5053
0
    for (i = 0; i < doc->num_incremental_sections; i++)
5054
0
    {
5055
0
      pdf_xref *xref = doc->local_xref;
5056
0
      pdf_xref_subsec *sub;
5057
5058
0
      fz_write_printf(ctx, fz_stddbg(ctx), "Local xref (%sin force):\n", doc->local_xref_nesting == 0 ? "not " : "");
5059
0
      for (sub = xref->subsec; sub != NULL; sub = sub->next)
5060
0
      {
5061
0
        fz_write_printf(ctx, fz_stddbg(ctx), "  Objects %d->%d\n", sub->start, sub->start + sub->len - 1);
5062
0
        for (j = 0; j < sub->len; j++)
5063
0
        {
5064
0
          pdf_xref_entry *e = &sub->table[j];
5065
0
          if (e->type == 0)
5066
0
            continue;
5067
0
          fz_write_printf(ctx, fz_stddbg(ctx), "%d %d obj (%c)\n", j + sub->start, e->gen, e->type);
5068
0
          pdf_debug_obj(ctx, e->obj);
5069
0
          fz_write_printf(ctx, fz_stddbg(ctx), "\nendobj\n");
5070
0
        }
5071
0
      }
5072
0
    }
5073
0
  }
5074
5075
0
}
5076
5077
pdf_obj *
5078
pdf_metadata(fz_context *ctx, pdf_document *doc)
5079
0
{
5080
0
  int initial = doc->xref_base;
5081
0
  pdf_obj *obj = NULL;
5082
5083
0
  fz_var(obj);
5084
5085
0
  fz_try(ctx)
5086
0
  {
5087
0
    do
5088
0
    {
5089
0
      pdf_obj *root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
5090
0
      obj = pdf_dict_get(ctx, root, PDF_NAME(Metadata));
5091
0
      if (obj)
5092
0
        break;
5093
0
      doc->xref_base++;
5094
0
    }
5095
0
    while (doc->xref_base < doc->num_xref_sections);
5096
0
  }
5097
0
  fz_always(ctx)
5098
0
    doc->xref_base = initial;
5099
0
  fz_catch(ctx)
5100
0
    fz_rethrow(ctx);
5101
5102
0
  return obj;
5103
0
}
5104
5105
int pdf_obj_is_incremental(fz_context *ctx, pdf_obj *obj)
5106
5.89k
{
5107
5.89k
  pdf_document *doc = pdf_get_bound_document(ctx, obj);
5108
5.89k
  int v;
5109
5110
5.89k
  if (doc == NULL || doc->num_incremental_sections == 0)
5111
5.89k
    return 0;
5112
5113
0
  v = pdf_find_incremental_update_num_for_obj(ctx, doc, obj);
5114
5115
0
  return (v == 0);
5116
5.89k
}