Coverage Report

Created: 2025-11-07 06:58

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/mupdf/source/pdf/pdf-xref.c
Line
Count
Source
1
// Copyright (C) 2004-2025 Artifex Software, Inc.
2
//
3
// This file is part of MuPDF.
4
//
5
// MuPDF is free software: you can redistribute it and/or modify it under the
6
// terms of the GNU Affero General Public License as published by the Free
7
// Software Foundation, either version 3 of the License, or (at your option)
8
// any later version.
9
//
10
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13
// details.
14
//
15
// You should have received a copy of the GNU Affero General Public License
16
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17
//
18
// Alternative licensing terms are available from the licensor.
19
// For commercial licensing, see <https://www.artifex.com/> or contact
20
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21
// CA 94129, USA, for further information.
22
23
#include "mupdf/fitz.h"
24
#include "pdf-annot-imp.h"
25
#include "pdf-imp.h"
26
27
#include <assert.h>
28
#include <limits.h>
29
#include <string.h>
30
31
#undef DEBUG_PROGESSIVE_ADVANCE
32
33
#ifdef DEBUG_PROGESSIVE_ADVANCE
34
#define DEBUGMESS(A) do { fz_warn A; } while (0)
35
#else
36
0
#define DEBUGMESS(A) do { } while (0)
37
#endif
38
39
0
#define isdigit(c) (c >= '0' && c <= '9')
40
41
static inline int iswhite(int ch)
42
0
{
43
0
  return
44
0
    ch == '\000' || ch == '\011' || ch == '\012' ||
45
0
    ch == '\014' || ch == '\015' || ch == '\040';
46
0
}
47
48
/*
49
 * xref tables
50
 */
51
52
static void
53
pdf_drop_xref_subsec(fz_context *ctx, pdf_xref *xref)
54
16
{
55
16
  pdf_xref_subsec *sub = xref->subsec;
56
16
  pdf_unsaved_sig *usig;
57
16
  int e;
58
59
32
  while (sub != NULL)
60
16
  {
61
16
    pdf_xref_subsec *next_sub = sub->next;
62
10.8k
    for (e = 0; e < sub->len; e++)
63
10.8k
    {
64
10.8k
      pdf_xref_entry *entry = &sub->table[e];
65
10.8k
      pdf_drop_obj(ctx, entry->obj);
66
10.8k
      fz_drop_buffer(ctx, entry->stm_buf);
67
10.8k
    }
68
16
    fz_free(ctx, sub->table);
69
16
    fz_free(ctx, sub);
70
16
    sub = next_sub;
71
16
  }
72
73
16
  pdf_drop_obj(ctx, xref->pre_repair_trailer);
74
16
  pdf_drop_obj(ctx, xref->trailer);
75
76
16
  while ((usig = xref->unsaved_sigs) != NULL)
77
0
  {
78
0
    xref->unsaved_sigs = usig->next;
79
0
    pdf_drop_obj(ctx, usig->field);
80
0
    pdf_drop_signer(ctx, usig->signer);
81
0
    fz_free(ctx, usig);
82
0
  }
83
16
}
84
85
static void pdf_drop_xref_sections_imp(fz_context *ctx, pdf_document *doc, pdf_xref *xref_sections, int num_xref_sections)
86
64
{
87
64
  int x;
88
89
80
  for (x = 0; x < num_xref_sections; x++)
90
16
    pdf_drop_xref_subsec(ctx, &xref_sections[x]);
91
92
64
  fz_free(ctx, xref_sections);
93
64
}
94
95
static void pdf_drop_xref_sections(fz_context *ctx, pdf_document *doc)
96
32
{
97
32
  pdf_drop_xref_sections_imp(ctx, doc, doc->saved_xref_sections, doc->saved_num_xref_sections);
98
32
  pdf_drop_xref_sections_imp(ctx, doc, doc->xref_sections, doc->num_xref_sections);
99
100
32
  doc->saved_xref_sections = NULL;
101
32
  doc->saved_num_xref_sections = 0;
102
32
  doc->xref_sections = NULL;
103
32
  doc->num_xref_sections = 0;
104
32
  doc->num_incremental_sections = 0;
105
32
}
106
107
static void
108
extend_xref_index(fz_context *ctx, pdf_document *doc, int newlen)
109
35
{
110
35
  int i;
111
112
35
  doc->xref_index = fz_realloc_array(ctx, doc->xref_index, newlen, int);
113
10.9k
  for (i = doc->max_xref_len; i < newlen; i++)
114
10.8k
  {
115
10.8k
    doc->xref_index[i] = 0;
116
10.8k
  }
117
35
  doc->max_xref_len = newlen;
118
35
}
119
120
static void
121
resize_xref_sub(fz_context *ctx, pdf_xref *xref, int base, int newlen)
122
0
{
123
0
  pdf_xref_subsec *sub;
124
0
  int i;
125
126
0
  assert(xref != NULL);
127
0
  sub = xref->subsec;
128
0
  assert(sub->next == NULL && sub->start == base && sub->len+base == xref->num_objects);
129
0
  assert(newlen+base > xref->num_objects);
130
131
0
  sub->table = fz_realloc_array(ctx, sub->table, newlen, pdf_xref_entry);
132
0
  for (i = sub->len; i < newlen; i++)
133
0
  {
134
0
    sub->table[i].type = 0;
135
0
    sub->table[i].ofs = 0;
136
0
    sub->table[i].gen = 0;
137
0
    sub->table[i].num = 0;
138
0
    sub->table[i].stm_ofs = 0;
139
0
    sub->table[i].stm_buf = NULL;
140
0
    sub->table[i].obj = NULL;
141
0
  }
142
0
  sub->len = newlen;
143
0
  if (newlen+base > xref->num_objects)
144
0
    xref->num_objects = newlen+base;
145
0
}
146
147
/* This is only ever called when we already have an incremental
148
 * xref. This means there will only be 1 subsec, and it will be
149
 * a complete subsec. */
150
static void pdf_resize_xref(fz_context *ctx, pdf_document *doc, int newlen)
151
0
{
152
0
  pdf_xref *xref = &doc->xref_sections[doc->xref_base];
153
154
0
  resize_xref_sub(ctx, xref, 0, newlen);
155
0
  if (doc->max_xref_len < newlen)
156
0
    extend_xref_index(ctx, doc, newlen);
157
0
}
158
159
static void pdf_populate_next_xref_level(fz_context *ctx, pdf_document *doc)
160
0
{
161
0
  pdf_xref *xref;
162
0
  doc->xref_sections = fz_realloc_array(ctx, doc->xref_sections, doc->num_xref_sections + 1, pdf_xref);
163
0
  doc->num_xref_sections++;
164
165
0
  xref = &doc->xref_sections[doc->num_xref_sections - 1];
166
0
  xref->subsec = NULL;
167
0
  xref->num_objects = 0;
168
0
  xref->trailer = NULL;
169
0
  xref->pre_repair_trailer = NULL;
170
0
  xref->unsaved_sigs = NULL;
171
0
  xref->unsaved_sigs_end = NULL;
172
0
}
173
174
pdf_obj *pdf_trailer(fz_context *ctx, pdf_document *doc)
175
201
{
176
  /* Return the document's trailer (of the appropriate vintage) */
177
201
  pdf_xref *xrefs = doc->xref_sections;
178
179
201
  return xrefs ? xrefs[doc->xref_base].trailer : NULL;
180
201
}
181
182
void pdf_set_populating_xref_trailer(fz_context *ctx, pdf_document *doc, pdf_obj *trailer)
183
10
{
184
  /* Update the trailer of the xref section being populated */
185
10
  pdf_xref *xref = &doc->xref_sections[doc->num_xref_sections - 1];
186
10
  if (xref->trailer)
187
0
  {
188
0
    pdf_drop_obj(ctx, xref->pre_repair_trailer);
189
0
    xref->pre_repair_trailer = xref->trailer;
190
0
  }
191
10
  xref->trailer = pdf_keep_obj(ctx, trailer);
192
10
}
193
194
int pdf_xref_len(fz_context *ctx, pdf_document *doc)
195
43.3k
{
196
43.3k
  int i = doc->xref_base;
197
43.3k
  int xref_len = 0;
198
199
43.3k
  if (doc->local_xref && doc->local_xref_nesting > 0)
200
0
    xref_len = doc->local_xref->num_objects;
201
202
86.6k
  while (i < doc->num_xref_sections)
203
43.3k
    xref_len = fz_maxi(xref_len, doc->xref_sections[i++].num_objects);
204
205
43.3k
  return xref_len;
206
43.3k
}
207
208
/* Ensure that the given xref has a single subsection
209
 * that covers the entire range. */
210
static void
211
ensure_solid_xref(fz_context *ctx, pdf_document *doc, int num, int which)
212
36
{
213
36
  pdf_xref *xref = &doc->xref_sections[which];
214
36
  pdf_xref_subsec *sub = xref->subsec;
215
36
  pdf_xref_subsec *new_sub;
216
217
36
  if (num < xref->num_objects)
218
0
    num = xref->num_objects;
219
220
36
  if (sub != NULL && sub->next == NULL && sub->start == 0 && sub->len >= num)
221
1
    return;
222
223
35
  new_sub = fz_malloc_struct(ctx, pdf_xref_subsec);
224
70
  fz_try(ctx)
225
70
  {
226
35
    new_sub->table = fz_malloc_struct_array(ctx, num, pdf_xref_entry);
227
35
    new_sub->start = 0;
228
35
    new_sub->len = num;
229
35
    new_sub->next = NULL;
230
35
  }
231
70
  fz_catch(ctx)
232
0
  {
233
0
    fz_free(ctx, new_sub);
234
0
    fz_rethrow(ctx);
235
0
  }
236
237
  /* Move objects over to the new subsection and destroy the old
238
   * ones */
239
35
  sub = xref->subsec;
240
54
  while (sub != NULL)
241
19
  {
242
19
    pdf_xref_subsec *next = sub->next;
243
19
    int i;
244
245
10.8k
    for (i = 0; i < sub->len; i++)
246
10.8k
    {
247
10.8k
      new_sub->table[i+sub->start] = sub->table[i];
248
10.8k
    }
249
19
    fz_free(ctx, sub->table);
250
19
    fz_free(ctx, sub);
251
19
    sub = next;
252
19
  }
253
35
  xref->num_objects = num;
254
35
  xref->subsec = new_sub;
255
35
  if (doc->max_xref_len < num)
256
35
    extend_xref_index(ctx, doc, num);
257
35
}
258
259
static pdf_xref_entry *
260
pdf_get_local_xref_entry(fz_context *ctx, pdf_document *doc, int num)
261
0
{
262
0
  pdf_xref *xref = doc->local_xref;
263
0
  pdf_xref_subsec *sub;
264
265
0
  if (xref == NULL || doc->local_xref_nesting == 0)
266
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "Local xref not present!");
267
268
  /* Local xrefs only ever have 1 section, and it should be solid. */
269
0
  sub = xref->subsec;
270
0
  assert(sub && !sub->next);
271
0
  if (num >= sub->start && num < sub->start + sub->len)
272
0
    return &sub->table[num - sub->start];
273
274
  /* Expand the xref so we can return a pointer. */
275
0
  resize_xref_sub(ctx, xref, 0, num+1);
276
0
  sub = xref->subsec;
277
0
  return &sub->table[num - sub->start];
278
0
}
279
280
pdf_xref_entry *pdf_get_populating_xref_entry(fz_context *ctx, pdf_document *doc, int num)
281
43.8k
{
282
  /* Return an entry within the xref currently being populated */
283
43.8k
  pdf_xref *xref;
284
43.8k
  pdf_xref_subsec *sub;
285
286
43.8k
  if (doc->num_xref_sections == 0)
287
16
  {
288
16
    doc->xref_sections = fz_malloc_struct(ctx, pdf_xref);
289
16
    doc->num_xref_sections = 1;
290
16
  }
291
292
43.8k
  if (doc->local_xref && doc->local_xref_nesting > 0)
293
0
    return pdf_get_local_xref_entry(ctx, doc, num);
294
295
  /* Prevent accidental heap underflow */
296
43.8k
  if (num < 0 || num > PDF_MAX_OBJECT_NUMBER)
297
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "object number out of range (%d)", num);
298
299
  /* Return the pointer to the entry in the last section. */
300
43.8k
  xref = &doc->xref_sections[doc->num_xref_sections-1];
301
302
43.8k
  for (sub = xref->subsec; sub != NULL; sub = sub->next)
303
43.8k
  {
304
43.8k
    if (num >= sub->start && num < sub->start + sub->len)
305
43.8k
      return &sub->table[num-sub->start];
306
43.8k
  }
307
308
  /* We've been asked for an object that's not in a subsec. */
309
26
  ensure_solid_xref(ctx, doc, num+1, doc->num_xref_sections-1);
310
26
  xref = &doc->xref_sections[doc->num_xref_sections-1];
311
26
  sub = xref->subsec;
312
313
26
  return &sub->table[num-sub->start];
314
43.8k
}
315
316
/* It is vital that pdf_get_xref_entry_aux called with !solidify_if_needed
317
 * and a value object number, does NOT try/catch or throw. */
318
static
319
pdf_xref_entry *pdf_get_xref_entry_aux(fz_context *ctx, pdf_document *doc, int i, int solidify_if_needed)
320
32.3k
{
321
32.3k
  pdf_xref *xref = NULL;
322
32.3k
  pdf_xref_subsec *sub;
323
32.3k
  int j;
324
325
32.3k
  if (i < 0)
326
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "Negative object number requested");
327
328
32.3k
  if (i < doc->max_xref_len)
329
32.3k
    j = doc->xref_index[i];
330
0
  else
331
0
    j = 0;
332
333
  /* If we have an active local xref, check there first. */
334
32.3k
  if (doc->local_xref && doc->local_xref_nesting > 0)
335
0
  {
336
0
    xref = doc->local_xref;
337
338
0
    if (i < xref->num_objects)
339
0
    {
340
0
      for (sub = xref->subsec; sub != NULL; sub = sub->next)
341
0
      {
342
0
        pdf_xref_entry *entry;
343
344
0
        if (i < sub->start || i >= sub->start + sub->len)
345
0
          continue;
346
347
0
        entry = &sub->table[i - sub->start];
348
0
        if (entry->type)
349
0
          return entry;
350
0
      }
351
0
    }
352
0
  }
353
354
  /* We may be accessing an earlier version of the document using xref_base
355
   * and j may be an index into a later xref section */
356
32.3k
  if (doc->xref_base > j)
357
0
    j = doc->xref_base;
358
32.3k
  else
359
32.3k
    j = 0;
360
361
362
  /* Find the first xref section where the entry is defined. */
363
32.3k
  for (; j < doc->num_xref_sections; j++)
364
32.3k
  {
365
32.3k
    xref = &doc->xref_sections[j];
366
367
32.3k
    if (i < xref->num_objects)
368
32.3k
    {
369
32.3k
      for (sub = xref->subsec; sub != NULL; sub = sub->next)
370
32.3k
      {
371
32.3k
        pdf_xref_entry *entry;
372
373
32.3k
        if (i < sub->start || i >= sub->start + sub->len)
374
0
          continue;
375
376
32.3k
        entry = &sub->table[i - sub->start];
377
32.3k
        if (entry->type)
378
32.3k
        {
379
          /* Don't update xref_index if xref_base may have
380
           * influenced the value of j */
381
32.3k
          if (doc->xref_base == 0)
382
32.3k
            doc->xref_index[i] = j;
383
32.3k
          return entry;
384
32.3k
        }
385
32.3k
      }
386
32.3k
    }
387
32.3k
  }
388
389
  /* Didn't find the entry in any section. Return the entry from
390
   * the local_xref (if there is one active), or the final section. */
391
0
  if (doc->local_xref && doc->local_xref_nesting > 0)
392
0
  {
393
0
    if (xref == NULL || i < xref->num_objects)
394
0
    {
395
0
      xref = doc->local_xref;
396
0
      sub = xref->subsec;
397
0
      assert(sub != NULL && sub->next == NULL);
398
0
      if (i >= sub->start && i < sub->start + sub->len)
399
0
        return &sub->table[i - sub->start];
400
0
    }
401
402
    /* Expand the xref so we can return a pointer. */
403
0
    resize_xref_sub(ctx, xref, 0, i+1);
404
0
    sub = xref->subsec;
405
0
    return &sub->table[i - sub->start];
406
0
  }
407
408
0
  doc->xref_index[i] = 0;
409
0
  if (xref == NULL || i < xref->num_objects)
410
0
  {
411
0
    xref = &doc->xref_sections[doc->xref_base];
412
0
    for (sub = xref->subsec; sub != NULL; sub = sub->next)
413
0
    {
414
0
      if (i >= sub->start && i < sub->start + sub->len)
415
0
        return &sub->table[i - sub->start];
416
0
    }
417
0
  }
418
419
  /* Some really hairy code here. When we are reading the file in
420
   * initially, we read from 'newest' to 'oldest' (i.e. from 0 to
421
   * doc->num_xref_sections-1). Each section is created initially
422
   * with num_objects == 0 in it, and remains like that while we
423
   * are parsing the stream from the file. This is the only time
424
   * we'll ever have xref_sections with 0 objects in them. */
425
0
  if (doc->xref_sections[doc->num_xref_sections-1].num_objects == 0)
426
0
  {
427
    /* The oldest xref section has 0 objects in it. So we are
428
     * parsing an xref stream while loading. We don't want to
429
     * solidify the xref we are currently parsing for (as it'll
430
     * get very confused, and end up a different 'shape' in
431
     * memory to that which is in the file, and would hence
432
     * render 'fingerprinting' for snapshotting invalid) so
433
     * just give up at this point. */
434
0
    return NULL;
435
0
  }
436
437
0
  if (!solidify_if_needed)
438
0
    return NULL;
439
440
  /* At this point, we solidify the xref. This ensures that we
441
   * can return a pointer. This is the only case where this function
442
   * might throw an exception, and it will never happen when we are
443
   * working within a 'solid' xref. */
444
0
  ensure_solid_xref(ctx, doc, i+1, 0);
445
0
  xref = &doc->xref_sections[0];
446
0
  sub = xref->subsec;
447
0
  return &sub->table[i - sub->start];
448
0
}
449
450
pdf_xref_entry *pdf_get_xref_entry(fz_context *ctx, pdf_document *doc, int i)
451
32.3k
{
452
32.3k
  return pdf_get_xref_entry_aux(ctx, doc, i, 1);
453
32.3k
}
454
455
pdf_xref_entry *pdf_get_xref_entry_no_change(fz_context *ctx, pdf_document *doc, int i)
456
0
{
457
0
  return pdf_get_xref_entry_aux(ctx, doc, i, 0);
458
0
}
459
460
pdf_xref_entry *pdf_get_xref_entry_no_null(fz_context *ctx, pdf_document *doc, int i)
461
10.8k
{
462
10.8k
  pdf_xref_entry *entry = pdf_get_xref_entry(ctx, doc, i);
463
10.8k
  if (entry != NULL)
464
10.8k
    return entry;
465
0
  fz_throw(ctx, FZ_ERROR_ARGUMENT, "cannot find object in xref (%d 0 R), but not allowed to return NULL", i);
466
10.8k
}
467
468
void pdf_xref_entry_map(fz_context *ctx, pdf_document *doc, void (*fn)(fz_context *, pdf_xref_entry *, int, pdf_document *, void *), void *arg)
469
0
{
470
0
  int i, j;
471
0
  pdf_xref_subsec *sub;
472
0
  int xref_base = doc->xref_base;
473
474
0
  fz_try(ctx)
475
0
  {
476
    /* Map over any active local xref first. */
477
0
    if (doc->local_xref && doc->local_xref_nesting > 0)
478
0
    {
479
0
      pdf_xref *xref = doc->local_xref;
480
481
0
      for (sub = xref->subsec; sub != NULL; sub = sub->next)
482
0
      {
483
0
        for (i = sub->start; i < sub->start + sub->len; i++)
484
0
        {
485
0
          pdf_xref_entry *entry = &sub->table[i - sub->start];
486
0
          if (entry->type)
487
0
            fn(ctx, entry, i, doc, arg);
488
0
        }
489
0
      }
490
0
    }
491
492
0
    for (j = 0; j < doc->num_xref_sections; j++)
493
0
    {
494
0
      pdf_xref *xref = &doc->xref_sections[j];
495
0
      doc->xref_base = j;
496
497
0
      for (sub = xref->subsec; sub != NULL; sub = sub->next)
498
0
      {
499
0
        for (i = sub->start; i < sub->start + sub->len; i++)
500
0
        {
501
0
          pdf_xref_entry *entry = &sub->table[i - sub->start];
502
0
          if (entry->type)
503
0
            fn(ctx, entry, i, doc, arg);
504
0
        }
505
0
      }
506
0
    }
507
0
  }
508
0
  fz_always(ctx)
509
0
  {
510
0
    doc->xref_base = xref_base;
511
0
  }
512
0
  fz_catch(ctx)
513
0
    fz_rethrow(ctx);
514
0
}
515
516
/*
517
  Ensure we have an incremental xref section where we can store
518
  updated versions of indirect objects. This is a new xref section
519
  consisting of a single xref subsection.
520
*/
521
static void ensure_incremental_xref(fz_context *ctx, pdf_document *doc)
522
0
{
523
  /* If there are as yet no incremental sections, or if the most recent
524
   * one has been used to sign a signature field, then we need a new one.
525
   * After a signing, any further document changes require a new increment */
526
0
  if ((doc->num_incremental_sections == 0 || doc->xref_sections[0].unsaved_sigs != NULL)
527
0
    && !doc->disallow_new_increments)
528
0
  {
529
0
    pdf_xref *xref = &doc->xref_sections[0];
530
0
    pdf_xref *pxref;
531
0
    pdf_xref_entry *new_table = fz_malloc_struct_array(ctx, xref->num_objects, pdf_xref_entry);
532
0
    pdf_xref_subsec *sub = NULL;
533
0
    pdf_obj *trailer = NULL;
534
0
    int i;
535
536
0
    fz_var(trailer);
537
0
    fz_var(sub);
538
0
    fz_try(ctx)
539
0
    {
540
0
      sub = fz_malloc_struct(ctx, pdf_xref_subsec);
541
0
      trailer = xref->trailer ? pdf_copy_dict(ctx, xref->trailer) : NULL;
542
0
      doc->xref_sections = fz_realloc_array(ctx, doc->xref_sections, doc->num_xref_sections + 1, pdf_xref);
543
0
      xref = &doc->xref_sections[0];
544
0
      pxref = &doc->xref_sections[1];
545
0
      memmove(pxref, xref, doc->num_xref_sections * sizeof(pdf_xref));
546
      /* xref->num_objects is already correct */
547
0
      xref->subsec = sub;
548
0
      sub = NULL;
549
0
      xref->trailer = trailer;
550
0
      xref->pre_repair_trailer = NULL;
551
0
      xref->unsaved_sigs = NULL;
552
0
      xref->unsaved_sigs_end = NULL;
553
0
      xref->subsec->next = NULL;
554
0
      xref->subsec->len = xref->num_objects;
555
0
      xref->subsec->start = 0;
556
0
      xref->subsec->table = new_table;
557
0
      doc->num_xref_sections++;
558
0
      doc->num_incremental_sections++;
559
0
    }
560
0
    fz_catch(ctx)
561
0
    {
562
0
      fz_free(ctx, sub);
563
0
      fz_free(ctx, new_table);
564
0
      pdf_drop_obj(ctx, trailer);
565
0
      fz_rethrow(ctx);
566
0
    }
567
568
    /* Update the xref_index */
569
0
    for (i = 0; i < doc->max_xref_len; i++)
570
0
    {
571
0
      doc->xref_index[i]++;
572
0
    }
573
0
  }
574
0
}
575
576
/* Used when altering a document */
577
pdf_xref_entry *pdf_get_incremental_xref_entry(fz_context *ctx, pdf_document *doc, int i)
578
0
{
579
0
  pdf_xref *xref;
580
0
  pdf_xref_subsec *sub;
581
582
  /* Make a new final xref section if we haven't already */
583
0
  ensure_incremental_xref(ctx, doc);
584
585
0
  xref = &doc->xref_sections[doc->xref_base];
586
0
  if (i >= xref->num_objects)
587
0
    pdf_resize_xref(ctx, doc, i + 1);
588
589
0
  sub = xref->subsec;
590
0
  assert(sub != NULL && sub->next == NULL);
591
0
  assert(i >= sub->start && i < sub->start + sub->len);
592
0
  doc->xref_index[i] = 0;
593
0
  return &sub->table[i - sub->start];
594
0
}
595
596
int pdf_xref_is_incremental(fz_context *ctx, pdf_document *doc, int num)
597
0
{
598
0
  pdf_xref *xref = &doc->xref_sections[doc->xref_base];
599
0
  pdf_xref_subsec *sub = xref->subsec;
600
601
0
  assert(sub != NULL && sub->next == NULL && sub->len == xref->num_objects && sub->start == 0);
602
603
0
  return num < xref->num_objects && sub->table[num].type;
604
0
}
605
606
/* Used when clearing signatures. Removes the signature
607
from the list of unsaved signed signatures. */
608
void pdf_xref_remove_unsaved_signature(fz_context *ctx, pdf_document *doc, pdf_obj *field)
609
0
{
610
0
  int num = pdf_to_num(ctx, field);
611
0
  int idx = doc->xref_index[num];
612
0
  pdf_xref *xref = &doc->xref_sections[idx];
613
0
  pdf_unsaved_sig **usigptr = &xref->unsaved_sigs;
614
0
  pdf_unsaved_sig *usig = xref->unsaved_sigs;
615
616
0
  while (usig)
617
0
  {
618
0
    pdf_unsaved_sig **nextptr = &usig->next;
619
0
    pdf_unsaved_sig *next = usig->next;
620
621
0
    if (usig->field == field)
622
0
    {
623
0
      if (xref->unsaved_sigs_end == &usig->next)
624
0
      {
625
0
        if (usig->next)
626
0
          xref->unsaved_sigs_end = &usig->next->next;
627
0
        else
628
0
          xref->unsaved_sigs_end = NULL;
629
0
      }
630
0
      if (usigptr)
631
0
        *usigptr = usig->next;
632
633
0
      usig->next = NULL;
634
0
      pdf_drop_obj(ctx, usig->field);
635
0
      pdf_drop_signer(ctx, usig->signer);
636
0
      fz_free(ctx, usig);
637
638
0
      break;
639
0
    }
640
641
0
    usig = next;
642
0
    usigptr = nextptr;
643
0
  }
644
0
}
645
646
void pdf_xref_store_unsaved_signature(fz_context *ctx, pdf_document *doc, pdf_obj *field, pdf_pkcs7_signer *signer)
647
0
{
648
0
  pdf_xref *xref = &doc->xref_sections[0];
649
0
  pdf_unsaved_sig *unsaved_sig;
650
651
  /* Record details within the document structure so that contents
652
   * and byte_range can be updated with their correct values at
653
   * saving time */
654
0
  unsaved_sig = fz_malloc_struct(ctx, pdf_unsaved_sig);
655
0
  unsaved_sig->field = pdf_keep_obj(ctx, field);
656
0
  unsaved_sig->signer = signer->keep(ctx, signer);
657
0
  unsaved_sig->next = NULL;
658
0
  if (xref->unsaved_sigs_end == NULL)
659
0
    xref->unsaved_sigs_end = &xref->unsaved_sigs;
660
661
0
  *xref->unsaved_sigs_end = unsaved_sig;
662
0
  xref->unsaved_sigs_end = &unsaved_sig->next;
663
0
}
664
665
int pdf_xref_obj_is_unsaved_signature(pdf_document *doc, pdf_obj *obj)
666
0
{
667
0
  int i;
668
0
  for (i = 0; i < doc->num_incremental_sections; i++)
669
0
  {
670
0
    pdf_xref *xref = &doc->xref_sections[i];
671
0
    pdf_unsaved_sig *usig;
672
673
0
    for (usig = xref->unsaved_sigs; usig; usig = usig->next)
674
0
    {
675
0
      if (usig->field == obj)
676
0
        return 1;
677
0
    }
678
0
  }
679
680
0
  return 0;
681
0
}
682
683
void pdf_ensure_solid_xref(fz_context *ctx, pdf_document *doc, int num)
684
10
{
685
10
  if (doc->num_xref_sections == 0)
686
0
    pdf_populate_next_xref_level(ctx, doc);
687
688
10
  ensure_solid_xref(ctx, doc, num, 0);
689
10
}
690
691
int pdf_xref_ensure_incremental_object(fz_context *ctx, pdf_document *doc, int num)
692
0
{
693
0
  pdf_xref_entry *new_entry, *old_entry;
694
0
  pdf_xref_subsec *sub = NULL;
695
0
  int i;
696
0
  pdf_obj *copy;
697
698
  /* Make sure we have created an xref section for incremental updates */
699
0
  ensure_incremental_xref(ctx, doc);
700
701
  /* Search for the section that contains this object */
702
0
  for (i = doc->xref_index[num]; i < doc->num_xref_sections; i++)
703
0
  {
704
0
    pdf_xref *xref = &doc->xref_sections[i];
705
706
0
    if (num < 0 && num >= xref->num_objects)
707
0
      break;
708
0
    for (sub = xref->subsec; sub != NULL; sub = sub->next)
709
0
    {
710
0
      if (sub->start <= num && num < sub->start + sub->len && sub->table[num - sub->start].type)
711
0
        break;
712
0
    }
713
0
    if (sub != NULL)
714
0
      break;
715
0
  }
716
  /* sub == NULL implies we did not find it */
717
718
  /* If we don't find it, or it's already in the incremental section, return */
719
0
  if (i == 0 || sub == NULL)
720
0
    return 0;
721
722
0
  copy = pdf_deep_copy_obj(ctx, sub->table[num - sub->start].obj);
723
724
  /* Move the object to the incremental section */
725
0
  i = doc->xref_index[num];
726
0
  doc->xref_index[num] = 0;
727
0
  old_entry = &sub->table[num - sub->start];
728
0
  fz_try(ctx)
729
0
    new_entry = pdf_get_incremental_xref_entry(ctx, doc, num);
730
0
  fz_catch(ctx)
731
0
  {
732
0
    pdf_drop_obj(ctx, copy);
733
0
    doc->xref_index[num] = i;
734
0
    fz_rethrow(ctx);
735
0
  }
736
0
  *new_entry = *old_entry;
737
0
  if (new_entry->type == 'o')
738
0
  {
739
0
    new_entry->type = 'n';
740
0
    new_entry->gen = 0;
741
0
  }
742
  /* Better keep a copy. We must override the old entry with
743
   * the copy because the caller may be holding a reference to
744
   * the original and expect it to end up in the new entry */
745
0
  old_entry->obj = copy;
746
0
  old_entry->stm_buf = NULL;
747
748
0
  return 1;
749
0
}
750
751
void pdf_xref_ensure_local_object(fz_context *ctx, pdf_document *doc, int num)
752
0
{
753
0
  pdf_xref_entry *new_entry, *old_entry;
754
0
  pdf_xref_subsec *sub = NULL;
755
0
  int i;
756
0
  pdf_xref *xref;
757
0
  pdf_obj *copy;
758
759
  /* Is it in the local section already? */
760
0
  xref = doc->local_xref;
761
0
  for (sub = xref->subsec; sub != NULL; sub = sub->next)
762
0
  {
763
0
    if (sub->start <= num && num < sub->start + sub->len && sub->table[num - sub->start].type)
764
0
      break;
765
0
  }
766
  /* If we found it, it's in the local section already. */
767
0
  if (sub != NULL)
768
0
    return;
769
770
  /* Search for the section that contains this object */
771
0
  for (i = doc->xref_index[num]; i < doc->num_xref_sections; i++)
772
0
  {
773
0
    xref = &doc->xref_sections[i];
774
775
0
    if (num < 0 && num >= xref->num_objects)
776
0
      break;
777
0
    for (sub = xref->subsec; sub != NULL; sub = sub->next)
778
0
    {
779
0
      if (sub->start <= num && num < sub->start + sub->len && sub->table[num - sub->start].type)
780
0
        break;
781
0
    }
782
0
    if (sub != NULL)
783
0
      break;
784
0
  }
785
  /* sub == NULL implies we did not find it */
786
0
  if (sub == NULL)
787
0
    return; /* No object to find */
788
789
0
  copy = pdf_deep_copy_obj(ctx, sub->table[num - sub->start].obj);
790
791
  /* Copy the object to the local section */
792
0
  i = doc->xref_index[num];
793
0
  doc->xref_index[num] = 0;
794
0
  old_entry = &sub->table[num - sub->start];
795
0
  fz_try(ctx)
796
0
    new_entry = pdf_get_local_xref_entry(ctx, doc, num);
797
0
  fz_catch(ctx)
798
0
  {
799
0
    pdf_drop_obj(ctx, copy);
800
0
    doc->xref_index[num] = i;
801
0
    fz_rethrow(ctx);
802
0
  }
803
0
  *new_entry = *old_entry;
804
0
  if (new_entry->type == 'o')
805
0
  {
806
0
    new_entry->type = 'n';
807
0
    new_entry->gen = 0;
808
0
  }
809
0
  new_entry->stm_buf = NULL;
810
0
  new_entry->obj = NULL;
811
  /* old entry is incremental and may have changes.
812
   * Better keep a copy. We must override the old entry with
813
   * the copy because the caller may be holding a reference to
814
   * the original and expect it to end up in the new entry */
815
0
  new_entry->obj = old_entry->obj;
816
0
  old_entry->obj = copy;
817
0
  new_entry->stm_buf = NULL; /* FIXME */
818
0
}
819
820
void pdf_replace_xref(fz_context *ctx, pdf_document *doc, pdf_xref_entry *entries, int n)
821
0
{
822
0
  int *xref_index = NULL;
823
0
  pdf_xref *xref = NULL;
824
0
  pdf_xref_subsec *sub;
825
826
0
  fz_var(xref_index);
827
0
  fz_var(xref);
828
829
0
  fz_try(ctx)
830
0
  {
831
0
    xref_index = fz_calloc(ctx, n, sizeof(int));
832
0
    xref = fz_malloc_struct(ctx, pdf_xref);
833
0
    sub = fz_malloc_struct(ctx, pdf_xref_subsec);
834
0
  }
835
0
  fz_catch(ctx)
836
0
  {
837
0
    fz_free(ctx, xref);
838
0
    fz_free(ctx, xref_index);
839
0
    fz_rethrow(ctx);
840
0
  }
841
842
0
  sub->table = entries;
843
0
  sub->start = 0;
844
0
  sub->len = n;
845
846
0
  xref->subsec = sub;
847
0
  xref->num_objects = n;
848
0
  xref->trailer = pdf_keep_obj(ctx, pdf_trailer(ctx, doc));
849
850
  /* The new table completely replaces the previous separate sections */
851
0
  pdf_drop_xref_sections(ctx, doc);
852
853
0
  doc->xref_sections = xref;
854
0
  doc->num_xref_sections = 1;
855
0
  doc->num_incremental_sections = 0;
856
0
  doc->xref_base = 0;
857
0
  doc->disallow_new_increments = 0;
858
0
  doc->max_xref_len = n;
859
860
0
  fz_free(ctx, doc->xref_index);
861
0
  doc->xref_index = xref_index;
862
0
}
863
864
void pdf_forget_xref(fz_context *ctx, pdf_document *doc)
865
16
{
866
16
  pdf_obj *trailer = pdf_keep_obj(ctx, pdf_trailer(ctx, doc));
867
868
16
  pdf_drop_local_xref_and_resources(ctx, doc);
869
870
16
  if (doc->saved_xref_sections)
871
0
    pdf_drop_xref_sections_imp(ctx, doc, doc->saved_xref_sections, doc->saved_num_xref_sections);
872
873
16
  doc->saved_xref_sections = doc->xref_sections;
874
16
  doc->saved_num_xref_sections = doc->num_xref_sections;
875
876
16
  doc->xref_sections = NULL;
877
16
  doc->startxref = 0;
878
16
  doc->num_xref_sections = 0;
879
16
  doc->num_incremental_sections = 0;
880
16
  doc->xref_base = 0;
881
16
  doc->disallow_new_increments = 0;
882
883
32
  fz_try(ctx)
884
32
  {
885
16
    pdf_get_populating_xref_entry(ctx, doc, 0);
886
16
  }
887
32
  fz_catch(ctx)
888
0
  {
889
0
    pdf_drop_obj(ctx, trailer);
890
0
    fz_rethrow(ctx);
891
0
  }
892
893
  /* Set the trailer of the final xref section. */
894
16
  doc->xref_sections[0].trailer = trailer;
895
16
}
896
897
/*
898
 * magic version tag and startxref
899
 */
900
901
int
902
pdf_version(fz_context *ctx, pdf_document *doc)
903
0
{
904
0
  int version = doc->version;
905
0
  fz_try(ctx)
906
0
  {
907
0
    pdf_obj *obj = pdf_dict_getl(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root), PDF_NAME(Version), NULL);
908
0
    const char *str = pdf_to_name(ctx, obj);
909
0
    if (*str)
910
0
      version = 10 * (fz_atof(str) + 0.05f);
911
0
  }
912
0
  fz_catch(ctx)
913
0
  {
914
0
    fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
915
0
    fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
916
0
    fz_report_error(ctx);
917
0
    fz_warn(ctx, "Ignoring broken Root/Version number.");
918
0
  }
919
0
  return version;
920
0
}
921
922
static void
923
pdf_load_version(fz_context *ctx, pdf_document *doc)
924
16
{
925
16
  char buf[1024];
926
16
  char *s = NULL;
927
16
  size_t i, n;
928
929
  /* look for '%PDF' version marker within first kilobyte of file */
930
16
  fz_seek(ctx, doc->file, 0, SEEK_SET);
931
16
  n = fz_read(ctx, doc->file, (unsigned char*) buf, sizeof buf);
932
16
  if (n < 5)
933
0
    fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find version marker");
934
16
  buf[n-1] = 0;
935
5.13k
  for (i = 0; i < n - 5; i++)
936
5.12k
  {
937
5.12k
    if (memcmp(&buf[i], "%PDF-", 5) == 0 || memcmp(&buf[i], "%FDF-", 5) == 0)
938
11
    {
939
11
      s = buf + i;
940
11
      break;
941
11
    }
942
5.12k
  }
943
16
  if (!s)
944
5
    fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find version marker");
945
946
11
  if (s[1] == 'F')
947
0
    doc->is_fdf = 1;
948
949
11
  doc->version = 10 * (fz_atof(s+5) + 0.05f);
950
11
  if ((doc->version < 10 || doc->version > 17) && doc->version != 20)
951
0
    fz_warn(ctx, "unknown PDF version: %d.%d", doc->version / 10, doc->version % 10);
952
953
11
  if (s != buf)
954
1
  {
955
1
    fz_warn(ctx, "garbage bytes before version marker");
956
1
    doc->bias = s - buf;
957
1
  }
958
959
11
  fz_seek(ctx, doc->file, doc->bias, SEEK_SET);
960
11
}
961
962
static void
963
pdf_read_start_xref(fz_context *ctx, pdf_document *doc)
964
11
{
965
11
  unsigned char buf[1024];
966
11
  size_t i, n;
967
11
  int64_t t;
968
969
11
  fz_seek(ctx, doc->file, 0, SEEK_END);
970
971
11
  doc->file_size = fz_tell(ctx, doc->file);
972
973
11
  t = fz_maxi64(0, doc->file_size - (int64_t)sizeof buf);
974
11
  fz_seek(ctx, doc->file, t, SEEK_SET);
975
976
11
  n = fz_read(ctx, doc->file, buf, sizeof buf);
977
11
  if (n < 9)
978
0
    fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find startxref");
979
980
11
  i = n - 9;
981
11
  do
982
11.1k
  {
983
11.1k
    if (memcmp(buf + i, "startxref", 9) == 0)
984
0
    {
985
0
      i += 9;
986
0
      while (i < n && iswhite(buf[i]))
987
0
        i ++;
988
0
      doc->startxref = 0;
989
0
      while (i < n && isdigit(buf[i]))
990
0
      {
991
0
        if (doc->startxref >= INT64_MAX/10)
992
0
          fz_throw(ctx, FZ_ERROR_LIMIT, "startxref too large");
993
0
        doc->startxref = doc->startxref * 10 + (buf[i++] - '0');
994
0
      }
995
0
      if (doc->startxref != 0)
996
0
        return;
997
0
      break;
998
0
    }
999
11.1k
  } while (i-- > 0);
1000
1001
11
  fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find startxref");
1002
11
}
1003
1004
void fz_skip_space(fz_context *ctx, fz_stream *stm)
1005
0
{
1006
0
  do
1007
0
  {
1008
0
    int c = fz_peek_byte(ctx, stm);
1009
0
    if (c == EOF || c > 32)
1010
0
      return;
1011
0
    (void)fz_read_byte(ctx, stm);
1012
0
  }
1013
0
  while (1);
1014
0
}
1015
1016
int fz_skip_string(fz_context *ctx, fz_stream *stm, const char *str)
1017
0
{
1018
0
  while (*str)
1019
0
  {
1020
0
    int c = fz_peek_byte(ctx, stm);
1021
0
    if (c == EOF || c != *str++)
1022
0
      return 1;
1023
0
    (void)fz_read_byte(ctx, stm);
1024
0
  }
1025
0
  return 0;
1026
0
}
1027
1028
/*
1029
 * trailer dictionary
1030
 */
1031
1032
static int
1033
pdf_xref_size_from_old_trailer(fz_context *ctx, pdf_document *doc)
1034
0
{
1035
0
  int len;
1036
0
  char *s;
1037
0
  int64_t t;
1038
0
  pdf_token tok;
1039
0
  int c;
1040
0
  int size = 0;
1041
0
  int64_t ofs;
1042
0
  pdf_obj *trailer = NULL;
1043
0
  size_t n;
1044
0
  pdf_lexbuf *buf = &doc->lexbuf.base;
1045
0
  pdf_obj *obj = NULL;
1046
1047
0
  fz_var(trailer);
1048
1049
  /* Record the current file read offset so that we can reinstate it */
1050
0
  ofs = fz_tell(ctx, doc->file);
1051
1052
0
  fz_skip_space(ctx, doc->file);
1053
0
  if (fz_skip_string(ctx, doc->file, "xref"))
1054
0
    fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find xref marker");
1055
0
  fz_skip_space(ctx, doc->file);
1056
1057
0
  while (1)
1058
0
  {
1059
0
    c = fz_peek_byte(ctx, doc->file);
1060
0
    if (!isdigit(c))
1061
0
      break;
1062
1063
0
    fz_read_line(ctx, doc->file, buf->scratch, buf->size);
1064
0
    s = buf->scratch;
1065
0
    fz_strsep(&s, " "); /* ignore start */
1066
0
    if (!s)
1067
0
      fz_throw(ctx, FZ_ERROR_FORMAT, "xref subsection length missing");
1068
0
    len = fz_atoi(fz_strsep(&s, " "));
1069
0
    if (len < 0)
1070
0
      fz_throw(ctx, FZ_ERROR_FORMAT, "xref subsection length must be positive");
1071
1072
    /* broken pdfs where the section is not on a separate line */
1073
0
    if (s && *s != '\0')
1074
0
      fz_seek(ctx, doc->file, -(2 + (int)strlen(s)), SEEK_CUR);
1075
1076
0
    t = fz_tell(ctx, doc->file);
1077
0
    if (t < 0)
1078
0
      fz_throw(ctx, FZ_ERROR_SYSTEM, "cannot tell in file");
1079
1080
    /* Spec says xref entries should be 20 bytes, but it's not infrequent
1081
     * to see 19, in particular for some PCLm drivers. Cope. */
1082
0
    if (len > 0)
1083
0
    {
1084
0
      n = fz_read(ctx, doc->file, (unsigned char *)buf->scratch, 20);
1085
0
      if (n < 19)
1086
0
        fz_throw(ctx, FZ_ERROR_FORMAT, "malformed xref table");
1087
0
      if (n == 20 && buf->scratch[19] > 32)
1088
0
        n = 19;
1089
0
    }
1090
0
    else
1091
0
      n = 20;
1092
1093
0
    if (len > (int64_t)((INT64_MAX - t) / n))
1094
0
      fz_throw(ctx, FZ_ERROR_LIMIT, "xref has too many entries");
1095
1096
0
    fz_seek(ctx, doc->file, t + n * (int64_t)len, SEEK_SET);
1097
0
  }
1098
1099
0
  fz_try(ctx)
1100
0
  {
1101
0
    tok = pdf_lex(ctx, doc->file, buf);
1102
0
    if (tok != PDF_TOK_TRAILER)
1103
0
      fz_throw(ctx, FZ_ERROR_FORMAT, "expected trailer marker");
1104
1105
0
    tok = pdf_lex(ctx, doc->file, buf);
1106
0
    if (tok != PDF_TOK_OPEN_DICT)
1107
0
      fz_throw(ctx, FZ_ERROR_FORMAT, "expected trailer dictionary");
1108
1109
0
    trailer = pdf_parse_dict(ctx, doc, doc->file, buf);
1110
1111
0
    obj = pdf_dict_get(ctx, trailer, PDF_NAME(Size));
1112
0
    if (pdf_is_indirect(ctx, obj))
1113
0
      fz_throw(ctx, FZ_ERROR_FORMAT, "trailer Size entry is indirect");
1114
1115
0
    size = pdf_dict_get_int(ctx, trailer, PDF_NAME(Size));
1116
0
    if (size < 0 || size > PDF_MAX_OBJECT_NUMBER + 1)
1117
0
      fz_throw(ctx, FZ_ERROR_FORMAT, "trailer Size entry out of range");
1118
0
  }
1119
0
  fz_always(ctx)
1120
0
  {
1121
0
    pdf_drop_obj(ctx, trailer);
1122
0
  }
1123
0
  fz_catch(ctx)
1124
0
  {
1125
0
    fz_rethrow(ctx);
1126
0
  }
1127
1128
0
  fz_seek(ctx, doc->file, ofs, SEEK_SET);
1129
1130
0
  return size;
1131
0
}
1132
1133
static pdf_xref_entry *
1134
pdf_xref_find_subsection(fz_context *ctx, pdf_document *doc, int start, int len)
1135
0
{
1136
0
  pdf_xref *xref = &doc->xref_sections[doc->num_xref_sections-1];
1137
0
  pdf_xref_subsec *sub, *extend = NULL;
1138
0
  int num_objects;
1139
0
  int solidify = 0;
1140
1141
0
  if (len == 0)
1142
0
    return NULL;
1143
1144
  /* Different cases here.
1145
   * Case 1) We might be asking for a subsection (or a subset of a
1146
   *         subsection) that we already have - Just return it.
1147
   * Case 2) We might be asking for a subsection that overlaps (or
1148
   *         extends) a subsection we already have - extend the existing one.
1149
   * Case 3) We might be asking for a subsection that overlaps multiple
1150
   *         existing subsections - solidify the whole set.
1151
   * Case 4) We might be asking for a completely new subsection - just
1152
   *         allocate it.
1153
   */
1154
1155
  /* Sanity check */
1156
0
  for (sub = xref->subsec; sub != NULL; sub = sub->next)
1157
0
  {
1158
0
    if (start >= sub->start && start <= sub->start + sub->len)
1159
0
    {
1160
      /* 'start' is in (or immediately after) 'sub' */
1161
0
      if (start + len <= sub->start + sub->len)
1162
0
      {
1163
        /* And so is start+len-1 - just return this! Case 1. */
1164
0
        return &sub->table[start-sub->start];
1165
0
      }
1166
      /* So we overlap with sub. */
1167
0
      if (extend == NULL)
1168
0
      {
1169
        /* Maybe we can extend sub? */
1170
0
        extend = sub;
1171
0
      }
1172
0
      else
1173
0
      {
1174
        /* OK, so we've already found an overlapping one. We'll need to solidify. Case 3. */
1175
0
        solidify = 1;
1176
0
        break;
1177
0
      }
1178
0
    }
1179
0
    else if (start + len > sub->start && start + len < sub->start + sub->len)
1180
0
    {
1181
      /* The end of the start+len range is in 'sub'. */
1182
      /* For now, we won't support extending sub backwards. Just take this as
1183
       * needing to solidify. Case 3. */
1184
0
      solidify = 1;
1185
0
      break;
1186
0
    }
1187
0
    else if (start < sub->start && start + len >= sub->start + sub->len)
1188
0
    {
1189
      /* The end of the start+len range is beyond 'sub'. */
1190
      /* For now, we won't support extending sub backwards. Just take this as
1191
       * needing to solidify. Another variant of case 3. */
1192
0
      solidify = 1;
1193
0
      break;
1194
0
    }
1195
0
  }
1196
1197
0
  num_objects = xref->num_objects;
1198
0
  if (num_objects < start + len)
1199
0
    num_objects = start + len;
1200
1201
0
  if (solidify)
1202
0
  {
1203
    /* Case 3: Solidify the xref */
1204
0
    ensure_solid_xref(ctx, doc, num_objects, doc->num_xref_sections-1);
1205
0
    xref = &doc->xref_sections[doc->num_xref_sections-1];
1206
0
    sub = xref->subsec;
1207
0
  }
1208
0
  else if (extend)
1209
0
  {
1210
    /* Case 2: Extend the subsection */
1211
0
    int newlen = start + len - extend->start;
1212
0
    sub = extend;
1213
0
    sub->table = fz_realloc_array(ctx, sub->table, newlen, pdf_xref_entry);
1214
0
    memset(&sub->table[sub->len], 0, sizeof(pdf_xref_entry) * (newlen - sub->len));
1215
0
    sub->len = newlen;
1216
0
    if (xref->num_objects < sub->start + sub->len)
1217
0
      xref->num_objects = sub->start + sub->len;
1218
0
    if (doc->max_xref_len < sub->start + sub->len)
1219
0
      extend_xref_index(ctx, doc, sub->start + sub->len);
1220
0
  }
1221
0
  else
1222
0
  {
1223
    /* Case 4 */
1224
0
    sub = fz_malloc_struct(ctx, pdf_xref_subsec);
1225
0
    fz_try(ctx)
1226
0
    {
1227
0
      sub->table = fz_malloc_struct_array(ctx, len, pdf_xref_entry);
1228
0
      sub->start = start;
1229
0
      sub->len = len;
1230
0
      sub->next = xref->subsec;
1231
0
      xref->subsec = sub;
1232
0
    }
1233
0
    fz_catch(ctx)
1234
0
    {
1235
0
      fz_free(ctx, sub);
1236
0
      fz_rethrow(ctx);
1237
0
    }
1238
0
    if (xref->num_objects < num_objects)
1239
0
      xref->num_objects = num_objects;
1240
0
    if (doc->max_xref_len < num_objects)
1241
0
      extend_xref_index(ctx, doc, num_objects);
1242
0
  }
1243
0
  return &sub->table[start-sub->start];
1244
0
}
1245
1246
static inline void
1247
validate_object_number_range(fz_context *ctx, int first, int len, const char *what)
1248
0
{
1249
0
  if (first < 0 || first > PDF_MAX_OBJECT_NUMBER)
1250
0
    fz_throw(ctx, FZ_ERROR_FORMAT, "first object number in %s out of range", what);
1251
0
  if (len < 0 || len > PDF_MAX_OBJECT_NUMBER)
1252
0
    fz_throw(ctx, FZ_ERROR_FORMAT, "number of objects in %s out of range", what);
1253
0
  if (len > 0 && len - 1 > PDF_MAX_OBJECT_NUMBER - first)
1254
0
    fz_throw(ctx, FZ_ERROR_FORMAT, "last object number in %s out of range", what);
1255
0
}
1256
1257
static pdf_obj *
1258
pdf_read_old_xref(fz_context *ctx, pdf_document *doc)
1259
0
{
1260
0
  int start, len, c, i, xref_len, carried;
1261
0
  fz_stream *file = doc->file;
1262
0
  pdf_xref_entry *table;
1263
0
  pdf_token tok;
1264
0
  size_t n;
1265
0
  char *s, *e;
1266
0
  pdf_lexbuf *buf = &doc->lexbuf.base;
1267
1268
0
  xref_len = pdf_xref_size_from_old_trailer(ctx, doc);
1269
1270
0
  fz_skip_space(ctx, doc->file);
1271
0
  if (fz_skip_string(ctx, doc->file, "xref"))
1272
0
    fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find xref marker");
1273
0
  fz_skip_space(ctx, doc->file);
1274
1275
0
  while (1)
1276
0
  {
1277
0
    c = fz_peek_byte(ctx, file);
1278
0
    if (!isdigit(c))
1279
0
      break;
1280
1281
0
    fz_read_line(ctx, file, buf->scratch, buf->size);
1282
0
    s = buf->scratch;
1283
0
    start = fz_atoi(fz_strsep(&s, " "));
1284
0
    len = fz_atoi(fz_strsep(&s, " "));
1285
1286
    /* broken pdfs where the section is not on a separate line */
1287
0
    if (s && *s != '\0')
1288
0
    {
1289
0
      fz_warn(ctx, "broken xref subsection. proceeding anyway.");
1290
0
      fz_seek(ctx, file, -(2 + (int)strlen(s)), SEEK_CUR);
1291
0
    }
1292
1293
0
    validate_object_number_range(ctx, start, len, "xref subsection");
1294
1295
    /* broken pdfs where size in trailer undershoots entries in xref sections */
1296
0
    if (start + len > xref_len)
1297
0
    {
1298
0
      fz_warn(ctx, "broken xref subsection, proceeding anyway.");
1299
0
    }
1300
1301
0
    table = pdf_xref_find_subsection(ctx, doc, start, len);
1302
1303
    /* Xref entries SHOULD be 20 bytes long, but we see 19 byte
1304
     * ones more frequently than we'd like (e.g. PCLm drivers).
1305
     * Cope with this by 'carrying' data forward. */
1306
0
    carried = 0;
1307
0
    for (i = 0; i < len; i++)
1308
0
    {
1309
0
      pdf_xref_entry *entry = &table[i];
1310
0
      n = fz_read(ctx, file, (unsigned char *) buf->scratch + carried, 20-carried);
1311
0
      if (n != (size_t)(20-carried))
1312
0
        fz_throw(ctx, FZ_ERROR_FORMAT, "unexpected EOF in xref table");
1313
0
      n += carried;
1314
0
      buf->scratch[n] = '\0';
1315
0
      if (!entry->type)
1316
0
      {
1317
0
        s = buf->scratch;
1318
0
        e = s + n;
1319
1320
0
        entry->num = start + i;
1321
1322
        /* broken pdfs where line start with white space */
1323
0
        while (s < e && iswhite(*s))
1324
0
          s++;
1325
1326
0
        if (s == e || !isdigit(*s))
1327
0
          fz_throw(ctx, FZ_ERROR_FORMAT, "xref offset missing");
1328
0
        while (s < e && isdigit(*s))
1329
0
          entry->ofs = entry->ofs * 10 + *s++ - '0';
1330
1331
0
        while (s < e && iswhite(*s))
1332
0
          s++;
1333
0
        if (s == e || !isdigit(*s))
1334
0
          fz_throw(ctx, FZ_ERROR_FORMAT, "xref generation number missing");
1335
0
        while (s < e && isdigit(*s))
1336
0
          entry->gen = entry->gen * 10 + *s++ - '0';
1337
1338
0
        while (s < e && iswhite(*s))
1339
0
          s++;
1340
0
        if (s == e || (*s != 'f' && *s != 'n' && *s != 'o'))
1341
0
          fz_throw(ctx, FZ_ERROR_FORMAT, "unexpected xref type: 0x%x (%d %d R)", s == e ? 0 : *s, entry->num, entry->gen);
1342
0
        entry->type = *s++;
1343
1344
        /* If the last byte of our buffer isn't an EOL (or space), carry one byte forward */
1345
0
        carried = buf->scratch[19] > 32;
1346
0
        if (carried)
1347
0
          buf->scratch[0] = buf->scratch[19];
1348
0
      }
1349
0
    }
1350
0
    if (carried)
1351
0
      fz_unread_byte(ctx, file);
1352
0
  }
1353
1354
0
  tok = pdf_lex(ctx, file, buf);
1355
0
  if (tok != PDF_TOK_TRAILER)
1356
0
    fz_throw(ctx, FZ_ERROR_FORMAT, "expected trailer marker");
1357
1358
0
  tok = pdf_lex(ctx, file, buf);
1359
0
  if (tok != PDF_TOK_OPEN_DICT)
1360
0
    fz_throw(ctx, FZ_ERROR_FORMAT, "expected trailer dictionary");
1361
1362
0
  doc->last_xref_was_old_style = 1;
1363
1364
0
  return pdf_parse_dict(ctx, doc, file, buf);
1365
0
}
1366
1367
static void
1368
pdf_read_new_xref_section(fz_context *ctx, pdf_document *doc, fz_stream *stm, int i0, int i1, int w0, int w1, int w2)
1369
0
{
1370
0
  pdf_xref_entry *table;
1371
0
  int i, n;
1372
1373
0
  validate_object_number_range(ctx, i0, i1, "xref subsection");
1374
1375
0
  table = pdf_xref_find_subsection(ctx, doc, i0, i1);
1376
0
  for (i = i0; i < i0 + i1; i++)
1377
0
  {
1378
0
    pdf_xref_entry *entry = &table[i-i0];
1379
0
    int64_t a = 0;
1380
0
    int64_t b = 0;
1381
0
    int64_t c = 0;
1382
1383
0
    if (fz_is_eof(ctx, stm))
1384
0
      fz_throw(ctx, FZ_ERROR_FORMAT, "truncated xref stream");
1385
1386
0
    for (n = 0; n < w0; n++)
1387
0
      a = (a << 8) + fz_read_byte(ctx, stm);
1388
0
    for (n = 0; n < w1; n++)
1389
0
      b = (b << 8) + fz_read_byte(ctx, stm);
1390
0
    for (n = 0; n < w2; n++)
1391
0
      c = (c << 8) + fz_read_byte(ctx, stm);
1392
1393
0
    if (!entry->type)
1394
0
    {
1395
0
      int t = w0 ? a : 1;
1396
0
      entry->type = t == 0 ? 'f' : t == 1 ? 'n' : t == 2 ? 'o' : 0;
1397
0
      entry->ofs = w1 ? b : 0;
1398
0
      entry->gen = w2 ? c : 0;
1399
0
      entry->num = i;
1400
0
    }
1401
0
  }
1402
1403
0
  doc->last_xref_was_old_style = 0;
1404
0
}
1405
1406
/* Entered with file locked, remains locked throughout. */
1407
static pdf_obj *
1408
pdf_read_new_xref(fz_context *ctx, pdf_document *doc)
1409
0
{
1410
0
  fz_stream *stm = NULL;
1411
0
  pdf_obj *trailer = NULL;
1412
0
  pdf_obj *index = NULL;
1413
0
  pdf_obj *obj = NULL;
1414
0
  int gen, num = 0;
1415
0
  int64_t ofs, stm_ofs;
1416
0
  int size, w0, w1, w2;
1417
0
  int t;
1418
1419
0
  fz_var(trailer);
1420
0
  fz_var(stm);
1421
1422
0
  fz_try(ctx)
1423
0
  {
1424
0
    ofs = fz_tell(ctx, doc->file);
1425
0
    trailer = pdf_parse_ind_obj(ctx, doc, doc->file, &num, &gen, &stm_ofs, NULL);
1426
0
    if (num == 0)
1427
0
      fz_throw(ctx, FZ_ERROR_FORMAT, "Trailer object number cannot be 0\n");
1428
0
  }
1429
0
  fz_catch(ctx)
1430
0
  {
1431
0
    pdf_drop_obj(ctx, trailer);
1432
0
    fz_rethrow(ctx);
1433
0
  }
1434
1435
0
  fz_try(ctx)
1436
0
  {
1437
0
    pdf_xref_entry *entry;
1438
1439
0
    obj = pdf_dict_get(ctx, trailer, PDF_NAME(Size));
1440
0
    if (!obj)
1441
0
      fz_throw(ctx, FZ_ERROR_FORMAT, "xref stream missing Size entry (%d 0 R)", num);
1442
1443
0
    size = pdf_to_int(ctx, obj);
1444
1445
    /* Bug708176: If the PDF file producer has declared Size without
1446
     * including this object, then increment it. */
1447
0
    if (size == num)
1448
0
      pdf_dict_put_int(ctx, trailer, PDF_NAME(Size), size+1);
1449
1450
0
    obj = pdf_dict_get(ctx, trailer, PDF_NAME(W));
1451
0
    if (!obj)
1452
0
      fz_throw(ctx, FZ_ERROR_FORMAT, "xref stream missing W entry (%d  R)", num);
1453
1454
0
    if (pdf_is_indirect(ctx, pdf_array_get(ctx, obj, 0)))
1455
0
      fz_throw(ctx, FZ_ERROR_FORMAT, "xref stream object type field width an indirect object");
1456
0
    if (pdf_is_indirect(ctx, pdf_array_get(ctx, obj, 1)))
1457
0
      fz_throw(ctx, FZ_ERROR_FORMAT, "xref stream object field 2 width an indirect object");
1458
0
    if (pdf_is_indirect(ctx, pdf_array_get(ctx, obj, 2)))
1459
0
      fz_throw(ctx, FZ_ERROR_FORMAT, "xref stream object field 3 width an indirect object");
1460
1461
0
    if (doc->file_reading_linearly && pdf_dict_get(ctx, trailer, PDF_NAME(Encrypt)))
1462
0
      fz_throw(ctx, FZ_ERROR_ARGUMENT, "Cannot read linearly with encryption");
1463
1464
0
    w0 = pdf_array_get_int(ctx, obj, 0);
1465
0
    w1 = pdf_array_get_int(ctx, obj, 1);
1466
0
    w2 = pdf_array_get_int(ctx, obj, 2);
1467
1468
0
    if (w0 < 0 || w0 > 8)
1469
0
      fz_warn(ctx, "xref stream objects have corrupt type");
1470
0
    if (w1 < 0 || w1 > 8)
1471
0
      fz_warn(ctx, "xref stream objects have corrupt offset");
1472
0
    if (w2 < 0 || w2 > 8)
1473
0
      fz_warn(ctx, "xref stream objects have corrupt generation");
1474
1475
0
    w0 = w0 < 0 ? 0 : w0 > 8 ? 8 : w0;
1476
0
    w1 = w1 < 0 ? 0 : w1 > 8 ? 8 : w1;
1477
0
    w2 = w2 < 0 ? 0 : w2 > 8 ? 8 : w2;
1478
1479
0
    index = pdf_dict_get(ctx, trailer, PDF_NAME(Index));
1480
1481
0
    stm = pdf_open_stream_with_offset(ctx, doc, num, trailer, stm_ofs);
1482
1483
0
    if (!index)
1484
0
    {
1485
0
      pdf_read_new_xref_section(ctx, doc, stm, 0, size, w0, w1, w2);
1486
0
    }
1487
0
    else
1488
0
    {
1489
0
      int n = pdf_array_len(ctx, index);
1490
0
      for (t = 0; t < n; t += 2)
1491
0
      {
1492
0
        int i0 = pdf_array_get_int(ctx, index, t + 0);
1493
0
        int i1 = pdf_array_get_int(ctx, index, t + 1);
1494
0
        pdf_read_new_xref_section(ctx, doc, stm, i0, i1, w0, w1, w2);
1495
0
      }
1496
0
    }
1497
0
    entry = pdf_get_populating_xref_entry(ctx, doc, num);
1498
0
    entry->ofs = ofs;
1499
0
    entry->gen = gen;
1500
0
    entry->num = num;
1501
0
    entry->stm_ofs = stm_ofs;
1502
0
    pdf_drop_obj(ctx, entry->obj);
1503
0
    entry->obj = pdf_keep_obj(ctx, trailer);
1504
0
    entry->type = 'n';
1505
0
    pdf_set_obj_parent(ctx, trailer, num);
1506
0
  }
1507
0
  fz_always(ctx)
1508
0
  {
1509
0
    fz_drop_stream(ctx, stm);
1510
0
  }
1511
0
  fz_catch(ctx)
1512
0
  {
1513
0
    pdf_drop_obj(ctx, trailer);
1514
0
    fz_rethrow(ctx);
1515
0
  }
1516
1517
0
  return trailer;
1518
0
}
1519
1520
static pdf_obj *
1521
pdf_read_xref(fz_context *ctx, pdf_document *doc, int64_t ofs)
1522
0
{
1523
0
  pdf_obj *trailer;
1524
0
  int c;
1525
1526
0
  fz_seek(ctx, doc->file, doc->bias + ofs, SEEK_SET);
1527
1528
0
  while (iswhite(fz_peek_byte(ctx, doc->file)))
1529
0
    fz_read_byte(ctx, doc->file);
1530
1531
0
  c = fz_peek_byte(ctx, doc->file);
1532
0
  if (c == 'x')
1533
0
    trailer = pdf_read_old_xref(ctx, doc);
1534
0
  else if (isdigit(c))
1535
0
    trailer = pdf_read_new_xref(ctx, doc);
1536
0
  else
1537
0
    fz_throw(ctx, FZ_ERROR_FORMAT, "cannot recognize xref format");
1538
1539
0
  return trailer;
1540
0
}
1541
1542
static int64_t
1543
read_xref_section(fz_context *ctx, pdf_document *doc, int64_t ofs)
1544
0
{
1545
0
  pdf_obj *trailer = NULL;
1546
0
  pdf_obj *prevobj;
1547
0
  int64_t xrefstmofs = 0;
1548
0
  int64_t prevofs = 0;
1549
1550
0
  trailer = pdf_read_xref(ctx, doc, ofs);
1551
0
  fz_try(ctx)
1552
0
  {
1553
0
    pdf_set_populating_xref_trailer(ctx, doc, trailer);
1554
1555
    /* FIXME: do we overwrite free entries properly? */
1556
    /* FIXME: Does this work properly with progression? */
1557
0
    xrefstmofs = pdf_to_int64(ctx, pdf_dict_get(ctx, trailer, PDF_NAME(XRefStm)));
1558
0
    if (xrefstmofs)
1559
0
    {
1560
0
      if (xrefstmofs < 0)
1561
0
        fz_throw(ctx, FZ_ERROR_FORMAT, "negative xref stream offset");
1562
1563
      /*
1564
        Read the XRefStm stream, but throw away the resulting trailer. We do not
1565
        follow any Prev tag therein, as specified on Page 108 of the PDF reference
1566
        1.7
1567
      */
1568
0
      pdf_drop_obj(ctx, pdf_read_xref(ctx, doc, xrefstmofs));
1569
0
    }
1570
1571
0
    prevobj = pdf_dict_get(ctx, trailer, PDF_NAME(Prev));
1572
0
    if (pdf_is_int(ctx, prevobj))
1573
0
    {
1574
0
      prevofs = pdf_to_int64(ctx, prevobj);
1575
0
      if (prevofs <= 0)
1576
0
        fz_throw(ctx, FZ_ERROR_FORMAT, "invalid offset for previous xref section");
1577
0
    }
1578
0
  }
1579
0
  fz_always(ctx)
1580
0
    pdf_drop_obj(ctx, trailer);
1581
0
  fz_catch(ctx)
1582
0
    fz_rethrow(ctx);
1583
1584
0
  return prevofs;
1585
0
}
1586
1587
static void
1588
pdf_read_xref_sections(fz_context *ctx, pdf_document *doc, int64_t ofs, int read_previous)
1589
0
{
1590
0
  int i, len, cap;
1591
0
  int64_t *offsets;
1592
0
  int populated = 0;
1593
0
  int size, xref_len;
1594
1595
0
  len = 0;
1596
0
  cap = 10;
1597
0
  offsets = fz_malloc_array(ctx, cap, int64_t);
1598
1599
0
  fz_var(populated);
1600
0
  fz_var(offsets);
1601
1602
0
  fz_try(ctx)
1603
0
  {
1604
0
    while(ofs)
1605
0
    {
1606
0
      for (i = 0; i < len; i ++)
1607
0
      {
1608
0
        if (offsets[i] == ofs)
1609
0
          break;
1610
0
      }
1611
0
      if (i < len)
1612
0
      {
1613
0
        fz_warn(ctx, "ignoring xref section recursion at offset %d", (int)ofs);
1614
0
        break;
1615
0
      }
1616
0
      if (len == cap)
1617
0
      {
1618
0
        cap *= 2;
1619
0
        offsets = fz_realloc_array(ctx, offsets, cap, int64_t);
1620
0
      }
1621
0
      offsets[len++] = ofs;
1622
1623
0
      pdf_populate_next_xref_level(ctx, doc);
1624
0
      populated = 1;
1625
0
      ofs = read_xref_section(ctx, doc, ofs);
1626
0
      if (!read_previous)
1627
0
        break;
1628
0
    }
1629
1630
    /* For pathological files, such as chinese-example.pdf, where the original
1631
     * xref in the file is highly fragmented, we can safely solidify it here
1632
     * with no ill effects. */
1633
0
    ensure_solid_xref(ctx, doc, 0, doc->num_xref_sections-1);
1634
1635
0
    size = pdf_dict_get_int(ctx, pdf_trailer(ctx, doc), PDF_NAME(Size));
1636
0
    xref_len = pdf_xref_len(ctx, doc);
1637
0
    if (xref_len > size)
1638
0
    {
1639
0
      if (xref_len == size+1)
1640
0
      {
1641
        /* Bug 708456 && Bug 708176. Allow for (sadly, quite common
1642
         * PDF generators that can't get size right). */
1643
0
        fz_warn(ctx, "Trailer Size is off-by-one. Ignoring.");
1644
0
        pdf_dict_put_int(ctx, pdf_trailer(ctx, doc), PDF_NAME(Size), size+1);
1645
0
      }
1646
0
      else
1647
0
        fz_throw(ctx, FZ_ERROR_FORMAT, "incorrect number of xref entries in trailer, repairing");
1648
0
    }
1649
0
  }
1650
0
  fz_always(ctx)
1651
0
  {
1652
0
    fz_free(ctx, offsets);
1653
0
  }
1654
0
  fz_catch(ctx)
1655
0
  {
1656
    /* Undo pdf_populate_next_xref_level if we've done that already. */
1657
0
    if (populated)
1658
0
    {
1659
0
      pdf_drop_xref_subsec(ctx, &doc->xref_sections[doc->num_xref_sections - 1]);
1660
0
      doc->num_xref_sections--;
1661
0
    }
1662
0
    fz_rethrow(ctx);
1663
0
  }
1664
0
}
1665
1666
void
1667
pdf_prime_xref_index(fz_context *ctx, pdf_document *doc)
1668
10
{
1669
10
  int i, j;
1670
10
  int *idx = doc->xref_index;
1671
1672
20
  for (i = doc->num_xref_sections-1; i >= 0; i--)
1673
10
  {
1674
10
    pdf_xref *xref = &doc->xref_sections[i];
1675
10
    pdf_xref_subsec *subsec = xref->subsec;
1676
20
    while (subsec != NULL)
1677
10
    {
1678
10
      int start = subsec->start;
1679
10
      int end = subsec->start + subsec->len;
1680
10.8k
      for (j = start; j < end; j++)
1681
10.8k
      {
1682
10.8k
        char t = subsec->table[j-start].type;
1683
10.8k
        if (t != 0 && t != 'f')
1684
345
          idx[j] = i;
1685
10.8k
      }
1686
1687
10
      subsec = subsec->next;
1688
10
    }
1689
10
  }
1690
10
}
1691
1692
static void
1693
check_xref_entry_offsets(fz_context *ctx, pdf_xref_entry *entry, int i, pdf_document *doc, void *arg)
1694
0
{
1695
0
  int xref_len = (int)(intptr_t)arg;
1696
1697
0
  if (entry->type == 'n')
1698
0
  {
1699
    /* Special case code: "0000000000 * n" means free,
1700
     * according to some producers (inc Quartz) */
1701
0
    if (entry->ofs == 0)
1702
0
      entry->type = 'f';
1703
0
    else if (entry->ofs <= 0 || entry->ofs >= doc->file_size)
1704
0
      fz_throw(ctx, FZ_ERROR_FORMAT, "object offset out of range: %d (%d 0 R)", (int)entry->ofs, i);
1705
0
  }
1706
0
  else if (entry->type == 'o')
1707
0
  {
1708
    /* Read this into a local variable here, because pdf_get_xref_entry
1709
     * may solidify the xref, hence invalidating "entry", meaning we
1710
     * need a stashed value for the throw. */
1711
0
    int64_t ofs = entry->ofs;
1712
0
    if (ofs <= 0 || ofs >= xref_len || pdf_get_xref_entry_no_null(ctx, doc, ofs)->type != 'n')
1713
0
      fz_throw(ctx, FZ_ERROR_FORMAT, "invalid reference to an objstm that does not exist: %d (%d 0 R)", (int)ofs, i);
1714
0
  }
1715
0
}
1716
1717
/*
1718
 * load xref tables from pdf
1719
 *
1720
 * File locked on entry, throughout and on exit.
1721
 */
1722
1723
static void
1724
pdf_load_xref(fz_context *ctx, pdf_document *doc)
1725
11
{
1726
11
  int xref_len;
1727
11
  pdf_xref_entry *entry;
1728
1729
11
  pdf_read_start_xref(ctx, doc);
1730
1731
11
  pdf_read_xref_sections(ctx, doc, doc->startxref, 1);
1732
1733
11
  if (pdf_xref_len(ctx, doc) == 0)
1734
0
    fz_throw(ctx, FZ_ERROR_FORMAT, "found xref was empty");
1735
1736
11
  pdf_prime_xref_index(ctx, doc);
1737
1738
11
  entry = pdf_get_xref_entry_no_null(ctx, doc, 0);
1739
  /* broken pdfs where first object is missing */
1740
11
  if (!entry->type)
1741
0
  {
1742
0
    entry->type = 'f';
1743
0
    entry->gen = 65535;
1744
0
    entry->num = 0;
1745
0
  }
1746
  /* broken pdfs where first object is not free */
1747
11
  else if (entry->type != 'f')
1748
0
    fz_warn(ctx, "first object in xref is not free");
1749
1750
  /* broken pdfs where object offsets are out of range */
1751
11
  xref_len = pdf_xref_len(ctx, doc);
1752
11
  pdf_xref_entry_map(ctx, doc, check_xref_entry_offsets, (void *)(intptr_t)xref_len);
1753
11
}
1754
1755
static void
1756
pdf_check_linear(fz_context *ctx, pdf_document *doc)
1757
11
{
1758
11
  pdf_obj *dict = NULL;
1759
11
  pdf_obj *o;
1760
11
  int num, gen;
1761
11
  int64_t stmofs;
1762
1763
11
  fz_var(dict);
1764
1765
22
  fz_try(ctx)
1766
22
  {
1767
11
    dict = pdf_parse_ind_obj(ctx, doc, doc->file, &num, &gen, &stmofs, NULL);
1768
11
    if (!pdf_is_dict(ctx, dict))
1769
0
      break;
1770
11
    o = pdf_dict_get(ctx, dict, PDF_NAME(Linearized));
1771
11
    if (o == NULL)
1772
7
      break;
1773
4
    if (pdf_to_int(ctx, o) != 1)
1774
0
      break;
1775
4
    doc->has_linearization_object = 1;
1776
4
  }
1777
22
  fz_always(ctx)
1778
11
    pdf_drop_obj(ctx, dict);
1779
11
  fz_catch(ctx)
1780
1
  {
1781
    /* Silently swallow this error. */
1782
1
    fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
1783
1
    fz_report_error(ctx);
1784
1
  }
1785
11
}
1786
1787
static void
1788
pdf_load_linear(fz_context *ctx, pdf_document *doc)
1789
0
{
1790
0
  pdf_obj *dict = NULL;
1791
0
  pdf_obj *hint = NULL;
1792
0
  pdf_obj *o;
1793
0
  int num, gen, lin, len;
1794
0
  int64_t stmofs;
1795
1796
0
  fz_var(dict);
1797
0
  fz_var(hint);
1798
1799
0
  fz_try(ctx)
1800
0
  {
1801
0
    pdf_xref_entry *entry;
1802
1803
0
    dict = pdf_parse_ind_obj(ctx, doc, doc->file, &num, &gen, &stmofs, NULL);
1804
0
    if (!pdf_is_dict(ctx, dict))
1805
0
      fz_throw(ctx, FZ_ERROR_FORMAT, "Failed to read linearized dictionary");
1806
0
    o = pdf_dict_get(ctx, dict, PDF_NAME(Linearized));
1807
0
    if (o == NULL)
1808
0
      fz_throw(ctx, FZ_ERROR_FORMAT, "Failed to read linearized dictionary");
1809
0
    lin = pdf_to_int(ctx, o);
1810
0
    if (lin != 1)
1811
0
      fz_throw(ctx, FZ_ERROR_FORMAT, "Unexpected version of Linearized tag (%d)", lin);
1812
0
    doc->has_linearization_object = 1;
1813
0
    len = pdf_dict_get_int(ctx, dict, PDF_NAME(L));
1814
0
    if (len != doc->file_length)
1815
0
      fz_throw(ctx, FZ_ERROR_ARGUMENT, "File has been updated since linearization");
1816
1817
0
    pdf_read_xref_sections(ctx, doc, fz_tell(ctx, doc->file), 0);
1818
1819
0
    doc->linear_page_count = pdf_dict_get_int(ctx, dict, PDF_NAME(N));
1820
0
    doc->linear_page_refs = fz_realloc_array(ctx, doc->linear_page_refs, doc->linear_page_count, pdf_obj *);
1821
0
    memset(doc->linear_page_refs, 0, doc->linear_page_count * sizeof(pdf_obj*));
1822
0
    doc->linear_obj = dict;
1823
0
    doc->linear_pos = fz_tell(ctx, doc->file);
1824
0
    doc->linear_page1_obj_num = pdf_dict_get_int(ctx, dict, PDF_NAME(O));
1825
0
    doc->linear_page_refs[0] = pdf_new_indirect(ctx, doc, doc->linear_page1_obj_num, 0);
1826
0
    doc->linear_page_num = 0;
1827
0
    hint = pdf_dict_get(ctx, dict, PDF_NAME(H));
1828
0
    doc->hint_object_offset = pdf_array_get_int(ctx, hint, 0);
1829
0
    doc->hint_object_length = pdf_array_get_int(ctx, hint, 1);
1830
1831
0
    entry = pdf_get_populating_xref_entry(ctx, doc, 0);
1832
0
    entry->type = 'f';
1833
0
  }
1834
0
  fz_catch(ctx)
1835
0
  {
1836
0
    pdf_drop_obj(ctx, dict);
1837
0
    fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
1838
0
    fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
1839
0
    fz_report_error(ctx);
1840
    /* Drop back to non linearized reading mode */
1841
0
    doc->file_reading_linearly = 0;
1842
0
  }
1843
0
}
1844
1845
static void
1846
id_and_password(fz_context *ctx, pdf_document *doc)
1847
10
{
1848
10
  pdf_obj *encrypt, *id;
1849
1850
10
  pdf_prime_xref_index(ctx, doc);
1851
1852
10
  encrypt = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt));
1853
10
  id = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(ID));
1854
1855
10
  if (pdf_is_dict(ctx, encrypt))
1856
0
    doc->crypt = pdf_new_crypt(ctx, encrypt, id);
1857
1858
  /* Allow lazy clients to read encrypted files with a blank password */
1859
10
  (void)pdf_authenticate_password(ctx, doc, "");
1860
10
}
1861
1862
/*
1863
 * Initialize and load xref tables.
1864
 * If password is not null, try to decrypt.
1865
 */
1866
static void
1867
pdf_init_document(fz_context *ctx, pdf_document *doc)
1868
16
{
1869
16
  int repaired = 0;
1870
1871
32
  fz_try(ctx)
1872
32
  {
1873
    /* Check to see if we should work in progressive mode */
1874
16
    if (doc->file->progressive)
1875
0
    {
1876
0
      doc->file_reading_linearly = 1;
1877
0
      fz_seek(ctx, doc->file, 0, SEEK_END);
1878
0
      doc->file_length = fz_tell(ctx, doc->file);
1879
0
      if (doc->file_length < 0)
1880
0
        doc->file_length = 0;
1881
0
      fz_seek(ctx, doc->file, 0, SEEK_SET);
1882
0
    }
1883
1884
16
    pdf_load_version(ctx, doc);
1885
1886
16
    if (doc->is_fdf)
1887
0
    {
1888
0
      doc->file_reading_linearly = 0;
1889
0
      repaired = 1;
1890
0
      break; /* skip to end of try/catch */
1891
0
    }
1892
1893
    /* Try to load the linearized file if we are in progressive
1894
     * mode. */
1895
16
    if (doc->file_reading_linearly)
1896
0
      pdf_load_linear(ctx, doc);
1897
16
    else
1898
      /* Even if we're not in progressive mode, check to see
1899
       * if the file claims to be linearized. This is important
1900
       * for checking signatures later on. */
1901
16
      pdf_check_linear(ctx, doc);
1902
1903
    /* If we aren't in progressive mode (or the linear load failed
1904
     * and has set us back to non-progressive mode), load normally.
1905
     */
1906
16
    if (!doc->file_reading_linearly)
1907
11
      pdf_load_xref(ctx, doc);
1908
16
  }
1909
32
  fz_catch(ctx)
1910
16
  {
1911
16
    pdf_drop_xref_sections(ctx, doc);
1912
16
    fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
1913
16
    doc->file_reading_linearly = 0;
1914
16
    fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
1915
16
    fz_report_error(ctx);
1916
16
    fz_warn(ctx, "trying to repair broken xref");
1917
16
    repaired = 1;
1918
16
  }
1919
1920
16
  if (repaired)
1921
16
  {
1922
    /* pdf_repair_xref may access xref_index, so reset it properly */
1923
16
    if (doc->xref_index)
1924
0
      memset(doc->xref_index, 0, sizeof(int) * doc->max_xref_len);
1925
16
    pdf_repair_xref_aux(ctx, doc, id_and_password);
1926
16
  }
1927
0
  else
1928
0
    id_and_password(ctx, doc);
1929
16
}
1930
1931
void
1932
pdf_invalidate_xfa(fz_context *ctx, pdf_document *doc)
1933
16
{
1934
16
  if (doc == NULL)
1935
0
    return;
1936
16
  fz_drop_xml(ctx, doc->xfa);
1937
16
  doc->xfa = NULL;
1938
16
}
1939
1940
static void
1941
pdf_drop_document_imp(fz_context *ctx, fz_document *doc_)
1942
16
{
1943
16
  pdf_document *doc = (pdf_document*)doc_;
1944
16
  int i;
1945
1946
16
  fz_defer_reap_start(ctx);
1947
1948
  /* Type3 glyphs in the glyph cache can contain pdf_obj pointers
1949
   * that we are about to destroy. Simplest solution is to bin the
1950
   * glyph cache at this point. */
1951
32
  fz_try(ctx)
1952
32
    fz_purge_glyph_cache(ctx);
1953
32
  fz_catch(ctx)
1954
0
  {
1955
    /* Swallow error, but continue dropping */
1956
0
    fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
1957
0
    fz_report_error(ctx);
1958
0
  }
1959
1960
16
  pdf_set_doc_event_callback(ctx, doc, NULL, NULL, NULL);
1961
16
  pdf_drop_js(ctx, doc->js);
1962
1963
16
  pdf_drop_journal(ctx, doc->journal);
1964
1965
16
  pdf_drop_resource_tables(ctx, doc);
1966
1967
16
  pdf_drop_local_xref(ctx, doc->local_xref);
1968
1969
16
  pdf_drop_xref_sections(ctx, doc);
1970
16
  fz_free(ctx, doc->xref_index);
1971
1972
16
  fz_drop_stream(ctx, doc->file);
1973
16
  pdf_drop_crypt(ctx, doc->crypt);
1974
1975
16
  pdf_drop_obj(ctx, doc->linear_obj);
1976
16
  if (doc->linear_page_refs)
1977
0
  {
1978
0
    for (i=0; i < doc->linear_page_count; i++)
1979
0
      pdf_drop_obj(ctx, doc->linear_page_refs[i]);
1980
1981
0
    fz_free(ctx, doc->linear_page_refs);
1982
0
  }
1983
1984
16
  fz_free(ctx, doc->hint_page);
1985
16
  fz_free(ctx, doc->hint_shared_ref);
1986
16
  fz_free(ctx, doc->hint_shared);
1987
16
  fz_free(ctx, doc->hint_obj_offsets);
1988
1989
16
  for (i=0; i < doc->num_type3_fonts; i++)
1990
0
  {
1991
0
    fz_try(ctx)
1992
0
      fz_decouple_type3_font(ctx, doc->type3_fonts[i], (void *)doc);
1993
0
    fz_always(ctx)
1994
0
      fz_drop_font(ctx, doc->type3_fonts[i]);
1995
0
    fz_catch(ctx)
1996
0
    {
1997
      /* Swallow error, but continue dropping */
1998
0
      fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
1999
0
      fz_report_error(ctx);
2000
0
    }
2001
0
  }
2002
2003
16
  fz_free(ctx, doc->type3_fonts);
2004
2005
16
  pdf_drop_ocg(ctx, doc);
2006
2007
16
  pdf_empty_store(ctx, doc);
2008
2009
16
  pdf_lexbuf_fin(ctx, &doc->lexbuf.base);
2010
2011
16
  fz_drop_colorspace(ctx, doc->oi);
2012
2013
46
  for (i = 0; i < doc->orphans_count; i++)
2014
30
    pdf_drop_obj(ctx, doc->orphans[i]);
2015
2016
16
  fz_free(ctx, doc->orphans);
2017
2018
16
  pdf_drop_page_tree_internal(ctx, doc);
2019
2020
16
  fz_defer_reap_end(ctx);
2021
2022
16
  pdf_invalidate_xfa(ctx, doc);
2023
16
}
2024
2025
void
2026
pdf_drop_document(fz_context *ctx, pdf_document *doc)
2027
14
{
2028
14
  fz_drop_document(ctx, &doc->super);
2029
14
}
2030
2031
pdf_document *
2032
pdf_keep_document(fz_context *ctx, pdf_document *doc)
2033
14
{
2034
14
  return (pdf_document *)fz_keep_document(ctx, &doc->super);
2035
14
}
2036
2037
/*
2038
 * compressed object streams
2039
 */
2040
2041
/*
2042
  Do not hold pdf_xref_entry's over call to this function as they
2043
  may be invalidated!
2044
*/
2045
static pdf_xref_entry *
2046
pdf_load_obj_stm(fz_context *ctx, pdf_document *doc, int num, pdf_lexbuf *buf, int target)
2047
0
{
2048
0
  fz_stream *stm = NULL;
2049
0
  pdf_obj *objstm = NULL;
2050
0
  int *numbuf = NULL;
2051
0
  int64_t *ofsbuf = NULL;
2052
2053
0
  pdf_obj *obj;
2054
0
  int64_t first;
2055
0
  int count;
2056
0
  int i;
2057
0
  pdf_token tok;
2058
0
  pdf_xref_entry *ret_entry = NULL;
2059
0
  int ret_idx;
2060
0
  int xref_len;
2061
0
  int found;
2062
0
  fz_stream *sub = NULL;
2063
2064
0
  fz_var(numbuf);
2065
0
  fz_var(ofsbuf);
2066
0
  fz_var(objstm);
2067
0
  fz_var(stm);
2068
0
  fz_var(sub);
2069
2070
0
  fz_try(ctx)
2071
0
  {
2072
0
    objstm = pdf_load_object(ctx, doc, num);
2073
2074
0
    if (pdf_obj_marked(ctx, objstm))
2075
0
      fz_throw(ctx, FZ_ERROR_FORMAT, "recursive object stream lookup");
2076
0
  }
2077
0
  fz_catch(ctx)
2078
0
  {
2079
0
    pdf_drop_obj(ctx, objstm);
2080
0
    fz_rethrow(ctx);
2081
0
  }
2082
2083
0
  fz_try(ctx)
2084
0
  {
2085
0
    (void)pdf_mark_obj(ctx, objstm);
2086
2087
0
    count = pdf_dict_get_int(ctx, objstm, PDF_NAME(N));
2088
0
    first = pdf_dict_get_int(ctx, objstm, PDF_NAME(First));
2089
2090
0
    if (count < 0 || count > PDF_MAX_OBJECT_NUMBER)
2091
0
      fz_throw(ctx, FZ_ERROR_FORMAT, "number of objects in object stream out of range");
2092
2093
0
    numbuf = fz_calloc(ctx, count, sizeof(*numbuf));
2094
0
    ofsbuf = fz_calloc(ctx, count, sizeof(*ofsbuf));
2095
2096
0
    xref_len = pdf_xref_len(ctx, doc);
2097
2098
0
    found = 0;
2099
2100
0
    stm = pdf_open_stream_number(ctx, doc, num);
2101
0
    for (i = 0; i < count; i++)
2102
0
    {
2103
0
      tok = pdf_lex(ctx, stm, buf);
2104
0
      if (tok != PDF_TOK_INT)
2105
0
        fz_throw(ctx, FZ_ERROR_FORMAT, "corrupt object stream (%d 0 R)", num);
2106
0
      numbuf[found] = buf->i;
2107
2108
0
      tok = pdf_lex(ctx, stm, buf);
2109
0
      if (tok != PDF_TOK_INT)
2110
0
        fz_throw(ctx, FZ_ERROR_FORMAT, "corrupt object stream (%d 0 R)", num);
2111
0
      ofsbuf[found] = buf->i;
2112
2113
0
      if (numbuf[found] <= 0 || numbuf[found] >= xref_len)
2114
0
        fz_warn(ctx, "object stream object out of range, skipping");
2115
0
      else
2116
0
        found++;
2117
0
    }
2118
2119
0
    ret_idx = -1;
2120
0
    for (i = 0; i < found; i++)
2121
0
    {
2122
0
      pdf_xref_entry *entry;
2123
0
      uint64_t length;
2124
0
      int64_t offset;
2125
2126
0
      offset = first + ofsbuf[i];
2127
0
      if (i+1 < found)
2128
0
        length = ofsbuf[i+1] - ofsbuf[i];
2129
0
      else
2130
0
        length = UINT64_MAX;
2131
2132
0
      sub = fz_open_null_filter(ctx, stm, length, offset);
2133
2134
0
      obj = pdf_parse_stm_obj(ctx, doc, sub, buf);
2135
0
      fz_drop_stream(ctx, sub);
2136
0
      sub = NULL;
2137
2138
0
      entry = pdf_get_xref_entry_no_null(ctx, doc, numbuf[i]);
2139
2140
0
      pdf_set_obj_parent(ctx, obj, numbuf[i]);
2141
2142
      /* We may have set entry->type to be 'O' from being 'o' to avoid nasty
2143
       * recursions in pdf_cache_object. Accept the type being 'O' here. */
2144
0
      if ((entry->type == 'o' || entry->type == 'O') && entry->ofs == num)
2145
0
      {
2146
        /* If we already have an entry for this object,
2147
         * we'd like to drop it and use the new one -
2148
         * but this means that anyone currently holding
2149
         * a pointer to the old one will be left with a
2150
         * stale pointer. Instead, we drop the new one
2151
         * and trust that the old one is correct. */
2152
0
        if (entry->obj)
2153
0
        {
2154
0
          if (pdf_objcmp(ctx, entry->obj, obj))
2155
0
            fz_warn(ctx, "Encountered new definition for object %d - keeping the original one", numbuf[i]);
2156
0
          pdf_drop_obj(ctx, obj);
2157
0
        }
2158
0
        else
2159
0
        {
2160
0
          entry->obj = obj;
2161
          /* If we've just read a 'null' object, don't leave this as a NULL 'o' object,
2162
           * as that will a) confuse the code that called us into thinking that nothing
2163
           * was loaded, and b) cause the entire objstm to be reloaded every time that
2164
           * object is accessed. Instead, just mark it as an 'f'. */
2165
0
          if (obj == NULL)
2166
0
            entry->type = 'f';
2167
0
          fz_drop_buffer(ctx, entry->stm_buf);
2168
0
          entry->stm_buf = NULL;
2169
0
        }
2170
0
        if (numbuf[i] == target)
2171
0
          ret_idx = i;
2172
0
      }
2173
0
      else
2174
0
      {
2175
0
        pdf_drop_obj(ctx, obj);
2176
0
      }
2177
0
    }
2178
    /* Parsing our way through the stream can cause the xref to be
2179
     * solidified, which will move an entry. We therefore can't
2180
     * read the entry for returning until no more parsing is to be
2181
     * done. Thus we end up reading this entry twice. */
2182
0
    if (ret_idx >= 0)
2183
0
      ret_entry = pdf_get_xref_entry_no_null(ctx, doc, numbuf[ret_idx]);
2184
0
  }
2185
0
  fz_always(ctx)
2186
0
  {
2187
0
    fz_drop_stream(ctx, stm);
2188
0
    fz_drop_stream(ctx, sub);
2189
0
    fz_free(ctx, ofsbuf);
2190
0
    fz_free(ctx, numbuf);
2191
0
    pdf_unmark_obj(ctx, objstm);
2192
0
    pdf_drop_obj(ctx, objstm);
2193
0
  }
2194
0
  fz_catch(ctx)
2195
0
  {
2196
0
    fz_rethrow(ctx);
2197
0
  }
2198
0
  return ret_entry;
2199
0
}
2200
2201
/*
2202
 * object loading
2203
 */
2204
static int
2205
pdf_obj_read(fz_context *ctx, pdf_document *doc, int64_t *offset, int *nump, pdf_obj **page)
2206
0
{
2207
0
  pdf_lexbuf *buf = &doc->lexbuf.base;
2208
0
  int num, gen, tok;
2209
0
  int64_t numofs, genofs, stmofs, tmpofs, newtmpofs;
2210
0
  int xref_len;
2211
0
  pdf_xref_entry *entry;
2212
2213
0
  numofs = *offset;
2214
0
  fz_seek(ctx, doc->file, doc->bias + numofs, SEEK_SET);
2215
2216
  /* We expect to read 'num' here */
2217
0
  tok = pdf_lex(ctx, doc->file, buf);
2218
0
  genofs = fz_tell(ctx, doc->file);
2219
0
  if (tok != PDF_TOK_INT)
2220
0
  {
2221
    /* Failed! */
2222
0
    DEBUGMESS((ctx, "skipping unexpected data (tok=%d) at %d", tok, *offset));
2223
0
    *offset = genofs;
2224
0
    return tok == PDF_TOK_EOF;
2225
0
  }
2226
0
  *nump = num = buf->i;
2227
2228
  /* We expect to read 'gen' here */
2229
0
  tok = pdf_lex(ctx, doc->file, buf);
2230
0
  tmpofs = fz_tell(ctx, doc->file);
2231
0
  if (tok != PDF_TOK_INT)
2232
0
  {
2233
    /* Failed! */
2234
0
    DEBUGMESS((ctx, "skipping unexpected data after \"%d\" (tok=%d) at %d", num, tok, *offset));
2235
0
    *offset = tmpofs;
2236
0
    return tok == PDF_TOK_EOF;
2237
0
  }
2238
0
  gen = buf->i;
2239
2240
  /* We expect to read 'obj' here */
2241
0
  do
2242
0
  {
2243
0
    tmpofs = fz_tell(ctx, doc->file);
2244
0
    tok = pdf_lex(ctx, doc->file, buf);
2245
0
    if (tok == PDF_TOK_OBJ)
2246
0
      break;
2247
0
    if (tok != PDF_TOK_INT)
2248
0
    {
2249
0
      DEBUGMESS((ctx, "skipping unexpected data (tok=%d) at %d", tok, tmpofs));
2250
0
      *offset = fz_tell(ctx, doc->file);
2251
0
      return tok == PDF_TOK_EOF;
2252
0
    }
2253
0
    DEBUGMESS((ctx, "skipping unexpected int %d at %d", num, numofs));
2254
0
    *nump = num = gen;
2255
0
    numofs = genofs;
2256
0
    gen = buf->i;
2257
0
    genofs = tmpofs;
2258
0
  }
2259
0
  while (1);
2260
2261
  /* Now we read the actual object */
2262
0
  xref_len = pdf_xref_len(ctx, doc);
2263
2264
  /* When we are reading a progressive file, we typically see:
2265
   *    File Header
2266
   *    obj m (Linearization params)
2267
   *    xref #1 (refers to objects m-n)
2268
   *    obj m+1
2269
   *    ...
2270
   *    obj n
2271
   *    obj 1
2272
   *    ...
2273
   *    obj n-1
2274
   *    xref #2
2275
   *
2276
   * The linearisation params are read elsewhere, hence
2277
   * whenever we read an object it should just go into the
2278
   * previous xref.
2279
   */
2280
0
  tok = pdf_repair_obj(ctx, doc, buf, &stmofs, NULL, NULL, NULL, page, &newtmpofs, NULL);
2281
2282
0
  do /* So we can break out of it */
2283
0
  {
2284
0
    if (num <= 0 || num >= xref_len)
2285
0
    {
2286
0
      fz_warn(ctx, "Not a valid object number (%d %d obj)", num, gen);
2287
0
      break;
2288
0
    }
2289
0
    if (gen != 0)
2290
0
    {
2291
0
      fz_warn(ctx, "Unexpected non zero generation number in linearized file");
2292
0
    }
2293
0
    entry = pdf_get_populating_xref_entry(ctx, doc, num);
2294
0
    if (entry->type != 0)
2295
0
    {
2296
0
      DEBUGMESS((ctx, "Duplicate object found (%d %d obj)", num, gen));
2297
0
      break;
2298
0
    }
2299
0
    if (page && *page)
2300
0
    {
2301
0
      DEBUGMESS((ctx, "Successfully read object %d @ %d - and found page %d!", num, numofs, doc->linear_page_num));
2302
0
      if (!entry->obj)
2303
0
        entry->obj = pdf_keep_obj(ctx, *page);
2304
2305
0
      if (doc->linear_page_refs[doc->linear_page_num] == NULL)
2306
0
        doc->linear_page_refs[doc->linear_page_num] = pdf_new_indirect(ctx, doc, num, gen);
2307
0
    }
2308
0
    else
2309
0
    {
2310
0
      DEBUGMESS((ctx, "Successfully read object %d @ %d", num, numofs));
2311
0
    }
2312
0
    entry->type = 'n';
2313
0
    entry->gen = gen; // XXX: was 0
2314
0
    entry->num = num;
2315
0
    entry->ofs = numofs;
2316
0
    entry->stm_ofs = stmofs;
2317
0
  }
2318
0
  while (0);
2319
0
  if (page && *page)
2320
0
    doc->linear_page_num++;
2321
2322
0
  if (tok == PDF_TOK_ENDOBJ)
2323
0
  {
2324
0
    *offset = fz_tell(ctx, doc->file);
2325
0
  }
2326
0
  else
2327
0
  {
2328
0
    *offset = newtmpofs;
2329
0
  }
2330
0
  return 0;
2331
0
}
2332
2333
static void
2334
pdf_load_hinted_page(fz_context *ctx, pdf_document *doc, int pagenum)
2335
0
{
2336
0
  pdf_obj *page = NULL;
2337
2338
0
  if (!doc->hints_loaded || !doc->linear_page_refs)
2339
0
    return;
2340
2341
0
  if (doc->linear_page_refs[pagenum])
2342
0
    return;
2343
2344
0
  fz_var(page);
2345
2346
0
  fz_try(ctx)
2347
0
  {
2348
0
    int num = doc->hint_page[pagenum].number;
2349
0
    page = pdf_load_object(ctx, doc, num);
2350
0
    if (pdf_name_eq(ctx, PDF_NAME(Page), pdf_dict_get(ctx, page, PDF_NAME(Type))))
2351
0
    {
2352
      /* We have found the page object! */
2353
0
      DEBUGMESS((ctx, "LoadHintedPage pagenum=%d num=%d", pagenum, num));
2354
0
      doc->linear_page_refs[pagenum] = pdf_new_indirect(ctx, doc, num, 0);
2355
0
    }
2356
0
  }
2357
0
  fz_always(ctx)
2358
0
    pdf_drop_obj(ctx, page);
2359
0
  fz_catch(ctx)
2360
0
  {
2361
0
    fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
2362
    /* Swallow the error and proceed as normal */
2363
0
    fz_report_error(ctx);
2364
0
  }
2365
0
}
2366
2367
static int
2368
read_hinted_object(fz_context *ctx, pdf_document *doc, int num)
2369
0
{
2370
  /* Try to find the object using our hint table. Find the closest
2371
   * object <= the one we want that has a hint and read forward from
2372
   * there. */
2373
0
  int expected = num;
2374
0
  int curr_pos;
2375
0
  int64_t start, offset;
2376
2377
0
  while (doc->hint_obj_offsets[expected] == 0 && expected > 0)
2378
0
    expected--;
2379
0
  if (expected != num)
2380
0
    DEBUGMESS((ctx, "object %d is unhinted, will search forward from %d", expected, num));
2381
0
  if (expected == 0) /* No hints found, just bail */
2382
0
    return 0;
2383
2384
0
  curr_pos = fz_tell(ctx, doc->file);
2385
0
  offset = doc->hint_obj_offsets[expected];
2386
2387
0
  fz_var(expected);
2388
2389
0
  fz_try(ctx)
2390
0
  {
2391
0
    int found;
2392
2393
    /* Try to read forward from there */
2394
0
    do
2395
0
    {
2396
0
      start = offset;
2397
0
      DEBUGMESS((ctx, "Searching for object %d @ %d", expected, offset));
2398
0
      pdf_obj_read(ctx, doc, &offset, &found, 0);
2399
0
      DEBUGMESS((ctx, "Found object %d - next will be @ %d", found, offset));
2400
0
      if (found <= expected)
2401
0
      {
2402
        /* We found the right one (or one earlier than
2403
         * we expected). Update the hints. */
2404
0
        doc->hint_obj_offsets[expected] = offset;
2405
0
        doc->hint_obj_offsets[found] = start;
2406
0
        doc->hint_obj_offsets[found+1] = offset;
2407
        /* Retry with the next one */
2408
0
        expected = found+1;
2409
0
      }
2410
0
      else
2411
0
      {
2412
        /* We found one later than we expected. */
2413
0
        doc->hint_obj_offsets[expected] = 0;
2414
0
        doc->hint_obj_offsets[found] = start;
2415
0
        doc->hint_obj_offsets[found+1] = offset;
2416
0
        while (doc->hint_obj_offsets[expected] == 0 && expected > 0)
2417
0
          expected--;
2418
0
        if (expected == 0) /* No hints found, we give up */
2419
0
          break;
2420
0
      }
2421
0
    }
2422
0
    while (found != num);
2423
0
  }
2424
0
  fz_always(ctx)
2425
0
  {
2426
0
    fz_seek(ctx, doc->file, curr_pos, SEEK_SET);
2427
0
  }
2428
0
  fz_catch(ctx)
2429
0
  {
2430
0
    fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
2431
    /* FIXME: Currently we ignore the hint. Perhaps we should
2432
     * drop back to non-hinted operation here. */
2433
0
    doc->hint_obj_offsets[expected] = 0;
2434
0
    fz_rethrow(ctx);
2435
0
  }
2436
0
  return expected != 0;
2437
0
}
2438
2439
pdf_obj *
2440
pdf_load_unencrypted_object(fz_context *ctx, pdf_document *doc, int num)
2441
0
{
2442
0
  pdf_xref_entry *x;
2443
2444
0
  if (num <= 0 || num >= pdf_xref_len(ctx, doc))
2445
0
    return NULL;
2446
2447
0
  x = pdf_get_xref_entry_no_null(ctx, doc, num);
2448
0
  if (x->type == 'n')
2449
0
  {
2450
0
    fz_seek(ctx, doc->file, doc->bias + x->ofs, SEEK_SET);
2451
0
    return pdf_parse_ind_obj(ctx, doc, doc->file, NULL, NULL, NULL, NULL);
2452
0
  }
2453
0
  return NULL;
2454
0
}
2455
2456
int
2457
pdf_object_exists(fz_context *ctx, pdf_document *doc, int num)
2458
0
{
2459
0
  pdf_xref_entry *x;
2460
0
  if (num <= 0 || num >= pdf_xref_len(ctx, doc))
2461
0
    return 0;
2462
0
  x = pdf_get_xref_entry(ctx, doc, num);
2463
0
  if (x && (x->type == 'n' || x->type == 'o'))
2464
0
    return 1;
2465
0
  return 0;
2466
0
}
2467
2468
pdf_xref_entry *
2469
pdf_cache_object(fz_context *ctx, pdf_document *doc, int num)
2470
21.4k
{
2471
21.4k
  pdf_xref_entry *x;
2472
21.4k
  int rnum, rgen, try_repair;
2473
2474
21.4k
  fz_var(try_repair);
2475
2476
21.4k
  if (num <= 0 || num >= pdf_xref_len(ctx, doc))
2477
0
    return NULL;
2478
2479
21.4k
object_updated:
2480
21.4k
  try_repair = 0;
2481
21.4k
  rnum = num;
2482
2483
21.4k
  x = pdf_get_xref_entry(ctx, doc, num);
2484
21.4k
  if (x == NULL)
2485
0
    return NULL;
2486
2487
21.4k
  if (x->obj != NULL)
2488
20.9k
    return x;
2489
2490
523
  if (x->type == 'f')
2491
3
  {
2492
3
    x->obj = PDF_NULL;
2493
3
  }
2494
520
  else if (x->type == 'n')
2495
520
  {
2496
520
    fz_seek(ctx, doc->file, doc->bias + x->ofs, SEEK_SET);
2497
2498
1.04k
    fz_try(ctx)
2499
1.04k
    {
2500
520
      x->obj = pdf_parse_ind_obj(ctx, doc, doc->file,
2501
520
          &rnum, &rgen, &x->stm_ofs, &try_repair);
2502
520
    }
2503
1.04k
    fz_catch(ctx)
2504
181
    {
2505
181
      fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
2506
181
      fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
2507
181
      if (!try_repair)
2508
181
        fz_rethrow(ctx);
2509
0
      else
2510
0
        fz_report_error(ctx);
2511
181
    }
2512
2513
339
    if (!try_repair && rnum != num)
2514
0
    {
2515
0
      pdf_drop_obj(ctx, x->obj);
2516
0
      x->type = 'f';
2517
0
      x->ofs = -1;
2518
0
      x->gen = 0;
2519
0
      x->num = 0;
2520
0
      x->stm_ofs = 0;
2521
0
      x->obj = NULL;
2522
0
      try_repair = (doc->repair_attempted == 0);
2523
0
    }
2524
2525
339
    if (try_repair)
2526
0
    {
2527
0
perform_repair:
2528
0
      fz_try(ctx)
2529
0
        pdf_repair_xref(ctx, doc);
2530
0
      fz_catch(ctx)
2531
0
      {
2532
0
        fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
2533
0
        fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
2534
0
        fz_rethrow_if(ctx, FZ_ERROR_REPAIRED);
2535
0
        fz_report_error(ctx);
2536
0
        if (rnum == num)
2537
0
          fz_throw(ctx, FZ_ERROR_FORMAT, "cannot parse object (%d 0 R)", num);
2538
0
        else
2539
0
          fz_throw(ctx, FZ_ERROR_FORMAT, "found object (%d 0 R) instead of (%d 0 R)", rnum, num);
2540
0
      }
2541
0
      goto object_updated;
2542
0
    }
2543
2544
339
    if (doc->crypt)
2545
0
      pdf_crypt_obj(ctx, doc->crypt, x->obj, x->num, x->gen);
2546
339
  }
2547
0
  else if (x->type == 'o')
2548
0
  {
2549
0
    if (!x->obj)
2550
0
    {
2551
0
      pdf_xref_entry *orig_x = x;
2552
0
      pdf_xref_entry *ox = x; /* This init is unused, but it shuts warnings up. */
2553
0
      orig_x->type = 'O'; /* Mark this node so we know we're recursing. */
2554
0
      fz_try(ctx)
2555
0
        x = pdf_load_obj_stm(ctx, doc, x->ofs, &doc->lexbuf.base, num);
2556
0
      fz_always(ctx)
2557
0
      {
2558
        /* Most of the time ox == orig_x, but if pdf_load_obj_stm performed a
2559
         * repair, it may not be. It is safe to call pdf_get_xref_entry_no_change
2560
         * here, as it does not try/catch. */
2561
0
        ox = pdf_get_xref_entry_no_change(ctx, doc, num);
2562
        /* Bug 706762: ox can be NULL if the object went away during a repair. */
2563
0
        if (ox && ox->type == 'O')
2564
0
          ox->type = 'o'; /* Not recursing any more. */
2565
0
      }
2566
0
      fz_catch(ctx)
2567
0
        fz_rethrow(ctx);
2568
0
      if (x == NULL)
2569
0
        fz_throw(ctx, FZ_ERROR_FORMAT, "cannot load object stream containing object (%d 0 R)", num);
2570
0
      if (!x->obj)
2571
0
      {
2572
0
        x->type = 'f';
2573
0
        if (ox)
2574
0
          ox->type = 'f';
2575
0
        if (doc->repair_attempted)
2576
0
          fz_throw(ctx, FZ_ERROR_FORMAT, "object (%d 0 R) was not found in its object stream", num);
2577
0
        goto perform_repair;
2578
0
      }
2579
0
    }
2580
0
  }
2581
0
  else if (doc->hint_obj_offsets && read_hinted_object(ctx, doc, num))
2582
0
  {
2583
0
    goto object_updated;
2584
0
  }
2585
0
  else if (doc->file_length && doc->linear_pos < doc->file_length)
2586
0
  {
2587
0
    fz_throw(ctx, FZ_ERROR_TRYLATER, "cannot find object in xref (%d 0 R) - not loaded yet?", num);
2588
0
  }
2589
0
  else
2590
0
  {
2591
0
    fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find object in xref (%d 0 R)", num);
2592
0
  }
2593
2594
342
  pdf_set_obj_parent(ctx, x->obj, num);
2595
342
  return x;
2596
523
}
2597
2598
pdf_obj *
2599
pdf_load_object(fz_context *ctx, pdf_document *doc, int num)
2600
551
{
2601
551
  pdf_xref_entry *entry;
2602
551
  if (num <= 0 || num >= pdf_xref_len(ctx, doc))
2603
0
    return NULL;
2604
551
  entry = pdf_cache_object(ctx, doc, num);
2605
551
  if (entry)
2606
545
    return pdf_keep_obj(ctx, entry->obj);
2607
6
  return NULL;
2608
551
}
2609
2610
pdf_obj *
2611
pdf_resolve_indirect(fz_context *ctx, pdf_obj *ref)
2612
21.2k
{
2613
21.2k
  if (pdf_is_indirect(ctx, ref))
2614
21.2k
  {
2615
21.2k
    pdf_document *doc = pdf_get_indirect_document(ctx, ref);
2616
21.2k
    int num = pdf_to_num(ctx, ref);
2617
21.2k
    pdf_xref_entry *entry;
2618
2619
21.2k
    if (!doc)
2620
0
      return NULL;
2621
2622
21.2k
    if (num <= 0 || num >= pdf_xref_len(ctx, doc))
2623
345
      return NULL;
2624
2625
41.7k
    fz_try(ctx)
2626
41.7k
      entry = pdf_cache_object(ctx, doc, num);
2627
41.7k
    fz_catch(ctx)
2628
175
    {
2629
175
      fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
2630
175
      fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
2631
175
      fz_rethrow_if(ctx, FZ_ERROR_REPAIRED);
2632
175
      fz_report_error(ctx);
2633
175
      fz_warn(ctx, "cannot load object (%d 0 R) into cache", num);
2634
175
      return NULL;
2635
175
    }
2636
2637
20.7k
    if (entry)
2638
20.7k
      return entry->obj;
2639
2640
0
    return NULL;
2641
20.7k
  }
2642
0
  return ref;
2643
21.2k
}
2644
2645
pdf_obj *
2646
pdf_resolve_indirect_chain(fz_context *ctx, pdf_obj *ref)
2647
21.2k
{
2648
21.2k
  int sanity = 10;
2649
2650
42.4k
  while (pdf_is_indirect(ctx, ref))
2651
21.2k
  {
2652
21.2k
    if (--sanity == 0)
2653
0
    {
2654
0
      fz_warn(ctx, "too many indirections (possible indirection cycle involving %d 0 R)", pdf_to_num(ctx, ref));
2655
0
      return NULL;
2656
0
    }
2657
2658
21.2k
    ref = pdf_resolve_indirect(ctx, ref);
2659
21.2k
  }
2660
2661
21.2k
  return ref;
2662
21.2k
}
2663
2664
int
2665
pdf_count_objects(fz_context *ctx, pdf_document *doc)
2666
0
{
2667
0
  return pdf_xref_len(ctx, doc);
2668
0
}
2669
2670
int
2671
pdf_is_local_object(fz_context *ctx, pdf_document *doc, pdf_obj *obj)
2672
0
{
2673
0
  pdf_xref *xref = doc->local_xref;
2674
0
  pdf_xref_subsec *sub;
2675
0
  int num;
2676
2677
0
  if (!pdf_is_indirect(ctx, obj))
2678
0
    return 0;
2679
2680
0
  if (xref == NULL)
2681
0
    return 0; /* no local xref present */
2682
2683
0
  num = pdf_to_num(ctx, obj);
2684
2685
  /* Local xrefs only ever have 1 section, and it should be solid. */
2686
0
  sub = xref->subsec;
2687
0
  if (num >= sub->start && num < sub->start + sub->len)
2688
0
    return sub->table[num - sub->start].type != 0;
2689
2690
0
  return 0;
2691
0
}
2692
2693
static int
2694
pdf_create_local_object(fz_context *ctx, pdf_document *doc)
2695
0
{
2696
  /* TODO: reuse free object slots by properly linking free object chains in the ofs field */
2697
0
  pdf_xref_entry *entry;
2698
0
  int num;
2699
2700
0
  num = doc->local_xref->num_objects;
2701
2702
0
  entry = pdf_get_local_xref_entry(ctx, doc, num);
2703
0
  entry->type = 'f';
2704
0
  entry->ofs = -1;
2705
0
  entry->gen = 0;
2706
0
  entry->num = num;
2707
0
  entry->stm_ofs = 0;
2708
0
  entry->stm_buf = NULL;
2709
0
  entry->obj = NULL;
2710
0
  return num;
2711
0
}
2712
2713
int
2714
pdf_create_object(fz_context *ctx, pdf_document *doc)
2715
0
{
2716
  /* TODO: reuse free object slots by properly linking free object chains in the ofs field */
2717
0
  pdf_xref_entry *entry;
2718
0
  int num;
2719
2720
0
  if (doc->local_xref && doc->local_xref_nesting > 0)
2721
0
    return pdf_create_local_object(ctx, doc);
2722
2723
0
  num = pdf_xref_len(ctx, doc);
2724
2725
0
  if (num > PDF_MAX_OBJECT_NUMBER)
2726
0
    fz_throw(ctx, FZ_ERROR_LIMIT, "too many objects stored in pdf");
2727
2728
0
  entry = pdf_get_incremental_xref_entry(ctx, doc, num);
2729
0
  entry->type = 'f';
2730
0
  entry->ofs = -1;
2731
0
  entry->gen = 0;
2732
0
  entry->num = num;
2733
0
  entry->stm_ofs = 0;
2734
0
  entry->stm_buf = NULL;
2735
0
  entry->obj = NULL;
2736
2737
0
  pdf_add_journal_fragment(ctx, doc, num, NULL, NULL, 1);
2738
2739
0
  return num;
2740
0
}
2741
2742
static void
2743
pdf_delete_local_object(fz_context *ctx, pdf_document *doc, int num)
2744
0
{
2745
0
  pdf_xref_entry *x;
2746
2747
0
  if (doc->local_xref == NULL || doc->local_xref_nesting == 0)
2748
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "No local xref to delete from!");
2749
2750
0
  if (num <= 0 || num >= doc->local_xref->num_objects)
2751
0
  {
2752
0
    fz_warn(ctx, "local object out of range (%d 0 R); xref size %d", num, doc->local_xref->num_objects);
2753
0
    return;
2754
0
  }
2755
2756
0
  x = pdf_get_local_xref_entry(ctx, doc, num);
2757
2758
0
  fz_drop_buffer(ctx, x->stm_buf);
2759
0
  pdf_drop_obj(ctx, x->obj);
2760
2761
0
  x->type = 'f';
2762
0
  x->ofs = 0;
2763
0
  x->gen += 1;
2764
0
  x->num = 0;
2765
0
  x->stm_ofs = 0;
2766
0
  x->stm_buf = NULL;
2767
0
  x->obj = NULL;
2768
0
}
2769
2770
void
2771
pdf_delete_object(fz_context *ctx, pdf_document *doc, int num)
2772
0
{
2773
0
  pdf_xref_entry *x;
2774
0
  pdf_xref *xref;
2775
0
  int j;
2776
2777
0
  if (doc->local_xref && doc->local_xref_nesting > 0)
2778
0
  {
2779
0
    pdf_delete_local_object(ctx, doc, num);
2780
0
    return;
2781
0
  }
2782
2783
0
  if (num <= 0 || num >= pdf_xref_len(ctx, doc))
2784
0
  {
2785
0
    fz_warn(ctx, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(ctx, doc));
2786
0
    return;
2787
0
  }
2788
2789
0
  x = pdf_get_incremental_xref_entry(ctx, doc, num);
2790
2791
0
  fz_drop_buffer(ctx, x->stm_buf);
2792
0
  pdf_drop_obj(ctx, x->obj);
2793
2794
0
  x->type = 'f';
2795
0
  x->ofs = 0;
2796
0
  x->gen += 1;
2797
0
  x->num = 0;
2798
0
  x->stm_ofs = 0;
2799
0
  x->stm_buf = NULL;
2800
0
  x->obj = NULL;
2801
2802
  /* Currently we've left a 'free' object in the incremental
2803
   * section. This is enough to cause us to think that the
2804
   * document has changes. Check back in the non-incremental
2805
   * sections to see if the last instance of the object there
2806
   * was free (or if this object never appeared). If so, we
2807
   * can mark this object as non-existent in the incremental
2808
   * xref. This is important so we can 'undo' back to emptiness
2809
   * after we save/when we reload a snapshot. */
2810
0
  for (j = 1; j < doc->num_xref_sections; j++)
2811
0
  {
2812
0
    xref = &doc->xref_sections[j];
2813
2814
0
    if (num < xref->num_objects)
2815
0
    {
2816
0
      pdf_xref_subsec *sub;
2817
0
      for (sub = xref->subsec; sub != NULL; sub = sub->next)
2818
0
      {
2819
0
        pdf_xref_entry *entry;
2820
2821
0
        if (num < sub->start || num >= sub->start + sub->len)
2822
0
          continue;
2823
2824
0
        entry = &sub->table[num - sub->start];
2825
0
        if (entry->type)
2826
0
        {
2827
0
          if (entry->type == 'f')
2828
0
          {
2829
            /* It was free already! */
2830
0
            x->type = 0;
2831
0
            x->gen = 0;
2832
0
          }
2833
          /* It was a real object. */
2834
0
          return;
2835
0
        }
2836
0
      }
2837
0
    }
2838
0
  }
2839
  /* It never appeared before. */
2840
0
  x->type = 0;
2841
0
  x->gen = 0;
2842
0
}
2843
2844
static void
2845
pdf_update_local_object(fz_context *ctx, pdf_document *doc, int num, pdf_obj *newobj)
2846
0
{
2847
0
  pdf_xref_entry *x;
2848
2849
0
  if (doc->local_xref == NULL || doc->local_xref_nesting == 0)
2850
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't update local object without a local xref");
2851
2852
0
  if (!newobj)
2853
0
  {
2854
0
    pdf_delete_local_object(ctx, doc, num);
2855
0
    return;
2856
0
  }
2857
2858
0
  x = pdf_get_local_xref_entry(ctx, doc, num);
2859
2860
0
  pdf_drop_obj(ctx, x->obj);
2861
2862
0
  x->type = 'n';
2863
0
  x->ofs = 0;
2864
0
  x->obj = pdf_keep_obj(ctx, newobj);
2865
2866
0
  pdf_set_obj_parent(ctx, newobj, num);
2867
0
}
2868
2869
void
2870
pdf_update_object(fz_context *ctx, pdf_document *doc, int num, pdf_obj *newobj)
2871
0
{
2872
0
  pdf_xref_entry *x;
2873
2874
0
  if (!doc)
2875
0
    return;
2876
2877
0
  if (doc->local_xref && doc->local_xref_nesting > 0)
2878
0
  {
2879
0
    pdf_update_local_object(ctx, doc, num, newobj);
2880
0
    return;
2881
0
  }
2882
2883
0
  if (num <= 0 || num >= pdf_xref_len(ctx, doc))
2884
0
  {
2885
0
    fz_warn(ctx, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(ctx, doc));
2886
0
    return;
2887
0
  }
2888
2889
0
  if (!newobj)
2890
0
  {
2891
0
    pdf_delete_object(ctx, doc, num);
2892
0
    return;
2893
0
  }
2894
2895
0
  x = pdf_get_incremental_xref_entry(ctx, doc, num);
2896
2897
0
  pdf_drop_obj(ctx, x->obj);
2898
2899
0
  x->type = 'n';
2900
0
  x->ofs = 0;
2901
0
  x->obj = pdf_keep_obj(ctx, newobj);
2902
2903
0
  pdf_set_obj_parent(ctx, newobj, num);
2904
0
}
2905
2906
void
2907
pdf_update_stream(fz_context *ctx, pdf_document *doc, pdf_obj *obj, fz_buffer *newbuf, int compressed)
2908
0
{
2909
0
  int num;
2910
0
  pdf_xref_entry *x;
2911
2912
0
  if (pdf_is_indirect(ctx, obj))
2913
0
    num = pdf_to_num(ctx, obj);
2914
0
  else
2915
0
    num = pdf_obj_parent_num(ctx, obj);
2916
2917
  /* Write the Length first, as this has the effect of moving the
2918
   * old object into the journal for undo. This also moves the
2919
   * stream buffer with it, keeping it consistent. */
2920
0
  pdf_dict_put_int(ctx, obj, PDF_NAME(Length), fz_buffer_storage(ctx, newbuf, NULL));
2921
2922
0
  if (doc->local_xref && doc->local_xref_nesting > 0)
2923
0
  {
2924
0
    x = pdf_get_local_xref_entry(ctx, doc, num);
2925
0
  }
2926
0
  else
2927
0
  {
2928
0
    if (num <= 0 || num >= pdf_xref_len(ctx, doc))
2929
0
    {
2930
0
      fz_warn(ctx, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(ctx, doc));
2931
0
      return;
2932
0
    }
2933
2934
0
    x = pdf_get_xref_entry_no_null(ctx, doc, num);
2935
0
  }
2936
2937
0
  fz_drop_buffer(ctx, x->stm_buf);
2938
0
  x->stm_buf = fz_keep_buffer(ctx, newbuf);
2939
2940
0
  if (!compressed)
2941
0
  {
2942
0
    pdf_dict_del(ctx, obj, PDF_NAME(Filter));
2943
0
    pdf_dict_del(ctx, obj, PDF_NAME(DecodeParms));
2944
0
  }
2945
0
}
2946
2947
int
2948
pdf_lookup_metadata(fz_context *ctx, pdf_document *doc, const char *key, char *buf, size_t size)
2949
0
{
2950
0
  if (!strcmp(key, FZ_META_FORMAT))
2951
0
  {
2952
0
    int version = pdf_version(ctx, doc);
2953
0
    return 1 + (int)fz_snprintf(buf, size, "PDF %d.%d", version/10, version % 10);
2954
0
  }
2955
2956
0
  if (!strcmp(key, FZ_META_ENCRYPTION))
2957
0
  {
2958
0
    if (doc->crypt)
2959
0
    {
2960
0
      const char *stream_method = pdf_crypt_stream_method(ctx, doc->crypt);
2961
0
      const char *string_method = pdf_crypt_string_method(ctx, doc->crypt);
2962
0
      if (stream_method == string_method)
2963
0
        return 1 + (int)fz_snprintf(buf, size, "Standard V%d R%d %d-bit %s",
2964
0
            pdf_crypt_version(ctx, doc->crypt),
2965
0
            pdf_crypt_revision(ctx, doc->crypt),
2966
0
            pdf_crypt_length(ctx, doc->crypt),
2967
0
            pdf_crypt_string_method(ctx, doc->crypt));
2968
0
      else
2969
0
        return 1 + (int)fz_snprintf(buf, size, "Standard V%d R%d %d-bit streams: %s strings: %s",
2970
0
            pdf_crypt_version(ctx, doc->crypt),
2971
0
            pdf_crypt_revision(ctx, doc->crypt),
2972
0
            pdf_crypt_length(ctx, doc->crypt),
2973
0
            pdf_crypt_stream_method(ctx, doc->crypt),
2974
0
            pdf_crypt_string_method(ctx, doc->crypt));
2975
0
    }
2976
0
    else
2977
0
      return 1 + (int)fz_strlcpy(buf, "None", size);
2978
0
  }
2979
2980
0
  if (strstr(key, "info:") == key)
2981
0
  {
2982
0
    pdf_obj *info;
2983
0
    const char *s;
2984
0
    int n;
2985
2986
0
    info = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info));
2987
0
    if (!info)
2988
0
      return -1;
2989
2990
0
    info = pdf_dict_gets(ctx, info, key + 5);
2991
0
    if (!info)
2992
0
      return -1;
2993
2994
0
    s = pdf_to_text_string(ctx, info);
2995
0
    if (strlen(s) <= 0)
2996
0
      return -1;
2997
2998
0
    n = 1 + (int)fz_strlcpy(buf, s, size);
2999
0
    return n;
3000
0
  }
3001
3002
0
  return -1;
3003
0
}
3004
3005
void
3006
pdf_set_metadata(fz_context *ctx, pdf_document *doc, const char *key, const char *value)
3007
0
{
3008
3009
0
  pdf_obj *info = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info));
3010
3011
0
  pdf_begin_operation(ctx, doc, "Set Metadata");
3012
3013
0
  fz_try(ctx)
3014
0
  {
3015
    /* Ensure we have an Info dictionary. */
3016
0
    if (!pdf_is_dict(ctx, info))
3017
0
    {
3018
0
      info = pdf_add_new_dict(ctx, doc, 8);
3019
0
      pdf_dict_put_drop(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info), info);
3020
0
    }
3021
3022
0
    if (!strcmp(key, FZ_META_INFO_TITLE))
3023
0
      pdf_dict_put_text_string(ctx, info, PDF_NAME(Title), value);
3024
0
    else if (!strcmp(key, FZ_META_INFO_AUTHOR))
3025
0
      pdf_dict_put_text_string(ctx, info, PDF_NAME(Author), value);
3026
0
    else if (!strcmp(key, FZ_META_INFO_SUBJECT))
3027
0
      pdf_dict_put_text_string(ctx, info, PDF_NAME(Subject), value);
3028
0
    else if (!strcmp(key, FZ_META_INFO_KEYWORDS))
3029
0
      pdf_dict_put_text_string(ctx, info, PDF_NAME(Keywords), value);
3030
0
    else if (!strcmp(key, FZ_META_INFO_CREATOR))
3031
0
      pdf_dict_put_text_string(ctx, info, PDF_NAME(Creator), value);
3032
0
    else if (!strcmp(key, FZ_META_INFO_PRODUCER))
3033
0
      pdf_dict_put_text_string(ctx, info, PDF_NAME(Producer), value);
3034
0
    else if (!strcmp(key, FZ_META_INFO_CREATIONDATE))
3035
0
    {
3036
0
      int64_t time = pdf_parse_date(ctx, value);
3037
0
      if (time >= 0)
3038
0
        pdf_dict_put_date(ctx, info, PDF_NAME(CreationDate), time);
3039
0
    }
3040
0
    else if (!strcmp(key, FZ_META_INFO_MODIFICATIONDATE))
3041
0
    {
3042
0
      int64_t time = pdf_parse_date(ctx, value);
3043
0
      if (time >= 0)
3044
0
        pdf_dict_put_date(ctx, info, PDF_NAME(ModDate), time);
3045
0
    }
3046
3047
0
    if (!strncmp(key, FZ_META_INFO, strlen(FZ_META_INFO)))
3048
0
      key += strlen(FZ_META_INFO);
3049
0
    pdf_dict_put_text_string(ctx, info, pdf_new_name(ctx, key), value);
3050
0
    pdf_end_operation(ctx, doc);
3051
0
  }
3052
0
  fz_catch(ctx)
3053
0
  {
3054
0
    pdf_abandon_operation(ctx, doc);
3055
0
    fz_rethrow(ctx);
3056
0
  }
3057
0
}
3058
3059
static fz_link_dest
3060
pdf_resolve_link_imp(fz_context *ctx, fz_document *doc_, const char *uri)
3061
0
{
3062
0
  pdf_document *doc = (pdf_document*)doc_;
3063
0
  return pdf_resolve_link_dest(ctx, doc, uri);
3064
0
}
3065
3066
char *pdf_format_link_uri(fz_context *ctx, fz_document *doc, fz_link_dest dest)
3067
0
{
3068
0
  return pdf_new_uri_from_explicit_dest(ctx, dest);
3069
0
}
3070
3071
static fz_document *
3072
as_pdf(fz_context *ctx, fz_document *doc)
3073
0
{
3074
0
  return doc;
3075
0
}
3076
3077
/*
3078
  Initializers for the fz_document interface.
3079
3080
  The functions are split across two files to allow calls to a
3081
  version of the constructor that does not link in the interpreter.
3082
  The interpreter references the built-in font and cmap resources
3083
  which are quite big. Not linking those into the mutool binary
3084
  saves roughly 6MB of space.
3085
*/
3086
3087
static fz_colorspace *pdf_document_output_intent_imp(fz_context *ctx, fz_document *doc)
3088
0
{
3089
0
  return pdf_document_output_intent(ctx, (pdf_document*)doc);
3090
0
}
3091
3092
int pdf_needs_password_imp(fz_context *ctx, fz_document *doc)
3093
0
{
3094
0
  return pdf_needs_password(ctx, (pdf_document*)doc);
3095
0
}
3096
3097
int pdf_authenticate_password_imp(fz_context *ctx, fz_document *doc, const char *pw)
3098
0
{
3099
0
  return pdf_authenticate_password(ctx, (pdf_document*)doc, pw);
3100
0
}
3101
3102
int pdf_has_permission_imp(fz_context *ctx, fz_document *doc, fz_permission p)
3103
0
{
3104
0
  return pdf_has_permission(ctx, (pdf_document*)doc, p);
3105
0
}
3106
3107
fz_outline_iterator *pdf_new_outline_iterator_imp(fz_context *ctx, fz_document *doc)
3108
0
{
3109
0
  return pdf_new_outline_iterator(ctx, (pdf_document*)doc);
3110
0
}
3111
3112
int pdf_lookup_metadata_imp(fz_context *ctx, fz_document *doc, const char *key, char *ptr, size_t size)
3113
0
{
3114
0
  return pdf_lookup_metadata(ctx, (pdf_document*)doc, key, ptr, size);
3115
0
}
3116
3117
void pdf_set_metadata_imp(fz_context *ctx, fz_document *doc, const char *key, const char *value)
3118
0
{
3119
0
  pdf_set_metadata(ctx, (pdf_document*)doc, key, value);
3120
0
}
3121
3122
void pdf_run_document_structure_imp(fz_context *ctx, fz_document *doc, fz_device *dev, fz_cookie *cookie)
3123
0
{
3124
0
  pdf_run_document_structure(ctx, (pdf_document*)doc, dev, cookie);
3125
0
}
3126
3127
#ifndef NDEBUG
3128
void pdf_verify_name_table_sanity(void);
3129
#endif
3130
3131
3132
static pdf_document *
3133
pdf_new_document(fz_context *ctx, fz_stream *file)
3134
16
{
3135
16
  pdf_document *doc = fz_new_derived_document(ctx, pdf_document);
3136
3137
16
#ifndef NDEBUG
3138
16
  pdf_verify_name_table_sanity();
3139
16
#endif
3140
3141
16
  doc->super.drop_document = pdf_drop_document_imp;
3142
16
  doc->super.get_output_intent = pdf_document_output_intent_imp;
3143
16
  doc->super.needs_password = pdf_needs_password_imp;
3144
16
  doc->super.authenticate_password = pdf_authenticate_password_imp;
3145
16
  doc->super.has_permission = pdf_has_permission_imp;
3146
16
  doc->super.outline_iterator = pdf_new_outline_iterator_imp;
3147
16
  doc->super.resolve_link_dest = pdf_resolve_link_imp;
3148
16
  doc->super.format_link_uri = pdf_format_link_uri;
3149
16
  doc->super.count_pages = pdf_count_pages_imp;
3150
16
  doc->super.load_page = pdf_load_page_imp;
3151
16
  doc->super.page_label = pdf_page_label_imp;
3152
16
  doc->super.lookup_metadata = pdf_lookup_metadata_imp;
3153
16
  doc->super.set_metadata = pdf_set_metadata_imp;
3154
16
  doc->super.run_structure = pdf_run_document_structure_imp;
3155
16
  doc->super.as_pdf = as_pdf;
3156
3157
16
  pdf_lexbuf_init(ctx, &doc->lexbuf.base, PDF_LEXBUF_LARGE);
3158
16
  doc->file = fz_keep_stream(ctx, file);
3159
3160
  /* Default to PDF-1.7 if the version header is missing and for new documents */
3161
16
  doc->version = 17;
3162
3163
16
  doc->use_page_tree_map = 1;
3164
3165
16
  return doc;
3166
16
}
3167
3168
pdf_document *
3169
pdf_open_document_with_stream(fz_context *ctx, fz_stream *file)
3170
16
{
3171
16
  pdf_document *doc = pdf_new_document(ctx, file);
3172
32
  fz_try(ctx)
3173
32
  {
3174
16
    pdf_init_document(ctx, doc);
3175
16
  }
3176
32
  fz_catch(ctx)
3177
6
  {
3178
    /* fz_drop_document may clobber our error code/message so we have to stash them temporarily. */
3179
6
    char message[256];
3180
6
    int code;
3181
6
    fz_strlcpy(message, fz_convert_error(ctx, &code), sizeof message);
3182
6
    fz_drop_document(ctx, &doc->super);
3183
6
    fz_throw(ctx, code, "%s", message);
3184
6
  }
3185
10
  return doc;
3186
16
}
3187
3188
/* Uncomment the following to test progressive loading. */
3189
/* #define TEST_PROGRESSIVE_HACK */
3190
3191
pdf_document *
3192
pdf_open_document(fz_context *ctx, const char *filename)
3193
0
{
3194
0
  fz_stream *file = NULL;
3195
0
  pdf_document *doc = NULL;
3196
3197
0
  fz_var(file);
3198
0
  fz_var(doc);
3199
3200
0
  fz_try(ctx)
3201
0
  {
3202
0
    file = fz_open_file(ctx, filename);
3203
#ifdef TEST_PROGRESSIVE_HACK
3204
    file->progressive = 1;
3205
#endif
3206
0
    doc = pdf_new_document(ctx, file);
3207
0
    pdf_init_document(ctx, doc);
3208
0
  }
3209
0
  fz_always(ctx)
3210
0
  {
3211
0
    fz_drop_stream(ctx, file);
3212
0
  }
3213
0
  fz_catch(ctx)
3214
0
  {
3215
    /* fz_drop_document may clobber our error code/message so we have to stash them temporarily. */
3216
0
    char message[256];
3217
0
    int code;
3218
0
    fz_strlcpy(message, fz_convert_error(ctx, &code), sizeof message);
3219
0
    fz_drop_document(ctx, &doc->super);
3220
0
    fz_throw(ctx, code, "%s", message);
3221
0
  }
3222
3223
#ifdef TEST_PROGRESSIVE_HACK
3224
  if (doc->file_reading_linearly)
3225
  {
3226
    fz_try(ctx)
3227
      pdf_progressive_advance(ctx, doc, doc->linear_page_count-1);
3228
    fz_catch(ctx)
3229
    {
3230
      doc->file_reading_linearly = 0;
3231
      /* swallow the error */
3232
    }
3233
  }
3234
#endif
3235
3236
0
  return doc;
3237
0
}
3238
3239
static void
3240
pdf_load_hints(fz_context *ctx, pdf_document *doc, int objnum)
3241
0
{
3242
0
  fz_stream *stream = NULL;
3243
0
  pdf_obj *dict;
3244
3245
0
  fz_var(stream);
3246
0
  fz_var(dict);
3247
3248
0
  fz_try(ctx)
3249
0
  {
3250
0
    int i, j, least_num_page_objs, page_obj_num_bits;
3251
0
    int least_page_len, page_len_num_bits, shared_hint_offset;
3252
    /* int least_page_offset, page_offset_num_bits; */
3253
    /* int least_content_stream_len, content_stream_len_num_bits; */
3254
0
    int num_shared_obj_num_bits, shared_obj_num_bits;
3255
    /* int numerator_bits, denominator_bits; */
3256
0
    int shared;
3257
0
    int shared_obj_num, shared_obj_offset, shared_obj_count_page1;
3258
0
    int shared_obj_count_total;
3259
0
    int least_shared_group_len, shared_group_len_num_bits;
3260
0
    int max_object_num = pdf_xref_len(ctx, doc);
3261
3262
0
    stream = pdf_open_stream_number(ctx, doc, objnum);
3263
0
    dict = pdf_get_xref_entry_no_null(ctx, doc, objnum)->obj;
3264
0
    if (dict == NULL || !pdf_is_dict(ctx, dict))
3265
0
      fz_throw(ctx, FZ_ERROR_FORMAT, "malformed hint object");
3266
3267
0
    shared_hint_offset = pdf_dict_get_int(ctx, dict, PDF_NAME(S));
3268
3269
    /* Malloc the structures (use realloc to cope with the fact we
3270
     * may try this several times before enough data is loaded) */
3271
0
    doc->hint_page = fz_realloc_array(ctx, doc->hint_page, doc->linear_page_count+1, pdf_hint_page);
3272
0
    memset(doc->hint_page, 0, sizeof(*doc->hint_page) * (doc->linear_page_count+1));
3273
0
    doc->hint_obj_offsets = fz_realloc_array(ctx, doc->hint_obj_offsets, max_object_num, int64_t);
3274
0
    memset(doc->hint_obj_offsets, 0, sizeof(*doc->hint_obj_offsets) * max_object_num);
3275
0
    doc->hint_obj_offsets_max = max_object_num;
3276
3277
    /* Read the page object hints table: Header first */
3278
0
    least_num_page_objs = fz_read_bits(ctx, stream, 32);
3279
    /* The following is sometimes a lie, but we read this version,
3280
     * as other table values are built from it. In
3281
     * pdf_reference17.pdf, this points to 2 objects before the
3282
     * first pages page object. */
3283
0
    doc->hint_page[0].offset = fz_read_bits(ctx, stream, 32);
3284
0
    if (doc->hint_page[0].offset > doc->hint_object_offset)
3285
0
      doc->hint_page[0].offset += doc->hint_object_length;
3286
0
    page_obj_num_bits = fz_read_bits(ctx, stream, 16);
3287
0
    least_page_len = fz_read_bits(ctx, stream, 32);
3288
0
    page_len_num_bits = fz_read_bits(ctx, stream, 16);
3289
0
    /* least_page_offset = */ (void) fz_read_bits(ctx, stream, 32);
3290
0
    /* page_offset_num_bits = */ (void) fz_read_bits(ctx, stream, 16);
3291
0
    /* least_content_stream_len = */ (void) fz_read_bits(ctx, stream, 32);
3292
0
    /* content_stream_len_num_bits = */ (void) fz_read_bits(ctx, stream, 16);
3293
0
    num_shared_obj_num_bits = fz_read_bits(ctx, stream, 16);
3294
0
    shared_obj_num_bits = fz_read_bits(ctx, stream, 16);
3295
0
    /* numerator_bits = */ (void) fz_read_bits(ctx, stream, 16);
3296
0
    /* denominator_bits = */ (void) fz_read_bits(ctx, stream, 16);
3297
3298
    /* Item 1: Page object numbers */
3299
0
    doc->hint_page[0].number = doc->linear_page1_obj_num;
3300
    /* We don't care about the number of objects in the first page */
3301
0
    (void)fz_read_bits(ctx, stream, page_obj_num_bits);
3302
0
    j = 1;
3303
0
    for (i = 1; i < doc->linear_page_count; i++)
3304
0
    {
3305
0
      int delta_page_objs = fz_read_bits(ctx, stream, page_obj_num_bits);
3306
3307
0
      doc->hint_page[i].number = j;
3308
0
      j += least_num_page_objs + delta_page_objs;
3309
0
    }
3310
0
    doc->hint_page[i].number = j; /* Not a real page object */
3311
0
    fz_sync_bits(ctx, stream);
3312
    /* Item 2: Page lengths */
3313
0
    j = doc->hint_page[0].offset;
3314
0
    for (i = 0; i < doc->linear_page_count; i++)
3315
0
    {
3316
0
      int delta_page_len = fz_read_bits(ctx, stream, page_len_num_bits);
3317
0
      int old = j;
3318
3319
0
      doc->hint_page[i].offset = j;
3320
0
      j += least_page_len + delta_page_len;
3321
0
      if (old <= doc->hint_object_offset && j > doc->hint_object_offset)
3322
0
        j += doc->hint_object_length;
3323
0
    }
3324
0
    doc->hint_page[i].offset = j;
3325
0
    fz_sync_bits(ctx, stream);
3326
    /* Item 3: Shared references */
3327
0
    shared = 0;
3328
0
    for (i = 0; i < doc->linear_page_count; i++)
3329
0
    {
3330
0
      int num_shared_objs = fz_read_bits(ctx, stream, num_shared_obj_num_bits);
3331
0
      doc->hint_page[i].index = shared;
3332
0
      shared += num_shared_objs;
3333
0
    }
3334
0
    doc->hint_page[i].index = shared;
3335
0
    doc->hint_shared_ref = fz_realloc_array(ctx, doc->hint_shared_ref, shared, int);
3336
0
    memset(doc->hint_shared_ref, 0, sizeof(*doc->hint_shared_ref) * shared);
3337
0
    fz_sync_bits(ctx, stream);
3338
    /* Item 4: Shared references */
3339
0
    for (i = 0; i < shared; i++)
3340
0
    {
3341
0
      int ref = fz_read_bits(ctx, stream, shared_obj_num_bits);
3342
0
      doc->hint_shared_ref[i] = ref;
3343
0
    }
3344
    /* Skip items 5,6,7 as we don't use them */
3345
3346
0
    fz_seek(ctx, stream, doc->bias + shared_hint_offset, SEEK_SET);
3347
3348
    /* Read the shared object hints table: Header first */
3349
0
    shared_obj_num = fz_read_bits(ctx, stream, 32);
3350
0
    shared_obj_offset = fz_read_bits(ctx, stream, 32);
3351
0
    if (shared_obj_offset > doc->hint_object_offset)
3352
0
      shared_obj_offset += doc->hint_object_length;
3353
0
    shared_obj_count_page1 = fz_read_bits(ctx, stream, 32);
3354
0
    shared_obj_count_total = fz_read_bits(ctx, stream, 32);
3355
0
    shared_obj_num_bits = fz_read_bits(ctx, stream, 16);
3356
0
    least_shared_group_len = fz_read_bits(ctx, stream, 32);
3357
0
    shared_group_len_num_bits = fz_read_bits(ctx, stream, 16);
3358
3359
    /* Sanity check the references in Item 4 above to ensure we
3360
     * don't access out of range with malicious files. */
3361
0
    for (i = 0; i < shared; i++)
3362
0
    {
3363
0
      if (doc->hint_shared_ref[i] >= shared_obj_count_total)
3364
0
      {
3365
0
        fz_throw(ctx, FZ_ERROR_FORMAT, "malformed hint stream (shared refs)");
3366
0
      }
3367
0
    }
3368
3369
0
    doc->hint_shared = fz_realloc_array(ctx, doc->hint_shared, shared_obj_count_total+1, pdf_hint_shared);
3370
0
    memset(doc->hint_shared, 0, sizeof(*doc->hint_shared) * (shared_obj_count_total+1));
3371
3372
    /* Item 1: Shared references */
3373
0
    j = doc->hint_page[0].offset;
3374
0
    for (i = 0; i < shared_obj_count_page1; i++)
3375
0
    {
3376
0
      int off = fz_read_bits(ctx, stream, shared_group_len_num_bits);
3377
0
      int old = j;
3378
0
      doc->hint_shared[i].offset = j;
3379
0
      j += off + least_shared_group_len;
3380
0
      if (old <= doc->hint_object_offset && j > doc->hint_object_offset)
3381
0
        j += doc->hint_object_length;
3382
0
    }
3383
    /* FIXME: We would have problems recreating the length of the
3384
     * last page 1 shared reference group. But we'll never need
3385
     * to, so ignore it. */
3386
0
    j = shared_obj_offset;
3387
0
    for (; i < shared_obj_count_total; i++)
3388
0
    {
3389
0
      int off = fz_read_bits(ctx, stream, shared_group_len_num_bits);
3390
0
      int old = j;
3391
0
      doc->hint_shared[i].offset = j;
3392
0
      j += off + least_shared_group_len;
3393
0
      if (old <= doc->hint_object_offset && j > doc->hint_object_offset)
3394
0
        j += doc->hint_object_length;
3395
0
    }
3396
0
    doc->hint_shared[i].offset = j;
3397
0
    fz_sync_bits(ctx, stream);
3398
    /* Item 2: Signature flags: read these just so we can skip */
3399
0
    for (i = 0; i < shared_obj_count_total; i++)
3400
0
    {
3401
0
      doc->hint_shared[i].number = fz_read_bits(ctx, stream, 1);
3402
0
    }
3403
0
    fz_sync_bits(ctx, stream);
3404
    /* Item 3: Signatures: just skip */
3405
0
    for (i = 0; i < shared_obj_count_total; i++)
3406
0
    {
3407
0
      if (doc->hint_shared[i].number)
3408
0
      {
3409
0
        (void) fz_read_bits(ctx, stream, 128);
3410
0
      }
3411
0
    }
3412
0
    fz_sync_bits(ctx, stream);
3413
    /* Item 4: Shared object object numbers */
3414
0
    j = doc->linear_page1_obj_num; /* FIXME: This is a lie! */
3415
0
    for (i = 0; i < shared_obj_count_page1; i++)
3416
0
    {
3417
0
      doc->hint_shared[i].number = j;
3418
0
      j += fz_read_bits(ctx, stream, shared_obj_num_bits) + 1;
3419
0
    }
3420
0
    j = shared_obj_num;
3421
0
    for (; i < shared_obj_count_total; i++)
3422
0
    {
3423
0
      doc->hint_shared[i].number = j;
3424
0
      j += fz_read_bits(ctx, stream, shared_obj_num_bits) + 1;
3425
0
    }
3426
0
    doc->hint_shared[i].number = j;
3427
3428
    /* Now, actually use the data we have gathered. */
3429
0
    for (i = 0 /*shared_obj_count_page1*/; i < shared_obj_count_total; i++)
3430
0
    {
3431
0
      if (doc->hint_shared[i].number >= 0 && doc->hint_shared[i].number < max_object_num)
3432
0
        doc->hint_obj_offsets[doc->hint_shared[i].number] = doc->hint_shared[i].offset;
3433
0
    }
3434
0
    for (i = 0; i < doc->linear_page_count; i++)
3435
0
    {
3436
0
      if (doc->hint_page[i].number >= 0 && doc->hint_page[i].number < max_object_num)
3437
0
        doc->hint_obj_offsets[doc->hint_page[i].number] = doc->hint_page[i].offset;
3438
0
    }
3439
0
  }
3440
0
  fz_always(ctx)
3441
0
  {
3442
0
    fz_drop_stream(ctx, stream);
3443
0
  }
3444
0
  fz_catch(ctx)
3445
0
  {
3446
0
    fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
3447
    /* Don't try to load hints again */
3448
0
    doc->hints_loaded = 1;
3449
    /* We won't use the linearized object anymore. */
3450
0
    doc->file_reading_linearly = 0;
3451
0
    fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
3452
    /* Any other error becomes a TRYLATER */
3453
0
    fz_report_error(ctx);
3454
0
    fz_throw(ctx, FZ_ERROR_TRYLATER, "malformed hints object");
3455
0
  }
3456
0
  doc->hints_loaded = 1;
3457
0
}
3458
3459
static void
3460
pdf_load_hint_object(fz_context *ctx, pdf_document *doc)
3461
0
{
3462
0
  pdf_lexbuf *buf = &doc->lexbuf.base;
3463
0
  int64_t curr_pos;
3464
3465
0
  curr_pos = fz_tell(ctx, doc->file);
3466
0
  fz_seek(ctx, doc->file, doc->bias + doc->hint_object_offset, SEEK_SET);
3467
0
  fz_try(ctx)
3468
0
  {
3469
0
    while (1)
3470
0
    {
3471
0
      pdf_obj *page = NULL;
3472
0
      int num, tok;
3473
3474
0
      tok = pdf_lex(ctx, doc->file, buf);
3475
0
      if (tok != PDF_TOK_INT)
3476
0
        break;
3477
0
      num = buf->i;
3478
0
      tok = pdf_lex(ctx, doc->file, buf);
3479
0
      if (tok != PDF_TOK_INT)
3480
0
        break;
3481
      /* Ignore gen = buf->i */
3482
0
      tok = pdf_lex(ctx, doc->file, buf);
3483
0
      if (tok != PDF_TOK_OBJ)
3484
0
        break;
3485
0
      (void)pdf_repair_obj(ctx, doc, buf, NULL, NULL, NULL, NULL, &page, NULL, NULL);
3486
0
      pdf_load_hints(ctx, doc, num);
3487
0
    }
3488
0
  }
3489
0
  fz_always(ctx)
3490
0
  {
3491
0
    fz_seek(ctx, doc->file, curr_pos, SEEK_SET);
3492
0
  }
3493
0
  fz_catch(ctx)
3494
0
  {
3495
0
    fz_rethrow(ctx);
3496
0
  }
3497
0
}
3498
3499
pdf_obj *pdf_progressive_advance(fz_context *ctx, pdf_document *doc, int pagenum)
3500
0
{
3501
0
  int curr_pos;
3502
0
  pdf_obj *page = NULL;
3503
3504
0
  pdf_load_hinted_page(ctx, doc, pagenum);
3505
3506
0
  if (pagenum < 0 || pagenum >= doc->linear_page_count)
3507
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "page load out of range (%d of %d)", pagenum, doc->linear_page_count);
3508
3509
0
  if (doc->linear_pos == doc->file_length)
3510
0
    return doc->linear_page_refs[pagenum];
3511
3512
  /* Only load hints once, and then only after we have got page 0 */
3513
0
  if (pagenum > 0 && !doc->hints_loaded && doc->hint_object_offset > 0 && doc->linear_pos >= doc->hint_object_offset)
3514
0
  {
3515
    /* Found hint object */
3516
0
    pdf_load_hint_object(ctx, doc);
3517
0
  }
3518
3519
0
  DEBUGMESS((ctx, "continuing to try to advance from %d", doc->linear_pos));
3520
0
  curr_pos = fz_tell(ctx, doc->file);
3521
3522
0
  fz_var(page);
3523
3524
0
  fz_try(ctx)
3525
0
  {
3526
0
    int eof;
3527
0
    do
3528
0
    {
3529
0
      int num;
3530
0
      eof = pdf_obj_read(ctx, doc, &doc->linear_pos, &num, &page);
3531
0
      pdf_drop_obj(ctx, page);
3532
0
      page = NULL;
3533
0
    }
3534
0
    while (!eof);
3535
3536
0
    {
3537
0
      pdf_obj *catalog;
3538
0
      pdf_obj *pages;
3539
0
      doc->linear_pos = doc->file_length;
3540
0
      pdf_load_xref(ctx, doc);
3541
0
      catalog = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
3542
0
      pages = pdf_dict_get(ctx, catalog, PDF_NAME(Pages));
3543
3544
0
      if (!pdf_is_dict(ctx, pages))
3545
0
        fz_throw(ctx, FZ_ERROR_FORMAT, "missing page tree");
3546
0
      break;
3547
0
    }
3548
0
  }
3549
0
  fz_always(ctx)
3550
0
  {
3551
0
    fz_seek(ctx, doc->file, curr_pos, SEEK_SET);
3552
0
  }
3553
0
  fz_catch(ctx)
3554
0
  {
3555
0
    pdf_drop_obj(ctx, page);
3556
0
    if (fz_caught(ctx) == FZ_ERROR_TRYLATER)
3557
0
    {
3558
0
      if (doc->linear_page_refs[pagenum] == NULL)
3559
0
      {
3560
        /* Still not got a page */
3561
0
        fz_rethrow(ctx);
3562
0
      }
3563
      // TODO: should we really swallow this error?
3564
0
      fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
3565
0
      fz_report_error(ctx);
3566
0
    }
3567
0
    else
3568
0
      fz_rethrow(ctx);
3569
0
  }
3570
3571
0
  return doc->linear_page_refs[pagenum];
3572
0
}
3573
3574
pdf_document *fz_new_pdf_document_from_fz_document(fz_context *ctx, fz_document *ptr)
3575
0
{
3576
0
  if (!ptr || !ptr->as_pdf)
3577
0
    return NULL;
3578
0
  return (pdf_document *)fz_keep_document(ctx, ptr->as_pdf(ctx, ptr));
3579
0
}
3580
3581
pdf_document *pdf_document_from_fz_document(fz_context *ctx, fz_document *ptr)
3582
0
{
3583
0
  return (pdf_document *)((ptr && ptr->count_pages == pdf_count_pages_imp) ? ptr : NULL);
3584
0
}
3585
3586
pdf_page *pdf_page_from_fz_page(fz_context *ctx, fz_page *page)
3587
0
{
3588
0
  if (pdf_document_from_fz_document(ctx, page->doc))
3589
0
    return (pdf_page*) page;
3590
0
  return NULL;
3591
0
}
3592
3593
pdf_document *pdf_specifics(fz_context *ctx, fz_document *doc)
3594
0
{
3595
0
  return pdf_document_from_fz_document(ctx, doc);
3596
0
}
3597
3598
pdf_obj *
3599
pdf_add_object(fz_context *ctx, pdf_document *doc, pdf_obj *obj)
3600
0
{
3601
0
  pdf_document *orig_doc;
3602
0
  int num;
3603
3604
0
  orig_doc = pdf_get_bound_document(ctx, obj);
3605
0
  if (orig_doc && orig_doc != doc)
3606
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "tried to add an object belonging to a different document");
3607
0
  if (pdf_is_indirect(ctx, obj))
3608
0
    return pdf_keep_obj(ctx, obj);
3609
0
  num = pdf_create_object(ctx, doc);
3610
0
  pdf_update_object(ctx, doc, num, obj);
3611
0
  return pdf_new_indirect(ctx, doc, num, 0);
3612
0
}
3613
3614
pdf_obj *
3615
pdf_add_object_drop(fz_context *ctx, pdf_document *doc, pdf_obj *obj)
3616
0
{
3617
0
  pdf_obj *ind = NULL;
3618
0
  fz_try(ctx)
3619
0
    ind = pdf_add_object(ctx, doc, obj);
3620
0
  fz_always(ctx)
3621
0
    pdf_drop_obj(ctx, obj);
3622
0
  fz_catch(ctx)
3623
0
    fz_rethrow(ctx);
3624
0
  return ind;
3625
0
}
3626
3627
pdf_obj *
3628
pdf_add_new_dict(fz_context *ctx, pdf_document *doc, int initial)
3629
0
{
3630
0
  return pdf_add_object_drop(ctx, doc, pdf_new_dict(ctx, doc, initial));
3631
0
}
3632
3633
pdf_obj *
3634
pdf_add_new_array(fz_context *ctx, pdf_document *doc, int initial)
3635
0
{
3636
0
  return pdf_add_object_drop(ctx, doc, pdf_new_array(ctx, doc, initial));
3637
0
}
3638
3639
pdf_obj *
3640
pdf_add_stream(fz_context *ctx, pdf_document *doc, fz_buffer *buf, pdf_obj *obj, int compressed)
3641
0
{
3642
0
  pdf_obj *ind;
3643
0
  if (!obj)
3644
0
    ind = pdf_add_new_dict(ctx, doc, 4);
3645
0
  else
3646
0
    ind = pdf_add_object(ctx, doc, obj);
3647
0
  fz_try(ctx)
3648
0
    pdf_update_stream(ctx, doc, ind, buf, compressed);
3649
0
  fz_catch(ctx)
3650
0
  {
3651
0
    pdf_drop_obj(ctx, ind);
3652
0
    fz_rethrow(ctx);
3653
0
  }
3654
0
  return ind;
3655
0
}
3656
3657
pdf_document *pdf_create_document(fz_context *ctx)
3658
0
{
3659
0
  pdf_document *doc;
3660
0
  pdf_obj *root;
3661
0
  pdf_obj *pages;
3662
0
  pdf_obj *trailer = NULL;
3663
0
  pdf_obj *info;
3664
3665
0
  fz_var(trailer);
3666
3667
0
  doc = pdf_new_document(ctx, NULL);
3668
0
  fz_try(ctx)
3669
0
  {
3670
0
    doc->file_size = 0;
3671
0
    doc->startxref = 0;
3672
0
    doc->num_xref_sections = 0;
3673
0
    doc->num_incremental_sections = 0;
3674
0
    doc->xref_base = 0;
3675
0
    doc->disallow_new_increments = 0;
3676
0
    pdf_get_populating_xref_entry(ctx, doc, 0);
3677
3678
0
    trailer = pdf_new_dict(ctx, doc, 2);
3679
0
    pdf_dict_put_int(ctx, trailer, PDF_NAME(Size), 3);
3680
0
    pdf_dict_put_drop(ctx, trailer, PDF_NAME(Root), root = pdf_add_new_dict(ctx, doc, 2));
3681
0
    pdf_dict_put(ctx, root, PDF_NAME(Type), PDF_NAME(Catalog));
3682
0
    pdf_dict_put_drop(ctx, root, PDF_NAME(Pages), pages = pdf_add_new_dict(ctx, doc, 3));
3683
0
    pdf_dict_put(ctx, pages, PDF_NAME(Type), PDF_NAME(Pages));
3684
0
    pdf_dict_put_int(ctx, pages, PDF_NAME(Count), 0);
3685
0
    pdf_dict_put_array(ctx, pages, PDF_NAME(Kids), 1);
3686
3687
0
    info = pdf_dict_put_dict(ctx, root, PDF_NAME(Info), 1);
3688
0
    pdf_dict_put_text_string(ctx, info, PDF_NAME(Producer), "MuPDF " FZ_VERSION);
3689
3690
    /* Set the trailer of the final xref section. */
3691
0
    doc->xref_sections[0].trailer = trailer;
3692
3693
0
    doc->checked = 1;
3694
0
  }
3695
0
  fz_catch(ctx)
3696
0
  {
3697
0
    pdf_drop_obj(ctx, trailer);
3698
0
    fz_drop_document(ctx, &doc->super);
3699
0
    fz_rethrow(ctx);
3700
0
  }
3701
0
  return doc;
3702
0
}
3703
3704
static const char *pdf_extensions[] =
3705
{
3706
  "pdf",
3707
  "fdf",
3708
  "pclm",
3709
  "ai",
3710
  NULL
3711
};
3712
3713
static const char *pdf_mimetypes[] =
3714
{
3715
  "application/pdf",
3716
  "application/PCLm",
3717
  NULL
3718
};
3719
3720
static int
3721
pdf_recognize_doc_content(fz_context *ctx, const fz_document_handler *handler, fz_stream *stream, fz_archive *dir, void **state, fz_document_recognize_state_free_fn **free_state)
3722
30
{
3723
30
  const char *match = "%PDF-";
3724
30
  const char *match2 = "%FDF-";
3725
30
  int pos = 0;
3726
30
  int n = 4096+5;
3727
30
  int c;
3728
3729
30
  if (state)
3730
30
    *state = NULL;
3731
30
  if (free_state)
3732
30
    *free_state = NULL;
3733
3734
30
  if (stream == NULL)
3735
0
    return 0;
3736
3737
30
  do
3738
77.9k
  {
3739
77.9k
    c = fz_read_byte(ctx, stream);
3740
77.9k
    if (c == EOF)
3741
0
      return 0;
3742
77.9k
    if (c == match[pos] || c == match2[pos])
3743
258
    {
3744
258
      pos++;
3745
258
      if (pos == 5)
3746
11
        return 100;
3747
258
    }
3748
77.7k
    else
3749
77.7k
    {
3750
      /* Restart matching, but recheck c against the start. */
3751
77.7k
      pos = (c == match[0]);
3752
77.7k
    }
3753
77.9k
  }
3754
77.9k
  while (--n > 0);
3755
3756
19
  return 0;
3757
30
}
3758
3759
static fz_document *
3760
open_document(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *zip, void *state)
3761
16
{
3762
16
  if (file == NULL)
3763
0
    return NULL;
3764
16
  return (fz_document *)pdf_open_document_with_stream(ctx, file);
3765
16
}
3766
3767
fz_document_handler pdf_document_handler =
3768
{
3769
  NULL,
3770
  open_document,
3771
  pdf_extensions,
3772
  pdf_mimetypes,
3773
  pdf_recognize_doc_content
3774
};
3775
3776
void pdf_mark_xref(fz_context *ctx, pdf_document *doc)
3777
0
{
3778
0
  int x, e;
3779
3780
0
  for (x = 0; x < doc->num_xref_sections; x++)
3781
0
  {
3782
0
    pdf_xref *xref = &doc->xref_sections[x];
3783
0
    pdf_xref_subsec *sub;
3784
3785
0
    for (sub = xref->subsec; sub != NULL; sub = sub->next)
3786
0
    {
3787
0
      for (e = 0; e < sub->len; e++)
3788
0
      {
3789
0
        pdf_xref_entry *entry = &sub->table[e];
3790
0
        if (entry->obj)
3791
0
        {
3792
0
          entry->marked = 1;
3793
0
        }
3794
0
      }
3795
0
    }
3796
0
  }
3797
0
}
3798
3799
void pdf_clear_xref(fz_context *ctx, pdf_document *doc)
3800
0
{
3801
0
  int x, e;
3802
3803
0
  for (x = 0; x < doc->num_xref_sections; x++)
3804
0
  {
3805
0
    pdf_xref *xref = &doc->xref_sections[x];
3806
0
    pdf_xref_subsec *sub;
3807
3808
0
    for (sub = xref->subsec; sub != NULL; sub = sub->next)
3809
0
    {
3810
0
      for (e = 0; e < sub->len; e++)
3811
0
      {
3812
0
        pdf_xref_entry *entry = &sub->table[e];
3813
        /* We cannot drop objects if the stream
3814
         * buffer has been updated */
3815
0
        if (entry->obj != NULL && entry->stm_buf == NULL)
3816
0
        {
3817
0
          if (pdf_obj_refs(ctx, entry->obj) == 1)
3818
0
          {
3819
0
            pdf_drop_obj(ctx, entry->obj);
3820
0
            entry->obj = NULL;
3821
0
          }
3822
0
        }
3823
0
      }
3824
0
    }
3825
0
  }
3826
0
}
3827
3828
void pdf_clear_xref_to_mark(fz_context *ctx, pdf_document *doc)
3829
0
{
3830
0
  int x, e;
3831
3832
0
  for (x = 0; x < doc->num_xref_sections; x++)
3833
0
  {
3834
0
    pdf_xref *xref = &doc->xref_sections[x];
3835
0
    pdf_xref_subsec *sub;
3836
3837
0
    for (sub = xref->subsec; sub != NULL; sub = sub->next)
3838
0
    {
3839
0
      for (e = 0; e < sub->len; e++)
3840
0
      {
3841
0
        pdf_xref_entry *entry = &sub->table[e];
3842
3843
        /* We cannot drop objects if the stream buffer has
3844
         * been updated */
3845
0
        if (entry->obj != NULL && entry->stm_buf == NULL)
3846
0
        {
3847
0
          if (!entry->marked && pdf_obj_refs(ctx, entry->obj) == 1)
3848
0
          {
3849
0
            pdf_drop_obj(ctx, entry->obj);
3850
0
            entry->obj = NULL;
3851
0
          }
3852
0
        }
3853
0
      }
3854
0
    }
3855
0
  }
3856
0
}
3857
3858
int
3859
pdf_count_versions(fz_context *ctx, pdf_document *doc)
3860
0
{
3861
0
  return doc->num_xref_sections-doc->num_incremental_sections-doc->has_linearization_object;
3862
0
}
3863
3864
int
3865
pdf_count_unsaved_versions(fz_context *ctx, pdf_document *doc)
3866
0
{
3867
0
  return doc->num_incremental_sections;
3868
0
}
3869
3870
int
3871
pdf_doc_was_linearized(fz_context *ctx, pdf_document *doc)
3872
0
{
3873
0
  return doc->has_linearization_object;
3874
0
}
3875
3876
static int pdf_obj_exists(fz_context *ctx, pdf_document *doc, int i)
3877
0
{
3878
0
  pdf_xref_subsec *sub;
3879
0
  int j;
3880
3881
0
  if (i < 0)
3882
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "Negative object number requested");
3883
3884
0
  if (i <= doc->max_xref_len)
3885
0
    j = doc->xref_index[i];
3886
0
  else
3887
0
    j = 0;
3888
3889
  /* We may be accessing an earlier version of the document using xref_base
3890
   * and j may be an index into a later xref section */
3891
0
  if (doc->xref_base > j)
3892
0
    j = doc->xref_base;
3893
3894
  /* Find the first xref section where the entry is defined. */
3895
0
  for (; j < doc->num_xref_sections; j++)
3896
0
  {
3897
0
    pdf_xref *xref = &doc->xref_sections[j];
3898
3899
0
    if (i < xref->num_objects)
3900
0
    {
3901
0
      for (sub = xref->subsec; sub != NULL; sub = sub->next)
3902
0
      {
3903
0
        if (i < sub->start || i >= sub->start + sub->len)
3904
0
          continue;
3905
3906
0
        if (sub->table[i - sub->start].type)
3907
0
          return 1;
3908
0
      }
3909
0
    }
3910
0
  }
3911
3912
0
  return 0;
3913
0
}
3914
3915
enum {
3916
  FIELD_CHANGED = 1,
3917
  FIELD_CHANGE_VALID = 2,
3918
  FIELD_CHANGE_INVALID = 4
3919
};
3920
3921
typedef struct
3922
{
3923
  int num_obj;
3924
  int obj_changes[FZ_FLEXIBLE_ARRAY];
3925
} pdf_changes;
3926
3927
static int
3928
check_unchanged_between(fz_context *ctx, pdf_document *doc, pdf_changes *changes, pdf_obj *nobj, pdf_obj *oobj)
3929
0
{
3930
0
  int marked = 0;
3931
0
  int changed = 0;
3932
3933
  /* Trivially identical => trivially unchanged. */
3934
0
  if (nobj == oobj)
3935
0
    return 0;
3936
3937
  /* Strictly speaking we shouldn't need to call fz_var,
3938
   * but I suspect static analysis tools are not smart
3939
   * enough to figure that out. */
3940
0
  fz_var(marked);
3941
3942
0
  if (pdf_is_indirect(ctx, nobj))
3943
0
  {
3944
0
    int o_xref_base = doc->xref_base;
3945
3946
    /* Both must be indirect if one is. */
3947
0
    if (!pdf_is_indirect(ctx, oobj))
3948
0
    {
3949
0
      changes->obj_changes[pdf_to_num(ctx, nobj)] |= FIELD_CHANGE_INVALID;
3950
0
      return 1;
3951
0
    }
3952
3953
    /* Handle recursing back into ourselves. */
3954
0
    if (pdf_obj_marked(ctx, nobj))
3955
0
    {
3956
0
      if (pdf_obj_marked(ctx, oobj))
3957
0
        return 0;
3958
0
      changes->obj_changes[pdf_to_num(ctx, nobj)] |= FIELD_CHANGE_INVALID;
3959
0
      return 1;
3960
0
    }
3961
0
    else if (pdf_obj_marked(ctx, oobj))
3962
0
    {
3963
0
      changes->obj_changes[pdf_to_num(ctx, nobj)] |= FIELD_CHANGE_INVALID;
3964
0
      return 1;
3965
0
    }
3966
3967
0
    nobj = pdf_resolve_indirect_chain(ctx, nobj);
3968
0
    doc->xref_base = o_xref_base+1;
3969
0
    fz_try(ctx)
3970
0
    {
3971
0
      oobj = pdf_resolve_indirect_chain(ctx, oobj);
3972
0
      if (oobj != nobj)
3973
0
      {
3974
        /* Different objects, so lock them */
3975
0
        if (!pdf_obj_marked(ctx, nobj) && !pdf_obj_marked(ctx, oobj))
3976
0
        {
3977
0
          (void)pdf_mark_obj(ctx, nobj);
3978
0
          (void)pdf_mark_obj(ctx, oobj);
3979
0
          marked = 1;
3980
0
        }
3981
0
      }
3982
0
    }
3983
0
    fz_always(ctx)
3984
0
      doc->xref_base = o_xref_base;
3985
0
    fz_catch(ctx)
3986
0
      fz_rethrow(ctx);
3987
3988
0
    if (nobj == oobj)
3989
0
      return 0; /* Trivially identical */
3990
0
  }
3991
3992
0
  fz_var(changed);
3993
3994
0
  fz_try(ctx)
3995
0
  {
3996
0
    if (pdf_is_dict(ctx, nobj))
3997
0
    {
3998
0
      int i, n = pdf_dict_len(ctx, nobj);
3999
4000
0
      if (!pdf_is_dict(ctx, oobj) || n != pdf_dict_len(ctx, oobj))
4001
0
      {
4002
0
change_found:
4003
0
        changes->obj_changes[pdf_to_num(ctx, nobj)] |= FIELD_CHANGE_INVALID;
4004
0
        changed = 1;
4005
0
        break;
4006
0
      }
4007
4008
0
      for (i = 0; i < n; i++)
4009
0
      {
4010
0
        pdf_obj *key = pdf_dict_get_key(ctx, nobj, i);
4011
0
        pdf_obj *nval = pdf_dict_get(ctx, nobj, key);
4012
0
        pdf_obj *oval = pdf_dict_get(ctx, oobj, key);
4013
4014
0
        changed |= check_unchanged_between(ctx, doc, changes, nval, oval);
4015
0
      }
4016
0
    }
4017
0
    else if (pdf_is_array(ctx, nobj))
4018
0
    {
4019
0
      int i, n = pdf_array_len(ctx, nobj);
4020
4021
0
      if (!pdf_is_array(ctx, oobj) || n != pdf_array_len(ctx, oobj))
4022
0
        goto change_found;
4023
4024
0
      for (i = 0; i < n; i++)
4025
0
      {
4026
0
        pdf_obj *nval = pdf_array_get(ctx, nobj, i);
4027
0
        pdf_obj *oval = pdf_array_get(ctx, oobj, i);
4028
4029
0
        changed |= check_unchanged_between(ctx, doc, changes, nval, oval);
4030
0
      }
4031
0
    }
4032
0
    else if (pdf_objcmp(ctx, nobj, oobj))
4033
0
      goto change_found;
4034
0
  }
4035
0
  fz_always(ctx)
4036
0
  {
4037
0
    if (marked)
4038
0
    {
4039
0
      pdf_unmark_obj(ctx, nobj);
4040
0
      pdf_unmark_obj(ctx, oobj);
4041
0
    }
4042
0
  }
4043
0
  fz_catch(ctx)
4044
0
    fz_rethrow(ctx);
4045
4046
0
  return changed;
4047
0
}
4048
4049
typedef struct
4050
{
4051
  int max;
4052
  int len;
4053
  char **list;
4054
} char_list;
4055
4056
/* This structure is used to hold the definition of which fields
4057
 * are locked. */
4058
struct pdf_locked_fields
4059
{
4060
  int p;
4061
  int all;
4062
  char_list includes;
4063
  char_list excludes;
4064
};
4065
4066
static void
4067
free_char_list(fz_context *ctx, char_list *c)
4068
0
{
4069
0
  int i;
4070
4071
0
  if (c == NULL)
4072
0
    return;
4073
4074
0
  for (i = c->len-1; i >= 0; i--)
4075
0
    fz_free(ctx, c->list[i]);
4076
0
  fz_free(ctx, c->list);
4077
0
  c->len = 0;
4078
0
  c->max = 0;
4079
0
}
4080
4081
void
4082
pdf_drop_locked_fields(fz_context *ctx, pdf_locked_fields *fl)
4083
0
{
4084
0
  if (fl == NULL)
4085
0
    return;
4086
4087
0
  free_char_list(ctx, &fl->includes);
4088
0
  free_char_list(ctx, &fl->excludes);
4089
0
  fz_free(ctx, fl);
4090
0
}
4091
4092
static void
4093
char_list_append(fz_context *ctx, char_list *list, const char *s)
4094
0
{
4095
0
  if (list->len == list->max)
4096
0
  {
4097
0
    int n = list->max * 2;
4098
0
    if (n == 0) n = 4;
4099
4100
0
    list->list = fz_realloc_array(ctx, list->list, n, char *);
4101
0
    list->max = n;
4102
0
  }
4103
0
  list->list[list->len] = fz_strdup(ctx, s);
4104
0
  list->len++;
4105
0
}
4106
4107
int
4108
pdf_is_field_locked(fz_context *ctx, pdf_locked_fields *locked, const char *name)
4109
0
{
4110
0
  int i;
4111
4112
0
  if (locked->p == 1)
4113
0
  {
4114
    /* Permissions were set, and say that field changes are not to be allowed. */
4115
0
    return 1; /* Locked */
4116
0
  }
4117
4118
0
  if(locked->all)
4119
0
  {
4120
    /* The only way we might not be unlocked is if
4121
     * we are listed in the excludes. */
4122
0
    for (i = 0; i < locked->excludes.len; i++)
4123
0
      if (!strcmp(locked->excludes.list[i], name))
4124
0
        return 0;
4125
0
    return 1;
4126
0
  }
4127
4128
  /* The only way we can be locked is for us to be in the includes. */
4129
0
  for (i = 0; i < locked->includes.len; i++)
4130
0
    if (strcmp(locked->includes.list[i], name) == 0)
4131
0
      return 1;
4132
4133
  /* Anything else is unlocked */
4134
0
  return 0;
4135
0
}
4136
4137
/* Unfortunately, in C, there is no legal way to define a function
4138
 * type that returns itself. We therefore have to use a struct
4139
 * wrapper. */
4140
typedef struct filter_wrap
4141
{
4142
  struct filter_wrap (*func)(fz_context *ctx, pdf_obj *dict, pdf_obj *key);
4143
} filter_wrap;
4144
4145
typedef struct filter_wrap (*filter_fn)(fz_context *ctx, pdf_obj *dict, pdf_obj *key);
4146
4147
0
#define RETURN_FILTER(f) { filter_wrap rf; rf.func = (f); return rf; }
4148
4149
static filter_wrap filter_simple(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4150
0
{
4151
0
  RETURN_FILTER(NULL);
4152
0
}
4153
4154
static filter_wrap filter_transformparams(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4155
0
{
4156
0
  if (pdf_name_eq(ctx, key, PDF_NAME(Type)) ||
4157
0
    pdf_name_eq(ctx, key, PDF_NAME(P)) ||
4158
0
    pdf_name_eq(ctx, key, PDF_NAME(V)) ||
4159
0
    pdf_name_eq(ctx, key, PDF_NAME(Document)) ||
4160
0
    pdf_name_eq(ctx, key, PDF_NAME(Msg)) ||
4161
0
    pdf_name_eq(ctx, key, PDF_NAME(V)) ||
4162
0
    pdf_name_eq(ctx, key, PDF_NAME(Annots)) ||
4163
0
    pdf_name_eq(ctx, key, PDF_NAME(Form)) ||
4164
0
    pdf_name_eq(ctx, key, PDF_NAME(FormEx)) ||
4165
0
    pdf_name_eq(ctx, key, PDF_NAME(EF)) ||
4166
0
    pdf_name_eq(ctx, key, PDF_NAME(P)) ||
4167
0
    pdf_name_eq(ctx, key, PDF_NAME(Action)) ||
4168
0
    pdf_name_eq(ctx, key, PDF_NAME(Fields)))
4169
0
    RETURN_FILTER(&filter_simple);
4170
0
  RETURN_FILTER(NULL);
4171
0
}
4172
4173
static filter_wrap filter_reference(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4174
0
{
4175
0
  if (pdf_name_eq(ctx, key, PDF_NAME(Type)) ||
4176
0
    pdf_name_eq(ctx, key, PDF_NAME(TransformMethod)) ||
4177
0
    pdf_name_eq(ctx, key, PDF_NAME(DigestMethod)) ||
4178
0
    pdf_name_eq(ctx, key, PDF_NAME(DigestValue)) ||
4179
0
    pdf_name_eq(ctx, key, PDF_NAME(DigestLocation)))
4180
0
    RETURN_FILTER(&filter_simple);
4181
0
  if (pdf_name_eq(ctx, key, PDF_NAME(TransformParams)))
4182
0
    RETURN_FILTER(&filter_transformparams);
4183
0
  RETURN_FILTER(NULL);
4184
0
}
4185
4186
static filter_wrap filter_prop_build_sub(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4187
0
{
4188
0
  if (pdf_name_eq(ctx, key, PDF_NAME(Name)) ||
4189
0
    pdf_name_eq(ctx, key, PDF_NAME(Date)) ||
4190
0
    pdf_name_eq(ctx, key, PDF_NAME(R)) ||
4191
0
    pdf_name_eq(ctx, key, PDF_NAME(PreRelease)) ||
4192
0
    pdf_name_eq(ctx, key, PDF_NAME(OS)) ||
4193
0
    pdf_name_eq(ctx, key, PDF_NAME(NonEFontNoWarn)) ||
4194
0
    pdf_name_eq(ctx, key, PDF_NAME(TrustedMode)) ||
4195
0
    pdf_name_eq(ctx, key, PDF_NAME(V)) ||
4196
0
    pdf_name_eq(ctx, key, PDF_NAME(REx)) ||
4197
0
    pdf_name_eq(ctx, key, PDF_NAME(Preview)))
4198
0
    RETURN_FILTER(&filter_simple);
4199
0
  RETURN_FILTER(NULL);
4200
0
}
4201
4202
static filter_wrap filter_prop_build(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4203
0
{
4204
0
  if (pdf_name_eq(ctx, key, PDF_NAME(Filter)) ||
4205
0
    pdf_name_eq(ctx, key, PDF_NAME(PubSec)) ||
4206
0
    pdf_name_eq(ctx, key, PDF_NAME(App)) ||
4207
0
    pdf_name_eq(ctx, key, PDF_NAME(SigQ)))
4208
0
    RETURN_FILTER(&filter_prop_build_sub);
4209
0
  RETURN_FILTER(NULL);
4210
0
}
4211
4212
static filter_wrap filter_v(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4213
0
{
4214
  /* Text can point to a stream object */
4215
0
  if (pdf_name_eq(ctx, key, PDF_NAME(Length)) && pdf_is_stream(ctx, dict))
4216
0
    RETURN_FILTER(&filter_simple);
4217
  /* Sigs point to a dict. */
4218
0
  if (pdf_name_eq(ctx, key, PDF_NAME(Type)) ||
4219
0
    pdf_name_eq(ctx, key, PDF_NAME(Filter)) ||
4220
0
    pdf_name_eq(ctx, key, PDF_NAME(SubFilter)) ||
4221
0
    pdf_name_eq(ctx, key, PDF_NAME(Contents)) ||
4222
0
    pdf_name_eq(ctx, key, PDF_NAME(Cert)) ||
4223
0
    pdf_name_eq(ctx, key, PDF_NAME(ByteRange)) ||
4224
0
    pdf_name_eq(ctx, key, PDF_NAME(Changes)) ||
4225
0
    pdf_name_eq(ctx, key, PDF_NAME(Name)) ||
4226
0
    pdf_name_eq(ctx, key, PDF_NAME(M)) ||
4227
0
    pdf_name_eq(ctx, key, PDF_NAME(Location)) ||
4228
0
    pdf_name_eq(ctx, key, PDF_NAME(Reason)) ||
4229
0
    pdf_name_eq(ctx, key, PDF_NAME(ContactInfo)) ||
4230
0
    pdf_name_eq(ctx, key, PDF_NAME(R)) ||
4231
0
    pdf_name_eq(ctx, key, PDF_NAME(V)) ||
4232
0
    pdf_name_eq(ctx, key, PDF_NAME(Prop_AuthTime)) ||
4233
0
    pdf_name_eq(ctx, key, PDF_NAME(Prop_AuthType)))
4234
0
  RETURN_FILTER(&filter_simple);
4235
0
  if (pdf_name_eq(ctx, key, PDF_NAME(Reference)))
4236
0
    RETURN_FILTER(filter_reference);
4237
0
  if (pdf_name_eq(ctx, key, PDF_NAME(Prop_Build)))
4238
0
    RETURN_FILTER(filter_prop_build);
4239
0
  RETURN_FILTER(NULL);
4240
0
}
4241
4242
static filter_wrap filter_appearance(fz_context *ctx, pdf_obj *dict, pdf_obj *key);
4243
4244
static filter_wrap filter_xobject_list(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4245
0
{
4246
  /* FIXME: Infinite recursion possible here? */
4247
0
  RETURN_FILTER(&filter_appearance);
4248
0
}
4249
4250
static filter_wrap filter_font(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4251
0
{
4252
  /* In the example I've seen the /Name field was dropped, so we'll allow
4253
   * local changes, but none that follow an indirection. */
4254
0
  RETURN_FILTER(NULL);
4255
0
}
4256
4257
/* FIXME: One idea here is to make filter_font_list and filter_xobject_list
4258
 * only accept NEW objects as changes. Will think about this. */
4259
static filter_wrap filter_font_list(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4260
0
{
4261
0
  RETURN_FILTER(&filter_font);
4262
0
}
4263
4264
static filter_wrap filter_resources(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4265
0
{
4266
0
  if (pdf_name_eq(ctx, key, PDF_NAME(XObject)))
4267
0
    RETURN_FILTER(&filter_xobject_list);
4268
0
  if (pdf_name_eq(ctx, key, PDF_NAME(Font)))
4269
0
    RETURN_FILTER(&filter_font_list);
4270
0
  RETURN_FILTER(NULL);
4271
0
}
4272
4273
static filter_wrap filter_appearance(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4274
0
{
4275
0
  if (pdf_name_eq(ctx, key, PDF_NAME(Resources)))
4276
0
    RETURN_FILTER(&filter_resources);
4277
0
  RETURN_FILTER(NULL);
4278
0
}
4279
4280
static filter_wrap filter_ap(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4281
0
{
4282
  /* Just the /N entry for now. May need to add more later. */
4283
0
  if (pdf_name_eq(ctx, key, PDF_NAME(N)) && pdf_is_stream(ctx, pdf_dict_get(ctx, dict, key)))
4284
0
    RETURN_FILTER(&filter_appearance);
4285
0
  RETURN_FILTER(NULL);
4286
0
}
4287
4288
static filter_wrap filter_xfa(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4289
0
{
4290
  /* Text can point to a stream object */
4291
0
  if (pdf_is_stream(ctx, dict))
4292
0
    RETURN_FILTER(&filter_simple);
4293
0
  RETURN_FILTER(NULL);
4294
0
}
4295
4296
static void
4297
filter_changes_accepted(fz_context *ctx, pdf_changes *changes, pdf_obj *obj, filter_fn filter)
4298
0
{
4299
0
  int obj_num;
4300
4301
0
  if (obj == NULL || pdf_obj_marked(ctx, obj))
4302
0
    return;
4303
4304
0
  obj_num = pdf_to_num(ctx, obj);
4305
4306
0
  fz_try(ctx)
4307
0
  {
4308
0
    if (obj_num != 0)
4309
0
    {
4310
0
      (void)pdf_mark_obj(ctx, obj);
4311
0
      changes->obj_changes[obj_num] |= FIELD_CHANGE_VALID;
4312
0
    }
4313
0
    if (filter == NULL)
4314
0
      break;
4315
0
    if (pdf_is_dict(ctx, obj))
4316
0
    {
4317
0
      int i, n = pdf_dict_len(ctx, obj);
4318
4319
0
      for (i = 0; i < n; i++)
4320
0
      {
4321
0
        pdf_obj *key = pdf_dict_get_key(ctx, obj, i);
4322
0
        pdf_obj *val = pdf_dict_get_val(ctx, obj, i);
4323
0
        filter_fn f = (filter(ctx, obj, key)).func;
4324
0
        if (f != NULL)
4325
0
          filter_changes_accepted(ctx, changes, val, f);
4326
0
      }
4327
0
    }
4328
0
    else if (pdf_is_array(ctx, obj))
4329
0
    {
4330
0
      int i, n = pdf_array_len(ctx, obj);
4331
4332
0
      for (i = 0; i < n; i++)
4333
0
      {
4334
0
        pdf_obj *val = pdf_array_get(ctx, obj, i);
4335
0
        filter_changes_accepted(ctx, changes, val, filter);
4336
0
      }
4337
0
    }
4338
0
  }
4339
0
  fz_always(ctx)
4340
0
    if (obj_num != 0)
4341
0
      pdf_unmark_obj(ctx, obj);
4342
0
  fz_catch(ctx)
4343
0
    fz_rethrow(ctx);
4344
0
}
4345
4346
static void
4347
check_field(fz_context *ctx, pdf_document *doc, pdf_changes *changes, pdf_obj *obj, pdf_locked_fields *locked, const char *name_prefix, pdf_obj *new_v, pdf_obj *old_v)
4348
0
{
4349
0
  pdf_obj *old_obj, *new_obj, *n_v, *o_v;
4350
0
  int o_xref_base;
4351
0
  int obj_num;
4352
0
  char *field_name = NULL;
4353
4354
  /* All fields MUST be indirections, either in the Fields array
4355
   * or AcroForms, or in the Kids array of other Fields. */
4356
0
  if (!pdf_is_indirect(ctx, obj))
4357
0
    return;
4358
4359
0
  obj_num = pdf_to_num(ctx, obj);
4360
0
  o_xref_base = doc->xref_base;
4361
0
  new_obj = pdf_resolve_indirect_chain(ctx, obj);
4362
4363
  /* Similarly, all fields must be dicts */
4364
0
  if (!pdf_is_dict(ctx, new_obj))
4365
0
    return;
4366
4367
0
  if (pdf_obj_marked(ctx, obj))
4368
0
    return;
4369
4370
0
  fz_var(field_name);
4371
4372
0
  fz_try(ctx)
4373
0
  {
4374
0
    int i, len;
4375
0
    const char *name;
4376
0
    size_t n;
4377
0
    pdf_obj *t;
4378
0
    int is_locked;
4379
4380
0
    (void)pdf_mark_obj(ctx, obj);
4381
4382
    /* Do this within the try, so we can catch any problems */
4383
0
    doc->xref_base = o_xref_base+1;
4384
0
    old_obj = pdf_resolve_indirect_chain(ctx, obj);
4385
4386
0
    t = pdf_dict_get(ctx, old_obj, PDF_NAME(T));
4387
0
    if (t != NULL)
4388
0
    {
4389
0
      name = pdf_dict_get_text_string(ctx, old_obj, PDF_NAME(T));
4390
0
      n = strlen(name)+1;
4391
0
      if (*name_prefix)
4392
0
        n += 1 + strlen(name_prefix);
4393
0
      field_name = fz_malloc(ctx, n);
4394
0
      if (*name_prefix)
4395
0
      {
4396
0
        strcpy(field_name, name_prefix);
4397
0
        strcat(field_name, ".");
4398
0
      }
4399
0
      else
4400
0
        *field_name = 0;
4401
0
      strcat(field_name, name);
4402
0
      name_prefix = field_name;
4403
0
    }
4404
4405
0
    doc->xref_base = o_xref_base;
4406
4407
0
    if (!pdf_is_dict(ctx, old_obj))
4408
0
      break;
4409
4410
    /* Check V explicitly, allowing for it being inherited. */
4411
0
    n_v = pdf_dict_get(ctx, new_obj, PDF_NAME(V));
4412
0
    if (n_v == NULL)
4413
0
      n_v = new_v;
4414
0
    o_v = pdf_dict_get(ctx, old_obj, PDF_NAME(V));
4415
0
    if (o_v == NULL)
4416
0
      o_v = old_v;
4417
4418
0
    is_locked = pdf_is_field_locked(ctx, locked, name_prefix);
4419
0
    if (pdf_name_eq(ctx, pdf_dict_get(ctx, new_obj, PDF_NAME(Type)), PDF_NAME(Annot)) &&
4420
0
      pdf_name_eq(ctx, pdf_dict_get(ctx, new_obj, PDF_NAME(Subtype)), PDF_NAME(Widget)))
4421
0
    {
4422
0
      if (is_locked)
4423
0
      {
4424
        /* If locked, V must not change! */
4425
0
        if (check_unchanged_between(ctx, doc, changes, n_v, o_v))
4426
0
          changes->obj_changes[obj_num] |= FIELD_CHANGE_INVALID;
4427
0
      }
4428
0
      else
4429
0
      {
4430
        /* If not locked, V can change to be filled in! */
4431
0
        filter_changes_accepted(ctx, changes, n_v, &filter_v);
4432
0
        changes->obj_changes[obj_num] |= FIELD_CHANGE_VALID;
4433
0
      }
4434
0
    }
4435
4436
    /* Check all the fields in the new object are
4437
     * either the same as the old object, or are
4438
     * expected changes. */
4439
0
    len = pdf_dict_len(ctx, new_obj);
4440
0
    for (i = 0; i < len; i++)
4441
0
    {
4442
0
      pdf_obj *key = pdf_dict_get_key(ctx, new_obj, i);
4443
0
      pdf_obj *nval = pdf_dict_get(ctx, new_obj, key);
4444
0
      pdf_obj *oval = pdf_dict_get(ctx, old_obj, key);
4445
4446
      /* Kids arrays shouldn't change. */
4447
0
      if (pdf_name_eq(ctx, key, PDF_NAME(Kids)))
4448
0
      {
4449
0
        int j, m;
4450
4451
        /* Kids must be an array. If it's not, count it as a difference. */
4452
0
        if (!pdf_is_array(ctx, nval) || !pdf_is_array(ctx, oval))
4453
0
        {
4454
0
change_found:
4455
0
          changes->obj_changes[obj_num] |= FIELD_CHANGE_INVALID;
4456
0
          break;
4457
0
        }
4458
0
        m = pdf_array_len(ctx, nval);
4459
        /* Any change in length counts as a difference */
4460
0
        if (m != pdf_array_len(ctx, oval))
4461
0
          goto change_found;
4462
0
        for (j = 0; j < m; j++)
4463
0
        {
4464
0
          pdf_obj *nkid = pdf_array_get(ctx, nval, j);
4465
0
          pdf_obj *okid = pdf_array_get(ctx, oval, j);
4466
          /* Kids arrays are supposed to all be indirect. If they aren't,
4467
           * count it as a difference. */
4468
0
          if (!pdf_is_indirect(ctx, nkid) || !pdf_is_indirect(ctx, okid))
4469
0
            goto change_found;
4470
          /* For now at least, we'll count any change in number as a difference. */
4471
0
          if (pdf_to_num(ctx, nkid) != pdf_to_num(ctx, okid))
4472
0
            goto change_found;
4473
0
          check_field(ctx, doc, changes, nkid, locked, name_prefix, n_v, o_v);
4474
0
        }
4475
0
      }
4476
0
      else if (pdf_name_eq(ctx, key, PDF_NAME(V)))
4477
0
      {
4478
        /* V is checked above */
4479
0
      }
4480
0
      else if (pdf_name_eq(ctx, key, PDF_NAME(AP)))
4481
0
      {
4482
        /* If we're locked, then nothing can change. If not,
4483
         * we can change to be filled in. */
4484
0
        if (is_locked)
4485
0
          check_unchanged_between(ctx, doc, changes, nval, oval);
4486
0
        else
4487
0
          filter_changes_accepted(ctx, changes, nval, &filter_ap);
4488
0
      }
4489
      /* All other fields can't change */
4490
0
      else
4491
0
        check_unchanged_between(ctx, doc, changes, nval, oval);
4492
0
    }
4493
4494
    /* Now check all the fields in the old object to
4495
     * make sure none were dropped. */
4496
0
    len = pdf_dict_len(ctx, old_obj);
4497
0
    for (i = 0; i < len; i++)
4498
0
    {
4499
0
      pdf_obj *key = pdf_dict_get_key(ctx, old_obj, i);
4500
0
      pdf_obj *nval, *oval;
4501
4502
      /* V is checked above */
4503
0
      if (pdf_name_eq(ctx, key, PDF_NAME(V)))
4504
0
        continue;
4505
4506
0
      nval = pdf_dict_get(ctx, new_obj, key);
4507
0
      oval = pdf_dict_get(ctx, old_obj, key);
4508
4509
0
      if (nval == NULL && oval != NULL)
4510
0
        changes->obj_changes[pdf_to_num(ctx, nval)] |= FIELD_CHANGE_INVALID;
4511
0
    }
4512
0
    changes->obj_changes[obj_num] |= FIELD_CHANGE_VALID;
4513
4514
0
  }
4515
0
  fz_always(ctx)
4516
0
  {
4517
0
    pdf_unmark_obj(ctx, obj);
4518
0
    fz_free(ctx, field_name);
4519
0
    doc->xref_base = o_xref_base;
4520
0
  }
4521
0
  fz_catch(ctx)
4522
0
    fz_rethrow(ctx);
4523
0
}
4524
4525
static int
4526
pdf_obj_changed_in_version(fz_context *ctx, pdf_document *doc, int num, int version)
4527
0
{
4528
0
  if (num < 0 || num > doc->max_xref_len)
4529
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "Invalid object number requested");
4530
4531
0
  return version == doc->xref_index[num];
4532
0
}
4533
4534
static void
4535
merge_lock_specification(fz_context *ctx, pdf_locked_fields *fields, pdf_obj *lock)
4536
0
{
4537
0
  pdf_obj *action;
4538
0
  int i, r, w;
4539
4540
0
  if (lock == NULL)
4541
0
    return;
4542
4543
0
  action = pdf_dict_get(ctx, lock, PDF_NAME(Action));
4544
4545
0
  if (pdf_name_eq(ctx, action, PDF_NAME(All)))
4546
0
  {
4547
    /* All fields locked means we don't need any stored
4548
     * includes/excludes. */
4549
0
    fields->all = 1;
4550
0
    free_char_list(ctx, &fields->includes);
4551
0
    free_char_list(ctx, &fields->excludes);
4552
0
  }
4553
0
  else
4554
0
  {
4555
0
    pdf_obj *f = pdf_dict_get(ctx, lock, PDF_NAME(Fields));
4556
0
    int len = pdf_array_len(ctx, f);
4557
4558
0
    if (pdf_name_eq(ctx, action, PDF_NAME(Include)))
4559
0
    {
4560
0
      if (fields->all)
4561
0
      {
4562
        /* Current state = "All except <excludes> are locked".
4563
         * We need to remove <Fields> from <excludes>. */
4564
0
        for (i = 0; i < len; i++)
4565
0
        {
4566
0
          const char *s = pdf_array_get_text_string(ctx, f, i);
4567
4568
0
          for (r = w = 0; r < fields->excludes.len; r++)
4569
0
          {
4570
0
            if (strcmp(s, fields->excludes.list[r]))
4571
0
              fields->excludes.list[w++] = fields->excludes.list[r];
4572
0
          }
4573
0
          fields->excludes.len = w;
4574
0
        }
4575
0
      }
4576
0
      else
4577
0
      {
4578
        /* Current state = <includes> are locked.
4579
         * We need to add <Fields> to <include> (avoiding repetition). */
4580
0
        for (i = 0; i < len; i++)
4581
0
        {
4582
0
          const char *s = pdf_array_get_text_string(ctx, f, i);
4583
4584
0
          for (r = 0; r < fields->includes.len; r++)
4585
0
          {
4586
0
            if (!strcmp(s, fields->includes.list[r]))
4587
0
              break;
4588
0
          }
4589
0
          if (r == fields->includes.len)
4590
0
            char_list_append(ctx, &fields->includes, s);
4591
0
        }
4592
0
      }
4593
0
    }
4594
0
    else if (pdf_name_eq(ctx, action, PDF_NAME(Exclude)))
4595
0
    {
4596
0
      if (fields->all)
4597
0
      {
4598
        /* Current state = "All except <excludes> are locked.
4599
         * We need to remove anything from <excludes> that isn't in <Fields>. */
4600
0
        for (r = w = 0; r < fields->excludes.len; r++)
4601
0
        {
4602
0
          for (i = 0; i < len; i++)
4603
0
          {
4604
0
            const char *s = pdf_array_get_text_string(ctx, f, i);
4605
0
            if (!strcmp(s, fields->excludes.list[r]))
4606
0
              break;
4607
0
          }
4608
0
          if (i != len) /* we found a match */
4609
0
            fields->excludes.list[w++] = fields->excludes.list[r];
4610
0
        }
4611
0
        fields->excludes.len = w;
4612
0
      }
4613
0
      else
4614
0
      {
4615
        /* Current state = <includes> are locked.
4616
         * Set all. <excludes> becomes <Fields> less <includes>. Remove <includes>. */
4617
0
        fields->all = 1;
4618
0
        for (i = 0; i < len; i++)
4619
0
        {
4620
0
          const char *s = pdf_array_get_text_string(ctx, f, i);
4621
0
          for (r = 0; r < fields->includes.len; r++)
4622
0
          {
4623
0
            if (!strcmp(s, fields->includes.list[r]))
4624
0
              break;
4625
0
          }
4626
0
          if (r == fields->includes.len)
4627
0
            char_list_append(ctx, &fields->excludes, s);
4628
0
        }
4629
0
        free_char_list(ctx, &fields->includes);
4630
0
      }
4631
0
    }
4632
0
  }
4633
0
}
4634
4635
static void
4636
find_locked_fields_value(fz_context *ctx, pdf_locked_fields *fields, pdf_obj *v)
4637
0
{
4638
0
  pdf_obj *ref = pdf_dict_get(ctx, v, PDF_NAME(Reference));
4639
0
  int i, n;
4640
4641
0
  if (!ref)
4642
0
    return;
4643
4644
0
  n = pdf_array_len(ctx, ref);
4645
0
  for (i = 0; i < n; i++)
4646
0
  {
4647
0
    pdf_obj *sr = pdf_array_get(ctx, ref, i);
4648
0
    pdf_obj *tm, *tp, *type;
4649
4650
    /* Type is optional, but if it exists, it'd better be SigRef. */
4651
0
    type = pdf_dict_get(ctx, sr, PDF_NAME(Type));
4652
0
    if (type != NULL && !pdf_name_eq(ctx, type, PDF_NAME(SigRef)))
4653
0
      continue;
4654
0
    tm = pdf_dict_get(ctx, sr, PDF_NAME(TransformMethod));
4655
0
    tp = pdf_dict_get(ctx, sr, PDF_NAME(TransformParams));
4656
0
    if (pdf_name_eq(ctx, tm, PDF_NAME(DocMDP)))
4657
0
    {
4658
0
      int p = pdf_dict_get_int(ctx, tp, PDF_NAME(P));
4659
4660
0
      if (p == 0)
4661
0
        p = 2;
4662
0
      if (fields->p == 0)
4663
0
        fields->p = p;
4664
0
      else
4665
0
        fields->p = fz_mini(fields->p, p);
4666
0
    }
4667
0
    else if (pdf_name_eq(ctx, tm, PDF_NAME(FieldMDP)))
4668
0
      merge_lock_specification(ctx, fields, tp);
4669
0
  }
4670
0
}
4671
4672
static void
4673
find_locked_fields_aux(fz_context *ctx, pdf_obj *field, pdf_locked_fields *fields, pdf_obj *inherit_v, pdf_obj *inherit_ft)
4674
0
{
4675
0
  int i, n;
4676
4677
0
  if (!pdf_name_eq(ctx, pdf_dict_get(ctx, field, PDF_NAME(Type)), PDF_NAME(Annot)))
4678
0
    return;
4679
4680
0
  if (pdf_obj_marked(ctx, field))
4681
0
    return;
4682
4683
0
  fz_try(ctx)
4684
0
  {
4685
0
    pdf_obj *kids, *v, *ft;
4686
4687
0
    (void)pdf_mark_obj(ctx, field);
4688
4689
0
    v = pdf_dict_get(ctx, field, PDF_NAME(V));
4690
0
    if (v == NULL)
4691
0
      v = inherit_v;
4692
0
    ft = pdf_dict_get(ctx, field, PDF_NAME(FT));
4693
0
    if (ft == NULL)
4694
0
      ft = inherit_ft;
4695
4696
    /* We are looking for Widget annotations of type Sig that are
4697
     * signed (i.e. have a 'V' field). */
4698
0
    if (pdf_name_eq(ctx, pdf_dict_get(ctx, field, PDF_NAME(Subtype)), PDF_NAME(Widget)) &&
4699
0
      pdf_name_eq(ctx, ft, PDF_NAME(Sig)) &&
4700
0
      pdf_name_eq(ctx, pdf_dict_get(ctx, v, PDF_NAME(Type)), PDF_NAME(Sig)))
4701
0
    {
4702
      /* Signed Sig Widgets (i.e. ones with a 'V' field) need
4703
       * to have their lock field respected. */
4704
0
      merge_lock_specification(ctx, fields, pdf_dict_get(ctx, field, PDF_NAME(Lock)));
4705
4706
      /* Look for DocMDP and FieldMDP entries to see what
4707
       * flavours of alterations are allowed. */
4708
0
      find_locked_fields_value(ctx, fields, v);
4709
0
    }
4710
4711
    /* Recurse as required */
4712
0
    kids = pdf_dict_get(ctx, field, PDF_NAME(Kids));
4713
0
    if (kids)
4714
0
    {
4715
0
      n = pdf_array_len(ctx, kids);
4716
0
      for (i = 0; i < n; i++)
4717
0
        find_locked_fields_aux(ctx, pdf_array_get(ctx, kids, i), fields, v, ft);
4718
0
    }
4719
0
  }
4720
0
  fz_always(ctx)
4721
0
    pdf_unmark_obj(ctx, field);
4722
0
  fz_catch(ctx)
4723
0
    fz_rethrow(ctx);
4724
0
}
4725
4726
pdf_locked_fields *
4727
pdf_find_locked_fields(fz_context *ctx, pdf_document *doc, int version)
4728
0
{
4729
0
  pdf_locked_fields *fields = fz_malloc_struct(ctx, pdf_locked_fields);
4730
0
  int o_xref_base = doc->xref_base;
4731
0
  doc->xref_base = version;
4732
4733
0
  fz_var(fields);
4734
4735
0
  fz_try(ctx)
4736
0
  {
4737
0
    pdf_obj *fobj = pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/AcroForm/Fields");
4738
0
    int i, len = pdf_array_len(ctx, fobj);
4739
4740
0
    if (len == 0)
4741
0
      break;
4742
4743
0
    for (i = 0; i < len; i++)
4744
0
      find_locked_fields_aux(ctx, pdf_array_get(ctx, fobj, i), fields, NULL, NULL);
4745
4746
    /* Add in any DocMDP referenced directly from the Perms dict. */
4747
0
    find_locked_fields_value(ctx, fields, pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/Perms/DocMDP"));
4748
0
  }
4749
0
  fz_always(ctx)
4750
0
    doc->xref_base = o_xref_base;
4751
0
  fz_catch(ctx)
4752
0
  {
4753
0
    pdf_drop_locked_fields(ctx, fields);
4754
0
    fz_rethrow(ctx);
4755
0
  }
4756
4757
0
  return fields;
4758
0
}
4759
4760
pdf_locked_fields *
4761
pdf_find_locked_fields_for_sig(fz_context *ctx, pdf_document *doc, pdf_obj *sig)
4762
0
{
4763
0
  pdf_locked_fields *fields = fz_malloc_struct(ctx, pdf_locked_fields);
4764
4765
0
  fz_var(fields);
4766
4767
0
  fz_try(ctx)
4768
0
  {
4769
0
    pdf_obj *ref;
4770
0
    int i, len;
4771
4772
    /* Ensure it really is a sig */
4773
0
    if (!pdf_name_eq(ctx, pdf_dict_get(ctx, sig, PDF_NAME(Subtype)), PDF_NAME(Widget)) ||
4774
0
      !pdf_name_eq(ctx, pdf_dict_get_inheritable(ctx, sig, PDF_NAME(FT)), PDF_NAME(Sig)))
4775
0
      break;
4776
4777
    /* Check the locking details given in the V (i.e. what the signature value
4778
     * claims to lock). */
4779
0
    ref = pdf_dict_getp(ctx, sig, "V/Reference");
4780
0
    len = pdf_array_len(ctx, ref);
4781
0
    for (i = 0; i < len; i++)
4782
0
    {
4783
0
      pdf_obj *tp = pdf_dict_get(ctx, pdf_array_get(ctx, ref, i), PDF_NAME(TransformParams));
4784
0
      merge_lock_specification(ctx, fields, tp);
4785
0
    }
4786
4787
    /* Also, check the locking details given in the Signature definition. This may
4788
     * not strictly be necessary as it's supposed to be "what the form author told
4789
     * the signature that it should lock". A well-formed signature should lock
4790
     * at least that much (possibly with extra fields locked from the XFA). If the
4791
     * signature doesn't lock as much as it was told to, we should be suspicious
4792
     * of the signing application. It is not clear that this test is actually
4793
     * necessary, or in keeping with what Acrobat does. */
4794
0
    merge_lock_specification(ctx, fields, pdf_dict_get(ctx, sig, PDF_NAME(Lock)));
4795
0
  }
4796
0
  fz_catch(ctx)
4797
0
  {
4798
0
    pdf_drop_locked_fields(ctx, fields);
4799
0
    fz_rethrow(ctx);
4800
0
  }
4801
4802
0
  return fields;
4803
0
}
4804
4805
static int
4806
validate_locked_fields(fz_context *ctx, pdf_document *doc, int version, pdf_locked_fields *locked)
4807
0
{
4808
0
  int o_xref_base = doc->xref_base;
4809
0
  pdf_changes *changes;
4810
0
  int num_objs;
4811
0
  int i, n;
4812
0
  int all_indirects = 1;
4813
4814
0
  num_objs = doc->max_xref_len;
4815
0
  changes = fz_malloc_flexible(ctx, pdf_changes, obj_changes, num_objs);
4816
0
  changes->num_obj = num_objs;
4817
4818
0
  fz_try(ctx)
4819
0
  {
4820
0
    pdf_obj *acroform, *new_acroform, *old_acroform;
4821
0
    int len, acroform_num;
4822
4823
0
    doc->xref_base = version;
4824
4825
    /* Detect every object that has changed */
4826
0
    for (i = 1; i < num_objs; i++)
4827
0
    {
4828
0
      if (pdf_obj_changed_in_version(ctx, doc, i, version))
4829
0
        changes->obj_changes[i] = FIELD_CHANGED;
4830
0
    }
4831
4832
    /* FIXME: Compare PageTrees and NumberTrees (just to allow for them being regenerated
4833
     * and having produced stuff that represents the same stuff). */
4834
4835
    /* The metadata of a document may be regenerated. Allow for that. */
4836
0
    filter_changes_accepted(ctx, changes, pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/Metadata"), &filter_simple);
4837
4838
    /* The ModDate of document info may be regenerated. Allow for that. */
4839
    /* FIXME: We accept all changes in document info, when maybe we ought to just
4840
     * accept ModDate? */
4841
0
    filter_changes_accepted(ctx, changes, pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Info"), &filter_simple);
4842
4843
    /* The Encryption dict may be rewritten for the new Xref. */
4844
0
    filter_changes_accepted(ctx, changes, pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Encrypt"), &filter_simple);
4845
4846
    /* We have to accept certain changes in the top level AcroForms dict,
4847
     * so get the 2 versions... */
4848
0
    acroform = pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/AcroForm");
4849
0
    acroform_num = pdf_to_num(ctx, acroform);
4850
0
    new_acroform = pdf_resolve_indirect_chain(ctx, acroform);
4851
0
    doc->xref_base = version+1;
4852
0
    old_acroform = pdf_resolve_indirect_chain(ctx, pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/AcroForm"));
4853
0
    doc->xref_base = version;
4854
0
    n = pdf_dict_len(ctx, new_acroform);
4855
0
    for (i = 0; i < n; i++)
4856
0
    {
4857
0
      pdf_obj *key = pdf_dict_get_key(ctx, new_acroform, i);
4858
0
      pdf_obj *nval = pdf_dict_get(ctx, new_acroform, key);
4859
0
      pdf_obj *oval = pdf_dict_get(ctx, old_acroform, key);
4860
4861
0
      if (pdf_name_eq(ctx, key, PDF_NAME(Fields)))
4862
0
      {
4863
0
        int j;
4864
4865
0
        len = pdf_array_len(ctx, nval);
4866
0
        for (j = 0; j < len; j++)
4867
0
        {
4868
0
          pdf_obj *field = pdf_array_get(ctx, nval, j);
4869
0
          if (!pdf_is_indirect(ctx, field))
4870
0
            all_indirects = 0;
4871
0
          check_field(ctx, doc, changes, field, locked, "", NULL, NULL);
4872
0
        }
4873
0
      }
4874
0
      else if (pdf_name_eq(ctx, key, PDF_NAME(SigFlags)))
4875
0
      {
4876
        /* Accept this */
4877
0
        changes->obj_changes[acroform_num] |= FIELD_CHANGE_VALID;
4878
0
      }
4879
0
      else if (pdf_name_eq(ctx, key, PDF_NAME(DR)))
4880
0
      {
4881
        /* Accept any changes from within the Document Resources */
4882
0
        filter_changes_accepted(ctx, changes, nval, &filter_resources);
4883
0
      }
4884
0
      else if (pdf_name_eq(ctx, key, PDF_NAME(XFA)))
4885
0
      {
4886
        /* Allow any changes within the XFA streams. */
4887
0
        filter_changes_accepted(ctx, changes, nval, &filter_xfa);
4888
0
      }
4889
0
      else if (pdf_objcmp(ctx, nval, oval))
4890
0
      {
4891
0
        changes->obj_changes[acroform_num] |= FIELD_CHANGE_INVALID;
4892
0
      }
4893
0
    }
4894
4895
    /* Allow for any object streams/XRefs to be changed. */
4896
0
    doc->xref_base = version+1;
4897
0
    for (i = 1; i < num_objs; i++)
4898
0
    {
4899
0
      pdf_obj *oobj, *otype;
4900
0
      if (changes->obj_changes[i] != FIELD_CHANGED)
4901
0
        continue;
4902
0
      if (!pdf_obj_exists(ctx, doc, i))
4903
0
      {
4904
        /* Not present this version - must be newly created, can't be a change. */
4905
0
        changes->obj_changes[i] |= FIELD_CHANGE_VALID;
4906
0
        continue;
4907
0
      }
4908
0
      oobj = pdf_load_object(ctx, doc, i);
4909
0
      otype = pdf_dict_get(ctx, oobj, PDF_NAME(Type));
4910
0
      if (pdf_name_eq(ctx, otype, PDF_NAME(ObjStm)) ||
4911
0
        pdf_name_eq(ctx, otype, PDF_NAME(XRef)))
4912
0
      {
4913
0
        changes->obj_changes[i] |= FIELD_CHANGE_VALID;
4914
0
      }
4915
0
      pdf_drop_obj(ctx, oobj);
4916
0
    }
4917
0
  }
4918
0
  fz_always(ctx)
4919
0
    doc->xref_base = o_xref_base;
4920
0
  fz_catch(ctx)
4921
0
  {
4922
0
    fz_free(ctx, changes);
4923
0
    fz_rethrow(ctx);
4924
0
  }
4925
4926
0
  for (i = 1; i < num_objs; i++)
4927
0
  {
4928
0
    if (changes->obj_changes[i] == FIELD_CHANGED)
4929
      /* Change with no reason */
4930
0
      break;
4931
0
    if (changes->obj_changes[i] & FIELD_CHANGE_INVALID)
4932
      /* Illegal Change */
4933
0
      break;
4934
0
  }
4935
4936
0
  fz_free(ctx, changes);
4937
4938
0
  return (i == num_objs) && all_indirects;
4939
0
}
4940
4941
int
4942
pdf_validate_changes(fz_context *ctx, pdf_document *doc, int version)
4943
0
{
4944
0
  int unsaved_versions = pdf_count_unsaved_versions(ctx, doc);
4945
0
  int n = pdf_count_versions(ctx, doc);
4946
0
  pdf_locked_fields *locked = NULL;
4947
0
  int result;
4948
4949
0
  if (version < 0 || version >= n)
4950
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "There aren't that many changes to find in this document!");
4951
4952
  /* We are wanting to compare version+1 with version to make sure
4953
   * that the only changes made in going to version are conformant
4954
   * with what was allowed in version+1. The production of version
4955
   * might have involved signing a signature field and locking down
4956
   * more fields - this means that taking the list of locked things
4957
   * from version rather than version+1 will give us bad results! */
4958
0
  locked = pdf_find_locked_fields(ctx, doc, unsaved_versions+version+1);
4959
4960
0
  fz_try(ctx)
4961
0
  {
4962
0
    if (!locked->all && locked->includes.len == 0 && locked->p == 0)
4963
0
    {
4964
      /* If nothing is locked at all, then all changes are permissible. */
4965
0
      result = 1;
4966
0
    }
4967
0
    else
4968
0
      result = validate_locked_fields(ctx, doc, unsaved_versions+version, locked);
4969
0
  }
4970
0
  fz_always(ctx)
4971
0
    pdf_drop_locked_fields(ctx, locked);
4972
0
  fz_catch(ctx)
4973
0
    fz_rethrow(ctx);
4974
4975
0
  return result;
4976
0
}
4977
4978
int
4979
pdf_validate_change_history(fz_context *ctx, pdf_document *doc)
4980
0
{
4981
0
  int num_versions = pdf_count_versions(ctx, doc);
4982
0
  int v;
4983
4984
0
  if (num_versions < 2)
4985
0
    return 0; /* Unless there are at least 2 versions, there have been no updates. */
4986
4987
0
  for(v = num_versions - 2; v >= 0; v--)
4988
0
  {
4989
0
    if (!pdf_validate_changes(ctx, doc, v))
4990
0
      return v+1;
4991
0
  }
4992
0
  return 0;
4993
0
}
4994
4995
/* Return the version that obj appears in, or -1 for not found. */
4996
static int
4997
pdf_find_incremental_update_num_for_obj(fz_context *ctx, pdf_document *doc, pdf_obj *obj)
4998
0
{
4999
0
  pdf_xref *xref = NULL;
5000
0
  pdf_xref_subsec *sub;
5001
0
  int i, j;
5002
5003
0
  if (obj == NULL)
5004
0
    return -1;
5005
5006
  /* obj needs to be indirect for us to get a num out of it. */
5007
0
  i = pdf_to_num(ctx, obj);
5008
0
  if (i <= 0)
5009
0
    return -1;
5010
5011
  /* obj can't be indirect below, so resolve it here. */
5012
0
  obj = pdf_resolve_indirect_chain(ctx, obj);
5013
5014
  /* Find the first xref section where the entry is defined. */
5015
0
  for (j = 0; j < doc->num_xref_sections; j++)
5016
0
  {
5017
0
    xref = &doc->xref_sections[j];
5018
5019
0
    if (i < xref->num_objects)
5020
0
    {
5021
0
      for (sub = xref->subsec; sub != NULL; sub = sub->next)
5022
0
      {
5023
0
        pdf_xref_entry *entry;
5024
5025
0
        if (i < sub->start || i >= sub->start + sub->len)
5026
0
          continue;
5027
5028
0
        entry = &sub->table[i - sub->start];
5029
0
        if (entry->obj == obj)
5030
0
          return j;
5031
0
      }
5032
0
    }
5033
0
  }
5034
0
  return -1;
5035
0
}
5036
5037
int pdf_find_version_for_obj(fz_context *ctx, pdf_document *doc, pdf_obj *obj)
5038
0
{
5039
0
  int v = pdf_find_incremental_update_num_for_obj(ctx, doc, obj);
5040
0
  int n;
5041
5042
0
  if (v == -1)
5043
0
    return -1;
5044
5045
0
  n = pdf_count_versions(ctx, doc) + pdf_count_unsaved_versions(ctx, doc);
5046
0
  if (v > n)
5047
0
    return n;
5048
5049
0
  return v;
5050
0
}
5051
5052
int pdf_validate_signature(fz_context *ctx, pdf_annot *widget)
5053
0
{
5054
0
  pdf_document *doc;
5055
0
  int unsaved_versions, num_versions, version, i;
5056
0
  pdf_locked_fields *locked = NULL;
5057
0
  int o_xref_base;
5058
5059
0
  if (!widget->page)
5060
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "annotation not bound to any page");
5061
5062
0
  doc = widget->page->doc;
5063
0
  unsaved_versions = pdf_count_unsaved_versions(ctx, doc);
5064
0
  num_versions = pdf_count_versions(ctx, doc) + unsaved_versions;
5065
0
  version = pdf_find_version_for_obj(ctx, doc, widget->obj);
5066
5067
0
  if (version > num_versions-1)
5068
0
    version = num_versions-1;
5069
5070
  /* Get the locked definition from the object when it was signed. */
5071
0
  o_xref_base = doc->xref_base;
5072
0
  doc->xref_base = version;
5073
5074
0
  fz_var(locked); /* Not really needed, but it stops warnings */
5075
5076
0
  fz_try(ctx)
5077
0
  {
5078
0
    locked = pdf_find_locked_fields_for_sig(ctx, doc, widget->obj);
5079
0
    for (i = version-1; i >= unsaved_versions; i--)
5080
0
    {
5081
0
      doc->xref_base = i;
5082
0
      if (!validate_locked_fields(ctx, doc, i, locked))
5083
0
        break;
5084
0
    }
5085
0
  }
5086
0
  fz_always(ctx)
5087
0
  {
5088
0
    doc->xref_base = o_xref_base;
5089
0
    pdf_drop_locked_fields(ctx, locked);
5090
0
  }
5091
0
  fz_catch(ctx)
5092
0
    fz_rethrow(ctx);
5093
5094
0
  return i+1-unsaved_versions;
5095
0
}
5096
5097
int pdf_was_pure_xfa(fz_context *ctx, pdf_document *doc)
5098
0
{
5099
0
  int num_unsaved_versions = pdf_count_unsaved_versions(ctx, doc);
5100
0
  int num_versions = pdf_count_versions(ctx, doc);
5101
0
  int v;
5102
0
  int o_xref_base = doc->xref_base;
5103
0
  int pure_xfa = 0;
5104
5105
0
  fz_var(pure_xfa);
5106
5107
0
  fz_try(ctx)
5108
0
  {
5109
0
    for(v = num_versions + num_unsaved_versions; !pure_xfa && v >= num_unsaved_versions; v--)
5110
0
    {
5111
0
      pdf_obj *o;
5112
0
      doc->xref_base = v;
5113
0
      o = pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/AcroForm");
5114
      /* If we find a version that had an empty Root/AcroForm/Fields, but had a
5115
       * Root/AcroForm/XFA entry, then we deduce that this was at one time a
5116
       * pure XFA form. */
5117
0
      if (pdf_array_len(ctx, pdf_dict_get(ctx, o, PDF_NAME(Fields))) == 0 &&
5118
0
        pdf_dict_get(ctx, o, PDF_NAME(XFA)) != NULL)
5119
0
        pure_xfa = 1;
5120
0
    }
5121
0
  }
5122
0
  fz_always(ctx)
5123
0
    doc->xref_base = o_xref_base;
5124
0
  fz_catch(ctx)
5125
0
    fz_rethrow(ctx);
5126
5127
0
  return pure_xfa;
5128
0
}
5129
5130
pdf_xref *pdf_new_local_xref(fz_context *ctx, pdf_document *doc)
5131
0
{
5132
0
  int n = pdf_xref_len(ctx, doc);
5133
0
  pdf_xref *xref = fz_malloc_struct(ctx, pdf_xref);
5134
5135
0
  xref->subsec = NULL;
5136
0
  xref->num_objects = n;
5137
0
  xref->trailer = NULL;
5138
0
  xref->pre_repair_trailer = NULL;
5139
0
  xref->unsaved_sigs = NULL;
5140
0
  xref->unsaved_sigs_end = NULL;
5141
5142
0
  fz_try(ctx)
5143
0
  {
5144
0
    xref->subsec = fz_malloc_struct(ctx, pdf_xref_subsec);
5145
0
    xref->subsec->len = n;
5146
0
    xref->subsec->start = 0;
5147
0
    xref->subsec->table = fz_malloc_struct_array(ctx, n, pdf_xref_entry);
5148
0
    xref->subsec->next = NULL;
5149
0
  }
5150
0
  fz_catch(ctx)
5151
0
  {
5152
0
    fz_free(ctx, xref->subsec);
5153
0
    fz_free(ctx, xref);
5154
0
    fz_rethrow(ctx);
5155
0
  }
5156
5157
0
  return xref;
5158
0
}
5159
5160
void pdf_drop_local_xref(fz_context *ctx, pdf_xref *xref)
5161
32
{
5162
32
  if (xref == NULL)
5163
32
    return;
5164
5165
0
  pdf_drop_xref_subsec(ctx, xref);
5166
5167
0
  fz_free(ctx, xref);
5168
0
}
5169
5170
void pdf_drop_local_xref_and_resources(fz_context *ctx, pdf_document *doc)
5171
16
{
5172
16
  pdf_purge_local_resources(ctx, doc);
5173
16
  pdf_purge_locals_from_store(ctx, doc);
5174
16
  pdf_drop_local_xref(ctx, doc->local_xref);
5175
16
  doc->local_xref = NULL;
5176
16
  doc->resynth_required = 1;
5177
16
}
5178
5179
void
5180
pdf_debug_doc_changes(fz_context *ctx, pdf_document *doc)
5181
0
{
5182
0
  int i, j;
5183
5184
0
  if (doc->num_incremental_sections == 0)
5185
0
    fz_write_printf(ctx, fz_stddbg(ctx), "No incremental xrefs");
5186
0
  else
5187
0
  {
5188
0
    for (i = 0; i < doc->num_incremental_sections; i++)
5189
0
    {
5190
0
      pdf_xref *xref = &doc->xref_sections[i];
5191
0
      pdf_xref_subsec *sub;
5192
5193
0
      fz_write_printf(ctx, fz_stddbg(ctx), "Incremental xref:\n");
5194
0
      for (sub = xref->subsec; sub != NULL; sub = sub->next)
5195
0
      {
5196
0
        fz_write_printf(ctx, fz_stddbg(ctx), "  Objects %d->%d\n", sub->start, sub->start + sub->len - 1);
5197
0
        for (j = 0; j < sub->len; j++)
5198
0
        {
5199
0
          pdf_xref_entry *e = &sub->table[j];
5200
0
          if (e->type == 0)
5201
0
            continue;
5202
0
          fz_write_printf(ctx, fz_stddbg(ctx), "%d %d obj (%c)\n", j + sub->start, e->gen, e->type);
5203
0
          pdf_debug_obj(ctx, e->obj);
5204
0
          fz_write_printf(ctx, fz_stddbg(ctx), "\nendobj\n");
5205
0
        }
5206
0
      }
5207
0
    }
5208
0
  }
5209
5210
0
  if (doc->local_xref == NULL)
5211
0
    fz_write_printf(ctx, fz_stddbg(ctx), "No local xref");
5212
0
  else
5213
0
  {
5214
0
    for (i = 0; i < doc->num_incremental_sections; i++)
5215
0
    {
5216
0
      pdf_xref *xref = doc->local_xref;
5217
0
      pdf_xref_subsec *sub;
5218
5219
0
      fz_write_printf(ctx, fz_stddbg(ctx), "Local xref (%sin force):\n", doc->local_xref_nesting == 0 ? "not " : "");
5220
0
      for (sub = xref->subsec; sub != NULL; sub = sub->next)
5221
0
      {
5222
0
        fz_write_printf(ctx, fz_stddbg(ctx), "  Objects %d->%d\n", sub->start, sub->start + sub->len - 1);
5223
0
        for (j = 0; j < sub->len; j++)
5224
0
        {
5225
0
          pdf_xref_entry *e = &sub->table[j];
5226
0
          if (e->type == 0)
5227
0
            continue;
5228
0
          fz_write_printf(ctx, fz_stddbg(ctx), "%d %d obj (%c)\n", j + sub->start, e->gen, e->type);
5229
0
          pdf_debug_obj(ctx, e->obj);
5230
0
          fz_write_printf(ctx, fz_stddbg(ctx), "\nendobj\n");
5231
0
        }
5232
0
      }
5233
0
    }
5234
0
  }
5235
5236
0
}
5237
5238
pdf_obj *
5239
pdf_metadata(fz_context *ctx, pdf_document *doc)
5240
0
{
5241
0
  int initial = doc->xref_base;
5242
0
  pdf_obj *obj = NULL;
5243
5244
0
  fz_var(obj);
5245
5246
0
  fz_try(ctx)
5247
0
  {
5248
0
    do
5249
0
    {
5250
0
      pdf_obj *root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
5251
0
      obj = pdf_dict_get(ctx, root, PDF_NAME(Metadata));
5252
0
      if (obj)
5253
0
        break;
5254
0
      doc->xref_base++;
5255
0
    }
5256
0
    while (doc->xref_base < doc->num_xref_sections);
5257
0
  }
5258
0
  fz_always(ctx)
5259
0
    doc->xref_base = initial;
5260
0
  fz_catch(ctx)
5261
0
    fz_rethrow(ctx);
5262
5263
0
  return obj;
5264
0
}
5265
5266
int pdf_obj_is_incremental(fz_context *ctx, pdf_obj *obj)
5267
0
{
5268
0
  pdf_document *doc = pdf_get_bound_document(ctx, obj);
5269
0
  int v;
5270
5271
0
  if (doc == NULL || doc->num_incremental_sections == 0)
5272
0
    return 0;
5273
5274
0
  v = pdf_find_incremental_update_num_for_obj(ctx, doc, obj);
5275
5276
0
  return (v == 0);
5277
0
}
5278
5279
void pdf_minimize_document(fz_context *ctx, pdf_document *doc)
5280
0
{
5281
0
  int i;
5282
5283
  /* Don't throw anything away if we've done a repair! */
5284
0
  if (doc == NULL || doc->repair_attempted)
5285
0
    return;
5286
5287
  /* Don't throw anything away in the incremental section, as that's where
5288
   * all our changes will be. */
5289
0
  for (i = doc->num_incremental_sections; i < doc->num_xref_sections; i++)
5290
0
  {
5291
0
    pdf_xref *xref = &doc->xref_sections[i];
5292
0
    pdf_xref_subsec *sub;
5293
5294
0
    for (sub = xref->subsec; sub; sub = sub->next)
5295
0
    {
5296
0
      int len = sub->len;
5297
0
      int j;
5298
0
      for (j = 0; j < len; j++)
5299
0
      {
5300
0
        pdf_xref_entry *e = &sub->table[j];
5301
0
        if (e->obj == NULL)
5302
0
          continue;
5303
0
        e->obj = pdf_drop_singleton_obj(ctx, e->obj);
5304
0
      }
5305
0
    }
5306
0
  }
5307
0
}
5308
5309
void pdf_repair_xref(fz_context *ctx, pdf_document *doc)
5310
0
{
5311
0
  pdf_repair_xref_aux(ctx, doc, pdf_prime_xref_index);
5312
0
}