Coverage Report

Created: 2024-07-05 06:13

/src/mupdf/source/pdf/pdf-xref.c
Line
Count
Source (jump to first uncovered line)
1
// Copyright (C) 2004-2024 Artifex Software, Inc.
2
//
3
// This file is part of MuPDF.
4
//
5
// MuPDF is free software: you can redistribute it and/or modify it under the
6
// terms of the GNU Affero General Public License as published by the Free
7
// Software Foundation, either version 3 of the License, or (at your option)
8
// any later version.
9
//
10
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13
// details.
14
//
15
// You should have received a copy of the GNU Affero General Public License
16
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17
//
18
// Alternative licensing terms are available from the licensor.
19
// For commercial licensing, see <https://www.artifex.com/> or contact
20
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21
// CA 94129, USA, for further information.
22
23
#include "mupdf/fitz.h"
24
#include "pdf-annot-imp.h"
25
26
#include <assert.h>
27
#include <limits.h>
28
#include <string.h>
29
30
#undef DEBUG_PROGESSIVE_ADVANCE
31
32
#ifdef DEBUG_PROGESSIVE_ADVANCE
33
#define DEBUGMESS(A) do { fz_warn A; } while (0)
34
#else
35
0
#define DEBUGMESS(A) do { } while (0)
36
#endif
37
38
478k
#define isdigit(c) (c >= '0' && c <= '9')
39
40
static inline int iswhite(int ch)
41
146k
{
42
146k
  return
43
146k
    ch == '\000' || ch == '\011' || ch == '\012' ||
44
146k
    ch == '\014' || ch == '\015' || ch == '\040';
45
146k
}
46
47
/*
48
 * xref tables
49
 */
50
51
static void
52
pdf_drop_xref_subsec(fz_context *ctx, pdf_xref *xref)
53
8.06k
{
54
8.06k
  pdf_xref_subsec *sub = xref->subsec;
55
8.06k
  pdf_unsaved_sig *usig;
56
8.06k
  int e;
57
58
16.0k
  while (sub != NULL)
59
7.98k
  {
60
7.98k
    pdf_xref_subsec *next_sub = sub->next;
61
18.3M
    for (e = 0; e < sub->len; e++)
62
18.3M
    {
63
18.3M
      pdf_xref_entry *entry = &sub->table[e];
64
18.3M
      pdf_drop_obj(ctx, entry->obj);
65
18.3M
      fz_drop_buffer(ctx, entry->stm_buf);
66
18.3M
    }
67
7.98k
    fz_free(ctx, sub->table);
68
7.98k
    fz_free(ctx, sub);
69
7.98k
    sub = next_sub;
70
7.98k
  }
71
72
8.06k
  pdf_drop_obj(ctx, xref->pre_repair_trailer);
73
8.06k
  pdf_drop_obj(ctx, xref->trailer);
74
75
8.06k
  while ((usig = xref->unsaved_sigs) != NULL)
76
0
  {
77
0
    xref->unsaved_sigs = usig->next;
78
0
    pdf_drop_obj(ctx, usig->field);
79
0
    pdf_drop_signer(ctx, usig->signer);
80
0
    fz_free(ctx, usig);
81
0
  }
82
8.06k
}
83
84
static void pdf_drop_xref_sections_imp(fz_context *ctx, pdf_document *doc, pdf_xref *xref_sections, int num_xref_sections)
85
25.3k
{
86
25.3k
  int x;
87
88
32.4k
  for (x = 0; x < num_xref_sections; x++)
89
7.12k
    pdf_drop_xref_subsec(ctx, &xref_sections[x]);
90
91
25.3k
  fz_free(ctx, xref_sections);
92
25.3k
}
93
94
static void pdf_drop_xref_sections(fz_context *ctx, pdf_document *doc)
95
12.6k
{
96
12.6k
  pdf_drop_xref_sections_imp(ctx, doc, doc->saved_xref_sections, doc->saved_num_xref_sections);
97
12.6k
  pdf_drop_xref_sections_imp(ctx, doc, doc->xref_sections, doc->num_xref_sections);
98
99
12.6k
  doc->saved_xref_sections = NULL;
100
12.6k
  doc->saved_num_xref_sections = 0;
101
12.6k
  doc->xref_sections = NULL;
102
12.6k
  doc->num_xref_sections = 0;
103
12.6k
  doc->num_incremental_sections = 0;
104
12.6k
}
105
106
static void
107
extend_xref_index(fz_context *ctx, pdf_document *doc, int newlen)
108
18.8k
{
109
18.8k
  int i;
110
111
18.8k
  doc->xref_index = fz_realloc_array(ctx, doc->xref_index, newlen, int);
112
18.1M
  for (i = doc->max_xref_len; i < newlen; i++)
113
18.1M
  {
114
18.1M
    doc->xref_index[i] = 0;
115
18.1M
  }
116
18.8k
  doc->max_xref_len = newlen;
117
18.8k
}
118
119
static void
120
resize_xref_sub(fz_context *ctx, pdf_xref *xref, int base, int newlen)
121
5.08k
{
122
5.08k
  pdf_xref_subsec *sub;
123
5.08k
  int i;
124
125
5.08k
  assert(xref != NULL);
126
5.08k
  sub = xref->subsec;
127
5.08k
  assert(sub->next == NULL && sub->start == base && sub->len+base == xref->num_objects);
128
5.08k
  assert(newlen+base > xref->num_objects);
129
130
5.08k
  sub->table = fz_realloc_array(ctx, sub->table, newlen, pdf_xref_entry);
131
10.1k
  for (i = sub->len; i < newlen; i++)
132
5.08k
  {
133
5.08k
    sub->table[i].type = 0;
134
5.08k
    sub->table[i].ofs = 0;
135
5.08k
    sub->table[i].gen = 0;
136
5.08k
    sub->table[i].num = 0;
137
5.08k
    sub->table[i].stm_ofs = 0;
138
5.08k
    sub->table[i].stm_buf = NULL;
139
5.08k
    sub->table[i].obj = NULL;
140
5.08k
  }
141
5.08k
  sub->len = newlen;
142
5.08k
  if (newlen+base > xref->num_objects)
143
5.08k
    xref->num_objects = newlen+base;
144
5.08k
}
145
146
/* This is only ever called when we already have an incremental
147
 * xref. This means there will only be 1 subsec, and it will be
148
 * a complete subsec. */
149
static void pdf_resize_xref(fz_context *ctx, pdf_document *doc, int newlen)
150
0
{
151
0
  pdf_xref *xref = &doc->xref_sections[doc->xref_base];
152
153
0
  resize_xref_sub(ctx, xref, 0, newlen);
154
0
  if (doc->max_xref_len < newlen)
155
0
    extend_xref_index(ctx, doc, newlen);
156
0
}
157
158
static void pdf_populate_next_xref_level(fz_context *ctx, pdf_document *doc)
159
1.59k
{
160
1.59k
  pdf_xref *xref;
161
1.59k
  doc->xref_sections = fz_realloc_array(ctx, doc->xref_sections, doc->num_xref_sections + 1, pdf_xref);
162
1.59k
  doc->num_xref_sections++;
163
164
1.59k
  xref = &doc->xref_sections[doc->num_xref_sections - 1];
165
1.59k
  xref->subsec = NULL;
166
1.59k
  xref->num_objects = 0;
167
1.59k
  xref->trailer = NULL;
168
1.59k
  xref->pre_repair_trailer = NULL;
169
1.59k
  xref->unsaved_sigs = NULL;
170
1.59k
  xref->unsaved_sigs_end = NULL;
171
1.59k
}
172
173
pdf_obj *pdf_trailer(fz_context *ctx, pdf_document *doc)
174
165k
{
175
  /* Return the document's trailer (of the appropriate vintage) */
176
165k
  pdf_xref *xrefs = doc->xref_sections;
177
178
165k
  return xrefs ? xrefs[doc->xref_base].trailer : NULL;
179
165k
}
180
181
void pdf_set_populating_xref_trailer(fz_context *ctx, pdf_document *doc, pdf_obj *trailer)
182
6.88k
{
183
  /* Update the trailer of the xref section being populated */
184
6.88k
  pdf_xref *xref = &doc->xref_sections[doc->num_xref_sections - 1];
185
6.88k
  if (xref->trailer)
186
57
  {
187
57
    pdf_drop_obj(ctx, xref->pre_repair_trailer);
188
57
    xref->pre_repair_trailer = xref->trailer;
189
57
  }
190
6.88k
  xref->trailer = pdf_keep_obj(ctx, trailer);
191
6.88k
}
192
193
int pdf_xref_len(fz_context *ctx, pdf_document *doc)
194
4.31M
{
195
4.31M
  int i = doc->xref_base;
196
4.31M
  int xref_len = 0;
197
198
4.31M
  if (doc->local_xref && doc->local_xref_nesting > 0)
199
494k
    xref_len = doc->local_xref->num_objects;
200
201
9.48M
  while (i < doc->num_xref_sections)
202
5.16M
    xref_len = fz_maxi(xref_len, doc->xref_sections[i++].num_objects);
203
204
4.31M
  return xref_len;
205
4.31M
}
206
207
/* Ensure that the given xref has a single subsection
208
 * that covers the entire range. */
209
static void
210
ensure_solid_xref(fz_context *ctx, pdf_document *doc, int num, int which)
211
18.5k
{
212
18.5k
  pdf_xref *xref = &doc->xref_sections[which];
213
18.5k
  pdf_xref_subsec *sub = xref->subsec;
214
18.5k
  pdf_xref_subsec *new_sub;
215
216
18.5k
  if (num < xref->num_objects)
217
732
    num = xref->num_objects;
218
219
18.5k
  if (sub != NULL && sub->next == NULL && sub->start == 0 && sub->len >= num)
220
748
    return;
221
222
17.7k
  new_sub = fz_malloc_struct(ctx, pdf_xref_subsec);
223
35.5k
  fz_try(ctx)
224
35.5k
  {
225
17.7k
    new_sub->table = fz_malloc_struct_array(ctx, num, pdf_xref_entry);
226
17.7k
    new_sub->start = 0;
227
17.7k
    new_sub->len = num;
228
17.7k
    new_sub->next = NULL;
229
17.7k
  }
230
35.5k
  fz_catch(ctx)
231
0
  {
232
0
    fz_free(ctx, new_sub);
233
0
    fz_rethrow(ctx);
234
0
  }
235
236
  /* Move objects over to the new subsection and destroy the old
237
   * ones */
238
17.7k
  sub = xref->subsec;
239
29.7k
  while (sub != NULL)
240
11.9k
  {
241
11.9k
    pdf_xref_subsec *next = sub->next;
242
11.9k
    int i;
243
244
18.0M
    for (i = 0; i < sub->len; i++)
245
17.9M
    {
246
17.9M
      new_sub->table[i+sub->start] = sub->table[i];
247
17.9M
    }
248
11.9k
    fz_free(ctx, sub->table);
249
11.9k
    fz_free(ctx, sub);
250
11.9k
    sub = next;
251
11.9k
  }
252
17.7k
  xref->num_objects = num;
253
17.7k
  xref->subsec = new_sub;
254
17.7k
  if (doc->max_xref_len < num)
255
16.9k
    extend_xref_index(ctx, doc, num);
256
17.7k
}
257
258
static pdf_xref_entry *
259
pdf_get_local_xref_entry(fz_context *ctx, pdf_document *doc, int num)
260
18.5k
{
261
18.5k
  pdf_xref *xref = doc->local_xref;
262
18.5k
  pdf_xref_subsec *sub;
263
264
18.5k
  if (xref == NULL || doc->local_xref_nesting == 0)
265
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "Local xref not present!");
266
267
  /* Local xrefs only ever have 1 section, and it should be solid. */
268
18.5k
  sub = xref->subsec;
269
18.5k
  assert(sub && !sub->next);
270
18.5k
  if (num >= sub->start && num < sub->start + sub->len)
271
13.5k
    return &sub->table[num - sub->start];
272
273
  /* Expand the xref so we can return a pointer. */
274
5.08k
  resize_xref_sub(ctx, xref, 0, num+1);
275
5.08k
  sub = xref->subsec;
276
5.08k
  return &sub->table[num - sub->start];
277
18.5k
}
278
279
pdf_xref_entry *pdf_get_populating_xref_entry(fz_context *ctx, pdf_document *doc, int num)
280
72.1M
{
281
  /* Return an entry within the xref currently being populated */
282
72.1M
  pdf_xref *xref;
283
72.1M
  pdf_xref_subsec *sub;
284
285
72.1M
  if (doc->num_xref_sections == 0)
286
6.07k
  {
287
6.07k
    doc->xref_sections = fz_malloc_struct(ctx, pdf_xref);
288
6.07k
    doc->num_xref_sections = 1;
289
6.07k
  }
290
291
72.1M
  if (doc->local_xref && doc->local_xref_nesting > 0)
292
0
    return pdf_get_local_xref_entry(ctx, doc, num);
293
294
  /* Prevent accidental heap underflow */
295
72.1M
  if (num < 0 || num > PDF_MAX_OBJECT_NUMBER)
296
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "object number out of range (%d)", num);
297
298
  /* Return the pointer to the entry in the last section. */
299
72.1M
  xref = &doc->xref_sections[doc->num_xref_sections-1];
300
301
72.1M
  for (sub = xref->subsec; sub != NULL; sub = sub->next)
302
72.1M
  {
303
72.1M
    if (num >= sub->start && num < sub->start + sub->len)
304
72.1M
      return &sub->table[num-sub->start];
305
72.1M
  }
306
307
  /* We've been asked for an object that's not in a subsec. */
308
11.9k
  ensure_solid_xref(ctx, doc, num+1, doc->num_xref_sections-1);
309
11.9k
  xref = &doc->xref_sections[doc->num_xref_sections-1];
310
11.9k
  sub = xref->subsec;
311
312
11.9k
  return &sub->table[num-sub->start];
313
72.1M
}
314
315
/* It is vital that pdf_get_xref_entry_aux called with !solidify_if_needed
316
 * and a value object number, does NOT try/catch or throw. */
317
static
318
pdf_xref_entry *pdf_get_xref_entry_aux(fz_context *ctx, pdf_document *doc, int i, int solidify_if_needed)
319
22.5M
{
320
22.5M
  pdf_xref *xref = NULL;
321
22.5M
  pdf_xref_subsec *sub;
322
22.5M
  int j;
323
324
22.5M
  if (i < 0)
325
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "Negative object number requested");
326
327
22.5M
  if (i < doc->max_xref_len)
328
22.3M
    j = doc->xref_index[i];
329
123k
  else
330
123k
    j = 0;
331
332
  /* If we have an active local xref, check there first. */
333
22.5M
  if (doc->local_xref && doc->local_xref_nesting > 0)
334
489k
  {
335
489k
    xref = doc->local_xref;
336
337
489k
    if (i < xref->num_objects)
338
489k
    {
339
767k
      for (sub = xref->subsec; sub != NULL; sub = sub->next)
340
489k
      {
341
489k
        pdf_xref_entry *entry;
342
343
489k
        if (i < sub->start || i >= sub->start + sub->len)
344
0
          continue;
345
346
489k
        entry = &sub->table[i - sub->start];
347
489k
        if (entry->type)
348
212k
          return entry;
349
489k
      }
350
489k
    }
351
489k
  }
352
353
  /* We may be accessing an earlier version of the document using xref_base
354
   * and j may be an index into a later xref section */
355
22.2M
  if (doc->xref_base > j)
356
5
    j = doc->xref_base;
357
22.2M
  else
358
22.2M
    j = 0;
359
360
361
  /* Find the first xref section where the entry is defined. */
362
22.7M
  for (; j < doc->num_xref_sections; j++)
363
22.7M
  {
364
22.7M
    xref = &doc->xref_sections[j];
365
366
22.7M
    if (i < xref->num_objects)
367
22.7M
    {
368
26.2M
      for (sub = xref->subsec; sub != NULL; sub = sub->next)
369
25.7M
      {
370
25.7M
        pdf_xref_entry *entry;
371
372
25.7M
        if (i < sub->start || i >= sub->start + sub->len)
373
3.44M
          continue;
374
375
22.3M
        entry = &sub->table[i - sub->start];
376
22.3M
        if (entry->type)
377
22.2M
        {
378
          /* Don't update xref_index if xref_base may have
379
           * influenced the value of j */
380
22.2M
          if (doc->xref_base == 0)
381
22.2M
            doc->xref_index[i] = j;
382
22.2M
          return entry;
383
22.2M
        }
384
22.3M
      }
385
22.7M
    }
386
22.7M
  }
387
388
  /* Didn't find the entry in any section. Return the entry from
389
   * the local_xref (if there is one active), or the final section. */
390
335
  if (doc->local_xref && doc->local_xref_nesting > 0)
391
0
  {
392
0
    if (xref == NULL || i < xref->num_objects)
393
0
    {
394
0
      xref = doc->local_xref;
395
0
      sub = xref->subsec;
396
0
      assert(sub != NULL && sub->next == NULL);
397
0
      if (i >= sub->start && i < sub->start + sub->len)
398
0
        return &sub->table[i - sub->start];
399
0
    }
400
401
    /* Expand the xref so we can return a pointer. */
402
0
    resize_xref_sub(ctx, xref, 0, i+1);
403
0
    sub = xref->subsec;
404
0
    return &sub->table[i - sub->start];
405
0
  }
406
407
335
  doc->xref_index[i] = 0;
408
335
  if (xref == NULL || i < xref->num_objects)
409
72
  {
410
72
    xref = &doc->xref_sections[doc->xref_base];
411
75
    for (sub = xref->subsec; sub != NULL; sub = sub->next)
412
72
    {
413
72
      if (i >= sub->start && i < sub->start + sub->len)
414
69
        return &sub->table[i - sub->start];
415
72
    }
416
72
  }
417
418
  /* Some really hairy code here. When we are reading the file in
419
   * initially, we read from 'newest' to 'oldest' (i.e. from 0 to
420
   * doc->num_xref_sections-1). Each section is created initially
421
   * with num_objects == 0 in it, and remains like that while we
422
   * are parsing the stream from the file. This is the only time
423
   * we'll ever have xref_sections with 0 objects in them. */
424
266
  if (doc->xref_sections[doc->num_xref_sections-1].num_objects == 0)
425
225
  {
426
    /* The oldest xref section has 0 objects in it. So we are
427
     * parsing an xref stream while loading. We don't want to
428
     * solidify the xref we are currently parsing for (as it'll
429
     * get very confused, and end up a different 'shape' in
430
     * memory to that which is in the file, and would hence
431
     * render 'fingerprinting' for snapshotting invalid) so
432
     * just give up at this point. */
433
225
    return NULL;
434
225
  }
435
436
41
  if (!solidify_if_needed)
437
2
    return NULL;
438
439
  /* At this point, we solidify the xref. This ensures that we
440
   * can return a pointer. This is the only case where this function
441
   * might throw an exception, and it will never happen when we are
442
   * working within a 'solid' xref. */
443
39
  ensure_solid_xref(ctx, doc, i+1, 0);
444
39
  xref = &doc->xref_sections[0];
445
39
  sub = xref->subsec;
446
39
  return &sub->table[i - sub->start];
447
41
}
448
449
pdf_xref_entry *pdf_get_xref_entry(fz_context *ctx, pdf_document *doc, int i)
450
22.0M
{
451
22.0M
  return pdf_get_xref_entry_aux(ctx, doc, i, 1);
452
22.0M
}
453
454
pdf_xref_entry *pdf_get_xref_entry_no_change(fz_context *ctx, pdf_document *doc, int i)
455
492k
{
456
492k
  return pdf_get_xref_entry_aux(ctx, doc, i, 0);
457
492k
}
458
459
pdf_xref_entry *pdf_get_xref_entry_no_null(fz_context *ctx, pdf_document *doc, int i)
460
18.0M
{
461
18.0M
  pdf_xref_entry *entry = pdf_get_xref_entry(ctx, doc, i);
462
18.0M
  if (entry != NULL)
463
18.0M
    return entry;
464
0
  fz_throw(ctx, FZ_ERROR_ARGUMENT, "cannot find object in xref (%d 0 R), but not allowed to return NULL", i);
465
18.0M
}
466
467
void pdf_xref_entry_map(fz_context *ctx, pdf_document *doc, void (*fn)(fz_context *, pdf_xref_entry *, int, pdf_document *, void *), void *arg)
468
631
{
469
631
  int i, j;
470
631
  pdf_xref_subsec *sub;
471
631
  int xref_base = doc->xref_base;
472
473
1.26k
  fz_try(ctx)
474
1.26k
  {
475
    /* Map over any active local xref first. */
476
631
    if (doc->local_xref && doc->local_xref_nesting > 0)
477
0
    {
478
0
      pdf_xref *xref = doc->local_xref;
479
480
0
      for (sub = xref->subsec; sub != NULL; sub = sub->next)
481
0
      {
482
0
        for (i = sub->start; i < sub->start + sub->len; i++)
483
0
        {
484
0
          pdf_xref_entry *entry = &sub->table[i - sub->start];
485
0
          if (entry->type)
486
0
            fn(ctx, entry, i, doc, arg);
487
0
        }
488
0
      }
489
0
    }
490
491
1.51k
    for (j = 0; j < doc->num_xref_sections; j++)
492
885
    {
493
885
      pdf_xref *xref = &doc->xref_sections[j];
494
885
      doc->xref_base = j;
495
496
2.03k
      for (sub = xref->subsec; sub != NULL; sub = sub->next)
497
1.14k
      {
498
41.6k
        for (i = sub->start; i < sub->start + sub->len; i++)
499
40.4k
        {
500
40.4k
          pdf_xref_entry *entry = &sub->table[i - sub->start];
501
40.4k
          if (entry->type)
502
40.2k
            fn(ctx, entry, i, doc, arg);
503
40.4k
        }
504
1.14k
      }
505
885
    }
506
631
  }
507
1.26k
  fz_always(ctx)
508
631
  {
509
631
    doc->xref_base = xref_base;
510
631
  }
511
631
  fz_catch(ctx)
512
12
    fz_rethrow(ctx);
513
631
}
514
515
/*
516
  Ensure we have an incremental xref section where we can store
517
  updated versions of indirect objects. This is a new xref section
518
  consisting of a single xref subsection.
519
*/
520
static void ensure_incremental_xref(fz_context *ctx, pdf_document *doc)
521
126
{
522
  /* If there are as yet no incremental sections, or if the most recent
523
   * one has been used to sign a signature field, then we need a new one.
524
   * After a signing, any further document changes require a new increment */
525
126
  if ((doc->num_incremental_sections == 0 || doc->xref_sections[0].unsaved_sigs != NULL)
526
126
    && !doc->disallow_new_increments)
527
63
  {
528
63
    pdf_xref *xref = &doc->xref_sections[0];
529
63
    pdf_xref *pxref;
530
63
    pdf_xref_entry *new_table = fz_malloc_struct_array(ctx, xref->num_objects, pdf_xref_entry);
531
63
    pdf_xref_subsec *sub = NULL;
532
63
    pdf_obj *trailer = NULL;
533
63
    int i;
534
535
63
    fz_var(trailer);
536
63
    fz_var(sub);
537
126
    fz_try(ctx)
538
126
    {
539
63
      sub = fz_malloc_struct(ctx, pdf_xref_subsec);
540
63
      trailer = xref->trailer ? pdf_copy_dict(ctx, xref->trailer) : NULL;
541
63
      doc->xref_sections = fz_realloc_array(ctx, doc->xref_sections, doc->num_xref_sections + 1, pdf_xref);
542
63
      xref = &doc->xref_sections[0];
543
63
      pxref = &doc->xref_sections[1];
544
63
      memmove(pxref, xref, doc->num_xref_sections * sizeof(pdf_xref));
545
      /* xref->num_objects is already correct */
546
63
      xref->subsec = sub;
547
63
      sub = NULL;
548
63
      xref->trailer = trailer;
549
63
      xref->pre_repair_trailer = NULL;
550
63
      xref->unsaved_sigs = NULL;
551
63
      xref->unsaved_sigs_end = NULL;
552
63
      xref->subsec->next = NULL;
553
63
      xref->subsec->len = xref->num_objects;
554
63
      xref->subsec->start = 0;
555
63
      xref->subsec->table = new_table;
556
63
      doc->num_xref_sections++;
557
63
      doc->num_incremental_sections++;
558
63
    }
559
126
    fz_catch(ctx)
560
0
    {
561
0
      fz_free(ctx, sub);
562
0
      fz_free(ctx, new_table);
563
0
      pdf_drop_obj(ctx, trailer);
564
0
      fz_rethrow(ctx);
565
0
    }
566
567
    /* Update the xref_index */
568
9.26k
    for (i = 0; i < doc->max_xref_len; i++)
569
9.19k
    {
570
9.19k
      doc->xref_index[i]++;
571
9.19k
    }
572
63
  }
573
126
}
574
575
/* Used when altering a document */
576
pdf_xref_entry *pdf_get_incremental_xref_entry(fz_context *ctx, pdf_document *doc, int i)
577
63
{
578
63
  pdf_xref *xref;
579
63
  pdf_xref_subsec *sub;
580
581
  /* Make a new final xref section if we haven't already */
582
63
  ensure_incremental_xref(ctx, doc);
583
584
63
  xref = &doc->xref_sections[doc->xref_base];
585
63
  if (i >= xref->num_objects)
586
0
    pdf_resize_xref(ctx, doc, i + 1);
587
588
63
  sub = xref->subsec;
589
63
  assert(sub != NULL && sub->next == NULL);
590
63
  assert(i >= sub->start && i < sub->start + sub->len);
591
63
  doc->xref_index[i] = 0;
592
63
  return &sub->table[i - sub->start];
593
63
}
594
595
int pdf_xref_is_incremental(fz_context *ctx, pdf_document *doc, int num)
596
0
{
597
0
  pdf_xref *xref = &doc->xref_sections[doc->xref_base];
598
0
  pdf_xref_subsec *sub = xref->subsec;
599
600
0
  assert(sub != NULL && sub->next == NULL && sub->len == xref->num_objects && sub->start == 0);
601
602
0
  return num < xref->num_objects && sub->table[num].type;
603
0
}
604
605
/* Used when clearing signatures. Removes the signature
606
from the list of unsaved signed signatures. */
607
void pdf_xref_remove_unsaved_signature(fz_context *ctx, pdf_document *doc, pdf_obj *field)
608
0
{
609
0
  int num = pdf_to_num(ctx, field);
610
0
  int idx = doc->xref_index[num];
611
0
  pdf_xref *xref = &doc->xref_sections[idx];
612
0
  pdf_unsaved_sig **usigptr = &xref->unsaved_sigs;
613
0
  pdf_unsaved_sig *usig = xref->unsaved_sigs;
614
615
0
  while (usig)
616
0
  {
617
0
    pdf_unsaved_sig **nextptr = &usig->next;
618
0
    pdf_unsaved_sig *next = usig->next;
619
620
0
    if (usig->field == field)
621
0
    {
622
0
      if (xref->unsaved_sigs_end == &usig->next)
623
0
      {
624
0
        if (usig->next)
625
0
          xref->unsaved_sigs_end = &usig->next->next;
626
0
        else
627
0
          xref->unsaved_sigs_end = NULL;
628
0
      }
629
0
      if (usigptr)
630
0
        *usigptr = usig->next;
631
632
0
      usig->next = NULL;
633
0
      pdf_drop_obj(ctx, usig->field);
634
0
      pdf_drop_signer(ctx, usig->signer);
635
0
      fz_free(ctx, usig);
636
637
0
      break;
638
0
    }
639
640
0
    usig = next;
641
0
    usigptr = nextptr;
642
0
  }
643
0
}
644
645
void pdf_xref_store_unsaved_signature(fz_context *ctx, pdf_document *doc, pdf_obj *field, pdf_pkcs7_signer *signer)
646
0
{
647
0
  pdf_xref *xref = &doc->xref_sections[0];
648
0
  pdf_unsaved_sig *unsaved_sig;
649
650
  /* Record details within the document structure so that contents
651
   * and byte_range can be updated with their correct values at
652
   * saving time */
653
0
  unsaved_sig = fz_malloc_struct(ctx, pdf_unsaved_sig);
654
0
  unsaved_sig->field = pdf_keep_obj(ctx, field);
655
0
  unsaved_sig->signer = signer->keep(ctx, signer);
656
0
  unsaved_sig->next = NULL;
657
0
  if (xref->unsaved_sigs_end == NULL)
658
0
    xref->unsaved_sigs_end = &xref->unsaved_sigs;
659
660
0
  *xref->unsaved_sigs_end = unsaved_sig;
661
0
  xref->unsaved_sigs_end = &unsaved_sig->next;
662
0
}
663
664
int pdf_xref_obj_is_unsaved_signature(pdf_document *doc, pdf_obj *obj)
665
0
{
666
0
  int i;
667
0
  for (i = 0; i < doc->num_incremental_sections; i++)
668
0
  {
669
0
    pdf_xref *xref = &doc->xref_sections[i];
670
0
    pdf_unsaved_sig *usig;
671
672
0
    for (usig = xref->unsaved_sigs; usig; usig = usig->next)
673
0
    {
674
0
      if (usig->field == obj)
675
0
        return 1;
676
0
    }
677
0
  }
678
679
0
  return 0;
680
0
}
681
682
void pdf_ensure_solid_xref(fz_context *ctx, pdf_document *doc, int num)
683
5.84k
{
684
5.84k
  if (doc->num_xref_sections == 0)
685
0
    pdf_populate_next_xref_level(ctx, doc);
686
687
5.84k
  ensure_solid_xref(ctx, doc, num, 0);
688
5.84k
}
689
690
int pdf_xref_ensure_incremental_object(fz_context *ctx, pdf_document *doc, int num)
691
63
{
692
63
  pdf_xref_entry *new_entry, *old_entry;
693
63
  pdf_xref_subsec *sub = NULL;
694
63
  int i;
695
63
  pdf_obj *copy;
696
697
  /* Make sure we have created an xref section for incremental updates */
698
63
  ensure_incremental_xref(ctx, doc);
699
700
  /* Search for the section that contains this object */
701
63
  for (i = doc->xref_index[num]; i < doc->num_xref_sections; i++)
702
63
  {
703
63
    pdf_xref *xref = &doc->xref_sections[i];
704
705
63
    if (num < 0 && num >= xref->num_objects)
706
0
      break;
707
63
    for (sub = xref->subsec; sub != NULL; sub = sub->next)
708
63
    {
709
63
      if (sub->start <= num && num < sub->start + sub->len && sub->table[num - sub->start].type)
710
63
        break;
711
63
    }
712
63
    if (sub != NULL)
713
63
      break;
714
63
  }
715
  /* sub == NULL implies we did not find it */
716
717
  /* If we don't find it, or it's already in the incremental section, return */
718
63
  if (i == 0 || sub == NULL)
719
0
    return 0;
720
721
63
  copy = pdf_deep_copy_obj(ctx, sub->table[num - sub->start].obj);
722
723
  /* Move the object to the incremental section */
724
63
  i = doc->xref_index[num];
725
63
  doc->xref_index[num] = 0;
726
63
  old_entry = &sub->table[num - sub->start];
727
126
  fz_try(ctx)
728
126
    new_entry = pdf_get_incremental_xref_entry(ctx, doc, num);
729
126
  fz_catch(ctx)
730
0
  {
731
0
    pdf_drop_obj(ctx, copy);
732
0
    doc->xref_index[num] = i;
733
0
    fz_rethrow(ctx);
734
0
  }
735
63
  *new_entry = *old_entry;
736
63
  if (new_entry->type == 'o')
737
0
  {
738
0
    new_entry->type = 'n';
739
0
    new_entry->gen = 0;
740
0
  }
741
  /* Better keep a copy. We must override the old entry with
742
   * the copy because the caller may be holding a reference to
743
   * the original and expect it to end up in the new entry */
744
63
  old_entry->obj = copy;
745
63
  old_entry->stm_buf = NULL;
746
747
63
  return 1;
748
63
}
749
750
void pdf_xref_ensure_local_object(fz_context *ctx, pdf_document *doc, int num)
751
54.4k
{
752
54.4k
  pdf_xref_entry *new_entry, *old_entry;
753
54.4k
  pdf_xref_subsec *sub = NULL;
754
54.4k
  int i;
755
54.4k
  pdf_xref *xref;
756
54.4k
  pdf_obj *copy;
757
758
  /* Is it in the local section already? */
759
54.4k
  xref = doc->local_xref;
760
58.3k
  for (sub = xref->subsec; sub != NULL; sub = sub->next)
761
54.4k
  {
762
54.4k
    if (sub->start <= num && num < sub->start + sub->len && sub->table[num - sub->start].type)
763
50.5k
      break;
764
54.4k
  }
765
  /* If we found it, it's in the local section already. */
766
54.4k
  if (sub != NULL)
767
50.5k
    return;
768
769
  /* Search for the section that contains this object */
770
3.90k
  for (i = doc->xref_index[num]; i < doc->num_xref_sections; i++)
771
3.90k
  {
772
3.90k
    xref = &doc->xref_sections[i];
773
774
3.90k
    if (num < 0 && num >= xref->num_objects)
775
0
      break;
776
8.57k
    for (sub = xref->subsec; sub != NULL; sub = sub->next)
777
8.57k
    {
778
8.57k
      if (sub->start <= num && num < sub->start + sub->len && sub->table[num - sub->start].type)
779
3.90k
        break;
780
8.57k
    }
781
3.90k
    if (sub != NULL)
782
3.90k
      break;
783
3.90k
  }
784
  /* sub == NULL implies we did not find it */
785
3.90k
  if (sub == NULL)
786
0
    return; /* No object to find */
787
788
3.90k
  copy = pdf_deep_copy_obj(ctx, sub->table[num - sub->start].obj);
789
790
  /* Copy the object to the local section */
791
3.90k
  i = doc->xref_index[num];
792
3.90k
  doc->xref_index[num] = 0;
793
3.90k
  old_entry = &sub->table[num - sub->start];
794
7.80k
  fz_try(ctx)
795
7.80k
    new_entry = pdf_get_local_xref_entry(ctx, doc, num);
796
7.80k
  fz_catch(ctx)
797
0
  {
798
0
    pdf_drop_obj(ctx, copy);
799
0
    doc->xref_index[num] = i;
800
0
    fz_rethrow(ctx);
801
0
  }
802
3.90k
  *new_entry = *old_entry;
803
3.90k
  if (new_entry->type == 'o')
804
621
  {
805
621
    new_entry->type = 'n';
806
621
    new_entry->gen = 0;
807
621
  }
808
3.90k
  new_entry->stm_buf = NULL;
809
3.90k
  new_entry->obj = NULL;
810
  /* old entry is incremental and may have changes.
811
   * Better keep a copy. We must override the old entry with
812
   * the copy because the caller may be holding a reference to
813
   * the original and expect it to end up in the new entry */
814
3.90k
  new_entry->obj = old_entry->obj;
815
3.90k
  old_entry->obj = copy;
816
3.90k
  new_entry->stm_buf = NULL; /* FIXME */
817
3.90k
}
818
819
void pdf_replace_xref(fz_context *ctx, pdf_document *doc, pdf_xref_entry *entries, int n)
820
0
{
821
0
  int *xref_index = NULL;
822
0
  pdf_xref *xref = NULL;
823
0
  pdf_xref_subsec *sub;
824
825
0
  fz_var(xref_index);
826
0
  fz_var(xref);
827
828
0
  fz_try(ctx)
829
0
  {
830
0
    xref_index = fz_calloc(ctx, n, sizeof(int));
831
0
    xref = fz_malloc_struct(ctx, pdf_xref);
832
0
    sub = fz_malloc_struct(ctx, pdf_xref_subsec);
833
0
  }
834
0
  fz_catch(ctx)
835
0
  {
836
0
    fz_free(ctx, xref);
837
0
    fz_free(ctx, xref_index);
838
0
    fz_rethrow(ctx);
839
0
  }
840
841
0
  sub->table = entries;
842
0
  sub->start = 0;
843
0
  sub->len = n;
844
845
0
  xref->subsec = sub;
846
0
  xref->num_objects = n;
847
0
  xref->trailer = pdf_keep_obj(ctx, pdf_trailer(ctx, doc));
848
849
  /* The new table completely replaces the previous separate sections */
850
0
  pdf_drop_xref_sections(ctx, doc);
851
852
0
  doc->xref_sections = xref;
853
0
  doc->num_xref_sections = 1;
854
0
  doc->num_incremental_sections = 0;
855
0
  doc->xref_base = 0;
856
0
  doc->disallow_new_increments = 0;
857
0
  doc->max_xref_len = n;
858
859
0
  fz_free(ctx, doc->xref_index);
860
0
  doc->xref_index = xref_index;
861
0
}
862
863
void pdf_forget_xref(fz_context *ctx, pdf_document *doc)
864
6.07k
{
865
6.07k
  pdf_obj *trailer = pdf_keep_obj(ctx, pdf_trailer(ctx, doc));
866
867
6.07k
  pdf_drop_local_xref_and_resources(ctx, doc);
868
869
6.07k
  if (doc->saved_xref_sections)
870
0
    pdf_drop_xref_sections_imp(ctx, doc, doc->saved_xref_sections, doc->saved_num_xref_sections);
871
872
6.07k
  doc->saved_xref_sections = doc->xref_sections;
873
6.07k
  doc->saved_num_xref_sections = doc->num_xref_sections;
874
875
6.07k
  doc->xref_sections = NULL;
876
6.07k
  doc->startxref = 0;
877
6.07k
  doc->num_xref_sections = 0;
878
6.07k
  doc->num_incremental_sections = 0;
879
6.07k
  doc->xref_base = 0;
880
6.07k
  doc->disallow_new_increments = 0;
881
882
12.1k
  fz_try(ctx)
883
12.1k
  {
884
6.07k
    pdf_get_populating_xref_entry(ctx, doc, 0);
885
6.07k
  }
886
12.1k
  fz_catch(ctx)
887
0
  {
888
0
    pdf_drop_obj(ctx, trailer);
889
0
    fz_rethrow(ctx);
890
0
  }
891
892
  /* Set the trailer of the final xref section. */
893
6.07k
  doc->xref_sections[0].trailer = trailer;
894
6.07k
}
895
896
/*
897
 * magic version tag and startxref
898
 */
899
900
int
901
pdf_version(fz_context *ctx, pdf_document *doc)
902
0
{
903
0
  int version = doc->version;
904
0
  fz_try(ctx)
905
0
  {
906
0
    pdf_obj *obj = pdf_dict_getl(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root), PDF_NAME(Version), NULL);
907
0
    const char *str = pdf_to_name(ctx, obj);
908
0
    if (*str)
909
0
      version = 10 * (fz_atof(str) + 0.05f);
910
0
  }
911
0
  fz_catch(ctx)
912
0
  {
913
0
    fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
914
0
    fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
915
0
    fz_report_error(ctx);
916
0
    fz_warn(ctx, "Ignoring broken Root/Version number.");
917
0
  }
918
0
  return version;
919
0
}
920
921
static void
922
pdf_load_version(fz_context *ctx, pdf_document *doc)
923
6.63k
{
924
6.63k
  char buf[20];
925
926
6.63k
  fz_seek(ctx, doc->file, 0, SEEK_SET);
927
6.63k
  fz_read_line(ctx, doc->file, buf, sizeof buf);
928
6.63k
  if (strlen(buf) < 5 || (memcmp(buf, "%PDF-", 5) != 0 && memcmp(buf, "%FDF-", 5) != 0))
929
4.71k
    fz_throw(ctx, FZ_ERROR_FORMAT, "cannot recognize version marker");
930
931
1.92k
  if (buf[1] == 'F')
932
3
    doc->is_fdf = 1;
933
934
1.92k
  doc->version = 10 * (fz_atof(buf+5) + 0.05f);
935
1.92k
  if (doc->version < 10 || doc->version > 17)
936
209
    if (doc->version != 20)
937
207
      fz_warn(ctx, "unknown PDF version: %d.%d", doc->version / 10, doc->version % 10);
938
1.92k
}
939
940
static void
941
pdf_read_start_xref(fz_context *ctx, pdf_document *doc)
942
1.91k
{
943
1.91k
  unsigned char buf[1024];
944
1.91k
  size_t i, n;
945
1.91k
  int64_t t;
946
947
1.91k
  fz_seek(ctx, doc->file, 0, SEEK_END);
948
949
1.91k
  doc->file_size = fz_tell(ctx, doc->file);
950
951
1.91k
  t = fz_maxi64(0, doc->file_size - (int64_t)sizeof buf);
952
1.91k
  fz_seek(ctx, doc->file, t, SEEK_SET);
953
954
1.91k
  n = fz_read(ctx, doc->file, buf, sizeof buf);
955
1.91k
  if (n < 9)
956
0
    fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find startxref");
957
958
1.91k
  i = n - 9;
959
1.91k
  do
960
600k
  {
961
600k
    if (memcmp(buf + i, "startxref", 9) == 0)
962
1.26k
    {
963
1.26k
      i += 9;
964
3.25k
      while (i < n && iswhite(buf[i]))
965
1.98k
        i ++;
966
1.26k
      doc->startxref = 0;
967
6.81k
      while (i < n && isdigit(buf[i]))
968
5.55k
      {
969
5.55k
        if (doc->startxref >= INT64_MAX/10)
970
1
          fz_throw(ctx, FZ_ERROR_LIMIT, "startxref too large");
971
5.55k
        doc->startxref = doc->startxref * 10 + (buf[i++] - '0');
972
5.55k
      }
973
1.26k
      if (doc->startxref != 0)
974
1.24k
        return;
975
21
      break;
976
1.26k
    }
977
600k
  } while (i-- > 0);
978
979
675
  fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find startxref");
980
1.91k
}
981
982
void fz_skip_space(fz_context *ctx, fz_stream *stm)
983
2.37k
{
984
2.37k
  do
985
3.83k
  {
986
3.83k
    int c = fz_peek_byte(ctx, stm);
987
3.83k
    if (c == EOF || c > 32)
988
2.37k
      return;
989
1.46k
    (void)fz_read_byte(ctx, stm);
990
1.46k
  }
991
2.37k
  while (1);
992
2.37k
}
993
994
int fz_skip_string(fz_context *ctx, fz_stream *stm, const char *str)
995
1.18k
{
996
5.94k
  while (*str)
997
4.75k
  {
998
4.75k
    int c = fz_peek_byte(ctx, stm);
999
4.75k
    if (c == EOF || c != *str++)
1000
1
      return 1;
1001
4.75k
    (void)fz_read_byte(ctx, stm);
1002
4.75k
  }
1003
1.18k
  return 0;
1004
1.18k
}
1005
1006
/*
1007
 * trailer dictionary
1008
 */
1009
1010
static int
1011
pdf_xref_size_from_old_trailer(fz_context *ctx, pdf_document *doc)
1012
610
{
1013
610
  int len;
1014
610
  char *s;
1015
610
  int64_t t;
1016
610
  pdf_token tok;
1017
610
  int c;
1018
610
  int size = 0;
1019
610
  int64_t ofs;
1020
610
  pdf_obj *trailer = NULL;
1021
610
  size_t n;
1022
610
  pdf_lexbuf *buf = &doc->lexbuf.base;
1023
610
  pdf_obj *obj = NULL;
1024
1025
610
  fz_var(trailer);
1026
1027
  /* Record the current file read offset so that we can reinstate it */
1028
610
  ofs = fz_tell(ctx, doc->file);
1029
1030
610
  fz_skip_space(ctx, doc->file);
1031
610
  if (fz_skip_string(ctx, doc->file, "xref"))
1032
1
    fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find xref marker");
1033
609
  fz_skip_space(ctx, doc->file);
1034
1035
2.43k
  while (1)
1036
2.43k
  {
1037
2.43k
    c = fz_peek_byte(ctx, doc->file);
1038
2.43k
    if (!isdigit(c))
1039
601
      break;
1040
1041
1.83k
    fz_read_line(ctx, doc->file, buf->scratch, buf->size);
1042
1.83k
    s = buf->scratch;
1043
1.83k
    fz_strsep(&s, " "); /* ignore start */
1044
1.83k
    if (!s)
1045
6
      fz_throw(ctx, FZ_ERROR_FORMAT, "xref subsection length missing");
1046
1.82k
    len = fz_atoi(fz_strsep(&s, " "));
1047
1.82k
    if (len < 0)
1048
2
      fz_throw(ctx, FZ_ERROR_FORMAT, "xref subsection length must be positive");
1049
1050
    /* broken pdfs where the section is not on a separate line */
1051
1.82k
    if (s && *s != '\0')
1052
61
      fz_seek(ctx, doc->file, -(2 + (int)strlen(s)), SEEK_CUR);
1053
1054
1.82k
    t = fz_tell(ctx, doc->file);
1055
1.82k
    if (t < 0)
1056
0
      fz_throw(ctx, FZ_ERROR_SYSTEM, "cannot tell in file");
1057
1058
    /* Spec says xref entries should be 20 bytes, but it's not infrequent
1059
     * to see 19, in particular for some PCLm drivers. Cope. */
1060
1.82k
    if (len > 0)
1061
1.69k
    {
1062
1.69k
      n = fz_read(ctx, doc->file, (unsigned char *)buf->scratch, 20);
1063
1.69k
      if (n < 19)
1064
0
        fz_throw(ctx, FZ_ERROR_FORMAT, "malformed xref table");
1065
1.69k
      if (n == 20 && buf->scratch[19] > 32)
1066
33
        n = 19;
1067
1.69k
    }
1068
134
    else
1069
134
      n = 20;
1070
1071
1.82k
    if (len > (int64_t)((INT64_MAX - t) / n))
1072
0
      fz_throw(ctx, FZ_ERROR_LIMIT, "xref has too many entries");
1073
1074
1.82k
    fz_seek(ctx, doc->file, t + n * (int64_t)len, SEEK_SET);
1075
1.82k
  }
1076
1077
1.20k
  fz_try(ctx)
1078
1.20k
  {
1079
601
    tok = pdf_lex(ctx, doc->file, buf);
1080
601
    if (tok != PDF_TOK_TRAILER)
1081
15
      fz_throw(ctx, FZ_ERROR_FORMAT, "expected trailer marker");
1082
1083
586
    tok = pdf_lex(ctx, doc->file, buf);
1084
586
    if (tok != PDF_TOK_OPEN_DICT)
1085
2
      fz_throw(ctx, FZ_ERROR_FORMAT, "expected trailer dictionary");
1086
1087
584
    trailer = pdf_parse_dict(ctx, doc, doc->file, buf);
1088
1089
584
    obj = pdf_dict_get(ctx, trailer, PDF_NAME(Size));
1090
584
    if (pdf_is_indirect(ctx, obj))
1091
2
      fz_throw(ctx, FZ_ERROR_FORMAT, "trailer Size entry is indirect");
1092
1093
582
    size = pdf_dict_get_int(ctx, trailer, PDF_NAME(Size));
1094
582
    if (size < 0 || size > PDF_MAX_OBJECT_NUMBER + 1)
1095
1
      fz_throw(ctx, FZ_ERROR_FORMAT, "trailer Size entry out of range");
1096
582
  }
1097
1.20k
  fz_always(ctx)
1098
601
  {
1099
601
    pdf_drop_obj(ctx, trailer);
1100
601
  }
1101
601
  fz_catch(ctx)
1102
22
  {
1103
22
    fz_rethrow(ctx);
1104
22
  }
1105
1106
559
  fz_seek(ctx, doc->file, ofs, SEEK_SET);
1107
1108
559
  return size;
1109
581
}
1110
1111
static pdf_xref_entry *
1112
pdf_xref_find_subsection(fz_context *ctx, pdf_document *doc, int start, int len)
1113
2.43k
{
1114
2.43k
  pdf_xref *xref = &doc->xref_sections[doc->num_xref_sections-1];
1115
2.43k
  pdf_xref_subsec *sub, *extend = NULL;
1116
2.43k
  int num_objects;
1117
2.43k
  int solidify = 0;
1118
1119
2.43k
  if (len == 0)
1120
30
    return NULL;
1121
1122
  /* Different cases here.
1123
   * Case 1) We might be asking for a subsection (or a subset of a
1124
   *         subsection) that we already have - Just return it.
1125
   * Case 2) We might be asking for a subsection that overlaps (or
1126
   *         extends) a subsection we already have - extend the existing one.
1127
   * Case 3) We might be asking for a subsection that overlaps multiple
1128
   *         existing subsections - solidify the whole set.
1129
   * Case 4) We might be asking for a completely new subsection - just
1130
   *         allocate it.
1131
   */
1132
1133
  /* Sanity check */
1134
10.2k
  for (sub = xref->subsec; sub != NULL; sub = sub->next)
1135
7.88k
  {
1136
7.88k
    if (start >= sub->start && start <= sub->start + sub->len)
1137
687
    {
1138
      /* 'start' is in (or immediately after) 'sub' */
1139
687
      if (start + len <= sub->start + sub->len)
1140
14
      {
1141
        /* And so is start+len-1 - just return this! Case 1. */
1142
14
        return &sub->table[start-sub->start];
1143
14
      }
1144
      /* So we overlap with sub. */
1145
673
      if (extend == NULL)
1146
673
      {
1147
        /* Maybe we can extend sub? */
1148
673
        extend = sub;
1149
673
      }
1150
0
      else
1151
0
      {
1152
        /* OK, so we've already found an overlapping one. We'll need to solidify. Case 3. */
1153
0
        solidify = 1;
1154
0
        break;
1155
0
      }
1156
673
    }
1157
7.19k
    else if (start + len > sub->start && start + len < sub->start + sub->len)
1158
1
    {
1159
      /* The end of the start+len range is in 'sub'. */
1160
      /* For now, we won't support extending sub backwards. Just take this as
1161
       * needing to solidify. Case 3. */
1162
1
      solidify = 1;
1163
1
      break;
1164
1
    }
1165
7.19k
    else if (start < sub->start && start + len >= sub->start + sub->len)
1166
0
    {
1167
      /* The end of the start+len range is beyond 'sub'. */
1168
      /* For now, we won't support extending sub backwards. Just take this as
1169
       * needing to solidify. Another variant of case 3. */
1170
0
      solidify = 1;
1171
0
      break;
1172
0
    }
1173
7.88k
  }
1174
1175
2.39k
  num_objects = xref->num_objects;
1176
2.39k
  if (num_objects < start + len)
1177
2.36k
    num_objects = start + len;
1178
1179
2.39k
  if (solidify)
1180
1
  {
1181
    /* Case 3: Solidify the xref */
1182
1
    ensure_solid_xref(ctx, doc, num_objects, doc->num_xref_sections-1);
1183
1
    xref = &doc->xref_sections[doc->num_xref_sections-1];
1184
1
    sub = xref->subsec;
1185
1
  }
1186
2.38k
  else if (extend)
1187
673
  {
1188
    /* Case 2: Extend the subsection */
1189
673
    int newlen = start + len - extend->start;
1190
673
    sub = extend;
1191
673
    sub->table = fz_realloc_array(ctx, sub->table, newlen, pdf_xref_entry);
1192
673
    memset(&sub->table[sub->len], 0, sizeof(pdf_xref_entry) * (newlen - sub->len));
1193
673
    sub->len = newlen;
1194
673
    if (xref->num_objects < sub->start + sub->len)
1195
666
      xref->num_objects = sub->start + sub->len;
1196
673
    if (doc->max_xref_len < sub->start + sub->len)
1197
634
      extend_xref_index(ctx, doc, sub->start + sub->len);
1198
673
  }
1199
1.71k
  else
1200
1.71k
  {
1201
    /* Case 4 */
1202
1.71k
    sub = fz_malloc_struct(ctx, pdf_xref_subsec);
1203
3.43k
    fz_try(ctx)
1204
3.43k
    {
1205
1.71k
      sub->table = fz_malloc_struct_array(ctx, len, pdf_xref_entry);
1206
1.71k
      sub->start = start;
1207
1.71k
      sub->len = len;
1208
1.71k
      sub->next = xref->subsec;
1209
1.71k
      xref->subsec = sub;
1210
1.71k
    }
1211
3.43k
    fz_catch(ctx)
1212
0
    {
1213
0
      fz_free(ctx, sub);
1214
0
      fz_rethrow(ctx);
1215
0
    }
1216
1.71k
    if (xref->num_objects < num_objects)
1217
1.70k
      xref->num_objects = num_objects;
1218
1.71k
    if (doc->max_xref_len < num_objects)
1219
1.23k
      extend_xref_index(ctx, doc, num_objects);
1220
1.71k
  }
1221
2.39k
  return &sub->table[start-sub->start];
1222
2.39k
}
1223
1224
static inline void
1225
validate_object_number_range(fz_context *ctx, int first, int len, const char *what)
1226
2.43k
{
1227
2.43k
  if (first < 0 || first > PDF_MAX_OBJECT_NUMBER)
1228
1
    fz_throw(ctx, FZ_ERROR_FORMAT, "first object number in %s out of range", what);
1229
2.43k
  if (len < 0 || len > PDF_MAX_OBJECT_NUMBER)
1230
1
    fz_throw(ctx, FZ_ERROR_FORMAT, "number of objects in %s out of range", what);
1231
2.43k
  if (len > 0 && len - 1 > PDF_MAX_OBJECT_NUMBER - first)
1232
0
    fz_throw(ctx, FZ_ERROR_FORMAT, "last object number in %s out of range", what);
1233
2.43k
}
1234
1235
static pdf_obj *
1236
pdf_read_old_xref(fz_context *ctx, pdf_document *doc)
1237
610
{
1238
610
  int start, len, c, i, xref_len, carried;
1239
610
  fz_stream *file = doc->file;
1240
610
  pdf_xref_entry *table;
1241
610
  pdf_token tok;
1242
610
  size_t n;
1243
610
  char *s, *e;
1244
610
  pdf_lexbuf *buf = &doc->lexbuf.base;
1245
1246
610
  xref_len = pdf_xref_size_from_old_trailer(ctx, doc);
1247
1248
610
  fz_skip_space(ctx, doc->file);
1249
610
  if (fz_skip_string(ctx, doc->file, "xref"))
1250
0
    fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find xref marker");
1251
610
  fz_skip_space(ctx, doc->file);
1252
1253
2.15k
  while (1)
1254
2.12k
  {
1255
2.12k
    c = fz_peek_byte(ctx, file);
1256
2.12k
    if (!isdigit(c))
1257
544
      break;
1258
1259
1.57k
    fz_read_line(ctx, file, buf->scratch, buf->size);
1260
1.57k
    s = buf->scratch;
1261
1.57k
    start = fz_atoi(fz_strsep(&s, " "));
1262
1.57k
    len = fz_atoi(fz_strsep(&s, " "));
1263
1264
    /* broken pdfs where the section is not on a separate line */
1265
1.57k
    if (s && *s != '\0')
1266
9
    {
1267
9
      fz_warn(ctx, "broken xref subsection. proceeding anyway.");
1268
9
      fz_seek(ctx, file, -(2 + (int)strlen(s)), SEEK_CUR);
1269
9
    }
1270
1271
1.57k
    validate_object_number_range(ctx, start, len, "xref subsection");
1272
1273
    /* broken pdfs where size in trailer undershoots entries in xref sections */
1274
1.57k
    if (start + len > xref_len)
1275
797
    {
1276
797
      fz_warn(ctx, "broken xref subsection, proceeding anyway.");
1277
797
    }
1278
1279
1.57k
    table = pdf_xref_find_subsection(ctx, doc, start, len);
1280
1281
    /* Xref entries SHOULD be 20 bytes long, but we see 19 byte
1282
     * ones more frequently than we'd like (e.g. PCLm drivers).
1283
     * Cope with this by 'carrying' data forward. */
1284
1.57k
    carried = 0;
1285
26.1k
    for (i = 0; i < len; i++)
1286
24.6k
    {
1287
24.6k
      pdf_xref_entry *entry = &table[i];
1288
24.6k
      n = fz_read(ctx, file, (unsigned char *) buf->scratch + carried, 20-carried);
1289
24.6k
      if (n != (size_t)(20-carried))
1290
0
        fz_throw(ctx, FZ_ERROR_FORMAT, "unexpected EOF in xref table");
1291
24.6k
      n += carried;
1292
24.6k
      buf->scratch[n] = '\0';
1293
24.6k
      if (!entry->type)
1294
24.5k
      {
1295
24.5k
        s = buf->scratch;
1296
24.5k
        e = s + n;
1297
1298
24.5k
        entry->num = start + i;
1299
1300
        /* broken pdfs where line start with white space */
1301
24.7k
        while (s < e && iswhite(*s))
1302
200
          s++;
1303
1304
24.5k
        if (s == e || !isdigit(*s))
1305
6
          fz_throw(ctx, FZ_ERROR_FORMAT, "xref offset missing");
1306
270k
        while (s < e && isdigit(*s))
1307
245k
          entry->ofs = entry->ofs * 10 + *s++ - '0';
1308
1309
49.2k
        while (s < e && iswhite(*s))
1310
24.6k
          s++;
1311
24.5k
        if (s == e || !isdigit(*s))
1312
18
          fz_throw(ctx, FZ_ERROR_FORMAT, "xref generation number missing");
1313
147k
        while (s < e && isdigit(*s))
1314
122k
          entry->gen = entry->gen * 10 + *s++ - '0';
1315
1316
49.1k
        while (s < e && iswhite(*s))
1317
24.5k
          s++;
1318
24.5k
        if (s == e || (*s != 'f' && *s != 'n' && *s != 'o'))
1319
11
          fz_throw(ctx, FZ_ERROR_FORMAT, "unexpected xref type: 0x%x (%d %d R)", s == e ? 0 : *s, entry->num, entry->gen);
1320
24.5k
        entry->type = *s++;
1321
1322
        /* If the last byte of our buffer isn't an EOL (or space), carry one byte forward */
1323
24.5k
        carried = buf->scratch[19] > 32;
1324
24.5k
        if (carried)
1325
42
          buf->scratch[0] = buf->scratch[19];
1326
24.5k
      }
1327
24.6k
    }
1328
1.54k
    if (carried)
1329
10
      fz_unread_byte(ctx, file);
1330
1.54k
  }
1331
1332
575
  tok = pdf_lex(ctx, file, buf);
1333
575
  if (tok != PDF_TOK_TRAILER)
1334
2
    fz_throw(ctx, FZ_ERROR_FORMAT, "expected trailer marker");
1335
1336
573
  tok = pdf_lex(ctx, file, buf);
1337
573
  if (tok != PDF_TOK_OPEN_DICT)
1338
0
    fz_throw(ctx, FZ_ERROR_FORMAT, "expected trailer dictionary");
1339
1340
573
  doc->last_xref_was_old_style = 1;
1341
1342
573
  return pdf_parse_dict(ctx, doc, file, buf);
1343
573
}
1344
1345
static void
1346
pdf_read_new_xref_section(fz_context *ctx, pdf_document *doc, fz_stream *stm, int i0, int i1, int w0, int w1, int w2)
1347
858
{
1348
858
  pdf_xref_entry *table;
1349
858
  int i, n;
1350
1351
858
  validate_object_number_range(ctx, i0, i1, "xref subsection");
1352
1353
858
  table = pdf_xref_find_subsection(ctx, doc, i0, i1);
1354
27.0k
  for (i = i0; i < i0 + i1; i++)
1355
26.1k
  {
1356
26.1k
    pdf_xref_entry *entry = &table[i-i0];
1357
26.1k
    int a = 0;
1358
26.1k
    int64_t b = 0;
1359
26.1k
    int c = 0;
1360
1361
26.1k
    if (fz_is_eof(ctx, stm))
1362
19
      fz_throw(ctx, FZ_ERROR_FORMAT, "truncated xref stream");
1363
1364
52.3k
    for (n = 0; n < w0; n++)
1365
26.1k
      a = (a << 8) + fz_read_byte(ctx, stm);
1366
646M
    for (n = 0; n < w1; n++)
1367
646M
      b = (b << 8) + fz_read_byte(ctx, stm);
1368
2.92M
    for (n = 0; n < w2; n++)
1369
2.89M
      c = (c << 8) + fz_read_byte(ctx, stm);
1370
1371
26.1k
    if (!entry->type)
1372
26.1k
    {
1373
26.1k
      int t = w0 ? a : 1;
1374
26.1k
      entry->type = t == 0 ? 'f' : t == 1 ? 'n' : t == 2 ? 'o' : 0;
1375
26.1k
      entry->ofs = w1 ? b : 0;
1376
26.1k
      entry->gen = w2 ? c : 0;
1377
26.1k
      entry->num = i;
1378
26.1k
    }
1379
26.1k
  }
1380
1381
839
  doc->last_xref_was_old_style = 0;
1382
839
}
1383
1384
/* Entered with file locked, remains locked throughout. */
1385
static pdf_obj *
1386
pdf_read_new_xref(fz_context *ctx, pdf_document *doc)
1387
629
{
1388
629
  fz_stream *stm = NULL;
1389
629
  pdf_obj *trailer = NULL;
1390
629
  pdf_obj *index = NULL;
1391
629
  pdf_obj *obj = NULL;
1392
629
  int gen, num = 0;
1393
629
  int64_t ofs, stm_ofs;
1394
629
  int size, w0, w1, w2;
1395
629
  int t;
1396
1397
629
  fz_var(trailer);
1398
629
  fz_var(stm);
1399
1400
1.25k
  fz_try(ctx)
1401
1.25k
  {
1402
629
    ofs = fz_tell(ctx, doc->file);
1403
629
    trailer = pdf_parse_ind_obj(ctx, doc, doc->file, &num, &gen, &stm_ofs, NULL);
1404
629
    if (num == 0)
1405
1
      fz_throw(ctx, FZ_ERROR_FORMAT, "Trailer object number cannot be 0\n");
1406
629
  }
1407
1.25k
  fz_catch(ctx)
1408
78
  {
1409
78
    pdf_drop_obj(ctx, trailer);
1410
78
    fz_rethrow(ctx);
1411
78
  }
1412
1413
1.10k
  fz_try(ctx)
1414
1.10k
  {
1415
551
    pdf_xref_entry *entry;
1416
1417
551
    obj = pdf_dict_get(ctx, trailer, PDF_NAME(Size));
1418
551
    if (!obj)
1419
3
      fz_throw(ctx, FZ_ERROR_FORMAT, "xref stream missing Size entry (%d 0 R)", num);
1420
1421
548
    size = pdf_to_int(ctx, obj);
1422
1423
548
    obj = pdf_dict_get(ctx, trailer, PDF_NAME(W));
1424
548
    if (!obj)
1425
1
      fz_throw(ctx, FZ_ERROR_FORMAT, "xref stream missing W entry (%d  R)", num);
1426
1427
547
    if (pdf_is_indirect(ctx, pdf_array_get(ctx, obj, 0)))
1428
0
      fz_throw(ctx, FZ_ERROR_FORMAT, "xref stream object type field width an indirect object");
1429
547
    if (pdf_is_indirect(ctx, pdf_array_get(ctx, obj, 1)))
1430
0
      fz_throw(ctx, FZ_ERROR_FORMAT, "xref stream object field 2 width an indirect object");
1431
547
    if (pdf_is_indirect(ctx, pdf_array_get(ctx, obj, 2)))
1432
0
      fz_throw(ctx, FZ_ERROR_FORMAT, "xref stream object field 3 width an indirect object");
1433
1434
547
    if (doc->file_reading_linearly && pdf_dict_get(ctx, trailer, PDF_NAME(Encrypt)))
1435
0
      fz_throw(ctx, FZ_ERROR_ARGUMENT, "Cannot read linearly with encryption");
1436
1437
547
    w0 = pdf_array_get_int(ctx, obj, 0);
1438
547
    w1 = pdf_array_get_int(ctx, obj, 1);
1439
547
    w2 = pdf_array_get_int(ctx, obj, 2);
1440
1441
547
    if (w0 < 0)
1442
1
      fz_warn(ctx, "xref stream objects have corrupt type");
1443
547
    if (w1 < 0)
1444
1
      fz_warn(ctx, "xref stream objects have corrupt offset");
1445
547
    if (w2 < 0)
1446
0
      fz_warn(ctx, "xref stream objects have corrupt generation");
1447
1448
547
    w0 = w0 < 0 ? 0 : w0;
1449
547
    w1 = w1 < 0 ? 0 : w1;
1450
547
    w2 = w2 < 0 ? 0 : w2;
1451
1452
547
    index = pdf_dict_get(ctx, trailer, PDF_NAME(Index));
1453
1454
547
    stm = pdf_open_stream_with_offset(ctx, doc, num, trailer, stm_ofs);
1455
1456
547
    if (!index)
1457
218
    {
1458
218
      pdf_read_new_xref_section(ctx, doc, stm, 0, size, w0, w1, w2);
1459
218
    }
1460
329
    else
1461
329
    {
1462
329
      int n = pdf_array_len(ctx, index);
1463
969
      for (t = 0; t < n; t += 2)
1464
640
      {
1465
640
        int i0 = pdf_array_get_int(ctx, index, t + 0);
1466
640
        int i1 = pdf_array_get_int(ctx, index, t + 1);
1467
640
        pdf_read_new_xref_section(ctx, doc, stm, i0, i1, w0, w1, w2);
1468
640
      }
1469
329
    }
1470
547
    entry = pdf_get_populating_xref_entry(ctx, doc, num);
1471
547
    entry->ofs = ofs;
1472
547
    entry->gen = gen;
1473
547
    entry->num = num;
1474
547
    entry->stm_ofs = stm_ofs;
1475
547
    pdf_drop_obj(ctx, entry->obj);
1476
547
    entry->obj = pdf_keep_obj(ctx, trailer);
1477
547
    entry->type = 'n';
1478
547
    pdf_set_obj_parent(ctx, trailer, num);
1479
547
  }
1480
1.10k
  fz_always(ctx)
1481
551
  {
1482
551
    fz_drop_stream(ctx, stm);
1483
551
  }
1484
551
  fz_catch(ctx)
1485
26
  {
1486
26
    pdf_drop_obj(ctx, trailer);
1487
26
    fz_rethrow(ctx);
1488
26
  }
1489
1490
520
  return trailer;
1491
546
}
1492
1493
static pdf_obj *
1494
pdf_read_xref(fz_context *ctx, pdf_document *doc, int64_t ofs)
1495
1.61k
{
1496
1.61k
  pdf_obj *trailer;
1497
1.61k
  int c;
1498
1499
1.61k
  fz_seek(ctx, doc->file, ofs, SEEK_SET);
1500
1501
19.8k
  while (iswhite(fz_peek_byte(ctx, doc->file)))
1502
18.2k
    fz_read_byte(ctx, doc->file);
1503
1504
1.61k
  c = fz_peek_byte(ctx, doc->file);
1505
1.61k
  if (c == 'x')
1506
610
    trailer = pdf_read_old_xref(ctx, doc);
1507
1.00k
  else if (isdigit(c))
1508
629
    trailer = pdf_read_new_xref(ctx, doc);
1509
372
  else
1510
372
    fz_throw(ctx, FZ_ERROR_FORMAT, "cannot recognize xref format");
1511
1512
1.23k
  return trailer;
1513
1.61k
}
1514
1515
static int64_t
1516
read_xref_section(fz_context *ctx, pdf_document *doc, int64_t ofs)
1517
1.59k
{
1518
1.59k
  pdf_obj *trailer = NULL;
1519
1.59k
  pdf_obj *prevobj;
1520
1.59k
  int64_t xrefstmofs = 0;
1521
1.59k
  int64_t prevofs = 0;
1522
1523
1.59k
  trailer = pdf_read_xref(ctx, doc, ofs);
1524
2.09k
  fz_try(ctx)
1525
2.09k
  {
1526
1.04k
    pdf_set_populating_xref_trailer(ctx, doc, trailer);
1527
1528
    /* FIXME: do we overwrite free entries properly? */
1529
    /* FIXME: Does this work properly with progression? */
1530
1.04k
    xrefstmofs = pdf_to_int64(ctx, pdf_dict_get(ctx, trailer, PDF_NAME(XRefStm)));
1531
1.04k
    if (xrefstmofs)
1532
21
    {
1533
21
      if (xrefstmofs < 0)
1534
0
        fz_throw(ctx, FZ_ERROR_FORMAT, "negative xref stream offset");
1535
1536
      /*
1537
        Read the XRefStm stream, but throw away the resulting trailer. We do not
1538
        follow any Prev tag therein, as specified on Page 108 of the PDF reference
1539
        1.7
1540
      */
1541
21
      pdf_drop_obj(ctx, pdf_read_xref(ctx, doc, xrefstmofs));
1542
21
    }
1543
1544
1.04k
    prevobj = pdf_dict_get(ctx, trailer, PDF_NAME(Prev));
1545
1.04k
    if (pdf_is_int(ctx, prevobj))
1546
348
    {
1547
348
      prevofs = pdf_to_int64(ctx, prevobj);
1548
348
      if (prevofs <= 0)
1549
1
        fz_throw(ctx, FZ_ERROR_FORMAT, "invalid offset for previous xref section");
1550
348
    }
1551
1.04k
  }
1552
2.09k
  fz_always(ctx)
1553
1.04k
    pdf_drop_obj(ctx, trailer);
1554
1.04k
  fz_catch(ctx)
1555
3
    fz_rethrow(ctx);
1556
1557
1.58k
  return prevofs;
1558
1.58k
}
1559
1560
static void
1561
pdf_read_xref_sections(fz_context *ctx, pdf_document *doc, int64_t ofs, int read_previous)
1562
1.24k
{
1563
1.24k
  int i, len, cap;
1564
1.24k
  int64_t *offsets;
1565
1.24k
  int populated = 0;
1566
1.24k
  int size, xref_len;
1567
1568
1.24k
  len = 0;
1569
1.24k
  cap = 10;
1570
1.24k
  offsets = fz_malloc_array(ctx, cap, int64_t);
1571
1572
1.24k
  fz_var(populated);
1573
1.24k
  fz_var(offsets);
1574
1575
2.48k
  fz_try(ctx)
1576
2.48k
  {
1577
2.83k
    while(ofs)
1578
1.59k
    {
1579
2.07k
      for (i = 0; i < len; i ++)
1580
488
      {
1581
488
        if (offsets[i] == ofs)
1582
0
          break;
1583
488
      }
1584
1.59k
      if (i < len)
1585
0
      {
1586
0
        fz_warn(ctx, "ignoring xref section recursion at offset %d", (int)ofs);
1587
0
        break;
1588
0
      }
1589
1.59k
      if (len == cap)
1590
0
      {
1591
0
        cap *= 2;
1592
0
        offsets = fz_realloc_array(ctx, offsets, cap, int64_t);
1593
0
      }
1594
1.59k
      offsets[len++] = ofs;
1595
1596
1.59k
      pdf_populate_next_xref_level(ctx, doc);
1597
1.59k
      populated = 1;
1598
1.59k
      ofs = read_xref_section(ctx, doc, ofs);
1599
1.59k
      if (!read_previous)
1600
0
        break;
1601
1.59k
    }
1602
1603
    /* For pathological files, such as chinese-example.pdf, where the original
1604
     * xref in the file is highly fragmented, we can safely solidify it here
1605
     * with no ill effects. */
1606
1.24k
    ensure_solid_xref(ctx, doc, 0, doc->num_xref_sections-1);
1607
1608
1.24k
    size = pdf_dict_get_int(ctx, pdf_trailer(ctx, doc), PDF_NAME(Size));
1609
1.24k
    xref_len = pdf_xref_len(ctx, doc);
1610
1.24k
    if (xref_len > size)
1611
67
      fz_throw(ctx, FZ_ERROR_FORMAT, "incorrect number of xref entries in trailer, repairing");
1612
1.24k
  }
1613
2.48k
  fz_always(ctx)
1614
1.24k
  {
1615
1.24k
    fz_free(ctx, offsets);
1616
1.24k
  }
1617
1.24k
  fz_catch(ctx)
1618
612
  {
1619
    /* Undo pdf_populate_next_xref_level if we've done that already. */
1620
612
    if (populated)
1621
612
    {
1622
612
      pdf_drop_xref_subsec(ctx, &doc->xref_sections[doc->num_xref_sections - 1]);
1623
612
      doc->num_xref_sections--;
1624
612
    }
1625
612
    fz_rethrow(ctx);
1626
612
  }
1627
1.17k
}
1628
1629
static void
1630
pdf_prime_xref_index(fz_context *ctx, pdf_document *doc)
1631
6.46k
{
1632
6.46k
  int i, j;
1633
6.46k
  int *idx = doc->xref_index;
1634
1635
13.1k
  for (i = doc->num_xref_sections-1; i >= 0; i--)
1636
6.71k
  {
1637
6.71k
    pdf_xref *xref = &doc->xref_sections[i];
1638
6.71k
    pdf_xref_subsec *subsec = xref->subsec;
1639
13.7k
    while (subsec != NULL)
1640
6.98k
    {
1641
6.98k
      int start = subsec->start;
1642
6.98k
      int end = subsec->start + subsec->len;
1643
18.0M
      for (j = start; j < end; j++)
1644
18.0M
      {
1645
18.0M
        char t = subsec->table[j-start].type;
1646
18.0M
        if (t != 0 && t != 'f')
1647
132k
          idx[j] = i;
1648
18.0M
      }
1649
1650
6.98k
      subsec = subsec->next;
1651
6.98k
    }
1652
6.71k
  }
1653
6.46k
}
1654
1655
static void
1656
check_xref_entry_offsets(fz_context *ctx, pdf_xref_entry *entry, int i, pdf_document *doc, void *arg)
1657
40.2k
{
1658
40.2k
  int xref_len = (int)(intptr_t)arg;
1659
1660
40.2k
  if (entry->type == 'n')
1661
22.6k
  {
1662
    /* Special case code: "0000000000 * n" means free,
1663
     * according to some producers (inc Quartz) */
1664
22.6k
    if (entry->ofs == 0)
1665
31
      entry->type = 'f';
1666
22.6k
    else if (entry->ofs <= 0 || entry->ofs >= doc->file_size)
1667
6
      fz_throw(ctx, FZ_ERROR_FORMAT, "object offset out of range: %d (%d 0 R)", (int)entry->ofs, i);
1668
22.6k
  }
1669
17.5k
  else if (entry->type == 'o')
1670
11.0k
  {
1671
    /* Read this into a local variable here, because pdf_get_xref_entry
1672
     * may solidify the xref, hence invalidating "entry", meaning we
1673
     * need a stashed value for the throw. */
1674
11.0k
    int64_t ofs = entry->ofs;
1675
11.0k
    if (ofs <= 0 || ofs >= xref_len || pdf_get_xref_entry_no_null(ctx, doc, ofs)->type != 'n')
1676
6
      fz_throw(ctx, FZ_ERROR_FORMAT, "invalid reference to an objstm that does not exist: %d (%d 0 R)", (int)ofs, i);
1677
11.0k
  }
1678
40.2k
}
1679
1680
/*
1681
 * load xref tables from pdf
1682
 *
1683
 * File locked on entry, throughout and on exit.
1684
 */
1685
1686
static void
1687
pdf_load_xref(fz_context *ctx, pdf_document *doc)
1688
1.91k
{
1689
1.91k
  int xref_len;
1690
1.91k
  pdf_xref_entry *entry;
1691
1692
1.91k
  pdf_read_start_xref(ctx, doc);
1693
1694
1.91k
  pdf_read_xref_sections(ctx, doc, doc->startxref, 1);
1695
1696
1.91k
  if (pdf_xref_len(ctx, doc) == 0)
1697
0
    fz_throw(ctx, FZ_ERROR_FORMAT, "found xref was empty");
1698
1699
1.91k
  pdf_prime_xref_index(ctx, doc);
1700
1701
1.91k
  entry = pdf_get_xref_entry_no_null(ctx, doc, 0);
1702
  /* broken pdfs where first object is missing */
1703
1.91k
  if (!entry->type)
1704
6
  {
1705
6
    entry->type = 'f';
1706
6
    entry->gen = 65535;
1707
6
    entry->num = 0;
1708
6
  }
1709
  /* broken pdfs where first object is not free */
1710
1.91k
  else if (entry->type != 'f')
1711
4
    fz_warn(ctx, "first object in xref is not free");
1712
1713
  /* broken pdfs where object offsets are out of range */
1714
1.91k
  xref_len = pdf_xref_len(ctx, doc);
1715
1.91k
  pdf_xref_entry_map(ctx, doc, check_xref_entry_offsets, (void *)(intptr_t)xref_len);
1716
1.91k
}
1717
1718
static void
1719
pdf_check_linear(fz_context *ctx, pdf_document *doc)
1720
1.91k
{
1721
1.91k
  pdf_obj *dict = NULL;
1722
1.91k
  pdf_obj *o;
1723
1.91k
  int num, gen;
1724
1.91k
  int64_t stmofs;
1725
1726
1.91k
  fz_var(dict);
1727
1728
3.83k
  fz_try(ctx)
1729
3.83k
  {
1730
1.91k
    dict = pdf_parse_ind_obj(ctx, doc, doc->file, &num, &gen, &stmofs, NULL);
1731
1.91k
    if (!pdf_is_dict(ctx, dict))
1732
13
      break;
1733
1.90k
    o = pdf_dict_get(ctx, dict, PDF_NAME(Linearized));
1734
1.90k
    if (o == NULL)
1735
1.27k
      break;
1736
627
    if (pdf_to_int(ctx, o) != 1)
1737
1
      break;
1738
626
    doc->has_linearization_object = 1;
1739
626
  }
1740
3.83k
  fz_always(ctx)
1741
1.91k
    pdf_drop_obj(ctx, dict);
1742
1.91k
  fz_catch(ctx)
1743
318
  {
1744
    /* Silently swallow this error. */
1745
318
    fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
1746
318
    fz_report_error(ctx);
1747
318
  }
1748
1.91k
}
1749
1750
static void
1751
pdf_load_linear(fz_context *ctx, pdf_document *doc)
1752
0
{
1753
0
  pdf_obj *dict = NULL;
1754
0
  pdf_obj *hint = NULL;
1755
0
  pdf_obj *o;
1756
0
  int num, gen, lin, len;
1757
0
  int64_t stmofs;
1758
1759
0
  fz_var(dict);
1760
0
  fz_var(hint);
1761
1762
0
  fz_try(ctx)
1763
0
  {
1764
0
    pdf_xref_entry *entry;
1765
1766
0
    dict = pdf_parse_ind_obj(ctx, doc, doc->file, &num, &gen, &stmofs, NULL);
1767
0
    if (!pdf_is_dict(ctx, dict))
1768
0
      fz_throw(ctx, FZ_ERROR_FORMAT, "Failed to read linearized dictionary");
1769
0
    o = pdf_dict_get(ctx, dict, PDF_NAME(Linearized));
1770
0
    if (o == NULL)
1771
0
      fz_throw(ctx, FZ_ERROR_FORMAT, "Failed to read linearized dictionary");
1772
0
    lin = pdf_to_int(ctx, o);
1773
0
    if (lin != 1)
1774
0
      fz_throw(ctx, FZ_ERROR_FORMAT, "Unexpected version of Linearized tag (%d)", lin);
1775
0
    doc->has_linearization_object = 1;
1776
0
    len = pdf_dict_get_int(ctx, dict, PDF_NAME(L));
1777
0
    if (len != doc->file_length)
1778
0
      fz_throw(ctx, FZ_ERROR_ARGUMENT, "File has been updated since linearization");
1779
1780
0
    pdf_read_xref_sections(ctx, doc, fz_tell(ctx, doc->file), 0);
1781
1782
0
    doc->linear_page_count = pdf_dict_get_int(ctx, dict, PDF_NAME(N));
1783
0
    doc->linear_page_refs = fz_realloc_array(ctx, doc->linear_page_refs, doc->linear_page_count, pdf_obj *);
1784
0
    memset(doc->linear_page_refs, 0, doc->linear_page_count * sizeof(pdf_obj*));
1785
0
    doc->linear_obj = dict;
1786
0
    doc->linear_pos = fz_tell(ctx, doc->file);
1787
0
    doc->linear_page1_obj_num = pdf_dict_get_int(ctx, dict, PDF_NAME(O));
1788
0
    doc->linear_page_refs[0] = pdf_new_indirect(ctx, doc, doc->linear_page1_obj_num, 0);
1789
0
    doc->linear_page_num = 0;
1790
0
    hint = pdf_dict_get(ctx, dict, PDF_NAME(H));
1791
0
    doc->hint_object_offset = pdf_array_get_int(ctx, hint, 0);
1792
0
    doc->hint_object_length = pdf_array_get_int(ctx, hint, 1);
1793
1794
0
    entry = pdf_get_populating_xref_entry(ctx, doc, 0);
1795
0
    entry->type = 'f';
1796
0
  }
1797
0
  fz_catch(ctx)
1798
0
  {
1799
0
    pdf_drop_obj(ctx, dict);
1800
0
    fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
1801
0
    fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
1802
0
    fz_report_error(ctx);
1803
    /* Drop back to non linearized reading mode */
1804
0
    doc->file_reading_linearly = 0;
1805
0
  }
1806
0
}
1807
1808
/*
1809
 * Initialize and load xref tables.
1810
 * If password is not null, try to decrypt.
1811
 */
1812
1813
static void
1814
pdf_init_document(fz_context *ctx, pdf_document *doc)
1815
6.63k
{
1816
6.63k
  pdf_obj *encrypt, *id;
1817
6.63k
  int repaired = 0;
1818
1819
13.2k
  fz_try(ctx)
1820
13.2k
  {
1821
    /* Check to see if we should work in progressive mode */
1822
6.63k
    if (doc->file->progressive)
1823
0
    {
1824
0
      doc->file_reading_linearly = 1;
1825
0
      fz_seek(ctx, doc->file, 0, SEEK_END);
1826
0
      doc->file_length = fz_tell(ctx, doc->file);
1827
0
      if (doc->file_length < 0)
1828
0
        doc->file_length = 0;
1829
0
      fz_seek(ctx, doc->file, 0, SEEK_SET);
1830
0
    }
1831
1832
6.63k
    pdf_load_version(ctx, doc);
1833
1834
6.63k
    if (doc->is_fdf)
1835
3
    {
1836
3
      doc->file_reading_linearly = 0;
1837
3
      repaired = 1;
1838
3
      break; /* skip to end of try/catch */
1839
3
    }
1840
1841
    /* Try to load the linearized file if we are in progressive
1842
     * mode. */
1843
6.63k
    if (doc->file_reading_linearly)
1844
0
      pdf_load_linear(ctx, doc);
1845
6.63k
    else
1846
      /* Even if we're not in progressive mode, check to see
1847
       * if the file claims to be linearized. This is important
1848
       * for checking signatures later on. */
1849
6.63k
      pdf_check_linear(ctx, doc);
1850
1851
    /* If we aren't in progressive mode (or the linear load failed
1852
     * and has set us back to non-progressive mode), load normally.
1853
     */
1854
6.63k
    if (!doc->file_reading_linearly)
1855
1.91k
      pdf_load_xref(ctx, doc);
1856
6.63k
  }
1857
13.2k
  fz_catch(ctx)
1858
6.01k
  {
1859
6.01k
    pdf_drop_xref_sections(ctx, doc);
1860
6.01k
    fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
1861
6.01k
    doc->file_reading_linearly = 0;
1862
6.01k
    fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
1863
6.01k
    fz_report_error(ctx);
1864
6.01k
    fz_warn(ctx, "trying to repair broken xref");
1865
6.01k
    repaired = 1;
1866
6.01k
  }
1867
1868
13.2k
  fz_try(ctx)
1869
13.2k
  {
1870
6.63k
    if (repaired)
1871
6.01k
    {
1872
      /* pdf_repair_xref may access xref_index, so reset it properly */
1873
6.01k
      if (doc->xref_index)
1874
201
        memset(doc->xref_index, 0, sizeof(int) * doc->max_xref_len);
1875
6.01k
      pdf_repair_xref(ctx, doc);
1876
6.01k
      pdf_prime_xref_index(ctx, doc);
1877
6.01k
    }
1878
1879
6.63k
    encrypt = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt));
1880
6.63k
    id = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(ID));
1881
6.63k
    if (pdf_is_dict(ctx, encrypt))
1882
115
      doc->crypt = pdf_new_crypt(ctx, encrypt, id);
1883
1884
    /* Allow lazy clients to read encrypted files with a blank password */
1885
6.63k
    (void)pdf_authenticate_password(ctx, doc, "");
1886
1887
6.63k
    if (repaired)
1888
5.77k
    {
1889
5.77k
      pdf_repair_trailer(ctx, doc);
1890
5.77k
    }
1891
6.63k
  }
1892
13.2k
  fz_catch(ctx)
1893
246
  {
1894
246
    fz_rethrow(ctx);
1895
246
  }
1896
6.63k
}
1897
1898
void pdf_repair_trailer(fz_context *ctx, pdf_document *doc)
1899
5.82k
{
1900
5.82k
  int hasroot, hasinfo;
1901
5.82k
  pdf_obj *obj, *nobj;
1902
5.82k
  pdf_obj *dict = NULL;
1903
5.82k
  int i;
1904
1905
5.82k
  int xref_len = pdf_xref_len(ctx, doc);
1906
5.82k
  pdf_repair_obj_stms(ctx, doc);
1907
1908
5.82k
  hasroot = (pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root)) != NULL);
1909
5.82k
  hasinfo = (pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info)) != NULL);
1910
1911
5.82k
  fz_var(dict);
1912
1913
11.6k
  fz_try(ctx)
1914
11.6k
  {
1915
    /* Scan from the end so we have a better chance of finding
1916
     * newer objects if there are multiple instances of Info and
1917
     * Root objects.
1918
     */
1919
17.8M
    for (i = xref_len - 1; i > 0 && (!hasinfo || !hasroot); --i)
1920
17.8M
    {
1921
17.8M
      pdf_xref_entry *entry = pdf_get_xref_entry_no_null(ctx, doc, i);
1922
17.8M
      if (entry->type == 0 || entry->type == 'f')
1923
17.7M
        continue;
1924
1925
120k
      fz_try(ctx)
1926
120k
      {
1927
60.0k
        dict = pdf_load_object(ctx, doc, i);
1928
60.0k
      }
1929
120k
      fz_catch(ctx)
1930
5.21k
      {
1931
5.21k
        fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
1932
5.21k
        fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
1933
5.21k
        fz_report_error(ctx);
1934
5.21k
        fz_warn(ctx, "ignoring broken object (%d 0 R)", i);
1935
5.21k
        continue;
1936
5.21k
      }
1937
1938
54.8k
      if (!hasroot)
1939
30.5k
      {
1940
30.5k
        obj = pdf_dict_get(ctx, dict, PDF_NAME(Type));
1941
30.5k
        if (obj == PDF_NAME(Catalog))
1942
4.28k
        {
1943
4.28k
          nobj = pdf_new_indirect(ctx, doc, i, 0);
1944
4.28k
          pdf_dict_put_drop(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root), nobj);
1945
4.28k
          hasroot = 1;
1946
4.28k
        }
1947
30.5k
      }
1948
1949
54.8k
      if (!hasinfo)
1950
53.2k
      {
1951
53.2k
        if (pdf_dict_get(ctx, dict, PDF_NAME(Creator)) || pdf_dict_get(ctx, dict, PDF_NAME(Producer)))
1952
682
        {
1953
682
          nobj = pdf_new_indirect(ctx, doc, i, 0);
1954
682
          pdf_dict_put_drop(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info), nobj);
1955
682
          hasinfo = 1;
1956
682
        }
1957
53.2k
      }
1958
1959
54.8k
      pdf_drop_obj(ctx, dict);
1960
54.8k
      dict = NULL;
1961
54.8k
    }
1962
5.82k
  }
1963
11.6k
  fz_always(ctx)
1964
5.82k
  {
1965
    /* ensure that strings are not used in their repaired, non-decrypted form */
1966
5.82k
    if (doc->crypt)
1967
89
    {
1968
89
      pdf_crypt *tmp;
1969
89
      pdf_clear_xref(ctx, doc);
1970
1971
      /* ensure that Encryption dictionary and ID are cached without decryption,
1972
         otherwise a decrypted Encryption dictionary and ID may be used when saving
1973
         the PDF causing it to be inconsistent (since strings/streams are encrypted
1974
         with the actual encryption key, not the decrypted encryption key). */
1975
89
      tmp = doc->crypt;
1976
89
      doc->crypt = NULL;
1977
178
      fz_try(ctx)
1978
178
      {
1979
89
        (void) pdf_resolve_indirect(ctx, pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt)));
1980
89
        (void) pdf_resolve_indirect(ctx, pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(ID)));
1981
89
      }
1982
178
      fz_always(ctx)
1983
89
        doc->crypt = tmp;
1984
89
      fz_catch(ctx)
1985
0
      {
1986
0
        fz_rethrow(ctx);
1987
0
      }
1988
89
    }
1989
5.82k
  }
1990
5.82k
  fz_catch(ctx)
1991
0
  {
1992
0
    pdf_drop_obj(ctx, dict);
1993
0
    fz_rethrow(ctx);
1994
0
  }
1995
5.82k
}
1996
1997
void
1998
pdf_invalidate_xfa(fz_context *ctx, pdf_document *doc)
1999
6.63k
{
2000
6.63k
  if (doc == NULL)
2001
0
    return;
2002
6.63k
  fz_drop_xml(ctx, doc->xfa);
2003
6.63k
  doc->xfa = NULL;
2004
6.63k
}
2005
2006
static void
2007
pdf_drop_document_imp(fz_context *ctx, pdf_document *doc)
2008
6.63k
{
2009
6.63k
  int i;
2010
2011
6.63k
  fz_defer_reap_start(ctx);
2012
2013
  /* Type3 glyphs in the glyph cache can contain pdf_obj pointers
2014
   * that we are about to destroy. Simplest solution is to bin the
2015
   * glyph cache at this point. */
2016
13.2k
  fz_try(ctx)
2017
13.2k
    fz_purge_glyph_cache(ctx);
2018
13.2k
  fz_catch(ctx)
2019
0
  {
2020
    /* Swallow error, but continue dropping */
2021
0
    fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
2022
0
    fz_report_error(ctx);
2023
0
  }
2024
2025
6.63k
  pdf_set_doc_event_callback(ctx, doc, NULL, NULL, NULL);
2026
6.63k
  pdf_drop_js(ctx, doc->js);
2027
2028
6.63k
  pdf_drop_journal(ctx, doc->journal);
2029
2030
6.63k
  pdf_drop_resource_tables(ctx, doc);
2031
2032
6.63k
  pdf_drop_local_xref(ctx, doc->local_xref);
2033
2034
6.63k
  pdf_drop_xref_sections(ctx, doc);
2035
6.63k
  fz_free(ctx, doc->xref_index);
2036
2037
6.63k
  fz_drop_stream(ctx, doc->file);
2038
6.63k
  pdf_drop_crypt(ctx, doc->crypt);
2039
2040
6.63k
  pdf_drop_obj(ctx, doc->linear_obj);
2041
6.63k
  if (doc->linear_page_refs)
2042
0
  {
2043
0
    for (i=0; i < doc->linear_page_count; i++)
2044
0
      pdf_drop_obj(ctx, doc->linear_page_refs[i]);
2045
2046
0
    fz_free(ctx, doc->linear_page_refs);
2047
0
  }
2048
2049
6.63k
  fz_free(ctx, doc->hint_page);
2050
6.63k
  fz_free(ctx, doc->hint_shared_ref);
2051
6.63k
  fz_free(ctx, doc->hint_shared);
2052
6.63k
  fz_free(ctx, doc->hint_obj_offsets);
2053
2054
6.85k
  for (i=0; i < doc->num_type3_fonts; i++)
2055
215
  {
2056
430
    fz_try(ctx)
2057
430
      fz_decouple_type3_font(ctx, doc->type3_fonts[i], (void *)doc);
2058
430
    fz_always(ctx)
2059
215
      fz_drop_font(ctx, doc->type3_fonts[i]);
2060
215
    fz_catch(ctx)
2061
0
    {
2062
      /* Swallow error, but continue dropping */
2063
0
      fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
2064
0
      fz_report_error(ctx);
2065
0
    }
2066
215
  }
2067
2068
6.63k
  fz_free(ctx, doc->type3_fonts);
2069
2070
6.63k
  pdf_drop_ocg(ctx, doc);
2071
2072
6.63k
  pdf_empty_store(ctx, doc);
2073
2074
6.63k
  pdf_lexbuf_fin(ctx, &doc->lexbuf.base);
2075
2076
6.63k
  fz_drop_colorspace(ctx, doc->oi);
2077
2078
22.8k
  for (i = 0; i < doc->orphans_count; i++)
2079
16.2k
    pdf_drop_obj(ctx, doc->orphans[i]);
2080
2081
6.63k
  fz_free(ctx, doc->orphans);
2082
2083
6.63k
  pdf_drop_page_tree_internal(ctx, doc);
2084
2085
6.63k
  fz_defer_reap_end(ctx);
2086
2087
6.63k
  pdf_invalidate_xfa(ctx, doc);
2088
6.63k
}
2089
2090
void
2091
pdf_drop_document(fz_context *ctx, pdf_document *doc)
2092
16.7k
{
2093
16.7k
  fz_drop_document(ctx, &doc->super);
2094
16.7k
}
2095
2096
pdf_document *
2097
pdf_keep_document(fz_context *ctx, pdf_document *doc)
2098
16.7k
{
2099
16.7k
  return (pdf_document *)fz_keep_document(ctx, &doc->super);
2100
16.7k
}
2101
2102
/*
2103
 * compressed object streams
2104
 */
2105
2106
/*
2107
  Do not hold pdf_xref_entry's over call to this function as they
2108
  may be invalidated!
2109
*/
2110
static pdf_xref_entry *
2111
pdf_load_obj_stm(fz_context *ctx, pdf_document *doc, int num, pdf_lexbuf *buf, int target)
2112
11.9k
{
2113
11.9k
  fz_stream *stm = NULL;
2114
11.9k
  pdf_obj *objstm = NULL;
2115
11.9k
  int *numbuf = NULL;
2116
11.9k
  int64_t *ofsbuf = NULL;
2117
2118
11.9k
  pdf_obj *obj;
2119
11.9k
  int64_t first;
2120
11.9k
  int count;
2121
11.9k
  int i;
2122
11.9k
  pdf_token tok;
2123
11.9k
  pdf_xref_entry *ret_entry = NULL;
2124
11.9k
  int ret_idx;
2125
11.9k
  int xref_len;
2126
11.9k
  int found;
2127
11.9k
  fz_stream *sub = NULL;
2128
2129
11.9k
  fz_var(numbuf);
2130
11.9k
  fz_var(ofsbuf);
2131
11.9k
  fz_var(objstm);
2132
11.9k
  fz_var(stm);
2133
11.9k
  fz_var(sub);
2134
2135
23.9k
  fz_try(ctx)
2136
23.9k
  {
2137
11.9k
    objstm = pdf_load_object(ctx, doc, num);
2138
2139
11.9k
    if (pdf_obj_marked(ctx, objstm))
2140
0
      fz_throw(ctx, FZ_ERROR_FORMAT, "recursive object stream lookup");
2141
11.9k
  }
2142
23.9k
  fz_catch(ctx)
2143
50
  {
2144
50
    pdf_drop_obj(ctx, objstm);
2145
50
    fz_rethrow(ctx);
2146
50
  }
2147
2148
23.8k
  fz_try(ctx)
2149
23.8k
  {
2150
11.9k
    (void)pdf_mark_obj(ctx, objstm);
2151
2152
11.9k
    count = pdf_dict_get_int(ctx, objstm, PDF_NAME(N));
2153
11.9k
    first = pdf_dict_get_int(ctx, objstm, PDF_NAME(First));
2154
2155
11.9k
    if (count < 0 || count > PDF_MAX_OBJECT_NUMBER)
2156
0
      fz_throw(ctx, FZ_ERROR_FORMAT, "number of objects in object stream out of range");
2157
2158
11.9k
    numbuf = fz_calloc(ctx, count, sizeof(*numbuf));
2159
11.9k
    ofsbuf = fz_calloc(ctx, count, sizeof(*ofsbuf));
2160
2161
11.9k
    xref_len = pdf_xref_len(ctx, doc);
2162
2163
11.9k
    found = 0;
2164
2165
11.9k
    stm = pdf_open_stream_number(ctx, doc, num);
2166
549k
    for (i = 0; i < count; i++)
2167
539k
    {
2168
539k
      tok = pdf_lex(ctx, stm, buf);
2169
539k
      if (tok != PDF_TOK_INT)
2170
1.40k
        fz_throw(ctx, FZ_ERROR_FORMAT, "corrupt object stream (%d 0 R)", num);
2171
538k
      numbuf[found] = buf->i;
2172
2173
538k
      tok = pdf_lex(ctx, stm, buf);
2174
538k
      if (tok != PDF_TOK_INT)
2175
702
        fz_throw(ctx, FZ_ERROR_FORMAT, "corrupt object stream (%d 0 R)", num);
2176
537k
      ofsbuf[found] = buf->i;
2177
2178
537k
      if (numbuf[found] <= 0 || numbuf[found] >= xref_len)
2179
80.8k
        fz_warn(ctx, "object stream object out of range, skipping");
2180
456k
      else
2181
456k
        found++;
2182
537k
    }
2183
2184
9.84k
    ret_idx = -1;
2185
219k
    for (i = 0; i < found; i++)
2186
209k
    {
2187
209k
      pdf_xref_entry *entry;
2188
209k
      uint64_t length;
2189
209k
      int64_t offset;
2190
2191
209k
      offset = first + ofsbuf[i];
2192
209k
      if (i+1 < found)
2193
207k
        length = ofsbuf[i+1] - ofsbuf[i];
2194
2.35k
      else
2195
2.35k
        length = UINT64_MAX;
2196
2197
209k
      sub = fz_open_null_filter(ctx, stm, length, offset);
2198
2199
209k
      obj = pdf_parse_stm_obj(ctx, doc, sub, buf);
2200
209k
      fz_drop_stream(ctx, sub);
2201
209k
      sub = NULL;
2202
2203
209k
      entry = pdf_get_xref_entry_no_null(ctx, doc, numbuf[i]);
2204
2205
209k
      pdf_set_obj_parent(ctx, obj, numbuf[i]);
2206
2207
      /* We may have set entry->type to be 'O' from being 'o' to avoid nasty
2208
       * recursions in pdf_cache_object. Accept the type being 'O' here. */
2209
209k
      if ((entry->type == 'o' || entry->type == 'O') && entry->ofs == num)
2210
201k
      {
2211
        /* If we already have an entry for this object,
2212
         * we'd like to drop it and use the new one -
2213
         * but this means that anyone currently holding
2214
         * a pointer to the old one will be left with a
2215
         * stale pointer. Instead, we drop the new one
2216
         * and trust that the old one is correct. */
2217
201k
        if (entry->obj)
2218
168k
        {
2219
168k
          if (pdf_objcmp(ctx, entry->obj, obj))
2220
35
            fz_warn(ctx, "Encountered new definition for object %d - keeping the original one", numbuf[i]);
2221
168k
          pdf_drop_obj(ctx, obj);
2222
168k
        }
2223
33.3k
        else
2224
33.3k
        {
2225
33.3k
          entry->obj = obj;
2226
33.3k
          fz_drop_buffer(ctx, entry->stm_buf);
2227
33.3k
          entry->stm_buf = NULL;
2228
33.3k
        }
2229
201k
        if (numbuf[i] == target)
2230
1.86k
          ret_idx = i;
2231
201k
      }
2232
8.15k
      else
2233
8.15k
      {
2234
8.15k
        pdf_drop_obj(ctx, obj);
2235
8.15k
      }
2236
209k
    }
2237
    /* Parsing our way through the stream can cause the xref to be
2238
     * solidified, which will move an entry. We therefore can't
2239
     * read the entry for returning until no more parsing is to be
2240
     * done. Thus we end up reading this entry twice. */
2241
9.84k
    if (ret_idx >= 0)
2242
1.83k
      ret_entry = pdf_get_xref_entry_no_null(ctx, doc, numbuf[ret_idx]);
2243
9.84k
  }
2244
23.8k
  fz_always(ctx)
2245
11.9k
  {
2246
11.9k
    fz_drop_stream(ctx, stm);
2247
11.9k
    fz_drop_stream(ctx, sub);
2248
11.9k
    fz_free(ctx, ofsbuf);
2249
11.9k
    fz_free(ctx, numbuf);
2250
11.9k
    pdf_unmark_obj(ctx, objstm);
2251
11.9k
    pdf_drop_obj(ctx, objstm);
2252
11.9k
  }
2253
11.9k
  fz_catch(ctx)
2254
9.50k
  {
2255
9.50k
    fz_rethrow(ctx);
2256
9.50k
  }
2257
339
  return ret_entry;
2258
9.84k
}
2259
2260
/*
2261
 * object loading
2262
 */
2263
static int
2264
pdf_obj_read(fz_context *ctx, pdf_document *doc, int64_t *offset, int *nump, pdf_obj **page)
2265
0
{
2266
0
  pdf_lexbuf *buf = &doc->lexbuf.base;
2267
0
  int num, gen, tok;
2268
0
  int64_t numofs, genofs, stmofs, tmpofs, newtmpofs;
2269
0
  int xref_len;
2270
0
  pdf_xref_entry *entry;
2271
2272
0
  numofs = *offset;
2273
0
  fz_seek(ctx, doc->file, numofs, SEEK_SET);
2274
2275
  /* We expect to read 'num' here */
2276
0
  tok = pdf_lex(ctx, doc->file, buf);
2277
0
  genofs = fz_tell(ctx, doc->file);
2278
0
  if (tok != PDF_TOK_INT)
2279
0
  {
2280
    /* Failed! */
2281
0
    DEBUGMESS((ctx, "skipping unexpected data (tok=%d) at %d", tok, *offset));
2282
0
    *offset = genofs;
2283
0
    return tok == PDF_TOK_EOF;
2284
0
  }
2285
0
  *nump = num = buf->i;
2286
2287
  /* We expect to read 'gen' here */
2288
0
  tok = pdf_lex(ctx, doc->file, buf);
2289
0
  tmpofs = fz_tell(ctx, doc->file);
2290
0
  if (tok != PDF_TOK_INT)
2291
0
  {
2292
    /* Failed! */
2293
0
    DEBUGMESS((ctx, "skipping unexpected data after \"%d\" (tok=%d) at %d", num, tok, *offset));
2294
0
    *offset = tmpofs;
2295
0
    return tok == PDF_TOK_EOF;
2296
0
  }
2297
0
  gen = buf->i;
2298
2299
  /* We expect to read 'obj' here */
2300
0
  do
2301
0
  {
2302
0
    tmpofs = fz_tell(ctx, doc->file);
2303
0
    tok = pdf_lex(ctx, doc->file, buf);
2304
0
    if (tok == PDF_TOK_OBJ)
2305
0
      break;
2306
0
    if (tok != PDF_TOK_INT)
2307
0
    {
2308
0
      DEBUGMESS((ctx, "skipping unexpected data (tok=%d) at %d", tok, tmpofs));
2309
0
      *offset = fz_tell(ctx, doc->file);
2310
0
      return tok == PDF_TOK_EOF;
2311
0
    }
2312
0
    DEBUGMESS((ctx, "skipping unexpected int %d at %d", num, numofs));
2313
0
    *nump = num = gen;
2314
0
    numofs = genofs;
2315
0
    gen = buf->i;
2316
0
    genofs = tmpofs;
2317
0
  }
2318
0
  while (1);
2319
2320
  /* Now we read the actual object */
2321
0
  xref_len = pdf_xref_len(ctx, doc);
2322
2323
  /* When we are reading a progressive file, we typically see:
2324
   *    File Header
2325
   *    obj m (Linearization params)
2326
   *    xref #1 (refers to objects m-n)
2327
   *    obj m+1
2328
   *    ...
2329
   *    obj n
2330
   *    obj 1
2331
   *    ...
2332
   *    obj n-1
2333
   *    xref #2
2334
   *
2335
   * The linearisation params are read elsewhere, hence
2336
   * whenever we read an object it should just go into the
2337
   * previous xref.
2338
   */
2339
0
  tok = pdf_repair_obj(ctx, doc, buf, &stmofs, NULL, NULL, NULL, page, &newtmpofs, NULL);
2340
2341
0
  do /* So we can break out of it */
2342
0
  {
2343
0
    if (num <= 0 || num >= xref_len)
2344
0
    {
2345
0
      fz_warn(ctx, "Not a valid object number (%d %d obj)", num, gen);
2346
0
      break;
2347
0
    }
2348
0
    if (gen != 0)
2349
0
    {
2350
0
      fz_warn(ctx, "Unexpected non zero generation number in linearized file");
2351
0
    }
2352
0
    entry = pdf_get_populating_xref_entry(ctx, doc, num);
2353
0
    if (entry->type != 0)
2354
0
    {
2355
0
      DEBUGMESS((ctx, "Duplicate object found (%d %d obj)", num, gen));
2356
0
      break;
2357
0
    }
2358
0
    if (page && *page)
2359
0
    {
2360
0
      DEBUGMESS((ctx, "Successfully read object %d @ %d - and found page %d!", num, numofs, doc->linear_page_num));
2361
0
      if (!entry->obj)
2362
0
        entry->obj = pdf_keep_obj(ctx, *page);
2363
2364
0
      if (doc->linear_page_refs[doc->linear_page_num] == NULL)
2365
0
        doc->linear_page_refs[doc->linear_page_num] = pdf_new_indirect(ctx, doc, num, gen);
2366
0
    }
2367
0
    else
2368
0
    {
2369
0
      DEBUGMESS((ctx, "Successfully read object %d @ %d", num, numofs));
2370
0
    }
2371
0
    entry->type = 'n';
2372
0
    entry->gen = gen; // XXX: was 0
2373
0
    entry->num = num;
2374
0
    entry->ofs = numofs;
2375
0
    entry->stm_ofs = stmofs;
2376
0
  }
2377
0
  while (0);
2378
0
  if (page && *page)
2379
0
    doc->linear_page_num++;
2380
2381
0
  if (tok == PDF_TOK_ENDOBJ)
2382
0
  {
2383
0
    *offset = fz_tell(ctx, doc->file);
2384
0
  }
2385
0
  else
2386
0
  {
2387
0
    *offset = newtmpofs;
2388
0
  }
2389
0
  return 0;
2390
0
}
2391
2392
static void
2393
pdf_load_hinted_page(fz_context *ctx, pdf_document *doc, int pagenum)
2394
0
{
2395
0
  pdf_obj *page = NULL;
2396
2397
0
  if (!doc->hints_loaded || !doc->linear_page_refs)
2398
0
    return;
2399
2400
0
  if (doc->linear_page_refs[pagenum])
2401
0
    return;
2402
2403
0
  fz_var(page);
2404
2405
0
  fz_try(ctx)
2406
0
  {
2407
0
    int num = doc->hint_page[pagenum].number;
2408
0
    page = pdf_load_object(ctx, doc, num);
2409
0
    if (pdf_name_eq(ctx, PDF_NAME(Page), pdf_dict_get(ctx, page, PDF_NAME(Type))))
2410
0
    {
2411
      /* We have found the page object! */
2412
0
      DEBUGMESS((ctx, "LoadHintedPage pagenum=%d num=%d", pagenum, num));
2413
0
      doc->linear_page_refs[pagenum] = pdf_new_indirect(ctx, doc, num, 0);
2414
0
    }
2415
0
  }
2416
0
  fz_always(ctx)
2417
0
    pdf_drop_obj(ctx, page);
2418
0
  fz_catch(ctx)
2419
0
  {
2420
0
    fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
2421
    /* Swallow the error and proceed as normal */
2422
0
    fz_report_error(ctx);
2423
0
  }
2424
0
}
2425
2426
static int
2427
read_hinted_object(fz_context *ctx, pdf_document *doc, int num)
2428
0
{
2429
  /* Try to find the object using our hint table. Find the closest
2430
   * object <= the one we want that has a hint and read forward from
2431
   * there. */
2432
0
  int expected = num;
2433
0
  int curr_pos;
2434
0
  int64_t start, offset;
2435
2436
0
  while (doc->hint_obj_offsets[expected] == 0 && expected > 0)
2437
0
    expected--;
2438
0
  if (expected != num)
2439
0
    DEBUGMESS((ctx, "object %d is unhinted, will search forward from %d", expected, num));
2440
0
  if (expected == 0) /* No hints found, just bail */
2441
0
    return 0;
2442
2443
0
  curr_pos = fz_tell(ctx, doc->file);
2444
0
  offset = doc->hint_obj_offsets[expected];
2445
2446
0
  fz_var(expected);
2447
2448
0
  fz_try(ctx)
2449
0
  {
2450
0
    int found;
2451
2452
    /* Try to read forward from there */
2453
0
    do
2454
0
    {
2455
0
      start = offset;
2456
0
      DEBUGMESS((ctx, "Searching for object %d @ %d", expected, offset));
2457
0
      pdf_obj_read(ctx, doc, &offset, &found, 0);
2458
0
      DEBUGMESS((ctx, "Found object %d - next will be @ %d", found, offset));
2459
0
      if (found <= expected)
2460
0
      {
2461
        /* We found the right one (or one earlier than
2462
         * we expected). Update the hints. */
2463
0
        doc->hint_obj_offsets[expected] = offset;
2464
0
        doc->hint_obj_offsets[found] = start;
2465
0
        doc->hint_obj_offsets[found+1] = offset;
2466
        /* Retry with the next one */
2467
0
        expected = found+1;
2468
0
      }
2469
0
      else
2470
0
      {
2471
        /* We found one later than we expected. */
2472
0
        doc->hint_obj_offsets[expected] = 0;
2473
0
        doc->hint_obj_offsets[found] = start;
2474
0
        doc->hint_obj_offsets[found+1] = offset;
2475
0
        while (doc->hint_obj_offsets[expected] == 0 && expected > 0)
2476
0
          expected--;
2477
0
        if (expected == 0) /* No hints found, we give up */
2478
0
          break;
2479
0
      }
2480
0
    }
2481
0
    while (found != num);
2482
0
  }
2483
0
  fz_always(ctx)
2484
0
  {
2485
0
    fz_seek(ctx, doc->file, curr_pos, SEEK_SET);
2486
0
  }
2487
0
  fz_catch(ctx)
2488
0
  {
2489
0
    fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
2490
    /* FIXME: Currently we ignore the hint. Perhaps we should
2491
     * drop back to non-hinted operation here. */
2492
0
    doc->hint_obj_offsets[expected] = 0;
2493
0
    fz_rethrow(ctx);
2494
0
  }
2495
0
  return expected != 0;
2496
0
}
2497
2498
pdf_obj *
2499
pdf_load_unencrypted_object(fz_context *ctx, pdf_document *doc, int num)
2500
0
{
2501
0
  pdf_xref_entry *x;
2502
2503
0
  if (num <= 0 || num >= pdf_xref_len(ctx, doc))
2504
0
    fz_throw(ctx, FZ_ERROR_FORMAT, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(ctx, doc));
2505
2506
0
  x = pdf_get_xref_entry_no_null(ctx, doc, num);
2507
0
  if (x->type == 'n')
2508
0
  {
2509
0
    fz_seek(ctx, doc->file, x->ofs, SEEK_SET);
2510
0
    return pdf_parse_ind_obj(ctx, doc, doc->file, NULL, NULL, NULL, NULL);
2511
0
  }
2512
0
  return NULL;
2513
0
}
2514
2515
pdf_xref_entry *
2516
pdf_cache_object(fz_context *ctx, pdf_document *doc, int num)
2517
3.92M
{
2518
3.92M
  pdf_xref_entry *x;
2519
3.92M
  int rnum, rgen, try_repair;
2520
2521
3.92M
  fz_var(try_repair);
2522
2523
3.92M
  if (num <= 0 || num >= pdf_xref_len(ctx, doc))
2524
37.2k
    fz_throw(ctx, FZ_ERROR_FORMAT, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(ctx, doc));
2525
2526
3.89M
object_updated:
2527
3.89M
  try_repair = 0;
2528
3.89M
  rnum = num;
2529
2530
3.89M
  x = pdf_get_xref_entry(ctx, doc, num);
2531
3.89M
  if (x == NULL)
2532
0
    fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find object in xref (%d 0 R)", num);
2533
2534
3.89M
  if (x->obj != NULL)
2535
3.60M
    return x;
2536
2537
291k
  if (x->type == 'f')
2538
92.4k
  {
2539
92.4k
    x->obj = PDF_NULL;
2540
92.4k
  }
2541
199k
  else if (x->type == 'n')
2542
186k
  {
2543
186k
    fz_seek(ctx, doc->file, x->ofs, SEEK_SET);
2544
2545
373k
    fz_try(ctx)
2546
373k
    {
2547
186k
      x->obj = pdf_parse_ind_obj(ctx, doc, doc->file,
2548
186k
          &rnum, &rgen, &x->stm_ofs, &try_repair);
2549
186k
    }
2550
373k
    fz_catch(ctx)
2551
91.1k
    {
2552
91.1k
      fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
2553
91.1k
      fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
2554
91.1k
      if (!try_repair)
2555
91.1k
        fz_rethrow(ctx);
2556
86
      else
2557
86
        fz_report_error(ctx);
2558
91.1k
    }
2559
2560
95.8k
    if (!try_repair && rnum != num)
2561
11
    {
2562
11
      pdf_drop_obj(ctx, x->obj);
2563
11
      x->type = 'f';
2564
11
      x->ofs = -1;
2565
11
      x->gen = 0;
2566
11
      x->num = 0;
2567
11
      x->stm_ofs = 0;
2568
11
      x->obj = NULL;
2569
11
      try_repair = (doc->repair_attempted == 0);
2570
11
    }
2571
2572
95.8k
    if (try_repair)
2573
97
    {
2574
97
perform_repair:
2575
194
      fz_try(ctx)
2576
194
      {
2577
97
        pdf_repair_xref(ctx, doc);
2578
97
        pdf_prime_xref_index(ctx, doc);
2579
97
        pdf_repair_obj_stms(ctx, doc);
2580
97
        pdf_repair_trailer(ctx, doc);
2581
97
      }
2582
194
      fz_catch(ctx)
2583
46
      {
2584
46
        fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
2585
46
        fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
2586
46
        fz_rethrow_if(ctx, FZ_ERROR_REPAIRED);
2587
46
        fz_report_error(ctx);
2588
46
        if (rnum == num)
2589
40
          fz_throw(ctx, FZ_ERROR_FORMAT, "cannot parse object (%d 0 R)", num);
2590
6
        else
2591
6
          fz_throw(ctx, FZ_ERROR_FORMAT, "found object (%d 0 R) instead of (%d 0 R)", rnum, num);
2592
46
      }
2593
51
      goto object_updated;
2594
97
    }
2595
2596
95.7k
    if (doc->crypt)
2597
2.51k
      pdf_crypt_obj(ctx, doc->crypt, x->obj, x->num, x->gen);
2598
95.7k
  }
2599
12.0k
  else if (x->type == 'o')
2600
11.9k
  {
2601
11.9k
    if (!x->obj)
2602
11.9k
    {
2603
11.9k
      pdf_xref_entry *orig_x = x;
2604
11.9k
      pdf_xref_entry *ox = x; /* This init is unused, but it shuts warnings up. */
2605
11.9k
      orig_x->type = 'O'; /* Mark this node so we know we're recursing. */
2606
23.9k
      fz_try(ctx)
2607
23.9k
        x = pdf_load_obj_stm(ctx, doc, x->ofs, &doc->lexbuf.base, num);
2608
23.9k
      fz_always(ctx)
2609
11.9k
      {
2610
        /* Most of the time ox == orig_x, but if pdf_load_obj_stm performed a
2611
         * repair, it may not be. It is safe to call pdf_get_xref_entry_no_change
2612
         * here, as it does not try/catch. */
2613
11.9k
        ox = pdf_get_xref_entry_no_change(ctx, doc, num);
2614
        /* Bug 706762: ox can be NULL if the object went away during a repair. */
2615
11.9k
        if (ox && ox->type == 'O')
2616
11.9k
          ox->type = 'o'; /* Not recursing any more. */
2617
11.9k
      }
2618
11.9k
      fz_catch(ctx)
2619
9.55k
        fz_rethrow(ctx);
2620
2.44k
      if (x == NULL)
2621
612
        fz_throw(ctx, FZ_ERROR_FORMAT, "cannot load object stream containing object (%d 0 R)", num);
2622
1.83k
      if (!x->obj)
2623
0
      {
2624
0
        x->type = 'f';
2625
0
        if (ox)
2626
0
          ox->type = 'f';
2627
0
        if (doc->repair_attempted)
2628
0
          fz_throw(ctx, FZ_ERROR_FORMAT, "object (%d 0 R) was not found in its object stream", num);
2629
0
        goto perform_repair;
2630
0
      }
2631
1.83k
    }
2632
11.9k
  }
2633
100
  else if (doc->hint_obj_offsets && read_hinted_object(ctx, doc, num))
2634
0
  {
2635
0
    goto object_updated;
2636
0
  }
2637
100
  else if (doc->file_length && doc->linear_pos < doc->file_length)
2638
0
  {
2639
0
    fz_throw(ctx, FZ_ERROR_TRYLATER, "cannot find object in xref (%d 0 R) - not loaded yet?", num);
2640
0
  }
2641
100
  else
2642
100
  {
2643
100
    fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find object in xref (%d 0 R)", num);
2644
100
  }
2645
2646
190k
  pdf_set_obj_parent(ctx, x->obj, num);
2647
190k
  return x;
2648
291k
}
2649
2650
pdf_obj *
2651
pdf_load_object(fz_context *ctx, pdf_document *doc, int num)
2652
141k
{
2653
141k
  pdf_xref_entry *entry = pdf_cache_object(ctx, doc, num);
2654
141k
  return pdf_keep_obj(ctx, entry->obj);
2655
141k
}
2656
2657
pdf_obj *
2658
pdf_resolve_indirect(fz_context *ctx, pdf_obj *ref)
2659
3.52M
{
2660
3.52M
  if (pdf_is_indirect(ctx, ref))
2661
3.52M
  {
2662
3.52M
    pdf_document *doc = pdf_get_indirect_document(ctx, ref);
2663
3.52M
    int num = pdf_to_num(ctx, ref);
2664
3.52M
    pdf_xref_entry *entry;
2665
2666
3.52M
    if (!doc)
2667
0
      return NULL;
2668
3.52M
    if (num <= 0)
2669
702
    {
2670
702
      fz_warn(ctx, "invalid indirect reference (%d 0 R)", num);
2671
702
      return NULL;
2672
702
    }
2673
2674
7.05M
    fz_try(ctx)
2675
7.05M
      entry = pdf_cache_object(ctx, doc, num);
2676
7.05M
    fz_catch(ctx)
2677
133k
    {
2678
133k
      fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
2679
133k
      fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
2680
133k
      fz_rethrow_if(ctx, FZ_ERROR_REPAIRED);
2681
133k
      fz_report_error(ctx);
2682
133k
      fz_warn(ctx, "cannot load object (%d 0 R) into cache", num);
2683
133k
      return NULL;
2684
133k
    }
2685
2686
3.39M
    ref = entry->obj;
2687
3.39M
  }
2688
3.39M
  return ref;
2689
3.52M
}
2690
2691
pdf_obj *
2692
pdf_resolve_indirect_chain(fz_context *ctx, pdf_obj *ref)
2693
3.52M
{
2694
3.52M
  int sanity = 10;
2695
2696
7.05M
  while (pdf_is_indirect(ctx, ref))
2697
3.52M
  {
2698
3.52M
    if (--sanity == 0)
2699
102
    {
2700
102
      fz_warn(ctx, "too many indirections (possible indirection cycle involving %d 0 R)", pdf_to_num(ctx, ref));
2701
102
      return NULL;
2702
102
    }
2703
2704
3.52M
    ref = pdf_resolve_indirect(ctx, ref);
2705
3.52M
  }
2706
2707
3.52M
  return ref;
2708
3.52M
}
2709
2710
int
2711
pdf_count_objects(fz_context *ctx, pdf_document *doc)
2712
0
{
2713
0
  return pdf_xref_len(ctx, doc);
2714
0
}
2715
2716
int
2717
pdf_is_local_object(fz_context *ctx, pdf_document *doc, pdf_obj *obj)
2718
115
{
2719
115
  pdf_xref *xref = doc->local_xref;
2720
115
  pdf_xref_subsec *sub;
2721
115
  int num;
2722
2723
115
  if (!pdf_is_indirect(ctx, obj))
2724
9
    return 0;
2725
2726
106
  if (xref == NULL)
2727
46
    return 0; /* no local xref present */
2728
2729
60
  num = pdf_to_num(ctx, obj);
2730
2731
  /* Local xrefs only ever have 1 section, and it should be solid. */
2732
60
  sub = xref->subsec;
2733
60
  if (num >= sub->start && num < sub->start + sub->len)
2734
60
    return sub->table[num - sub->start].type != 0;
2735
2736
0
  return 0;
2737
60
}
2738
2739
static int
2740
pdf_create_local_object(fz_context *ctx, pdf_document *doc)
2741
5.08k
{
2742
  /* TODO: reuse free object slots by properly linking free object chains in the ofs field */
2743
5.08k
  pdf_xref_entry *entry;
2744
5.08k
  int num;
2745
2746
5.08k
  num = doc->local_xref->num_objects;
2747
2748
5.08k
  entry = pdf_get_local_xref_entry(ctx, doc, num);
2749
5.08k
  entry->type = 'f';
2750
5.08k
  entry->ofs = -1;
2751
5.08k
  entry->gen = 0;
2752
5.08k
  entry->num = num;
2753
5.08k
  entry->stm_ofs = 0;
2754
5.08k
  entry->stm_buf = NULL;
2755
5.08k
  entry->obj = NULL;
2756
5.08k
  return num;
2757
5.08k
}
2758
2759
int
2760
pdf_create_object(fz_context *ctx, pdf_document *doc)
2761
5.08k
{
2762
  /* TODO: reuse free object slots by properly linking free object chains in the ofs field */
2763
5.08k
  pdf_xref_entry *entry;
2764
5.08k
  int num;
2765
2766
5.08k
  if (doc->local_xref && doc->local_xref_nesting > 0)
2767
5.08k
    return pdf_create_local_object(ctx, doc);
2768
2769
0
  num = pdf_xref_len(ctx, doc);
2770
2771
0
  if (num > PDF_MAX_OBJECT_NUMBER)
2772
0
    fz_throw(ctx, FZ_ERROR_LIMIT, "too many objects stored in pdf");
2773
2774
0
  entry = pdf_get_incremental_xref_entry(ctx, doc, num);
2775
0
  entry->type = 'f';
2776
0
  entry->ofs = -1;
2777
0
  entry->gen = 0;
2778
0
  entry->num = num;
2779
0
  entry->stm_ofs = 0;
2780
0
  entry->stm_buf = NULL;
2781
0
  entry->obj = NULL;
2782
2783
0
  pdf_add_journal_fragment(ctx, doc, num, NULL, NULL, 1);
2784
2785
0
  return num;
2786
0
}
2787
2788
static void
2789
pdf_delete_local_object(fz_context *ctx, pdf_document *doc, int num)
2790
0
{
2791
0
  pdf_xref_entry *x;
2792
2793
0
  if (doc->local_xref == NULL || doc->local_xref_nesting == 0)
2794
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "No local xref to delete from!");
2795
2796
0
  if (num <= 0 || num >= doc->local_xref->num_objects)
2797
0
  {
2798
0
    fz_warn(ctx, "local object out of range (%d 0 R); xref size %d", num, doc->local_xref->num_objects);
2799
0
    return;
2800
0
  }
2801
2802
0
  x = pdf_get_local_xref_entry(ctx, doc, num);
2803
2804
0
  fz_drop_buffer(ctx, x->stm_buf);
2805
0
  pdf_drop_obj(ctx, x->obj);
2806
2807
0
  x->type = 'f';
2808
0
  x->ofs = 0;
2809
0
  x->gen += 1;
2810
0
  x->num = 0;
2811
0
  x->stm_ofs = 0;
2812
0
  x->stm_buf = NULL;
2813
0
  x->obj = NULL;
2814
0
}
2815
2816
void
2817
pdf_delete_object(fz_context *ctx, pdf_document *doc, int num)
2818
0
{
2819
0
  pdf_xref_entry *x;
2820
0
  pdf_xref *xref;
2821
0
  int j;
2822
2823
0
  if (doc->local_xref && doc->local_xref_nesting > 0)
2824
0
  {
2825
0
    pdf_delete_local_object(ctx, doc, num);
2826
0
    return;
2827
0
  }
2828
2829
0
  if (num <= 0 || num >= pdf_xref_len(ctx, doc))
2830
0
  {
2831
0
    fz_warn(ctx, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(ctx, doc));
2832
0
    return;
2833
0
  }
2834
2835
0
  x = pdf_get_incremental_xref_entry(ctx, doc, num);
2836
2837
0
  fz_drop_buffer(ctx, x->stm_buf);
2838
0
  pdf_drop_obj(ctx, x->obj);
2839
2840
0
  x->type = 'f';
2841
0
  x->ofs = 0;
2842
0
  x->gen += 1;
2843
0
  x->num = 0;
2844
0
  x->stm_ofs = 0;
2845
0
  x->stm_buf = NULL;
2846
0
  x->obj = NULL;
2847
2848
  /* Currently we've left a 'free' object in the incremental
2849
   * section. This is enough to cause us to think that the
2850
   * document has changes. Check back in the non-incremental
2851
   * sections to see if the last instance of the object there
2852
   * was free (or if this object never appeared). If so, we
2853
   * can mark this object as non-existent in the incremental
2854
   * xref. This is important so we can 'undo' back to emptiness
2855
   * after we save/when we reload a snapshot. */
2856
0
  for (j = 1; j < doc->num_xref_sections; j++)
2857
0
  {
2858
0
    xref = &doc->xref_sections[j];
2859
2860
0
    if (num < xref->num_objects)
2861
0
    {
2862
0
      pdf_xref_subsec *sub;
2863
0
      for (sub = xref->subsec; sub != NULL; sub = sub->next)
2864
0
      {
2865
0
        pdf_xref_entry *entry;
2866
2867
0
        if (num < sub->start || num >= sub->start + sub->len)
2868
0
          continue;
2869
2870
0
        entry = &sub->table[num - sub->start];
2871
0
        if (entry->type)
2872
0
        {
2873
0
          if (entry->type == 'f')
2874
0
          {
2875
            /* It was free already! */
2876
0
            x->type = 0;
2877
0
            x->gen = 0;
2878
0
          }
2879
          /* It was a real object. */
2880
0
          return;
2881
0
        }
2882
0
      }
2883
0
    }
2884
0
  }
2885
  /* It never appeared before. */
2886
0
  x->type = 0;
2887
0
  x->gen = 0;
2888
0
}
2889
2890
static void
2891
pdf_update_local_object(fz_context *ctx, pdf_document *doc, int num, pdf_obj *newobj)
2892
5.08k
{
2893
5.08k
  pdf_xref_entry *x;
2894
2895
5.08k
  if (doc->local_xref == NULL || doc->local_xref_nesting == 0)
2896
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't update local object without a local xref");
2897
2898
5.08k
  if (!newobj)
2899
0
  {
2900
0
    pdf_delete_local_object(ctx, doc, num);
2901
0
    return;
2902
0
  }
2903
2904
5.08k
  x = pdf_get_local_xref_entry(ctx, doc, num);
2905
2906
5.08k
  pdf_drop_obj(ctx, x->obj);
2907
2908
5.08k
  x->type = 'n';
2909
5.08k
  x->ofs = 0;
2910
5.08k
  x->obj = pdf_keep_obj(ctx, newobj);
2911
2912
5.08k
  pdf_set_obj_parent(ctx, newobj, num);
2913
5.08k
}
2914
2915
void
2916
pdf_update_object(fz_context *ctx, pdf_document *doc, int num, pdf_obj *newobj)
2917
5.08k
{
2918
5.08k
  pdf_xref_entry *x;
2919
2920
5.08k
  if (!doc)
2921
0
    return;
2922
2923
5.08k
  if (doc->local_xref && doc->local_xref_nesting > 0)
2924
5.08k
  {
2925
5.08k
    pdf_update_local_object(ctx, doc, num, newobj);
2926
5.08k
    return;
2927
5.08k
  }
2928
2929
0
  if (num <= 0 || num >= pdf_xref_len(ctx, doc))
2930
0
  {
2931
0
    fz_warn(ctx, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(ctx, doc));
2932
0
    return;
2933
0
  }
2934
2935
0
  if (!newobj)
2936
0
  {
2937
0
    pdf_delete_object(ctx, doc, num);
2938
0
    return;
2939
0
  }
2940
2941
0
  x = pdf_get_incremental_xref_entry(ctx, doc, num);
2942
2943
0
  pdf_drop_obj(ctx, x->obj);
2944
2945
0
  x->type = 'n';
2946
0
  x->ofs = 0;
2947
0
  x->obj = pdf_keep_obj(ctx, newobj);
2948
2949
0
  pdf_set_obj_parent(ctx, newobj, num);
2950
0
}
2951
2952
void
2953
pdf_update_stream(fz_context *ctx, pdf_document *doc, pdf_obj *obj, fz_buffer *newbuf, int compressed)
2954
4.51k
{
2955
4.51k
  int num;
2956
4.51k
  pdf_xref_entry *x;
2957
2958
4.51k
  if (pdf_is_indirect(ctx, obj))
2959
4.51k
    num = pdf_to_num(ctx, obj);
2960
0
  else
2961
0
    num = pdf_obj_parent_num(ctx, obj);
2962
2963
  /* Write the Length first, as this has the effect of moving the
2964
   * old object into the journal for undo. This also moves the
2965
   * stream buffer with it, keeping it consistent. */
2966
4.51k
  pdf_dict_put_int(ctx, obj, PDF_NAME(Length), fz_buffer_storage(ctx, newbuf, NULL));
2967
2968
4.51k
  if (doc->local_xref && doc->local_xref_nesting > 0)
2969
4.51k
  {
2970
4.51k
    x = pdf_get_local_xref_entry(ctx, doc, num);
2971
4.51k
  }
2972
0
  else
2973
0
  {
2974
0
    if (num <= 0 || num >= pdf_xref_len(ctx, doc))
2975
0
    {
2976
0
      fz_warn(ctx, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(ctx, doc));
2977
0
      return;
2978
0
    }
2979
2980
0
    x = pdf_get_xref_entry_no_null(ctx, doc, num);
2981
0
  }
2982
2983
4.51k
  fz_drop_buffer(ctx, x->stm_buf);
2984
4.51k
  x->stm_buf = fz_keep_buffer(ctx, newbuf);
2985
2986
4.51k
  if (!compressed)
2987
4.51k
  {
2988
4.51k
    pdf_dict_del(ctx, obj, PDF_NAME(Filter));
2989
4.51k
    pdf_dict_del(ctx, obj, PDF_NAME(DecodeParms));
2990
4.51k
  }
2991
4.51k
}
2992
2993
int
2994
pdf_lookup_metadata(fz_context *ctx, pdf_document *doc, const char *key, char *buf, int size)
2995
0
{
2996
0
  if (!strcmp(key, FZ_META_FORMAT))
2997
0
  {
2998
0
    int version = pdf_version(ctx, doc);
2999
0
    return 1 + (int)fz_snprintf(buf, size, "PDF %d.%d", version/10, version % 10);
3000
0
  }
3001
3002
0
  if (!strcmp(key, FZ_META_ENCRYPTION))
3003
0
  {
3004
0
    if (doc->crypt)
3005
0
    {
3006
0
      const char *stream_method = pdf_crypt_stream_method(ctx, doc->crypt);
3007
0
      const char *string_method = pdf_crypt_string_method(ctx, doc->crypt);
3008
0
      if (stream_method == string_method)
3009
0
        return 1 + (int)fz_snprintf(buf, size, "Standard V%d R%d %d-bit %s",
3010
0
            pdf_crypt_version(ctx, doc->crypt),
3011
0
            pdf_crypt_revision(ctx, doc->crypt),
3012
0
            pdf_crypt_length(ctx, doc->crypt),
3013
0
            pdf_crypt_string_method(ctx, doc->crypt));
3014
0
      else
3015
0
        return 1 + (int)fz_snprintf(buf, size, "Standard V%d R%d %d-bit streams: %s strings: %s",
3016
0
            pdf_crypt_version(ctx, doc->crypt),
3017
0
            pdf_crypt_revision(ctx, doc->crypt),
3018
0
            pdf_crypt_length(ctx, doc->crypt),
3019
0
            pdf_crypt_stream_method(ctx, doc->crypt),
3020
0
            pdf_crypt_string_method(ctx, doc->crypt));
3021
0
    }
3022
0
    else
3023
0
      return 1 + (int)fz_strlcpy(buf, "None", size);
3024
0
  }
3025
3026
0
  if (strstr(key, "info:") == key)
3027
0
  {
3028
0
    pdf_obj *info;
3029
0
    const char *s;
3030
0
    int n;
3031
3032
0
    info = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info));
3033
0
    if (!info)
3034
0
      return -1;
3035
3036
0
    info = pdf_dict_gets(ctx, info, key + 5);
3037
0
    if (!info)
3038
0
      return -1;
3039
3040
0
    s = pdf_to_text_string(ctx, info);
3041
0
    if (strlen(s) <= 0)
3042
0
      return -1;
3043
3044
0
    n = 1 + (int)fz_strlcpy(buf, s, size);
3045
0
    return n;
3046
0
  }
3047
3048
0
  return -1;
3049
0
}
3050
3051
void
3052
pdf_set_metadata(fz_context *ctx, pdf_document *doc, const char *key, const char *value)
3053
0
{
3054
3055
0
  pdf_obj *info = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info));
3056
3057
0
  pdf_begin_operation(ctx, doc, "Set Metadata");
3058
3059
0
  fz_try(ctx)
3060
0
  {
3061
    /* Ensure we have an Info dictionary. */
3062
0
    if (!pdf_is_dict(ctx, info))
3063
0
    {
3064
0
      info = pdf_add_new_dict(ctx, doc, 8);
3065
0
      pdf_dict_put_drop(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info), info);
3066
0
    }
3067
3068
0
    if (!strcmp(key, FZ_META_INFO_TITLE))
3069
0
      pdf_dict_put_text_string(ctx, info, PDF_NAME(Title), value);
3070
0
    else if (!strcmp(key, FZ_META_INFO_AUTHOR))
3071
0
      pdf_dict_put_text_string(ctx, info, PDF_NAME(Author), value);
3072
0
    else if (!strcmp(key, FZ_META_INFO_SUBJECT))
3073
0
      pdf_dict_put_text_string(ctx, info, PDF_NAME(Subject), value);
3074
0
    else if (!strcmp(key, FZ_META_INFO_KEYWORDS))
3075
0
      pdf_dict_put_text_string(ctx, info, PDF_NAME(Keywords), value);
3076
0
    else if (!strcmp(key, FZ_META_INFO_CREATOR))
3077
0
      pdf_dict_put_text_string(ctx, info, PDF_NAME(Creator), value);
3078
0
    else if (!strcmp(key, FZ_META_INFO_PRODUCER))
3079
0
      pdf_dict_put_text_string(ctx, info, PDF_NAME(Producer), value);
3080
0
    else if (!strcmp(key, FZ_META_INFO_CREATIONDATE))
3081
0
    {
3082
0
      int64_t time = pdf_parse_date(ctx, value);
3083
0
      if (time >= 0)
3084
0
        pdf_dict_put_date(ctx, info, PDF_NAME(CreationDate), time);
3085
0
    }
3086
0
    else if (!strcmp(key, FZ_META_INFO_MODIFICATIONDATE))
3087
0
    {
3088
0
      int64_t time = pdf_parse_date(ctx, value);
3089
0
      if (time >= 0)
3090
0
        pdf_dict_put_date(ctx, info, PDF_NAME(ModDate), time);
3091
0
    }
3092
3093
0
    if (!strncmp(key, FZ_META_INFO, strlen(FZ_META_INFO)))
3094
0
      key += strlen(FZ_META_INFO);
3095
0
    pdf_dict_put_text_string(ctx, info, pdf_new_name(ctx, key), value);
3096
0
    pdf_end_operation(ctx, doc);
3097
0
  }
3098
0
  fz_catch(ctx)
3099
0
  {
3100
0
    pdf_abandon_operation(ctx, doc);
3101
0
    fz_rethrow(ctx);
3102
0
  }
3103
0
}
3104
3105
static fz_link_dest
3106
pdf_resolve_link_imp(fz_context *ctx, fz_document *doc_, const char *uri)
3107
0
{
3108
0
  pdf_document *doc = (pdf_document*)doc_;
3109
0
  return pdf_resolve_link_dest(ctx, doc, uri);
3110
0
}
3111
3112
char *pdf_format_link_uri(fz_context *ctx, fz_document *doc, fz_link_dest dest)
3113
0
{
3114
0
  return pdf_new_uri_from_explicit_dest(ctx, dest);
3115
0
}
3116
3117
/*
3118
  Initializers for the fz_document interface.
3119
3120
  The functions are split across two files to allow calls to a
3121
  version of the constructor that does not link in the interpreter.
3122
  The interpreter references the built-in font and cmap resources
3123
  which are quite big. Not linking those into the mutool binary
3124
  saves roughly 6MB of space.
3125
*/
3126
3127
static pdf_document *
3128
pdf_new_document(fz_context *ctx, fz_stream *file)
3129
6.63k
{
3130
6.63k
  pdf_document *doc = fz_new_derived_document(ctx, pdf_document);
3131
3132
6.63k
#ifndef NDEBUG
3133
6.63k
  {
3134
6.63k
    void pdf_verify_name_table_sanity(void);
3135
6.63k
    pdf_verify_name_table_sanity();
3136
6.63k
  }
3137
6.63k
#endif
3138
3139
6.63k
  doc->super.drop_document = (fz_document_drop_fn*)pdf_drop_document_imp;
3140
6.63k
  doc->super.get_output_intent = (fz_document_output_intent_fn*)pdf_document_output_intent;
3141
6.63k
  doc->super.needs_password = (fz_document_needs_password_fn*)pdf_needs_password;
3142
6.63k
  doc->super.authenticate_password = (fz_document_authenticate_password_fn*)pdf_authenticate_password;
3143
6.63k
  doc->super.has_permission = (fz_document_has_permission_fn*)pdf_has_permission;
3144
6.63k
  doc->super.outline_iterator = (fz_document_outline_iterator_fn*)pdf_new_outline_iterator;
3145
6.63k
  doc->super.resolve_link_dest = pdf_resolve_link_imp;
3146
6.63k
  doc->super.format_link_uri = pdf_format_link_uri;
3147
6.63k
  doc->super.count_pages = pdf_count_pages_imp;
3148
6.63k
  doc->super.load_page = pdf_load_page_imp;
3149
6.63k
  doc->super.page_label = pdf_page_label_imp;
3150
6.63k
  doc->super.lookup_metadata = (fz_document_lookup_metadata_fn*)pdf_lookup_metadata;
3151
6.63k
  doc->super.set_metadata = (fz_document_set_metadata_fn*)pdf_set_metadata;
3152
6.63k
  doc->super.run_structure = (fz_document_run_structure_fn *)pdf_run_document_structure;
3153
3154
6.63k
  pdf_lexbuf_init(ctx, &doc->lexbuf.base, PDF_LEXBUF_LARGE);
3155
6.63k
  doc->file = fz_keep_stream(ctx, file);
3156
3157
  /* Default to PDF-1.7 if the version header is missing and for new documents */
3158
6.63k
  doc->version = 17;
3159
3160
6.63k
  return doc;
3161
6.63k
}
3162
3163
pdf_document *
3164
pdf_open_document_with_stream(fz_context *ctx, fz_stream *file)
3165
6.63k
{
3166
6.63k
  pdf_document *doc = pdf_new_document(ctx, file);
3167
13.2k
  fz_try(ctx)
3168
13.2k
  {
3169
6.63k
    pdf_init_document(ctx, doc);
3170
6.63k
  }
3171
13.2k
  fz_catch(ctx)
3172
246
  {
3173
    /* fz_drop_document may clobber our error code/message so we have to stash them temporarily. */
3174
246
    char message[256];
3175
246
    int code;
3176
246
    fz_strlcpy(message, fz_convert_error(ctx, &code), sizeof message);
3177
246
    fz_drop_document(ctx, &doc->super);
3178
246
    fz_throw(ctx, code, "%s", message);
3179
246
  }
3180
6.39k
  return doc;
3181
6.63k
}
3182
3183
/* Uncomment the following to test progressive loading. */
3184
/* #define TEST_PROGRESSIVE_HACK */
3185
3186
pdf_document *
3187
pdf_open_document(fz_context *ctx, const char *filename)
3188
0
{
3189
0
  fz_stream *file = NULL;
3190
0
  pdf_document *doc = NULL;
3191
3192
0
  fz_var(file);
3193
0
  fz_var(doc);
3194
3195
0
  fz_try(ctx)
3196
0
  {
3197
0
    file = fz_open_file(ctx, filename);
3198
#ifdef TEST_PROGRESSIVE_HACK
3199
    file->progressive = 1;
3200
#endif
3201
0
    doc = pdf_new_document(ctx, file);
3202
0
    pdf_init_document(ctx, doc);
3203
0
  }
3204
0
  fz_always(ctx)
3205
0
  {
3206
0
    fz_drop_stream(ctx, file);
3207
0
  }
3208
0
  fz_catch(ctx)
3209
0
  {
3210
    /* fz_drop_document may clobber our error code/message so we have to stash them temporarily. */
3211
0
    char message[256];
3212
0
    int code;
3213
0
    fz_strlcpy(message, fz_convert_error(ctx, &code), sizeof message);
3214
0
    fz_drop_document(ctx, &doc->super);
3215
0
    fz_throw(ctx, code, "%s", message);
3216
0
  }
3217
3218
#ifdef TEST_PROGRESSIVE_HACK
3219
  if (doc->file_reading_linearly)
3220
  {
3221
    fz_try(ctx)
3222
      pdf_progressive_advance(ctx, doc, doc->linear_page_count-1);
3223
    fz_catch(ctx)
3224
    {
3225
      doc->file_reading_linearly = 0;
3226
      /* swallow the error */
3227
    }
3228
  }
3229
#endif
3230
3231
0
  return doc;
3232
0
}
3233
3234
static void
3235
pdf_load_hints(fz_context *ctx, pdf_document *doc, int objnum)
3236
0
{
3237
0
  fz_stream *stream = NULL;
3238
0
  pdf_obj *dict;
3239
3240
0
  fz_var(stream);
3241
0
  fz_var(dict);
3242
3243
0
  fz_try(ctx)
3244
0
  {
3245
0
    int i, j, least_num_page_objs, page_obj_num_bits;
3246
0
    int least_page_len, page_len_num_bits, shared_hint_offset;
3247
    /* int least_page_offset, page_offset_num_bits; */
3248
    /* int least_content_stream_len, content_stream_len_num_bits; */
3249
0
    int num_shared_obj_num_bits, shared_obj_num_bits;
3250
    /* int numerator_bits, denominator_bits; */
3251
0
    int shared;
3252
0
    int shared_obj_num, shared_obj_offset, shared_obj_count_page1;
3253
0
    int shared_obj_count_total;
3254
0
    int least_shared_group_len, shared_group_len_num_bits;
3255
0
    int max_object_num = pdf_xref_len(ctx, doc);
3256
3257
0
    stream = pdf_open_stream_number(ctx, doc, objnum);
3258
0
    dict = pdf_get_xref_entry_no_null(ctx, doc, objnum)->obj;
3259
0
    if (dict == NULL || !pdf_is_dict(ctx, dict))
3260
0
      fz_throw(ctx, FZ_ERROR_FORMAT, "malformed hint object");
3261
3262
0
    shared_hint_offset = pdf_dict_get_int(ctx, dict, PDF_NAME(S));
3263
3264
    /* Malloc the structures (use realloc to cope with the fact we
3265
     * may try this several times before enough data is loaded) */
3266
0
    doc->hint_page = fz_realloc_array(ctx, doc->hint_page, doc->linear_page_count+1, pdf_hint_page);
3267
0
    memset(doc->hint_page, 0, sizeof(*doc->hint_page) * (doc->linear_page_count+1));
3268
0
    doc->hint_obj_offsets = fz_realloc_array(ctx, doc->hint_obj_offsets, max_object_num, int64_t);
3269
0
    memset(doc->hint_obj_offsets, 0, sizeof(*doc->hint_obj_offsets) * max_object_num);
3270
0
    doc->hint_obj_offsets_max = max_object_num;
3271
3272
    /* Read the page object hints table: Header first */
3273
0
    least_num_page_objs = fz_read_bits(ctx, stream, 32);
3274
    /* The following is sometimes a lie, but we read this version,
3275
     * as other table values are built from it. In
3276
     * pdf_reference17.pdf, this points to 2 objects before the
3277
     * first pages page object. */
3278
0
    doc->hint_page[0].offset = fz_read_bits(ctx, stream, 32);
3279
0
    if (doc->hint_page[0].offset > doc->hint_object_offset)
3280
0
      doc->hint_page[0].offset += doc->hint_object_length;
3281
0
    page_obj_num_bits = fz_read_bits(ctx, stream, 16);
3282
0
    least_page_len = fz_read_bits(ctx, stream, 32);
3283
0
    page_len_num_bits = fz_read_bits(ctx, stream, 16);
3284
0
    /* least_page_offset = */ (void) fz_read_bits(ctx, stream, 32);
3285
0
    /* page_offset_num_bits = */ (void) fz_read_bits(ctx, stream, 16);
3286
0
    /* least_content_stream_len = */ (void) fz_read_bits(ctx, stream, 32);
3287
0
    /* content_stream_len_num_bits = */ (void) fz_read_bits(ctx, stream, 16);
3288
0
    num_shared_obj_num_bits = fz_read_bits(ctx, stream, 16);
3289
0
    shared_obj_num_bits = fz_read_bits(ctx, stream, 16);
3290
0
    /* numerator_bits = */ (void) fz_read_bits(ctx, stream, 16);
3291
0
    /* denominator_bits = */ (void) fz_read_bits(ctx, stream, 16);
3292
3293
    /* Item 1: Page object numbers */
3294
0
    doc->hint_page[0].number = doc->linear_page1_obj_num;
3295
    /* We don't care about the number of objects in the first page */
3296
0
    (void)fz_read_bits(ctx, stream, page_obj_num_bits);
3297
0
    j = 1;
3298
0
    for (i = 1; i < doc->linear_page_count; i++)
3299
0
    {
3300
0
      int delta_page_objs = fz_read_bits(ctx, stream, page_obj_num_bits);
3301
3302
0
      doc->hint_page[i].number = j;
3303
0
      j += least_num_page_objs + delta_page_objs;
3304
0
    }
3305
0
    doc->hint_page[i].number = j; /* Not a real page object */
3306
0
    fz_sync_bits(ctx, stream);
3307
    /* Item 2: Page lengths */
3308
0
    j = doc->hint_page[0].offset;
3309
0
    for (i = 0; i < doc->linear_page_count; i++)
3310
0
    {
3311
0
      int delta_page_len = fz_read_bits(ctx, stream, page_len_num_bits);
3312
0
      int old = j;
3313
3314
0
      doc->hint_page[i].offset = j;
3315
0
      j += least_page_len + delta_page_len;
3316
0
      if (old <= doc->hint_object_offset && j > doc->hint_object_offset)
3317
0
        j += doc->hint_object_length;
3318
0
    }
3319
0
    doc->hint_page[i].offset = j;
3320
0
    fz_sync_bits(ctx, stream);
3321
    /* Item 3: Shared references */
3322
0
    shared = 0;
3323
0
    for (i = 0; i < doc->linear_page_count; i++)
3324
0
    {
3325
0
      int num_shared_objs = fz_read_bits(ctx, stream, num_shared_obj_num_bits);
3326
0
      doc->hint_page[i].index = shared;
3327
0
      shared += num_shared_objs;
3328
0
    }
3329
0
    doc->hint_page[i].index = shared;
3330
0
    doc->hint_shared_ref = fz_realloc_array(ctx, doc->hint_shared_ref, shared, int);
3331
0
    memset(doc->hint_shared_ref, 0, sizeof(*doc->hint_shared_ref) * shared);
3332
0
    fz_sync_bits(ctx, stream);
3333
    /* Item 4: Shared references */
3334
0
    for (i = 0; i < shared; i++)
3335
0
    {
3336
0
      int ref = fz_read_bits(ctx, stream, shared_obj_num_bits);
3337
0
      doc->hint_shared_ref[i] = ref;
3338
0
    }
3339
    /* Skip items 5,6,7 as we don't use them */
3340
3341
0
    fz_seek(ctx, stream, shared_hint_offset, SEEK_SET);
3342
3343
    /* Read the shared object hints table: Header first */
3344
0
    shared_obj_num = fz_read_bits(ctx, stream, 32);
3345
0
    shared_obj_offset = fz_read_bits(ctx, stream, 32);
3346
0
    if (shared_obj_offset > doc->hint_object_offset)
3347
0
      shared_obj_offset += doc->hint_object_length;
3348
0
    shared_obj_count_page1 = fz_read_bits(ctx, stream, 32);
3349
0
    shared_obj_count_total = fz_read_bits(ctx, stream, 32);
3350
0
    shared_obj_num_bits = fz_read_bits(ctx, stream, 16);
3351
0
    least_shared_group_len = fz_read_bits(ctx, stream, 32);
3352
0
    shared_group_len_num_bits = fz_read_bits(ctx, stream, 16);
3353
3354
    /* Sanity check the references in Item 4 above to ensure we
3355
     * don't access out of range with malicious files. */
3356
0
    for (i = 0; i < shared; i++)
3357
0
    {
3358
0
      if (doc->hint_shared_ref[i] >= shared_obj_count_total)
3359
0
      {
3360
0
        fz_throw(ctx, FZ_ERROR_FORMAT, "malformed hint stream (shared refs)");
3361
0
      }
3362
0
    }
3363
3364
0
    doc->hint_shared = fz_realloc_array(ctx, doc->hint_shared, shared_obj_count_total+1, pdf_hint_shared);
3365
0
    memset(doc->hint_shared, 0, sizeof(*doc->hint_shared) * (shared_obj_count_total+1));
3366
3367
    /* Item 1: Shared references */
3368
0
    j = doc->hint_page[0].offset;
3369
0
    for (i = 0; i < shared_obj_count_page1; i++)
3370
0
    {
3371
0
      int off = fz_read_bits(ctx, stream, shared_group_len_num_bits);
3372
0
      int old = j;
3373
0
      doc->hint_shared[i].offset = j;
3374
0
      j += off + least_shared_group_len;
3375
0
      if (old <= doc->hint_object_offset && j > doc->hint_object_offset)
3376
0
        j += doc->hint_object_length;
3377
0
    }
3378
    /* FIXME: We would have problems recreating the length of the
3379
     * last page 1 shared reference group. But we'll never need
3380
     * to, so ignore it. */
3381
0
    j = shared_obj_offset;
3382
0
    for (; i < shared_obj_count_total; i++)
3383
0
    {
3384
0
      int off = fz_read_bits(ctx, stream, shared_group_len_num_bits);
3385
0
      int old = j;
3386
0
      doc->hint_shared[i].offset = j;
3387
0
      j += off + least_shared_group_len;
3388
0
      if (old <= doc->hint_object_offset && j > doc->hint_object_offset)
3389
0
        j += doc->hint_object_length;
3390
0
    }
3391
0
    doc->hint_shared[i].offset = j;
3392
0
    fz_sync_bits(ctx, stream);
3393
    /* Item 2: Signature flags: read these just so we can skip */
3394
0
    for (i = 0; i < shared_obj_count_total; i++)
3395
0
    {
3396
0
      doc->hint_shared[i].number = fz_read_bits(ctx, stream, 1);
3397
0
    }
3398
0
    fz_sync_bits(ctx, stream);
3399
    /* Item 3: Signatures: just skip */
3400
0
    for (i = 0; i < shared_obj_count_total; i++)
3401
0
    {
3402
0
      if (doc->hint_shared[i].number)
3403
0
      {
3404
0
        (void) fz_read_bits(ctx, stream, 128);
3405
0
      }
3406
0
    }
3407
0
    fz_sync_bits(ctx, stream);
3408
    /* Item 4: Shared object object numbers */
3409
0
    j = doc->linear_page1_obj_num; /* FIXME: This is a lie! */
3410
0
    for (i = 0; i < shared_obj_count_page1; i++)
3411
0
    {
3412
0
      doc->hint_shared[i].number = j;
3413
0
      j += fz_read_bits(ctx, stream, shared_obj_num_bits) + 1;
3414
0
    }
3415
0
    j = shared_obj_num;
3416
0
    for (; i < shared_obj_count_total; i++)
3417
0
    {
3418
0
      doc->hint_shared[i].number = j;
3419
0
      j += fz_read_bits(ctx, stream, shared_obj_num_bits) + 1;
3420
0
    }
3421
0
    doc->hint_shared[i].number = j;
3422
3423
    /* Now, actually use the data we have gathered. */
3424
0
    for (i = 0 /*shared_obj_count_page1*/; i < shared_obj_count_total; i++)
3425
0
    {
3426
0
      if (doc->hint_shared[i].number >= 0 && doc->hint_shared[i].number < max_object_num)
3427
0
        doc->hint_obj_offsets[doc->hint_shared[i].number] = doc->hint_shared[i].offset;
3428
0
    }
3429
0
    for (i = 0; i < doc->linear_page_count; i++)
3430
0
    {
3431
0
      if (doc->hint_page[i].number >= 0 && doc->hint_page[i].number < max_object_num)
3432
0
        doc->hint_obj_offsets[doc->hint_page[i].number] = doc->hint_page[i].offset;
3433
0
    }
3434
0
  }
3435
0
  fz_always(ctx)
3436
0
  {
3437
0
    fz_drop_stream(ctx, stream);
3438
0
  }
3439
0
  fz_catch(ctx)
3440
0
  {
3441
0
    fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
3442
    /* Don't try to load hints again */
3443
0
    doc->hints_loaded = 1;
3444
    /* We won't use the linearized object anymore. */
3445
0
    doc->file_reading_linearly = 0;
3446
0
    fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
3447
    /* Any other error becomes a TRYLATER */
3448
0
    fz_report_error(ctx);
3449
0
    fz_throw(ctx, FZ_ERROR_TRYLATER, "malformed hints object");
3450
0
  }
3451
0
  doc->hints_loaded = 1;
3452
0
}
3453
3454
static void
3455
pdf_load_hint_object(fz_context *ctx, pdf_document *doc)
3456
0
{
3457
0
  pdf_lexbuf *buf = &doc->lexbuf.base;
3458
0
  int64_t curr_pos;
3459
3460
0
  curr_pos = fz_tell(ctx, doc->file);
3461
0
  fz_seek(ctx, doc->file, doc->hint_object_offset, SEEK_SET);
3462
0
  fz_try(ctx)
3463
0
  {
3464
0
    while (1)
3465
0
    {
3466
0
      pdf_obj *page = NULL;
3467
0
      int num, tok;
3468
3469
0
      tok = pdf_lex(ctx, doc->file, buf);
3470
0
      if (tok != PDF_TOK_INT)
3471
0
        break;
3472
0
      num = buf->i;
3473
0
      tok = pdf_lex(ctx, doc->file, buf);
3474
0
      if (tok != PDF_TOK_INT)
3475
0
        break;
3476
      /* Ignore gen = buf->i */
3477
0
      tok = pdf_lex(ctx, doc->file, buf);
3478
0
      if (tok != PDF_TOK_OBJ)
3479
0
        break;
3480
0
      (void)pdf_repair_obj(ctx, doc, buf, NULL, NULL, NULL, NULL, &page, NULL, NULL);
3481
0
      pdf_load_hints(ctx, doc, num);
3482
0
    }
3483
0
  }
3484
0
  fz_always(ctx)
3485
0
  {
3486
0
    fz_seek(ctx, doc->file, curr_pos, SEEK_SET);
3487
0
  }
3488
0
  fz_catch(ctx)
3489
0
  {
3490
0
    fz_rethrow(ctx);
3491
0
  }
3492
0
}
3493
3494
pdf_obj *pdf_progressive_advance(fz_context *ctx, pdf_document *doc, int pagenum)
3495
0
{
3496
0
  int curr_pos;
3497
0
  pdf_obj *page = NULL;
3498
3499
0
  pdf_load_hinted_page(ctx, doc, pagenum);
3500
3501
0
  if (pagenum < 0 || pagenum >= doc->linear_page_count)
3502
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "page load out of range (%d of %d)", pagenum, doc->linear_page_count);
3503
3504
0
  if (doc->linear_pos == doc->file_length)
3505
0
    return doc->linear_page_refs[pagenum];
3506
3507
  /* Only load hints once, and then only after we have got page 0 */
3508
0
  if (pagenum > 0 && !doc->hints_loaded && doc->hint_object_offset > 0 && doc->linear_pos >= doc->hint_object_offset)
3509
0
  {
3510
    /* Found hint object */
3511
0
    pdf_load_hint_object(ctx, doc);
3512
0
  }
3513
3514
0
  DEBUGMESS((ctx, "continuing to try to advance from %d", doc->linear_pos));
3515
0
  curr_pos = fz_tell(ctx, doc->file);
3516
3517
0
  fz_var(page);
3518
3519
0
  fz_try(ctx)
3520
0
  {
3521
0
    int eof;
3522
0
    do
3523
0
    {
3524
0
      int num;
3525
0
      eof = pdf_obj_read(ctx, doc, &doc->linear_pos, &num, &page);
3526
0
      pdf_drop_obj(ctx, page);
3527
0
      page = NULL;
3528
0
    }
3529
0
    while (!eof);
3530
3531
0
    {
3532
0
      pdf_obj *catalog;
3533
0
      pdf_obj *pages;
3534
0
      doc->linear_pos = doc->file_length;
3535
0
      pdf_load_xref(ctx, doc);
3536
0
      catalog = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
3537
0
      pages = pdf_dict_get(ctx, catalog, PDF_NAME(Pages));
3538
3539
0
      if (!pdf_is_dict(ctx, pages))
3540
0
        fz_throw(ctx, FZ_ERROR_FORMAT, "missing page tree");
3541
0
      break;
3542
0
    }
3543
0
  }
3544
0
  fz_always(ctx)
3545
0
  {
3546
0
    fz_seek(ctx, doc->file, curr_pos, SEEK_SET);
3547
0
  }
3548
0
  fz_catch(ctx)
3549
0
  {
3550
0
    pdf_drop_obj(ctx, page);
3551
0
    if (fz_caught(ctx) == FZ_ERROR_TRYLATER)
3552
0
    {
3553
0
      if (doc->linear_page_refs[pagenum] == NULL)
3554
0
      {
3555
        /* Still not got a page */
3556
0
        fz_rethrow(ctx);
3557
0
      }
3558
      // TODO: should we really swallow this error?
3559
0
      fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
3560
0
      fz_report_error(ctx);
3561
0
    }
3562
0
    else
3563
0
      fz_rethrow(ctx);
3564
0
  }
3565
3566
0
  return doc->linear_page_refs[pagenum];
3567
0
}
3568
3569
pdf_document *pdf_document_from_fz_document(fz_context *ctx, fz_document *ptr)
3570
0
{
3571
0
  return (pdf_document *)((ptr && ptr->count_pages == pdf_count_pages_imp) ? ptr : NULL);
3572
0
}
3573
3574
pdf_page *pdf_page_from_fz_page(fz_context *ctx, fz_page *ptr)
3575
0
{
3576
0
  return (pdf_page *)((ptr && ptr->bound_page == (fz_page_bound_page_fn*)pdf_bound_page) ? ptr : NULL);
3577
0
}
3578
3579
pdf_document *pdf_specifics(fz_context *ctx, fz_document *doc)
3580
0
{
3581
0
  return pdf_document_from_fz_document(ctx, doc);
3582
0
}
3583
3584
pdf_obj *
3585
pdf_add_object(fz_context *ctx, pdf_document *doc, pdf_obj *obj)
3586
5.08k
{
3587
5.08k
  pdf_document *orig_doc;
3588
5.08k
  int num;
3589
3590
5.08k
  orig_doc = pdf_get_bound_document(ctx, obj);
3591
5.08k
  if (orig_doc && orig_doc != doc)
3592
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "tried to add an object belonging to a different document");
3593
5.08k
  if (pdf_is_indirect(ctx, obj))
3594
0
    return pdf_keep_obj(ctx, obj);
3595
5.08k
  num = pdf_create_object(ctx, doc);
3596
5.08k
  pdf_update_object(ctx, doc, num, obj);
3597
5.08k
  return pdf_new_indirect(ctx, doc, num, 0);
3598
5.08k
}
3599
3600
pdf_obj *
3601
pdf_add_object_drop(fz_context *ctx, pdf_document *doc, pdf_obj *obj)
3602
528
{
3603
528
  pdf_obj *ind = NULL;
3604
1.05k
  fz_try(ctx)
3605
1.05k
    ind = pdf_add_object(ctx, doc, obj);
3606
1.05k
  fz_always(ctx)
3607
528
    pdf_drop_obj(ctx, obj);
3608
528
  fz_catch(ctx)
3609
0
    fz_rethrow(ctx);
3610
528
  return ind;
3611
528
}
3612
3613
pdf_obj *
3614
pdf_add_new_dict(fz_context *ctx, pdf_document *doc, int initial)
3615
479
{
3616
479
  return pdf_add_object_drop(ctx, doc, pdf_new_dict(ctx, doc, initial));
3617
479
}
3618
3619
pdf_obj *
3620
pdf_add_new_array(fz_context *ctx, pdf_document *doc, int initial)
3621
49
{
3622
49
  return pdf_add_object_drop(ctx, doc, pdf_new_array(ctx, doc, initial));
3623
49
}
3624
3625
pdf_obj *
3626
pdf_add_stream(fz_context *ctx, pdf_document *doc, fz_buffer *buf, pdf_obj *obj, int compressed)
3627
4.46k
{
3628
4.46k
  pdf_obj *ind;
3629
4.46k
  if (!obj)
3630
49
    ind = pdf_add_new_dict(ctx, doc, 4);
3631
4.41k
  else
3632
4.41k
    ind = pdf_add_object(ctx, doc, obj);
3633
8.92k
  fz_try(ctx)
3634
8.92k
    pdf_update_stream(ctx, doc, ind, buf, compressed);
3635
8.92k
  fz_catch(ctx)
3636
0
  {
3637
0
    pdf_drop_obj(ctx, ind);
3638
0
    fz_rethrow(ctx);
3639
0
  }
3640
4.46k
  return ind;
3641
4.46k
}
3642
3643
pdf_document *pdf_create_document(fz_context *ctx)
3644
0
{
3645
0
  pdf_document *doc;
3646
0
  pdf_obj *root;
3647
0
  pdf_obj *pages;
3648
0
  pdf_obj *trailer = NULL;
3649
3650
0
  fz_var(trailer);
3651
3652
0
  doc = pdf_new_document(ctx, NULL);
3653
0
  fz_try(ctx)
3654
0
  {
3655
0
    doc->file_size = 0;
3656
0
    doc->startxref = 0;
3657
0
    doc->num_xref_sections = 0;
3658
0
    doc->num_incremental_sections = 0;
3659
0
    doc->xref_base = 0;
3660
0
    doc->disallow_new_increments = 0;
3661
0
    pdf_get_populating_xref_entry(ctx, doc, 0);
3662
3663
0
    trailer = pdf_new_dict(ctx, doc, 2);
3664
0
    pdf_dict_put_int(ctx, trailer, PDF_NAME(Size), 3);
3665
0
    pdf_dict_put_drop(ctx, trailer, PDF_NAME(Root), root = pdf_add_new_dict(ctx, doc, 2));
3666
0
    pdf_dict_put(ctx, root, PDF_NAME(Type), PDF_NAME(Catalog));
3667
0
    pdf_dict_put_drop(ctx, root, PDF_NAME(Pages), pages = pdf_add_new_dict(ctx, doc, 3));
3668
0
    pdf_dict_put(ctx, pages, PDF_NAME(Type), PDF_NAME(Pages));
3669
0
    pdf_dict_put_int(ctx, pages, PDF_NAME(Count), 0);
3670
0
    pdf_dict_put_array(ctx, pages, PDF_NAME(Kids), 1);
3671
3672
    /* Set the trailer of the final xref section. */
3673
0
    doc->xref_sections[0].trailer = trailer;
3674
0
  }
3675
0
  fz_catch(ctx)
3676
0
  {
3677
0
    pdf_drop_obj(ctx, trailer);
3678
0
    fz_drop_document(ctx, &doc->super);
3679
0
    fz_rethrow(ctx);
3680
0
  }
3681
0
  return doc;
3682
0
}
3683
3684
static const char *pdf_extensions[] =
3685
{
3686
  "pdf",
3687
  "fdf",
3688
  "pclm",
3689
  "ai",
3690
  NULL
3691
};
3692
3693
static const char *pdf_mimetypes[] =
3694
{
3695
  "application/pdf",
3696
  "application/PCLm",
3697
  NULL
3698
};
3699
3700
static int
3701
pdf_recognize_doc_content(fz_context *ctx, const fz_document_handler *handler, fz_stream *stream, fz_archive *dir)
3702
8.84k
{
3703
8.84k
  const char *match = "%PDF-";
3704
8.84k
  const char *match2 = "%FDF-";
3705
8.84k
  int pos = 0;
3706
8.84k
  int n = 4096+5;
3707
8.84k
  int c;
3708
3709
8.84k
  if (stream == NULL)
3710
0
    return 0;
3711
3712
8.84k
  do
3713
6.69M
  {
3714
6.69M
    c = fz_read_byte(ctx, stream);
3715
6.69M
    if (c == EOF)
3716
3.72k
      return 0;
3717
6.69M
    if (c == match[pos] || c == match2[pos])
3718
37.3k
    {
3719
37.3k
      pos++;
3720
37.3k
      if (pos == 5)
3721
4.56k
        return 100;
3722
37.3k
    }
3723
6.65M
    else
3724
6.65M
    {
3725
      /* Restart matching, but recheck c against the start. */
3726
6.65M
      pos = (c == match[0]);
3727
6.65M
    }
3728
6.69M
  }
3729
6.68M
  while (--n > 0);
3730
3731
548
  return 0;
3732
8.84k
}
3733
3734
static fz_document *
3735
open_document(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *zip)
3736
6.63k
{
3737
6.63k
  if (file == NULL)
3738
0
    return NULL;
3739
6.63k
  return (fz_document *)pdf_open_document_with_stream(ctx, file);
3740
6.63k
}
3741
3742
fz_document_handler pdf_document_handler =
3743
{
3744
  NULL,
3745
  open_document,
3746
  pdf_extensions,
3747
  pdf_mimetypes,
3748
  pdf_recognize_doc_content
3749
};
3750
3751
void pdf_mark_xref(fz_context *ctx, pdf_document *doc)
3752
0
{
3753
0
  int x, e;
3754
3755
0
  for (x = 0; x < doc->num_xref_sections; x++)
3756
0
  {
3757
0
    pdf_xref *xref = &doc->xref_sections[x];
3758
0
    pdf_xref_subsec *sub;
3759
3760
0
    for (sub = xref->subsec; sub != NULL; sub = sub->next)
3761
0
    {
3762
0
      for (e = 0; e < sub->len; e++)
3763
0
      {
3764
0
        pdf_xref_entry *entry = &sub->table[e];
3765
0
        if (entry->obj)
3766
0
        {
3767
0
          entry->marked = 1;
3768
0
        }
3769
0
      }
3770
0
    }
3771
0
  }
3772
0
}
3773
3774
void pdf_clear_xref(fz_context *ctx, pdf_document *doc)
3775
89
{
3776
89
  int x, e;
3777
3778
178
  for (x = 0; x < doc->num_xref_sections; x++)
3779
89
  {
3780
89
    pdf_xref *xref = &doc->xref_sections[x];
3781
89
    pdf_xref_subsec *sub;
3782
3783
178
    for (sub = xref->subsec; sub != NULL; sub = sub->next)
3784
89
    {
3785
16.1k
      for (e = 0; e < sub->len; e++)
3786
16.0k
      {
3787
16.0k
        pdf_xref_entry *entry = &sub->table[e];
3788
        /* We cannot drop objects if the stream
3789
         * buffer has been updated */
3790
16.0k
        if (entry->obj != NULL && entry->stm_buf == NULL)
3791
1.40k
        {
3792
1.40k
          if (pdf_obj_refs(ctx, entry->obj) == 1)
3793
1.39k
          {
3794
1.39k
            pdf_drop_obj(ctx, entry->obj);
3795
1.39k
            entry->obj = NULL;
3796
1.39k
          }
3797
1.40k
        }
3798
16.0k
      }
3799
89
    }
3800
89
  }
3801
89
}
3802
3803
void pdf_clear_xref_to_mark(fz_context *ctx, pdf_document *doc)
3804
0
{
3805
0
  int x, e;
3806
3807
0
  for (x = 0; x < doc->num_xref_sections; x++)
3808
0
  {
3809
0
    pdf_xref *xref = &doc->xref_sections[x];
3810
0
    pdf_xref_subsec *sub;
3811
3812
0
    for (sub = xref->subsec; sub != NULL; sub = sub->next)
3813
0
    {
3814
0
      for (e = 0; e < sub->len; e++)
3815
0
      {
3816
0
        pdf_xref_entry *entry = &sub->table[e];
3817
3818
        /* We cannot drop objects if the stream buffer has
3819
         * been updated */
3820
0
        if (entry->obj != NULL && entry->stm_buf == NULL)
3821
0
        {
3822
0
          if (!entry->marked && pdf_obj_refs(ctx, entry->obj) == 1)
3823
0
          {
3824
0
            pdf_drop_obj(ctx, entry->obj);
3825
0
            entry->obj = NULL;
3826
0
          }
3827
0
        }
3828
0
      }
3829
0
    }
3830
0
  }
3831
0
}
3832
3833
int
3834
pdf_count_versions(fz_context *ctx, pdf_document *doc)
3835
0
{
3836
0
  return doc->num_xref_sections-doc->num_incremental_sections-doc->has_linearization_object;
3837
0
}
3838
3839
int
3840
pdf_count_unsaved_versions(fz_context *ctx, pdf_document *doc)
3841
0
{
3842
0
  return doc->num_incremental_sections;
3843
0
}
3844
3845
int
3846
pdf_doc_was_linearized(fz_context *ctx, pdf_document *doc)
3847
0
{
3848
0
  return doc->has_linearization_object;
3849
0
}
3850
3851
static int pdf_obj_exists(fz_context *ctx, pdf_document *doc, int i)
3852
0
{
3853
0
  pdf_xref_subsec *sub;
3854
0
  int j;
3855
3856
0
  if (i < 0)
3857
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "Negative object number requested");
3858
3859
0
  if (i <= doc->max_xref_len)
3860
0
    j = doc->xref_index[i];
3861
0
  else
3862
0
    j = 0;
3863
3864
  /* We may be accessing an earlier version of the document using xref_base
3865
   * and j may be an index into a later xref section */
3866
0
  if (doc->xref_base > j)
3867
0
    j = doc->xref_base;
3868
3869
  /* Find the first xref section where the entry is defined. */
3870
0
  for (; j < doc->num_xref_sections; j++)
3871
0
  {
3872
0
    pdf_xref *xref = &doc->xref_sections[j];
3873
3874
0
    if (i < xref->num_objects)
3875
0
    {
3876
0
      for (sub = xref->subsec; sub != NULL; sub = sub->next)
3877
0
      {
3878
0
        if (i < sub->start || i >= sub->start + sub->len)
3879
0
          continue;
3880
3881
0
        if (sub->table[i - sub->start].type)
3882
0
          return 1;
3883
0
      }
3884
0
    }
3885
0
  }
3886
3887
0
  return 0;
3888
0
}
3889
3890
enum {
3891
  FIELD_CHANGED = 1,
3892
  FIELD_CHANGE_VALID = 2,
3893
  FIELD_CHANGE_INVALID = 4
3894
};
3895
3896
typedef struct
3897
{
3898
  int num_obj;
3899
  int obj_changes[1];
3900
} pdf_changes;
3901
3902
static int
3903
check_unchanged_between(fz_context *ctx, pdf_document *doc, pdf_changes *changes, pdf_obj *nobj, pdf_obj *oobj)
3904
0
{
3905
0
  int marked = 0;
3906
0
  int changed = 0;
3907
3908
  /* Trivially identical => trivially unchanged. */
3909
0
  if (nobj == oobj)
3910
0
    return 0;
3911
3912
  /* Strictly speaking we shouldn't need to call fz_var,
3913
   * but I suspect static analysis tools are not smart
3914
   * enough to figure that out. */
3915
0
  fz_var(marked);
3916
3917
0
  if (pdf_is_indirect(ctx, nobj))
3918
0
  {
3919
0
    int o_xref_base = doc->xref_base;
3920
3921
    /* Both must be indirect if one is. */
3922
0
    if (!pdf_is_indirect(ctx, oobj))
3923
0
    {
3924
0
      changes->obj_changes[pdf_to_num(ctx, nobj)] |= FIELD_CHANGE_INVALID;
3925
0
      return 1;
3926
0
    }
3927
3928
    /* Handle recursing back into ourselves. */
3929
0
    if (pdf_obj_marked(ctx, nobj))
3930
0
    {
3931
0
      if (pdf_obj_marked(ctx, oobj))
3932
0
        return 0;
3933
0
      changes->obj_changes[pdf_to_num(ctx, nobj)] |= FIELD_CHANGE_INVALID;
3934
0
      return 1;
3935
0
    }
3936
0
    else if (pdf_obj_marked(ctx, oobj))
3937
0
    {
3938
0
      changes->obj_changes[pdf_to_num(ctx, nobj)] |= FIELD_CHANGE_INVALID;
3939
0
      return 1;
3940
0
    }
3941
3942
0
    nobj = pdf_resolve_indirect_chain(ctx, nobj);
3943
0
    doc->xref_base = o_xref_base+1;
3944
0
    fz_try(ctx)
3945
0
    {
3946
0
      oobj = pdf_resolve_indirect_chain(ctx, oobj);
3947
0
      if (oobj != nobj)
3948
0
      {
3949
        /* Different objects, so lock them */
3950
0
        if (!pdf_obj_marked(ctx, nobj) && !pdf_obj_marked(ctx, oobj))
3951
0
        {
3952
0
          (void)pdf_mark_obj(ctx, nobj);
3953
0
          (void)pdf_mark_obj(ctx, oobj);
3954
0
          marked = 1;
3955
0
        }
3956
0
      }
3957
0
    }
3958
0
    fz_always(ctx)
3959
0
      doc->xref_base = o_xref_base;
3960
0
    fz_catch(ctx)
3961
0
      fz_rethrow(ctx);
3962
3963
0
    if (nobj == oobj)
3964
0
      return 0; /* Trivially identical */
3965
0
  }
3966
3967
0
  fz_var(changed);
3968
3969
0
  fz_try(ctx)
3970
0
  {
3971
0
    if (pdf_is_dict(ctx, nobj))
3972
0
    {
3973
0
      int i, n = pdf_dict_len(ctx, nobj);
3974
3975
0
      if (!pdf_is_dict(ctx, oobj) || n != pdf_dict_len(ctx, oobj))
3976
0
      {
3977
0
change_found:
3978
0
        changes->obj_changes[pdf_to_num(ctx, nobj)] |= FIELD_CHANGE_INVALID;
3979
0
        changed = 1;
3980
0
        break;
3981
0
      }
3982
3983
0
      for (i = 0; i < n; i++)
3984
0
      {
3985
0
        pdf_obj *key = pdf_dict_get_key(ctx, nobj, i);
3986
0
        pdf_obj *nval = pdf_dict_get(ctx, nobj, key);
3987
0
        pdf_obj *oval = pdf_dict_get(ctx, oobj, key);
3988
3989
0
        changed |= check_unchanged_between(ctx, doc, changes, nval, oval);
3990
0
      }
3991
0
    }
3992
0
    else if (pdf_is_array(ctx, nobj))
3993
0
    {
3994
0
      int i, n = pdf_array_len(ctx, nobj);
3995
3996
0
      if (!pdf_is_array(ctx, oobj) || n != pdf_array_len(ctx, oobj))
3997
0
        goto change_found;
3998
3999
0
      for (i = 0; i < n; i++)
4000
0
      {
4001
0
        pdf_obj *nval = pdf_array_get(ctx, nobj, i);
4002
0
        pdf_obj *oval = pdf_array_get(ctx, oobj, i);
4003
4004
0
        changed |= check_unchanged_between(ctx, doc, changes, nval, oval);
4005
0
      }
4006
0
    }
4007
0
    else if (pdf_objcmp(ctx, nobj, oobj))
4008
0
      goto change_found;
4009
0
  }
4010
0
  fz_always(ctx)
4011
0
  {
4012
0
    if (marked)
4013
0
    {
4014
0
      pdf_unmark_obj(ctx, nobj);
4015
0
      pdf_unmark_obj(ctx, oobj);
4016
0
    }
4017
0
  }
4018
0
  fz_catch(ctx)
4019
0
    fz_rethrow(ctx);
4020
4021
0
  return changed;
4022
0
}
4023
4024
typedef struct
4025
{
4026
  int max;
4027
  int len;
4028
  char **list;
4029
} char_list;
4030
4031
/* This structure is used to hold the definition of which fields
4032
 * are locked. */
4033
struct pdf_locked_fields
4034
{
4035
  int p;
4036
  int all;
4037
  char_list includes;
4038
  char_list excludes;
4039
};
4040
4041
static void
4042
free_char_list(fz_context *ctx, char_list *c)
4043
0
{
4044
0
  int i;
4045
4046
0
  if (c == NULL)
4047
0
    return;
4048
4049
0
  for (i = c->len-1; i >= 0; i--)
4050
0
    fz_free(ctx, c->list[i]);
4051
0
  fz_free(ctx, c->list);
4052
0
  c->len = 0;
4053
0
  c->max = 0;
4054
0
}
4055
4056
void
4057
pdf_drop_locked_fields(fz_context *ctx, pdf_locked_fields *fl)
4058
0
{
4059
0
  if (fl == NULL)
4060
0
    return;
4061
4062
0
  free_char_list(ctx, &fl->includes);
4063
0
  free_char_list(ctx, &fl->excludes);
4064
0
  fz_free(ctx, fl);
4065
0
}
4066
4067
static void
4068
char_list_append(fz_context *ctx, char_list *list, const char *s)
4069
0
{
4070
0
  if (list->len == list->max)
4071
0
  {
4072
0
    int n = list->max * 2;
4073
0
    if (n == 0) n = 4;
4074
4075
0
    list->list = fz_realloc_array(ctx, list->list, n, char *);
4076
0
    list->max = n;
4077
0
  }
4078
0
  list->list[list->len] = fz_strdup(ctx, s);
4079
0
  list->len++;
4080
0
}
4081
4082
int
4083
pdf_is_field_locked(fz_context *ctx, pdf_locked_fields *locked, const char *name)
4084
0
{
4085
0
  int i;
4086
4087
0
  if (locked->p == 1)
4088
0
  {
4089
    /* Permissions were set, and say that field changes are not to be allowed. */
4090
0
    return 1; /* Locked */
4091
0
  }
4092
4093
0
  if(locked->all)
4094
0
  {
4095
    /* The only way we might not be unlocked is if
4096
     * we are listed in the excludes. */
4097
0
    for (i = 0; i < locked->excludes.len; i++)
4098
0
      if (!strcmp(locked->excludes.list[i], name))
4099
0
        return 0;
4100
0
    return 1;
4101
0
  }
4102
4103
  /* The only way we can be locked is for us to be in the includes. */
4104
0
  for (i = 0; i < locked->includes.len; i++)
4105
0
    if (strcmp(locked->includes.list[i], name) == 0)
4106
0
      return 1;
4107
4108
  /* Anything else is unlocked */
4109
0
  return 0;
4110
0
}
4111
4112
/* Unfortunately, in C, there is no legal way to define a function
4113
 * type that returns itself. We therefore have to use a struct
4114
 * wrapper. */
4115
typedef struct filter_wrap
4116
{
4117
  struct filter_wrap (*func)(fz_context *ctx, pdf_obj *dict, pdf_obj *key);
4118
} filter_wrap;
4119
4120
typedef struct filter_wrap (*filter_fn)(fz_context *ctx, pdf_obj *dict, pdf_obj *key);
4121
4122
0
#define RETURN_FILTER(f) { filter_wrap rf; rf.func = (f); return rf; }
4123
4124
static filter_wrap filter_simple(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4125
0
{
4126
0
  RETURN_FILTER(NULL);
4127
0
}
4128
4129
static filter_wrap filter_transformparams(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4130
0
{
4131
0
  if (pdf_name_eq(ctx, key, PDF_NAME(Type)) ||
4132
0
    pdf_name_eq(ctx, key, PDF_NAME(P)) ||
4133
0
    pdf_name_eq(ctx, key, PDF_NAME(V)) ||
4134
0
    pdf_name_eq(ctx, key, PDF_NAME(Document)) ||
4135
0
    pdf_name_eq(ctx, key, PDF_NAME(Msg)) ||
4136
0
    pdf_name_eq(ctx, key, PDF_NAME(V)) ||
4137
0
    pdf_name_eq(ctx, key, PDF_NAME(Annots)) ||
4138
0
    pdf_name_eq(ctx, key, PDF_NAME(Form)) ||
4139
0
    pdf_name_eq(ctx, key, PDF_NAME(FormEx)) ||
4140
0
    pdf_name_eq(ctx, key, PDF_NAME(EF)) ||
4141
0
    pdf_name_eq(ctx, key, PDF_NAME(P)) ||
4142
0
    pdf_name_eq(ctx, key, PDF_NAME(Action)) ||
4143
0
    pdf_name_eq(ctx, key, PDF_NAME(Fields)))
4144
0
    RETURN_FILTER(&filter_simple);
4145
0
  RETURN_FILTER(NULL);
4146
0
}
4147
4148
static filter_wrap filter_reference(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4149
0
{
4150
0
  if (pdf_name_eq(ctx, key, PDF_NAME(Type)) ||
4151
0
    pdf_name_eq(ctx, key, PDF_NAME(TransformMethod)) ||
4152
0
    pdf_name_eq(ctx, key, PDF_NAME(DigestMethod)) ||
4153
0
    pdf_name_eq(ctx, key, PDF_NAME(DigestValue)) ||
4154
0
    pdf_name_eq(ctx, key, PDF_NAME(DigestLocation)))
4155
0
    RETURN_FILTER(&filter_simple);
4156
0
  if (pdf_name_eq(ctx, key, PDF_NAME(TransformParams)))
4157
0
    RETURN_FILTER(&filter_transformparams);
4158
0
  RETURN_FILTER(NULL);
4159
0
}
4160
4161
static filter_wrap filter_prop_build_sub(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4162
0
{
4163
0
  if (pdf_name_eq(ctx, key, PDF_NAME(Name)) ||
4164
0
    pdf_name_eq(ctx, key, PDF_NAME(Date)) ||
4165
0
    pdf_name_eq(ctx, key, PDF_NAME(R)) ||
4166
0
    pdf_name_eq(ctx, key, PDF_NAME(PreRelease)) ||
4167
0
    pdf_name_eq(ctx, key, PDF_NAME(OS)) ||
4168
0
    pdf_name_eq(ctx, key, PDF_NAME(NonEFontNoWarn)) ||
4169
0
    pdf_name_eq(ctx, key, PDF_NAME(TrustedMode)) ||
4170
0
    pdf_name_eq(ctx, key, PDF_NAME(V)) ||
4171
0
    pdf_name_eq(ctx, key, PDF_NAME(REx)) ||
4172
0
    pdf_name_eq(ctx, key, PDF_NAME(Preview)))
4173
0
    RETURN_FILTER(&filter_simple);
4174
0
  RETURN_FILTER(NULL);
4175
0
}
4176
4177
static filter_wrap filter_prop_build(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4178
0
{
4179
0
  if (pdf_name_eq(ctx, key, PDF_NAME(Filter)) ||
4180
0
    pdf_name_eq(ctx, key, PDF_NAME(PubSec)) ||
4181
0
    pdf_name_eq(ctx, key, PDF_NAME(App)) ||
4182
0
    pdf_name_eq(ctx, key, PDF_NAME(SigQ)))
4183
0
    RETURN_FILTER(&filter_prop_build_sub);
4184
0
  RETURN_FILTER(NULL);
4185
0
}
4186
4187
static filter_wrap filter_v(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4188
0
{
4189
  /* Text can point to a stream object */
4190
0
  if (pdf_name_eq(ctx, key, PDF_NAME(Length)) && pdf_is_stream(ctx, dict))
4191
0
    RETURN_FILTER(&filter_simple);
4192
  /* Sigs point to a dict. */
4193
0
  if (pdf_name_eq(ctx, key, PDF_NAME(Type)) ||
4194
0
    pdf_name_eq(ctx, key, PDF_NAME(Filter)) ||
4195
0
    pdf_name_eq(ctx, key, PDF_NAME(SubFilter)) ||
4196
0
    pdf_name_eq(ctx, key, PDF_NAME(Contents)) ||
4197
0
    pdf_name_eq(ctx, key, PDF_NAME(Cert)) ||
4198
0
    pdf_name_eq(ctx, key, PDF_NAME(ByteRange)) ||
4199
0
    pdf_name_eq(ctx, key, PDF_NAME(Changes)) ||
4200
0
    pdf_name_eq(ctx, key, PDF_NAME(Name)) ||
4201
0
    pdf_name_eq(ctx, key, PDF_NAME(M)) ||
4202
0
    pdf_name_eq(ctx, key, PDF_NAME(Location)) ||
4203
0
    pdf_name_eq(ctx, key, PDF_NAME(Reason)) ||
4204
0
    pdf_name_eq(ctx, key, PDF_NAME(ContactInfo)) ||
4205
0
    pdf_name_eq(ctx, key, PDF_NAME(R)) ||
4206
0
    pdf_name_eq(ctx, key, PDF_NAME(V)) ||
4207
0
    pdf_name_eq(ctx, key, PDF_NAME(Prop_AuthTime)) ||
4208
0
    pdf_name_eq(ctx, key, PDF_NAME(Prop_AuthType)))
4209
0
  RETURN_FILTER(&filter_simple);
4210
0
  if (pdf_name_eq(ctx, key, PDF_NAME(Reference)))
4211
0
    RETURN_FILTER(filter_reference);
4212
0
  if (pdf_name_eq(ctx, key, PDF_NAME(Prop_Build)))
4213
0
    RETURN_FILTER(filter_prop_build);
4214
0
  RETURN_FILTER(NULL);
4215
0
}
4216
4217
static filter_wrap filter_appearance(fz_context *ctx, pdf_obj *dict, pdf_obj *key);
4218
4219
static filter_wrap filter_xobject_list(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4220
0
{
4221
  /* FIXME: Infinite recursion possible here? */
4222
0
  RETURN_FILTER(&filter_appearance);
4223
0
}
4224
4225
static filter_wrap filter_font(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4226
0
{
4227
  /* In the example I've seen the /Name field was dropped, so we'll allow
4228
   * local changes, but none that follow an indirection. */
4229
0
  RETURN_FILTER(NULL);
4230
0
}
4231
4232
/* FIXME: One idea here is to make filter_font_list and filter_xobject_list
4233
 * only accept NEW objects as changes. Will think about this. */
4234
static filter_wrap filter_font_list(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4235
0
{
4236
0
  RETURN_FILTER(&filter_font);
4237
0
}
4238
4239
static filter_wrap filter_resources(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4240
0
{
4241
0
  if (pdf_name_eq(ctx, key, PDF_NAME(XObject)))
4242
0
    RETURN_FILTER(&filter_xobject_list);
4243
0
  if (pdf_name_eq(ctx, key, PDF_NAME(Font)))
4244
0
    RETURN_FILTER(&filter_font_list);
4245
0
  RETURN_FILTER(NULL);
4246
0
}
4247
4248
static filter_wrap filter_appearance(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4249
0
{
4250
0
  if (pdf_name_eq(ctx, key, PDF_NAME(Resources)))
4251
0
    RETURN_FILTER(&filter_resources);
4252
0
  RETURN_FILTER(NULL);
4253
0
}
4254
4255
static filter_wrap filter_ap(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4256
0
{
4257
  /* Just the /N entry for now. May need to add more later. */
4258
0
  if (pdf_name_eq(ctx, key, PDF_NAME(N)) && pdf_is_stream(ctx, pdf_dict_get(ctx, dict, key)))
4259
0
    RETURN_FILTER(&filter_appearance);
4260
0
  RETURN_FILTER(NULL);
4261
0
}
4262
4263
static filter_wrap filter_xfa(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
4264
0
{
4265
  /* Text can point to a stream object */
4266
0
  if (pdf_is_stream(ctx, dict))
4267
0
    RETURN_FILTER(&filter_simple);
4268
0
  RETURN_FILTER(NULL);
4269
0
}
4270
4271
static void
4272
filter_changes_accepted(fz_context *ctx, pdf_changes *changes, pdf_obj *obj, filter_fn filter)
4273
0
{
4274
0
  int obj_num;
4275
4276
0
  if (obj == NULL || pdf_obj_marked(ctx, obj))
4277
0
    return;
4278
4279
0
  obj_num = pdf_to_num(ctx, obj);
4280
4281
0
  fz_try(ctx)
4282
0
  {
4283
0
    if (obj_num != 0)
4284
0
    {
4285
0
      (void)pdf_mark_obj(ctx, obj);
4286
0
      changes->obj_changes[obj_num] |= FIELD_CHANGE_VALID;
4287
0
    }
4288
0
    if (filter == NULL)
4289
0
      break;
4290
0
    if (pdf_is_dict(ctx, obj))
4291
0
    {
4292
0
      int i, n = pdf_dict_len(ctx, obj);
4293
4294
0
      for (i = 0; i < n; i++)
4295
0
      {
4296
0
        pdf_obj *key = pdf_dict_get_key(ctx, obj, i);
4297
0
        pdf_obj *val = pdf_dict_get_val(ctx, obj, i);
4298
0
        filter_fn f = (filter(ctx, obj, key)).func;
4299
0
        if (f != NULL)
4300
0
          filter_changes_accepted(ctx, changes, val, f);
4301
0
      }
4302
0
    }
4303
0
    else if (pdf_is_array(ctx, obj))
4304
0
    {
4305
0
      int i, n = pdf_array_len(ctx, obj);
4306
4307
0
      for (i = 0; i < n; i++)
4308
0
      {
4309
0
        pdf_obj *val = pdf_array_get(ctx, obj, i);
4310
0
        filter_changes_accepted(ctx, changes, val, filter);
4311
0
      }
4312
0
    }
4313
0
  }
4314
0
  fz_always(ctx)
4315
0
    if (obj_num != 0)
4316
0
      pdf_unmark_obj(ctx, obj);
4317
0
  fz_catch(ctx)
4318
0
    fz_rethrow(ctx);
4319
0
}
4320
4321
static void
4322
check_field(fz_context *ctx, pdf_document *doc, pdf_changes *changes, pdf_obj *obj, pdf_locked_fields *locked, const char *name_prefix, pdf_obj *new_v, pdf_obj *old_v)
4323
0
{
4324
0
  pdf_obj *old_obj, *new_obj, *n_v, *o_v;
4325
0
  int o_xref_base;
4326
0
  int obj_num;
4327
0
  char *field_name = NULL;
4328
4329
  /* All fields MUST be indirections, either in the Fields array
4330
   * or AcroForms, or in the Kids array of other Fields. */
4331
0
  if (!pdf_is_indirect(ctx, obj))
4332
0
    return;
4333
4334
0
  obj_num = pdf_to_num(ctx, obj);
4335
0
  o_xref_base = doc->xref_base;
4336
0
  new_obj = pdf_resolve_indirect_chain(ctx, obj);
4337
4338
  /* Similarly, all fields must be dicts */
4339
0
  if (!pdf_is_dict(ctx, new_obj))
4340
0
    return;
4341
4342
0
  if (pdf_obj_marked(ctx, obj))
4343
0
    return;
4344
4345
0
  fz_var(field_name);
4346
4347
0
  fz_try(ctx)
4348
0
  {
4349
0
    int i, len;
4350
0
    const char *name;
4351
0
    size_t n;
4352
0
    pdf_obj *t;
4353
0
    int is_locked;
4354
4355
0
    (void)pdf_mark_obj(ctx, obj);
4356
4357
    /* Do this within the try, so we can catch any problems */
4358
0
    doc->xref_base = o_xref_base+1;
4359
0
    old_obj = pdf_resolve_indirect_chain(ctx, obj);
4360
4361
0
    t = pdf_dict_get(ctx, old_obj, PDF_NAME(T));
4362
0
    if (t != NULL)
4363
0
    {
4364
0
      name = pdf_dict_get_text_string(ctx, old_obj, PDF_NAME(T));
4365
0
      n = strlen(name)+1;
4366
0
      if (*name_prefix)
4367
0
        n += 1 + strlen(name_prefix);
4368
0
      field_name = fz_malloc(ctx, n);
4369
0
      if (*name_prefix)
4370
0
      {
4371
0
        strcpy(field_name, name_prefix);
4372
0
        strcat(field_name, ".");
4373
0
      }
4374
0
      else
4375
0
        *field_name = 0;
4376
0
      strcat(field_name, name);
4377
0
      name_prefix = field_name;
4378
0
    }
4379
4380
0
    doc->xref_base = o_xref_base;
4381
4382
0
    if (!pdf_is_dict(ctx, old_obj))
4383
0
      break;
4384
4385
    /* Check V explicitly, allowing for it being inherited. */
4386
0
    n_v = pdf_dict_get(ctx, new_obj, PDF_NAME(V));
4387
0
    if (n_v == NULL)
4388
0
      n_v = new_v;
4389
0
    o_v = pdf_dict_get(ctx, old_obj, PDF_NAME(V));
4390
0
    if (o_v == NULL)
4391
0
      o_v = old_v;
4392
4393
0
    is_locked = pdf_is_field_locked(ctx, locked, name_prefix);
4394
0
    if (pdf_name_eq(ctx, pdf_dict_get(ctx, new_obj, PDF_NAME(Type)), PDF_NAME(Annot)) &&
4395
0
      pdf_name_eq(ctx, pdf_dict_get(ctx, new_obj, PDF_NAME(Subtype)), PDF_NAME(Widget)))
4396
0
    {
4397
0
      if (is_locked)
4398
0
      {
4399
        /* If locked, V must not change! */
4400
0
        if (check_unchanged_between(ctx, doc, changes, n_v, o_v))
4401
0
          changes->obj_changes[obj_num] |= FIELD_CHANGE_INVALID;
4402
0
      }
4403
0
      else
4404
0
      {
4405
        /* If not locked, V can change to be filled in! */
4406
0
        filter_changes_accepted(ctx, changes, n_v, &filter_v);
4407
0
        changes->obj_changes[obj_num] |= FIELD_CHANGE_VALID;
4408
0
      }
4409
0
    }
4410
4411
    /* Check all the fields in the new object are
4412
     * either the same as the old object, or are
4413
     * expected changes. */
4414
0
    len = pdf_dict_len(ctx, new_obj);
4415
0
    for (i = 0; i < len; i++)
4416
0
    {
4417
0
      pdf_obj *key = pdf_dict_get_key(ctx, new_obj, i);
4418
0
      pdf_obj *nval = pdf_dict_get(ctx, new_obj, key);
4419
0
      pdf_obj *oval = pdf_dict_get(ctx, old_obj, key);
4420
4421
      /* Kids arrays shouldn't change. */
4422
0
      if (pdf_name_eq(ctx, key, PDF_NAME(Kids)))
4423
0
      {
4424
0
        int j, m;
4425
4426
        /* Kids must be an array. If it's not, count it as a difference. */
4427
0
        if (!pdf_is_array(ctx, nval) || !pdf_is_array(ctx, oval))
4428
0
        {
4429
0
change_found:
4430
0
          changes->obj_changes[obj_num] |= FIELD_CHANGE_INVALID;
4431
0
          break;
4432
0
        }
4433
0
        m = pdf_array_len(ctx, nval);
4434
        /* Any change in length counts as a difference */
4435
0
        if (m != pdf_array_len(ctx, oval))
4436
0
          goto change_found;
4437
0
        for (j = 0; j < m; j++)
4438
0
        {
4439
0
          pdf_obj *nkid = pdf_array_get(ctx, nval, j);
4440
0
          pdf_obj *okid = pdf_array_get(ctx, oval, j);
4441
          /* Kids arrays are supposed to all be indirect. If they aren't,
4442
           * count it as a difference. */
4443
0
          if (!pdf_is_indirect(ctx, nkid) || !pdf_is_indirect(ctx, okid))
4444
0
            goto change_found;
4445
          /* For now at least, we'll count any change in number as a difference. */
4446
0
          if (pdf_to_num(ctx, nkid) != pdf_to_num(ctx, okid))
4447
0
            goto change_found;
4448
0
          check_field(ctx, doc, changes, nkid, locked, name_prefix, n_v, o_v);
4449
0
        }
4450
0
      }
4451
0
      else if (pdf_name_eq(ctx, key, PDF_NAME(V)))
4452
0
      {
4453
        /* V is checked above */
4454
0
      }
4455
0
      else if (pdf_name_eq(ctx, key, PDF_NAME(AP)))
4456
0
      {
4457
        /* If we're locked, then nothing can change. If not,
4458
         * we can change to be filled in. */
4459
0
        if (is_locked)
4460
0
          check_unchanged_between(ctx, doc, changes, nval, oval);
4461
0
        else
4462
0
          filter_changes_accepted(ctx, changes, nval, &filter_ap);
4463
0
      }
4464
      /* All other fields can't change */
4465
0
      else
4466
0
        check_unchanged_between(ctx, doc, changes, nval, oval);
4467
0
    }
4468
4469
    /* Now check all the fields in the old object to
4470
     * make sure none were dropped. */
4471
0
    len = pdf_dict_len(ctx, old_obj);
4472
0
    for (i = 0; i < len; i++)
4473
0
    {
4474
0
      pdf_obj *key = pdf_dict_get_key(ctx, old_obj, i);
4475
0
      pdf_obj *nval, *oval;
4476
4477
      /* V is checked above */
4478
0
      if (pdf_name_eq(ctx, key, PDF_NAME(V)))
4479
0
        continue;
4480
4481
0
      nval = pdf_dict_get(ctx, new_obj, key);
4482
0
      oval = pdf_dict_get(ctx, old_obj, key);
4483
4484
0
      if (nval == NULL && oval != NULL)
4485
0
        changes->obj_changes[pdf_to_num(ctx, nval)] |= FIELD_CHANGE_INVALID;
4486
0
    }
4487
0
    changes->obj_changes[obj_num] |= FIELD_CHANGE_VALID;
4488
4489
0
  }
4490
0
  fz_always(ctx)
4491
0
  {
4492
0
    pdf_unmark_obj(ctx, obj);
4493
0
    fz_free(ctx, field_name);
4494
0
    doc->xref_base = o_xref_base;
4495
0
  }
4496
0
  fz_catch(ctx)
4497
0
    fz_rethrow(ctx);
4498
0
}
4499
4500
static int
4501
pdf_obj_changed_in_version(fz_context *ctx, pdf_document *doc, int num, int version)
4502
0
{
4503
0
  if (num < 0 || num > doc->max_xref_len)
4504
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "Invalid object number requested");
4505
4506
0
  return version == doc->xref_index[num];
4507
0
}
4508
4509
static void
4510
merge_lock_specification(fz_context *ctx, pdf_locked_fields *fields, pdf_obj *lock)
4511
0
{
4512
0
  pdf_obj *action;
4513
0
  int i, r, w;
4514
4515
0
  if (lock == NULL)
4516
0
    return;
4517
4518
0
  action = pdf_dict_get(ctx, lock, PDF_NAME(Action));
4519
4520
0
  if (pdf_name_eq(ctx, action, PDF_NAME(All)))
4521
0
  {
4522
    /* All fields locked means we don't need any stored
4523
     * includes/excludes. */
4524
0
    fields->all = 1;
4525
0
    free_char_list(ctx, &fields->includes);
4526
0
    free_char_list(ctx, &fields->excludes);
4527
0
  }
4528
0
  else
4529
0
  {
4530
0
    pdf_obj *f = pdf_dict_get(ctx, lock, PDF_NAME(Fields));
4531
0
    int len = pdf_array_len(ctx, f);
4532
4533
0
    if (pdf_name_eq(ctx, action, PDF_NAME(Include)))
4534
0
    {
4535
0
      if (fields->all)
4536
0
      {
4537
        /* Current state = "All except <excludes> are locked".
4538
         * We need to remove <Fields> from <excludes>. */
4539
0
        for (i = 0; i < len; i++)
4540
0
        {
4541
0
          const char *s = pdf_array_get_text_string(ctx, f, i);
4542
0
          int r, w;
4543
4544
0
          for (r = w = 0; r < fields->excludes.len; r++)
4545
0
          {
4546
0
            if (strcmp(s, fields->excludes.list[r]))
4547
0
              fields->excludes.list[w++] = fields->excludes.list[r];
4548
0
          }
4549
0
          fields->excludes.len = w;
4550
0
        }
4551
0
      }
4552
0
      else
4553
0
      {
4554
        /* Current state = <includes> are locked.
4555
         * We need to add <Fields> to <include> (avoiding repetition). */
4556
0
        for (i = 0; i < len; i++)
4557
0
        {
4558
0
          const char *s = pdf_array_get_text_string(ctx, f, i);
4559
4560
0
          for (r = 0; r < fields->includes.len; r++)
4561
0
          {
4562
0
            if (!strcmp(s, fields->includes.list[r]))
4563
0
              break;
4564
0
          }
4565
0
          if (r == fields->includes.len)
4566
0
            char_list_append(ctx, &fields->includes, s);
4567
0
        }
4568
0
      }
4569
0
    }
4570
0
    else if (pdf_name_eq(ctx, action, PDF_NAME(Exclude)))
4571
0
    {
4572
0
      if (fields->all)
4573
0
      {
4574
        /* Current state = "All except <excludes> are locked.
4575
         * We need to remove anything from <excludes> that isn't in <Fields>. */
4576
0
        for (r = w = 0; r < fields->excludes.len; r++)
4577
0
        {
4578
0
          for (i = 0; i < len; i++)
4579
0
          {
4580
0
            const char *s = pdf_array_get_text_string(ctx, f, i);
4581
0
            if (!strcmp(s, fields->excludes.list[r]))
4582
0
              break;
4583
0
          }
4584
0
          if (i != len) /* we found a match */
4585
0
            fields->excludes.list[w++] = fields->excludes.list[r];
4586
0
        }
4587
0
        fields->excludes.len = w;
4588
0
      }
4589
0
      else
4590
0
      {
4591
        /* Current state = <includes> are locked.
4592
         * Set all. <excludes> becomes <Fields> less <includes>. Remove <includes>. */
4593
0
        fields->all = 1;
4594
0
        for (i = 0; i < len; i++)
4595
0
        {
4596
0
          const char *s = pdf_array_get_text_string(ctx, f, i);
4597
0
          for (r = 0; r < fields->includes.len; r++)
4598
0
          {
4599
0
            if (!strcmp(s, fields->includes.list[r]))
4600
0
              break;
4601
0
          }
4602
0
          if (r == fields->includes.len)
4603
0
            char_list_append(ctx, &fields->excludes, s);
4604
0
        }
4605
0
        free_char_list(ctx, &fields->includes);
4606
0
      }
4607
0
    }
4608
0
  }
4609
0
}
4610
4611
static void
4612
find_locked_fields_value(fz_context *ctx, pdf_locked_fields *fields, pdf_obj *v)
4613
0
{
4614
0
  pdf_obj *ref = pdf_dict_get(ctx, v, PDF_NAME(Reference));
4615
0
  int i, n;
4616
4617
0
  if (!ref)
4618
0
    return;
4619
4620
0
  n = pdf_array_len(ctx, ref);
4621
0
  for (i = 0; i < n; i++)
4622
0
  {
4623
0
    pdf_obj *sr = pdf_array_get(ctx, ref, i);
4624
0
    pdf_obj *tm, *tp, *type;
4625
4626
    /* Type is optional, but if it exists, it'd better be SigRef. */
4627
0
    type = pdf_dict_get(ctx, sr, PDF_NAME(Type));
4628
0
    if (type != NULL && !pdf_name_eq(ctx, type, PDF_NAME(SigRef)))
4629
0
      continue;
4630
0
    tm = pdf_dict_get(ctx, sr, PDF_NAME(TransformMethod));
4631
0
    tp = pdf_dict_get(ctx, sr, PDF_NAME(TransformParams));
4632
0
    if (pdf_name_eq(ctx, tm, PDF_NAME(DocMDP)))
4633
0
    {
4634
0
      int p = pdf_dict_get_int(ctx, tp, PDF_NAME(P));
4635
4636
0
      if (p == 0)
4637
0
        p = 2;
4638
0
      if (fields->p == 0)
4639
0
        fields->p = p;
4640
0
      else
4641
0
        fields->p = fz_mini(fields->p, p);
4642
0
    }
4643
0
    else if (pdf_name_eq(ctx, tm, PDF_NAME(FieldMDP)))
4644
0
      merge_lock_specification(ctx, fields, tp);
4645
0
  }
4646
0
}
4647
4648
static void
4649
find_locked_fields_aux(fz_context *ctx, pdf_obj *field, pdf_locked_fields *fields, pdf_obj *inherit_v, pdf_obj *inherit_ft)
4650
0
{
4651
0
  int i, n;
4652
4653
0
  if (!pdf_name_eq(ctx, pdf_dict_get(ctx, field, PDF_NAME(Type)), PDF_NAME(Annot)))
4654
0
    return;
4655
4656
0
  if (pdf_obj_marked(ctx, field))
4657
0
    return;
4658
4659
0
  fz_try(ctx)
4660
0
  {
4661
0
    pdf_obj *kids, *v, *ft;
4662
4663
0
    (void)pdf_mark_obj(ctx, field);
4664
4665
0
    v = pdf_dict_get(ctx, field, PDF_NAME(V));
4666
0
    if (v == NULL)
4667
0
      v = inherit_v;
4668
0
    ft = pdf_dict_get(ctx, field, PDF_NAME(FT));
4669
0
    if (ft == NULL)
4670
0
      ft = inherit_ft;
4671
4672
    /* We are looking for Widget annotations of type Sig that are
4673
     * signed (i.e. have a 'V' field). */
4674
0
    if (pdf_name_eq(ctx, pdf_dict_get(ctx, field, PDF_NAME(Subtype)), PDF_NAME(Widget)) &&
4675
0
      pdf_name_eq(ctx, ft, PDF_NAME(Sig)) &&
4676
0
      pdf_name_eq(ctx, pdf_dict_get(ctx, v, PDF_NAME(Type)), PDF_NAME(Sig)))
4677
0
    {
4678
      /* Signed Sig Widgets (i.e. ones with a 'V' field) need
4679
       * to have their lock field respected. */
4680
0
      merge_lock_specification(ctx, fields, pdf_dict_get(ctx, field, PDF_NAME(Lock)));
4681
4682
      /* Look for DocMDP and FieldMDP entries to see what
4683
       * flavours of alterations are allowed. */
4684
0
      find_locked_fields_value(ctx, fields, v);
4685
0
    }
4686
4687
    /* Recurse as required */
4688
0
    kids = pdf_dict_get(ctx, field, PDF_NAME(Kids));
4689
0
    if (kids)
4690
0
    {
4691
0
      n = pdf_array_len(ctx, kids);
4692
0
      for (i = 0; i < n; i++)
4693
0
        find_locked_fields_aux(ctx, pdf_array_get(ctx, kids, i), fields, v, ft);
4694
0
    }
4695
0
  }
4696
0
  fz_always(ctx)
4697
0
    pdf_unmark_obj(ctx, field);
4698
0
  fz_catch(ctx)
4699
0
    fz_rethrow(ctx);
4700
0
}
4701
4702
pdf_locked_fields *
4703
pdf_find_locked_fields(fz_context *ctx, pdf_document *doc, int version)
4704
0
{
4705
0
  pdf_locked_fields *fields = fz_malloc_struct(ctx, pdf_locked_fields);
4706
0
  int o_xref_base = doc->xref_base;
4707
0
  doc->xref_base = version;
4708
4709
0
  fz_var(fields);
4710
4711
0
  fz_try(ctx)
4712
0
  {
4713
0
    pdf_obj *fobj = pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/AcroForm/Fields");
4714
0
    int i, len = pdf_array_len(ctx, fobj);
4715
4716
0
    if (len == 0)
4717
0
      break;
4718
4719
0
    for (i = 0; i < len; i++)
4720
0
      find_locked_fields_aux(ctx, pdf_array_get(ctx, fobj, i), fields, NULL, NULL);
4721
4722
    /* Add in any DocMDP referenced directly from the Perms dict. */
4723
0
    find_locked_fields_value(ctx, fields, pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/Perms/DocMDP"));
4724
0
  }
4725
0
  fz_always(ctx)
4726
0
    doc->xref_base = o_xref_base;
4727
0
  fz_catch(ctx)
4728
0
  {
4729
0
    pdf_drop_locked_fields(ctx, fields);
4730
0
    fz_rethrow(ctx);
4731
0
  }
4732
4733
0
  return fields;
4734
0
}
4735
4736
pdf_locked_fields *
4737
pdf_find_locked_fields_for_sig(fz_context *ctx, pdf_document *doc, pdf_obj *sig)
4738
0
{
4739
0
  pdf_locked_fields *fields = fz_malloc_struct(ctx, pdf_locked_fields);
4740
4741
0
  fz_var(fields);
4742
4743
0
  fz_try(ctx)
4744
0
  {
4745
0
    pdf_obj *ref;
4746
0
    int i, len;
4747
4748
    /* Ensure it really is a sig */
4749
0
    if (!pdf_name_eq(ctx, pdf_dict_get(ctx, sig, PDF_NAME(Subtype)), PDF_NAME(Widget)) ||
4750
0
      !pdf_name_eq(ctx, pdf_dict_get_inheritable(ctx, sig, PDF_NAME(FT)), PDF_NAME(Sig)))
4751
0
      break;
4752
4753
    /* Check the locking details given in the V (i.e. what the signature value
4754
     * claims to lock). */
4755
0
    ref = pdf_dict_getp(ctx, sig, "V/Reference");
4756
0
    len = pdf_array_len(ctx, ref);
4757
0
    for (i = 0; i < len; i++)
4758
0
    {
4759
0
      pdf_obj *tp = pdf_dict_get(ctx, pdf_array_get(ctx, ref, i), PDF_NAME(TransformParams));
4760
0
      merge_lock_specification(ctx, fields, tp);
4761
0
    }
4762
4763
    /* Also, check the locking details given in the Signature definition. This may
4764
     * not strictly be necessary as it's supposed to be "what the form author told
4765
     * the signature that it should lock". A well-formed signature should lock
4766
     * at least that much (possibly with extra fields locked from the XFA). If the
4767
     * signature doesn't lock as much as it was told to, we should be suspicious
4768
     * of the signing application. It is not clear that this test is actually
4769
     * necessary, or in keeping with what Acrobat does. */
4770
0
    merge_lock_specification(ctx, fields, pdf_dict_get(ctx, sig, PDF_NAME(Lock)));
4771
0
  }
4772
0
  fz_catch(ctx)
4773
0
  {
4774
0
    pdf_drop_locked_fields(ctx, fields);
4775
0
    fz_rethrow(ctx);
4776
0
  }
4777
4778
0
  return fields;
4779
0
}
4780
4781
static int
4782
validate_locked_fields(fz_context *ctx, pdf_document *doc, int version, pdf_locked_fields *locked)
4783
0
{
4784
0
  int o_xref_base = doc->xref_base;
4785
0
  pdf_changes *changes;
4786
0
  int num_objs;
4787
0
  int i, n;
4788
0
  int all_indirects = 1;
4789
4790
0
  num_objs = doc->max_xref_len;
4791
0
  changes = Memento_label(fz_calloc(ctx, 1, sizeof(*changes) + sizeof(int)*(num_objs-1)), "pdf_changes");
4792
0
  changes->num_obj = num_objs;
4793
4794
0
  fz_try(ctx)
4795
0
  {
4796
0
    pdf_obj *acroform, *new_acroform, *old_acroform;
4797
0
    int len, acroform_num;
4798
4799
0
    doc->xref_base = version;
4800
4801
    /* Detect every object that has changed */
4802
0
    for (i = 1; i < num_objs; i++)
4803
0
    {
4804
0
      if (pdf_obj_changed_in_version(ctx, doc, i, version))
4805
0
        changes->obj_changes[i] = FIELD_CHANGED;
4806
0
    }
4807
4808
    /* FIXME: Compare PageTrees and NumberTrees (just to allow for them being regenerated
4809
     * and having produced stuff that represents the same stuff). */
4810
4811
    /* The metadata of a document may be regenerated. Allow for that. */
4812
0
    filter_changes_accepted(ctx, changes, pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/Metadata"), &filter_simple);
4813
4814
    /* The ModDate of document info may be regenerated. Allow for that. */
4815
    /* FIXME: We accept all changes in document info, when maybe we ought to just
4816
     * accept ModDate? */
4817
0
    filter_changes_accepted(ctx, changes, pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Info"), &filter_simple);
4818
4819
    /* The Encryption dict may be rewritten for the new Xref. */
4820
0
    filter_changes_accepted(ctx, changes, pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Encrypt"), &filter_simple);
4821
4822
    /* We have to accept certain changes in the top level AcroForms dict,
4823
     * so get the 2 versions... */
4824
0
    acroform = pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/AcroForm");
4825
0
    acroform_num = pdf_to_num(ctx, acroform);
4826
0
    new_acroform = pdf_resolve_indirect_chain(ctx, acroform);
4827
0
    doc->xref_base = version+1;
4828
0
    old_acroform = pdf_resolve_indirect_chain(ctx, pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/AcroForm"));
4829
0
    doc->xref_base = version;
4830
0
    n = pdf_dict_len(ctx, new_acroform);
4831
0
    for (i = 0; i < n; i++)
4832
0
    {
4833
0
      pdf_obj *key = pdf_dict_get_key(ctx, new_acroform, i);
4834
0
      pdf_obj *nval = pdf_dict_get(ctx, new_acroform, key);
4835
0
      pdf_obj *oval = pdf_dict_get(ctx, old_acroform, key);
4836
4837
0
      if (pdf_name_eq(ctx, key, PDF_NAME(Fields)))
4838
0
      {
4839
0
        int j;
4840
4841
0
        len = pdf_array_len(ctx, nval);
4842
0
        for (j = 0; j < len; j++)
4843
0
        {
4844
0
          pdf_obj *field = pdf_array_get(ctx, nval, j);
4845
0
          if (!pdf_is_indirect(ctx, field))
4846
0
            all_indirects = 0;
4847
0
          check_field(ctx, doc, changes, field, locked, "", NULL, NULL);
4848
0
        }
4849
0
      }
4850
0
      else if (pdf_name_eq(ctx, key, PDF_NAME(SigFlags)))
4851
0
      {
4852
        /* Accept this */
4853
0
        changes->obj_changes[acroform_num] |= FIELD_CHANGE_VALID;
4854
0
      }
4855
0
      else if (pdf_name_eq(ctx, key, PDF_NAME(DR)))
4856
0
      {
4857
        /* Accept any changes from within the Document Resources */
4858
0
        filter_changes_accepted(ctx, changes, nval, &filter_resources);
4859
0
      }
4860
0
      else if (pdf_name_eq(ctx, key, PDF_NAME(XFA)))
4861
0
      {
4862
        /* Allow any changes within the XFA streams. */
4863
0
        filter_changes_accepted(ctx, changes, nval, &filter_xfa);
4864
0
      }
4865
0
      else if (pdf_objcmp(ctx, nval, oval))
4866
0
      {
4867
0
        changes->obj_changes[acroform_num] |= FIELD_CHANGE_INVALID;
4868
0
      }
4869
0
    }
4870
4871
    /* Allow for any object streams/XRefs to be changed. */
4872
0
    doc->xref_base = version+1;
4873
0
    for (i = 1; i < num_objs; i++)
4874
0
    {
4875
0
      pdf_obj *oobj, *otype;
4876
0
      if (changes->obj_changes[i] != FIELD_CHANGED)
4877
0
        continue;
4878
0
      if (!pdf_obj_exists(ctx, doc, i))
4879
0
      {
4880
        /* Not present this version - must be newly created, can't be a change. */
4881
0
        changes->obj_changes[i] |= FIELD_CHANGE_VALID;
4882
0
        continue;
4883
0
      }
4884
0
      oobj = pdf_load_object(ctx, doc, i);
4885
0
      otype = pdf_dict_get(ctx, oobj, PDF_NAME(Type));
4886
0
      if (pdf_name_eq(ctx, otype, PDF_NAME(ObjStm)) ||
4887
0
        pdf_name_eq(ctx, otype, PDF_NAME(XRef)))
4888
0
      {
4889
0
        changes->obj_changes[i] |= FIELD_CHANGE_VALID;
4890
0
      }
4891
0
      pdf_drop_obj(ctx, oobj);
4892
0
    }
4893
0
  }
4894
0
  fz_always(ctx)
4895
0
    doc->xref_base = o_xref_base;
4896
0
  fz_catch(ctx)
4897
0
  {
4898
0
    fz_free(ctx, changes);
4899
0
    fz_rethrow(ctx);
4900
0
  }
4901
4902
0
  for (i = 1; i < num_objs; i++)
4903
0
  {
4904
0
    if (changes->obj_changes[i] == FIELD_CHANGED)
4905
      /* Change with no reason */
4906
0
      break;
4907
0
    if (changes->obj_changes[i] & FIELD_CHANGE_INVALID)
4908
      /* Illegal Change */
4909
0
      break;
4910
0
  }
4911
4912
0
  fz_free(ctx, changes);
4913
4914
0
  return (i == num_objs) && all_indirects;
4915
0
}
4916
4917
int
4918
pdf_validate_changes(fz_context *ctx, pdf_document *doc, int version)
4919
0
{
4920
0
  int unsaved_versions = pdf_count_unsaved_versions(ctx, doc);
4921
0
  int n = pdf_count_versions(ctx, doc);
4922
0
  pdf_locked_fields *locked = NULL;
4923
0
  int result;
4924
4925
0
  if (version < 0 || version >= n)
4926
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "There aren't that many changes to find in this document!");
4927
4928
  /* We are wanting to compare version+1 with version to make sure
4929
   * that the only changes made in going to version are conformant
4930
   * with what was allowed in version+1. The production of version
4931
   * might have involved signing a signature field and locking down
4932
   * more fields - this means that taking the list of locked things
4933
   * from version rather than version+1 will give us bad results! */
4934
0
  locked = pdf_find_locked_fields(ctx, doc, unsaved_versions+version+1);
4935
4936
0
  fz_try(ctx)
4937
0
  {
4938
0
    if (!locked->all && locked->includes.len == 0 && locked->p == 0)
4939
0
    {
4940
      /* If nothing is locked at all, then all changes are permissible. */
4941
0
      result = 1;
4942
0
    }
4943
0
    else
4944
0
      result = validate_locked_fields(ctx, doc, unsaved_versions+version, locked);
4945
0
  }
4946
0
  fz_always(ctx)
4947
0
    pdf_drop_locked_fields(ctx, locked);
4948
0
  fz_catch(ctx)
4949
0
    fz_rethrow(ctx);
4950
4951
0
  return result;
4952
0
}
4953
4954
int
4955
pdf_validate_change_history(fz_context *ctx, pdf_document *doc)
4956
0
{
4957
0
  int num_versions = pdf_count_versions(ctx, doc);
4958
0
  int v;
4959
4960
0
  if (num_versions < 2)
4961
0
    return 0; /* Unless there are at least 2 versions, there have been no updates. */
4962
4963
0
  for(v = num_versions - 2; v >= 0; v--)
4964
0
  {
4965
0
    if (!pdf_validate_changes(ctx, doc, v))
4966
0
      return v+1;
4967
0
  }
4968
0
  return 0;
4969
0
}
4970
4971
/* Return the version that obj appears in, or -1 for not found. */
4972
static int
4973
pdf_find_incremental_update_num_for_obj(fz_context *ctx, pdf_document *doc, pdf_obj *obj)
4974
2
{
4975
2
  pdf_xref *xref = NULL;
4976
2
  pdf_xref_subsec *sub;
4977
2
  int i, j;
4978
4979
2
  if (obj == NULL)
4980
0
    return -1;
4981
4982
  /* obj needs to be indirect for us to get a num out of it. */
4983
2
  i = pdf_to_num(ctx, obj);
4984
2
  if (i <= 0)
4985
0
    return -1;
4986
4987
  /* obj can't be indirect below, so resolve it here. */
4988
2
  obj = pdf_resolve_indirect_chain(ctx, obj);
4989
4990
  /* Find the first xref section where the entry is defined. */
4991
2
  for (j = 0; j < doc->num_xref_sections; j++)
4992
2
  {
4993
2
    xref = &doc->xref_sections[j];
4994
4995
2
    if (i < xref->num_objects)
4996
2
    {
4997
2
      for (sub = xref->subsec; sub != NULL; sub = sub->next)
4998
2
      {
4999
2
        pdf_xref_entry *entry;
5000
5001
2
        if (i < sub->start || i >= sub->start + sub->len)
5002
0
          continue;
5003
5004
2
        entry = &sub->table[i - sub->start];
5005
2
        if (entry->obj == obj)
5006
2
          return j;
5007
2
      }
5008
2
    }
5009
2
  }
5010
0
  return -1;
5011
2
}
5012
5013
int pdf_find_version_for_obj(fz_context *ctx, pdf_document *doc, pdf_obj *obj)
5014
0
{
5015
0
  int v = pdf_find_incremental_update_num_for_obj(ctx, doc, obj);
5016
0
  int n;
5017
5018
0
  if (v == -1)
5019
0
    return -1;
5020
5021
0
  n = pdf_count_versions(ctx, doc) + pdf_count_unsaved_versions(ctx, doc);
5022
0
  if (v > n)
5023
0
    return n;
5024
5025
0
  return v;
5026
0
}
5027
5028
int pdf_validate_signature(fz_context *ctx, pdf_annot *widget)
5029
0
{
5030
0
  pdf_document *doc;
5031
0
  int unsaved_versions, num_versions, version, i;
5032
0
  pdf_locked_fields *locked = NULL;
5033
0
  int o_xref_base;
5034
5035
0
  if (!widget->page)
5036
0
    fz_throw(ctx, FZ_ERROR_GENERIC, "annotation not bound to any page");
5037
5038
0
  doc = widget->page->doc;
5039
0
  unsaved_versions = pdf_count_unsaved_versions(ctx, doc);
5040
0
  num_versions = pdf_count_versions(ctx, doc) + unsaved_versions;
5041
0
  version = pdf_find_version_for_obj(ctx, doc, widget->obj);
5042
5043
0
  if (version > num_versions-1)
5044
0
    version = num_versions-1;
5045
5046
  /* Get the locked definition from the object when it was signed. */
5047
0
  o_xref_base = doc->xref_base;
5048
0
  doc->xref_base = version;
5049
5050
0
  fz_var(locked); /* Not really needed, but it stops warnings */
5051
5052
0
  fz_try(ctx)
5053
0
  {
5054
0
    locked = pdf_find_locked_fields_for_sig(ctx, doc, widget->obj);
5055
0
    for (i = version-1; i >= unsaved_versions; i--)
5056
0
    {
5057
0
      doc->xref_base = i;
5058
0
      if (!validate_locked_fields(ctx, doc, i, locked))
5059
0
        break;
5060
0
    }
5061
0
  }
5062
0
  fz_always(ctx)
5063
0
  {
5064
0
    doc->xref_base = o_xref_base;
5065
0
    pdf_drop_locked_fields(ctx, locked);
5066
0
  }
5067
0
  fz_catch(ctx)
5068
0
    fz_rethrow(ctx);
5069
5070
0
  return i+1-unsaved_versions;
5071
0
}
5072
5073
int pdf_was_pure_xfa(fz_context *ctx, pdf_document *doc)
5074
0
{
5075
0
  int num_unsaved_versions = pdf_count_unsaved_versions(ctx, doc);
5076
0
  int num_versions = pdf_count_versions(ctx, doc);
5077
0
  int v;
5078
0
  int o_xref_base = doc->xref_base;
5079
0
  int pure_xfa = 0;
5080
5081
0
  fz_var(pure_xfa);
5082
5083
0
  fz_try(ctx)
5084
0
  {
5085
0
    for(v = num_versions + num_unsaved_versions; !pure_xfa && v >= num_unsaved_versions; v--)
5086
0
    {
5087
0
      pdf_obj *o;
5088
0
      doc->xref_base = v;
5089
0
      o = pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/AcroForm");
5090
      /* If we find a version that had an empty Root/AcroForm/Fields, but had a
5091
       * Root/AcroForm/XFA entry, then we deduce that this was at one time a
5092
       * pure XFA form. */
5093
0
      if (pdf_array_len(ctx, pdf_dict_get(ctx, o, PDF_NAME(Fields))) == 0 &&
5094
0
        pdf_dict_get(ctx, o, PDF_NAME(XFA)) != NULL)
5095
0
        pure_xfa = 1;
5096
0
    }
5097
0
  }
5098
0
  fz_always(ctx)
5099
0
    doc->xref_base = o_xref_base;
5100
0
  fz_catch(ctx)
5101
0
    fz_rethrow(ctx);
5102
5103
0
  return pure_xfa;
5104
0
}
5105
5106
pdf_xref *pdf_new_local_xref(fz_context *ctx, pdf_document *doc)
5107
329
{
5108
329
  int n = pdf_xref_len(ctx, doc);
5109
329
  pdf_xref *xref = fz_malloc_struct(ctx, pdf_xref);
5110
5111
329
  xref->subsec = NULL;
5112
329
  xref->num_objects = n;
5113
329
  xref->trailer = NULL;
5114
329
  xref->pre_repair_trailer = NULL;
5115
329
  xref->unsaved_sigs = NULL;
5116
329
  xref->unsaved_sigs_end = NULL;
5117
5118
658
  fz_try(ctx)
5119
658
  {
5120
329
    xref->subsec = fz_malloc_struct(ctx, pdf_xref_subsec);
5121
329
    xref->subsec->len = n;
5122
329
    xref->subsec->start = 0;
5123
329
    xref->subsec->table = fz_malloc_struct_array(ctx, n, pdf_xref_entry);
5124
329
    xref->subsec->next = NULL;
5125
329
  }
5126
658
  fz_catch(ctx)
5127
0
  {
5128
0
    fz_free(ctx, xref->subsec);
5129
0
    fz_free(ctx, xref);
5130
0
    fz_rethrow(ctx);
5131
0
  }
5132
5133
329
  return xref;
5134
329
}
5135
5136
void pdf_drop_local_xref(fz_context *ctx, pdf_xref *xref)
5137
12.7k
{
5138
12.7k
  if (xref == NULL)
5139
12.3k
    return;
5140
5141
329
  pdf_drop_xref_subsec(ctx, xref);
5142
5143
329
  fz_free(ctx, xref);
5144
329
}
5145
5146
void pdf_drop_local_xref_and_resources(fz_context *ctx, pdf_document *doc)
5147
6.07k
{
5148
6.07k
  pdf_purge_local_font_resources(ctx, doc);
5149
6.07k
  pdf_purge_locals_from_store(ctx, doc);
5150
6.07k
  pdf_drop_local_xref(ctx, doc->local_xref);
5151
6.07k
  doc->local_xref = NULL;
5152
6.07k
  doc->resynth_required = 1;
5153
6.07k
}
5154
5155
void
5156
pdf_debug_doc_changes(fz_context *ctx, pdf_document *doc)
5157
0
{
5158
0
  int i, j;
5159
5160
0
  if (doc->num_incremental_sections == 0)
5161
0
    fz_write_printf(ctx, fz_stddbg(ctx), "No incremental xrefs");
5162
0
  else
5163
0
  {
5164
0
    for (i = 0; i < doc->num_incremental_sections; i++)
5165
0
    {
5166
0
      pdf_xref *xref = &doc->xref_sections[i];
5167
0
      pdf_xref_subsec *sub;
5168
5169
0
      fz_write_printf(ctx, fz_stddbg(ctx), "Incremental xref:\n");
5170
0
      for (sub = xref->subsec; sub != NULL; sub = sub->next)
5171
0
      {
5172
0
        fz_write_printf(ctx, fz_stddbg(ctx), "  Objects %d->%d\n", sub->start, sub->start + sub->len - 1);
5173
0
        for (j = 0; j < sub->len; j++)
5174
0
        {
5175
0
          pdf_xref_entry *e = &sub->table[j];
5176
0
          if (e->type == 0)
5177
0
            continue;
5178
0
          fz_write_printf(ctx, fz_stddbg(ctx), "%d %d obj (%c)\n", j + sub->start, e->gen, e->type);
5179
0
          pdf_debug_obj(ctx, e->obj);
5180
0
          fz_write_printf(ctx, fz_stddbg(ctx), "\nendobj\n");
5181
0
        }
5182
0
      }
5183
0
    }
5184
0
  }
5185
5186
0
  if (doc->local_xref == NULL)
5187
0
    fz_write_printf(ctx, fz_stddbg(ctx), "No local xref");
5188
0
  else
5189
0
  {
5190
0
    for (i = 0; i < doc->num_incremental_sections; i++)
5191
0
    {
5192
0
      pdf_xref *xref = doc->local_xref;
5193
0
      pdf_xref_subsec *sub;
5194
5195
0
      fz_write_printf(ctx, fz_stddbg(ctx), "Local xref (%sin force):\n", doc->local_xref_nesting == 0 ? "not " : "");
5196
0
      for (sub = xref->subsec; sub != NULL; sub = sub->next)
5197
0
      {
5198
0
        fz_write_printf(ctx, fz_stddbg(ctx), "  Objects %d->%d\n", sub->start, sub->start + sub->len - 1);
5199
0
        for (j = 0; j < sub->len; j++)
5200
0
        {
5201
0
          pdf_xref_entry *e = &sub->table[j];
5202
0
          if (e->type == 0)
5203
0
            continue;
5204
0
          fz_write_printf(ctx, fz_stddbg(ctx), "%d %d obj (%c)\n", j + sub->start, e->gen, e->type);
5205
0
          pdf_debug_obj(ctx, e->obj);
5206
0
          fz_write_printf(ctx, fz_stddbg(ctx), "\nendobj\n");
5207
0
        }
5208
0
      }
5209
0
    }
5210
0
  }
5211
5212
0
}
5213
5214
pdf_obj *
5215
pdf_metadata(fz_context *ctx, pdf_document *doc)
5216
0
{
5217
0
  int initial = doc->xref_base;
5218
0
  pdf_obj *obj = NULL;
5219
5220
0
  fz_var(obj);
5221
5222
0
  fz_try(ctx)
5223
0
  {
5224
0
    do
5225
0
    {
5226
0
      pdf_obj *root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
5227
0
      obj = pdf_dict_get(ctx, root, PDF_NAME(Metadata));
5228
0
      if (obj)
5229
0
        break;
5230
0
      doc->xref_base++;
5231
0
    }
5232
0
    while (doc->xref_base < doc->num_xref_sections);
5233
0
  }
5234
0
  fz_always(ctx)
5235
0
    doc->xref_base = initial;
5236
0
  fz_catch(ctx)
5237
0
    fz_rethrow(ctx);
5238
5239
0
  return obj;
5240
0
}
5241
5242
int pdf_obj_is_incremental(fz_context *ctx, pdf_obj *obj)
5243
3.32k
{
5244
3.32k
  pdf_document *doc = pdf_get_bound_document(ctx, obj);
5245
3.32k
  int v;
5246
5247
3.32k
  if (doc == NULL || doc->num_incremental_sections == 0)
5248
3.32k
    return 0;
5249
5250
2
  v = pdf_find_incremental_update_num_for_obj(ctx, doc, obj);
5251
5252
2
  return (v == 0);
5253
3.32k
}
5254
5255
void pdf_minimize_document(fz_context *ctx, pdf_document *doc)
5256
0
{
5257
0
  int i;
5258
5259
  /* Don't throw anything away if we've done a repair! */
5260
0
  if (doc == NULL || doc->repair_attempted)
5261
0
    return;
5262
5263
  /* Don't throw anything away in the incremental section, as that's where
5264
   * all our changes will be. */
5265
0
  for (i = doc->num_incremental_sections; i < doc->num_xref_sections; i++)
5266
0
  {
5267
0
    pdf_xref *xref = &doc->xref_sections[i];
5268
0
    pdf_xref_subsec *sub;
5269
5270
0
    for (sub = xref->subsec; sub; sub = sub->next)
5271
0
    {
5272
0
      int len = sub->len;
5273
0
      int j;
5274
0
      for (j = 0; j < len; j++)
5275
0
      {
5276
0
        pdf_xref_entry *e = &sub->table[j];
5277
0
        if (e->obj == NULL)
5278
0
          continue;
5279
0
        e->obj = pdf_drop_singleton_obj(ctx, e->obj);
5280
0
      }
5281
0
    }
5282
0
  }
5283
0
}