Coverage Report

Created: 2025-01-11 06:55

/src/mupdf/source/pdf/pdf-write.c
Line
Count
Source (jump to first uncovered line)
1
// Copyright (C) 2004-2024 Artifex Software, Inc.
2
//
3
// This file is part of MuPDF.
4
//
5
// MuPDF is free software: you can redistribute it and/or modify it under the
6
// terms of the GNU Affero General Public License as published by the Free
7
// Software Foundation, either version 3 of the License, or (at your option)
8
// any later version.
9
//
10
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13
// details.
14
//
15
// You should have received a copy of the GNU Affero General Public License
16
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17
//
18
// Alternative licensing terms are available from the licensor.
19
// For commercial licensing, see <https://www.artifex.com/> or contact
20
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21
// CA 94129, USA, for further information.
22
23
#include "mupdf/fitz.h"
24
#include "pdf-annot-imp.h"
25
26
#include <zlib.h>
27
28
#include <assert.h>
29
#include <limits.h>
30
#include <string.h>
31
32
#include <stdio.h> /* for debug printing */
33
/* #define DEBUG_LINEARIZATION */
34
/* #define DEBUG_HEAP_SORT */
35
/* #define DEBUG_WRITING */
36
/* #define DEBUG_MARK_AND_SWEEP */
37
38
0
#define SIG_EXTRAS_SIZE (1024)
39
40
0
#define SLASH_BYTE_RANGE ("/ByteRange")
41
0
#define SLASH_CONTENTS ("/Contents")
42
0
#define SLASH_FILTER ("/Filter")
43
44
45
/*
46
  As part of linearization, we need to keep a list of what objects are used
47
  by what page. We do this by recording the objects used in a given page
48
  in a page_objects structure. We have a list of these structures (one per
49
  page) in the page_objects_list structure.
50
51
  The page_objects structure maintains a heap in the object array, so
52
  insertion takes log n time, and we can heapsort and dedupe at the end for
53
  a total worse case n log n time.
54
55
  The magic heap invariant is that:
56
    entry[n] >= entry[(n+1)*2-1] & entry[n] >= entry[(n+1)*2]
57
  or equivalently:
58
    entry[(n-1)>>1] >= entry[n]
59
60
  For a discussion of the heap data structure (and heapsort) see Kingston,
61
  "Algorithms and Data Structures".
62
*/
63
64
typedef struct {
65
  int num_shared;
66
  int page_object_number;
67
  int num_objects;
68
  int min_ofs;
69
  int max_ofs;
70
  /* Extensible list of objects used on this page */
71
  int cap;
72
  int len;
73
  int object[1];
74
} page_objects;
75
76
typedef struct {
77
  int cap;
78
  int len;
79
  page_objects *page[1];
80
} page_objects_list;
81
82
typedef struct
83
{
84
  fz_output *out;
85
86
  int do_incremental;
87
  int do_tight;
88
  int do_ascii;
89
  int do_expand;
90
  int do_compress;
91
  int do_compress_images;
92
  int do_compress_fonts;
93
  int do_garbage;
94
  int do_linear;
95
  int do_clean;
96
  int do_encrypt;
97
  int dont_regenerate_id;
98
  int do_snapshot;
99
  int do_preserve_metadata;
100
  int do_use_objstms;
101
  int compression_effort;
102
103
  int list_len;
104
  int *use_list;
105
  int64_t *ofs_list;
106
  int *gen_list;
107
  int *renumber_map;
108
109
  int bias; /* when saving incrementally to a file with garbage before the version marker */
110
111
  /* The following extras are required for linearization */
112
  int *rev_renumber_map;
113
  int start;
114
  int64_t first_xref_offset;
115
  int64_t main_xref_offset;
116
  int64_t first_xref_entry_offset;
117
  int64_t file_len;
118
  int hints_shared_offset;
119
  int64_t hintstream_len;
120
  pdf_obj *linear_l;
121
  pdf_obj *linear_h0;
122
  pdf_obj *linear_h1;
123
  pdf_obj *linear_o;
124
  pdf_obj *linear_e;
125
  pdf_obj *linear_n;
126
  pdf_obj *linear_t;
127
  pdf_obj *hints_s;
128
  pdf_obj *hints_length;
129
  int hint_object_num;
130
  int page_count;
131
  page_objects_list *page_object_lists;
132
  int crypt_object_number;
133
  char opwd_utf8[128];
134
  char upwd_utf8[128];
135
  int permissions;
136
  pdf_crypt *crypt;
137
  pdf_obj *crypt_obj;
138
  pdf_obj *metadata;
139
} pdf_write_state;
140
141
/*
142
 * Constants for use with use_list.
143
 *
144
 * If use_list[num] = 0, then object num is unused.
145
 * If use_list[num] & PARAMS, then object num is the linearisation params obj.
146
 * If use_list[num] & CATALOGUE, then object num is used by the catalogue.
147
 * If use_list[num] & PAGE1, then object num is used by page 1.
148
 * If use_list[num] & SHARED, then object num is shared between pages.
149
 * If use_list[num] & PAGE_OBJECT then this must be the first object in a page.
150
 * If use_list[num] & OTHER_OBJECTS then this must should appear in section 9.
151
 * Otherwise object num is used by page (use_list[num]>>USE_PAGE_SHIFT).
152
 */
153
enum
154
{
155
  USE_CATALOGUE = 2,
156
  USE_PAGE1 = 4,
157
  USE_SHARED = 8,
158
  USE_PARAMS = 16,
159
  USE_HINTS = 32,
160
  USE_PAGE_OBJECT = 64,
161
  USE_OTHER_OBJECTS = 128,
162
  USE_PAGE_MASK = ~255,
163
  USE_PAGE_SHIFT = 8
164
};
165
166
static void
167
expand_lists(fz_context *ctx, pdf_write_state *opts, int num)
168
0
{
169
0
  int i;
170
171
  /* objects are numbered 0..num and maybe two additional objects for linearization */
172
0
  num += 3;
173
0
  if (num <= opts->list_len)
174
0
    return;
175
176
0
  opts->use_list = fz_realloc_array(ctx, opts->use_list, num, int);
177
0
  opts->ofs_list = fz_realloc_array(ctx, opts->ofs_list, num, int64_t);
178
0
  opts->gen_list = fz_realloc_array(ctx, opts->gen_list, num, int);
179
0
  opts->renumber_map = fz_realloc_array(ctx, opts->renumber_map, num, int);
180
0
  opts->rev_renumber_map = fz_realloc_array(ctx, opts->rev_renumber_map, num, int);
181
182
0
  for (i = opts->list_len; i < num; i++)
183
0
  {
184
0
    opts->use_list[i] = 0;
185
0
    opts->ofs_list[i] = 0;
186
0
    opts->gen_list[i] = 0;
187
0
    opts->renumber_map[i] = i;
188
0
    opts->rev_renumber_map[i] = i;
189
0
  }
190
0
  opts->list_len = num;
191
0
}
192
193
/*
194
 * page_objects and page_object_list handling functions
195
 */
196
static page_objects_list *
197
page_objects_list_create(fz_context *ctx)
198
0
{
199
0
  page_objects_list *pol = fz_calloc(ctx, 1, sizeof(*pol));
200
201
0
  pol->cap = 1;
202
0
  pol->len = 0;
203
0
  return pol;
204
0
}
205
206
static void
207
page_objects_list_destroy(fz_context *ctx, page_objects_list *pol)
208
0
{
209
0
  int i;
210
211
0
  if (!pol)
212
0
    return;
213
0
  for (i = 0; i < pol->len; i++)
214
0
  {
215
0
    fz_free(ctx, pol->page[i]);
216
0
  }
217
0
  fz_free(ctx, pol);
218
0
}
219
220
static void
221
page_objects_list_ensure(fz_context *ctx, page_objects_list **pol, int newcap)
222
0
{
223
0
  int oldcap = (*pol)->cap;
224
0
  if (newcap <= oldcap)
225
0
    return;
226
0
  *pol = fz_realloc(ctx, *pol, sizeof(page_objects_list) + (newcap-1)*sizeof(page_objects *));
227
0
  memset(&(*pol)->page[oldcap], 0, (newcap-oldcap)*sizeof(page_objects *));
228
0
  (*pol)->cap = newcap;
229
0
}
230
231
static page_objects *
232
page_objects_create(fz_context *ctx)
233
0
{
234
0
  int initial_cap = 8;
235
0
  page_objects *po = fz_calloc(ctx, 1, sizeof(*po) + (initial_cap-1) * sizeof(int));
236
237
0
  po->cap = initial_cap;
238
0
  po->len = 0;
239
0
  return po;
240
0
}
241
242
static void
243
page_objects_insert(fz_context *ctx, page_objects **ppo, int i)
244
0
{
245
0
  page_objects *po;
246
247
  /* Make a page_objects if we don't have one */
248
0
  if (*ppo == NULL)
249
0
    *ppo = page_objects_create(ctx);
250
251
0
  po = *ppo;
252
  /* page_objects insertion: extend the page_objects by 1, and put us on the end */
253
0
  if (po->len == po->cap)
254
0
  {
255
0
    po = fz_realloc(ctx, po, sizeof(page_objects) + (po->cap*2 - 1)*sizeof(int));
256
0
    po->cap *= 2;
257
0
    *ppo = po;
258
0
  }
259
0
  po->object[po->len++] = i;
260
0
}
261
262
static void
263
page_objects_list_insert(fz_context *ctx, pdf_write_state *opts, int page, int object)
264
0
{
265
0
  page_objects_list_ensure(ctx, &opts->page_object_lists, page+1);
266
0
  if (object >= opts->list_len)
267
0
    expand_lists(ctx, opts, object);
268
0
  if (opts->page_object_lists->len < page+1)
269
0
    opts->page_object_lists->len = page+1;
270
0
  page_objects_insert(ctx, &opts->page_object_lists->page[page], object);
271
0
}
272
273
static void
274
page_objects_list_set_page_object(fz_context *ctx, pdf_write_state *opts, int page, int object)
275
0
{
276
0
  page_objects_list_ensure(ctx, &opts->page_object_lists, page+1);
277
0
  if (object >= opts->list_len)
278
0
    expand_lists(ctx, opts, object);
279
0
  opts->page_object_lists->page[page]->page_object_number = object;
280
0
}
281
282
static void
283
page_objects_sort(fz_context *ctx, page_objects *po)
284
0
{
285
0
  int i, j;
286
0
  int n = po->len;
287
288
  /* Step 1: Make a heap */
289
  /* Invariant: Valid heap in [0..i), unsorted elements in [i..n) */
290
0
  for (i = 1; i < n; i++)
291
0
  {
292
    /* Now bubble backwards to maintain heap invariant */
293
0
    j = i;
294
0
    while (j != 0)
295
0
    {
296
0
      int tmp;
297
0
      int k = (j-1)>>1;
298
0
      if (po->object[k] >= po->object[j])
299
0
        break;
300
0
      tmp = po->object[k];
301
0
      po->object[k] = po->object[j];
302
0
      po->object[j] = tmp;
303
0
      j = k;
304
0
    }
305
0
  }
306
307
  /* Step 2: Heap sort */
308
  /* Invariant: valid heap in [0..i), sorted list in [i..n) */
309
  /* Initially: i = n */
310
0
  for (i = n-1; i > 0; i--)
311
0
  {
312
    /* Swap the maximum (0th) element from the page_objects into its place
313
     * in the sorted list (position i). */
314
0
    int tmp = po->object[0];
315
0
    po->object[0] = po->object[i];
316
0
    po->object[i] = tmp;
317
    /* Now, the page_objects is invalid because the 0th element is out
318
     * of place. Bubble it until the page_objects is valid. */
319
0
    j = 0;
320
0
    while (1)
321
0
    {
322
      /* Children are k and k+1 */
323
0
      int k = (j+1)*2-1;
324
      /* If both children out of the page_objects, we're done */
325
0
      if (k > i-1)
326
0
        break;
327
      /* If both are in the page_objects, pick the larger one */
328
0
      if (k < i-1 && po->object[k] < po->object[k+1])
329
0
        k++;
330
      /* If j is bigger than k (i.e. both of its children),
331
       * we're done */
332
0
      if (po->object[j] > po->object[k])
333
0
        break;
334
0
      tmp = po->object[k];
335
0
      po->object[k] = po->object[j];
336
0
      po->object[j] = tmp;
337
0
      j = k;
338
0
    }
339
0
  }
340
0
}
341
342
static int
343
order_ge(int ui, int uj)
344
0
{
345
  /*
346
  For linearization, we need to order the sections as follows:
347
348
    Remaining pages         (Part 7)
349
    Shared objects          (Part 8)
350
    Objects not associated with any page    (Part 9)
351
    Any "other" objects
352
              (Header)(Part 1)
353
    (Linearization params)        (Part 2)
354
          (1st page Xref/Trailer) (Part 3)
355
    Catalogue (and other document level objects)  (Part 4)
356
    First page          (Part 6)
357
    (Primary Hint stream)     (*) (Part 5)
358
    Any free objects
359
360
  Note, this is NOT the same order they appear in
361
  the final file!
362
363
  (*) The PDF reference gives us the option of putting the hint stream
364
  after the first page, and we take it, for simplicity.
365
  */
366
367
  /* If the 2 objects are in the same section, then page object comes first. */
368
0
  if (((ui ^ uj) & ~USE_PAGE_OBJECT) == 0)
369
0
    return ((ui & USE_PAGE_OBJECT) == 0);
370
  /* Put unused objects last */
371
0
  else if (ui == 0)
372
0
    return 1;
373
0
  else if (uj == 0)
374
0
    return 0;
375
  /* Put the hint stream before that... */
376
0
  else if (ui & USE_HINTS)
377
0
    return 1;
378
0
  else if (uj & USE_HINTS)
379
0
    return 0;
380
  /* Put page 1 before that... */
381
0
  else if (ui & USE_PAGE1)
382
0
    return 1;
383
0
  else if (uj & USE_PAGE1)
384
0
    return 0;
385
  /* Put the catalogue before that... */
386
0
  else if (ui & USE_CATALOGUE)
387
0
    return 1;
388
0
  else if (uj & USE_CATALOGUE)
389
0
    return 0;
390
  /* Put the linearization params before that... */
391
0
  else if (ui & USE_PARAMS)
392
0
    return 1;
393
0
  else if (uj & USE_PARAMS)
394
0
    return 0;
395
  /* Put other objects before that */
396
0
  else if (ui & USE_OTHER_OBJECTS)
397
0
    return 1;
398
0
  else if (uj & USE_OTHER_OBJECTS)
399
0
    return 0;
400
  /* Put shared objects before that... */
401
0
  else if (ui & USE_SHARED)
402
0
    return 1;
403
0
  else if (uj & USE_SHARED)
404
0
    return 0;
405
  /* And otherwise, order by the page number on which
406
   * they are used. */
407
0
  return (ui>>USE_PAGE_SHIFT) >= (uj>>USE_PAGE_SHIFT);
408
0
}
409
410
static void
411
heap_sort(int *list, int n, const int *val, int (*ge)(int, int))
412
0
{
413
0
  int i, j;
414
415
#ifdef DEBUG_HEAP_SORT
416
  fprintf(stderr, "Initially:\n");
417
  for (i=0; i < n; i++)
418
  {
419
    fprintf(stderr, "%d: %d %x\n", i, list[i], val[list[i]]);
420
  }
421
#endif
422
  /* Step 1: Make a heap */
423
  /* Invariant: Valid heap in [0..i), unsorted elements in [i..n) */
424
0
  for (i = 1; i < n; i++)
425
0
  {
426
    /* Now bubble backwards to maintain heap invariant */
427
0
    j = i;
428
0
    while (j != 0)
429
0
    {
430
0
      int tmp;
431
0
      int k = (j-1)>>1;
432
0
      if (ge(val[list[k]], val[list[j]]))
433
0
        break;
434
0
      tmp = list[k];
435
0
      list[k] = list[j];
436
0
      list[j] = tmp;
437
0
      j = k;
438
0
    }
439
0
  }
440
#ifdef DEBUG_HEAP_SORT
441
  fprintf(stderr, "Valid heap:\n");
442
  for (i=0; i < n; i++)
443
  {
444
    int k;
445
    fprintf(stderr, "%d: %d %x ", i, list[i], val[list[i]]);
446
    k = (i+1)*2-1;
447
    if (k < n)
448
    {
449
      if (ge(val[list[i]], val[list[k]]))
450
        fprintf(stderr, "OK ");
451
      else
452
        fprintf(stderr, "BAD ");
453
    }
454
    if (k+1 < n)
455
    {
456
      if (ge(val[list[i]], val[list[k+1]]))
457
        fprintf(stderr, "OK\n");
458
      else
459
        fprintf(stderr, "BAD\n");
460
    }
461
    else
462
        fprintf(stderr, "\n");
463
  }
464
#endif
465
466
  /* Step 2: Heap sort */
467
  /* Invariant: valid heap in [0..i), sorted list in [i..n) */
468
  /* Initially: i = n */
469
0
  for (i = n-1; i > 0; i--)
470
0
  {
471
    /* Swap the maximum (0th) element from the page_objects into its place
472
     * in the sorted list (position i). */
473
0
    int tmp = list[0];
474
0
    list[0] = list[i];
475
0
    list[i] = tmp;
476
    /* Now, the page_objects is invalid because the 0th element is out
477
     * of place. Bubble it until the page_objects is valid. */
478
0
    j = 0;
479
0
    while (1)
480
0
    {
481
      /* Children are k and k+1 */
482
0
      int k = (j+1)*2-1;
483
      /* If both children out of the page_objects, we're done */
484
0
      if (k > i-1)
485
0
        break;
486
      /* If both are in the page_objects, pick the larger one */
487
0
      if (k < i-1 && ge(val[list[k+1]], val[list[k]]))
488
0
        k++;
489
      /* If j is bigger than k (i.e. both of its children),
490
       * we're done */
491
0
      if (ge(val[list[j]], val[list[k]]))
492
0
        break;
493
0
      tmp = list[k];
494
0
      list[k] = list[j];
495
0
      list[j] = tmp;
496
0
      j = k;
497
0
    }
498
0
  }
499
#ifdef DEBUG_HEAP_SORT
500
  fprintf(stderr, "Sorted:\n");
501
  for (i=0; i < n; i++)
502
  {
503
    fprintf(stderr, "%d: %d %x ", i, list[i], val[list[i]]);
504
    if (i+1 < n)
505
    {
506
      if (ge(val[list[i+1]], val[list[i]]))
507
        fprintf(stderr, "OK");
508
      else
509
        fprintf(stderr, "BAD");
510
    }
511
    fprintf(stderr, "\n");
512
  }
513
#endif
514
0
}
515
516
static void
517
page_objects_dedupe(fz_context *ctx, page_objects *po)
518
0
{
519
0
  int i, j;
520
0
  int n = po->len-1;
521
522
0
  for (i = 0; i < n; i++)
523
0
  {
524
0
    if (po->object[i] == po->object[i+1])
525
0
      break;
526
0
  }
527
0
  j = i; /* j points to the last valid one */
528
0
  i++; /* i points to the first one we haven't looked at */
529
0
  for (; i < n; i++)
530
0
  {
531
0
    if (po->object[j] != po->object[i])
532
0
      po->object[++j] = po->object[i];
533
0
  }
534
0
  po->len = j+1;
535
0
}
536
537
static void
538
page_objects_list_sort_and_dedupe(fz_context *ctx, page_objects_list *pol)
539
0
{
540
0
  int i;
541
0
  int n = pol->len;
542
543
0
  for (i = 0; i < n; i++)
544
0
  {
545
0
    page_objects_sort(ctx, pol->page[i]);
546
0
    page_objects_dedupe(ctx, pol->page[i]);
547
0
  }
548
0
}
549
550
#ifdef DEBUG_LINEARIZATION
551
static void
552
page_objects_dump(pdf_write_state *opts)
553
{
554
  page_objects_list *pol = opts->page_object_lists;
555
  int i, j;
556
557
  for (i = 0; i < pol->len; i++)
558
  {
559
    page_objects *p = pol->page[i];
560
    fprintf(stderr, "Page %d\n", i+1);
561
    for (j = 0; j < p->len; j++)
562
    {
563
      int o = p->object[j];
564
      fprintf(stderr, "\tObject %d: use=%x\n", o, opts->use_list[o]);
565
    }
566
    fprintf(stderr, "Byte range=%d->%d\n", p->min_ofs, p->max_ofs);
567
    fprintf(stderr, "Number of objects=%d, Number of shared objects=%d\n", p->num_objects, p->num_shared);
568
    fprintf(stderr, "Page object number=%d\n", p->page_object_number);
569
  }
570
}
571
572
static void
573
objects_dump(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
574
{
575
  int i;
576
577
  for (i=0; i < pdf_xref_len(ctx, doc); i++)
578
  {
579
    fprintf(stderr, "Object %d use=%x offset=%d\n", i, opts->use_list[i], (int)opts->ofs_list[i]);
580
  }
581
}
582
#endif
583
584
/*
585
 * Garbage collect objects not reachable from the trailer.
586
 */
587
588
static void bake_stream_length(fz_context *ctx, pdf_document *doc, int num)
589
0
{
590
0
  if (pdf_obj_num_is_stream(ctx, doc, num))
591
0
  {
592
0
    pdf_obj *len;
593
0
    pdf_obj *obj = NULL;
594
0
    fz_var(obj);
595
0
    fz_try(ctx)
596
0
    {
597
0
      obj = pdf_load_object(ctx, doc, num);
598
0
      len = pdf_dict_get(ctx, obj, PDF_NAME(Length));
599
0
      if (pdf_is_indirect(ctx, len))
600
0
        pdf_dict_put_int(ctx, obj, PDF_NAME(Length), pdf_to_int(ctx, len));
601
0
    }
602
0
    fz_always(ctx)
603
0
      pdf_drop_obj(ctx, obj);
604
0
    fz_catch(ctx)
605
0
      fz_rethrow(ctx);
606
0
  }
607
0
}
608
609
/* Mark a reference. If it's been marked already, return NULL (as no further
610
 * processing is required). If it's not, return the resolved object so
611
 * that we can continue our recursive marking. If it's a duff reference
612
 * return the fact so that we can remove the reference at source.
613
 */
614
static pdf_obj *markref(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *obj, int *duff)
615
0
{
616
0
  int num = pdf_to_num(ctx, obj);
617
0
  int xref_len = pdf_xref_len(ctx, doc);
618
619
0
  if (num <= 0 || num >= xref_len)
620
0
  {
621
0
    *duff = 1;
622
0
    return NULL;
623
0
  }
624
0
  expand_lists(ctx, opts, xref_len);
625
0
  *duff = 0;
626
0
  if (opts->use_list[num])
627
0
    return NULL;
628
629
0
  opts->use_list[num] = 1;
630
631
0
  obj = pdf_resolve_indirect(ctx, obj);
632
0
  if (obj == NULL || pdf_is_null(ctx, obj))
633
0
  {
634
0
    *duff = 1;
635
0
    opts->use_list[num] = 0;
636
0
  }
637
638
0
  return obj;
639
0
}
640
641
#ifdef DEBUG_MARK_AND_SWEEP
642
static int depth = 0;
643
644
static
645
void indent()
646
{
647
  while (depth > 0)
648
  {
649
    int d  = depth;
650
    if (d > 16)
651
      d = 16;
652
    printf("%s", &"                "[16-d]);
653
    depth -= d;
654
  }
655
}
656
#define DEBUGGING_MARKING(A) do { A; } while (0)
657
#else
658
0
#define DEBUGGING_MARKING(A) do { } while (0)
659
#endif
660
661
/* Recursively mark an object. If any references found are duff, then
662
 * replace them with nulls. */
663
static int markobj(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *obj)
664
0
{
665
0
  int i;
666
667
0
  DEBUGGING_MARKING(depth++);
668
669
0
  while (pdf_is_indirect(ctx, obj))
670
0
  {
671
0
    int duff;
672
0
    DEBUGGING_MARKING(indent(); printf("Marking object %d\n", pdf_to_num(ctx, obj)));
673
0
    obj = markref(ctx, doc, opts, obj, &duff);
674
0
    if (duff)
675
0
    {
676
0
      DEBUGGING_MARKING(depth--);
677
0
      return 1;
678
0
    }
679
0
  }
680
681
0
  if (pdf_is_dict(ctx, obj))
682
0
  {
683
0
    int n = pdf_dict_len(ctx, obj);
684
0
    for (i = 0; i < n; i++)
685
0
    {
686
0
      DEBUGGING_MARKING(indent(); printf("DICT[%d/%d] = %s\n", i, n, pdf_to_name(ctx, pdf_dict_get_key(ctx, obj, i))));
687
0
      if (markobj(ctx, doc, opts, pdf_dict_get_val(ctx, obj, i)))
688
0
        pdf_dict_put_val_null(ctx, obj, i);
689
0
    }
690
0
  }
691
692
0
  else if (pdf_is_array(ctx, obj))
693
0
  {
694
0
    int n = pdf_array_len(ctx, obj);
695
0
    for (i = 0; i < n; i++)
696
0
    {
697
0
      DEBUGGING_MARKING(indent(); printf("ARRAY[%d/%d]\n", i, n));
698
0
      if (markobj(ctx, doc, opts, pdf_array_get(ctx, obj, i)))
699
0
        pdf_array_put(ctx, obj, i, PDF_NULL);
700
0
    }
701
0
  }
702
703
0
  DEBUGGING_MARKING(depth--);
704
705
0
  return 0;
706
0
}
707
708
/*
709
 * Scan for and remove duplicate objects (slow)
710
 */
711
712
static int removeduplicateobjs(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
713
0
{
714
0
  int num, other;
715
0
  int xref_len = pdf_xref_len(ctx, doc);
716
0
  int changed = 0;
717
718
0
  expand_lists(ctx, opts, xref_len);
719
0
  for (num = 1; num < xref_len; num++)
720
0
  {
721
    /* Only compare an object to objects preceding it */
722
0
    for (other = 1; other < num; other++)
723
0
    {
724
0
      pdf_obj *a, *b;
725
0
      int newnum;
726
727
0
      if (num == other || num >= opts->list_len || !opts->use_list[num] || !opts->use_list[other])
728
0
        continue;
729
730
      /* TODO: resolve indirect references to see if we can omit them */
731
732
0
      a = pdf_get_xref_entry_no_null(ctx, doc, num)->obj;
733
0
      b = pdf_get_xref_entry_no_null(ctx, doc, other)->obj;
734
0
      if (opts->do_garbage >= 4)
735
0
      {
736
0
        if (pdf_objcmp_deep(ctx, a, b))
737
0
          continue;
738
0
      }
739
0
      else
740
0
      {
741
0
        if (pdf_objcmp(ctx, a, b))
742
0
          continue;
743
0
      }
744
745
      /* Keep the lowest numbered object */
746
0
      newnum = fz_mini(num, other);
747
0
      opts->renumber_map[num] = newnum;
748
0
      opts->renumber_map[other] = newnum;
749
0
      opts->rev_renumber_map[newnum] = num; /* Either will do */
750
0
      opts->use_list[fz_maxi(num, other)] = 0;
751
752
      /* One duplicate was found, do not look for another */
753
0
      changed = 1;
754
0
      break;
755
0
    }
756
0
  }
757
758
0
  return changed;
759
0
}
760
761
/*
762
 * Renumber objects sequentially so the xref is more compact
763
 *
764
 * This code assumes that any opts->renumber_map[n] <= n for all n.
765
 */
766
767
static void compactxref(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
768
0
{
769
0
  int num, newnum;
770
0
  int xref_len = pdf_xref_len(ctx, doc);
771
772
  /*
773
   * Update renumber_map in-place, clustering all used
774
   * objects together at low object ids. Objects that
775
   * already should be renumbered will have their new
776
   * object ids be updated to reflect the compaction.
777
   */
778
779
0
  if (xref_len > opts->list_len)
780
0
    expand_lists(ctx, opts, xref_len-1);
781
782
0
  newnum = 1;
783
0
  for (num = 1; num < xref_len; num++)
784
0
  {
785
    /* If it's not used, map it to zero */
786
0
    if (!opts->use_list[opts->renumber_map[num]])
787
0
    {
788
0
      opts->renumber_map[num] = 0;
789
0
    }
790
    /* If it's not moved, compact it. */
791
0
    else if (opts->renumber_map[num] == num)
792
0
    {
793
0
      opts->rev_renumber_map[newnum] = opts->rev_renumber_map[num];
794
0
      opts->renumber_map[num] = newnum++;
795
0
    }
796
    /* Otherwise it's used, and moved. We know that it must have
797
     * moved down, so the place it's moved to will be in the right
798
     * place already. */
799
0
    else
800
0
    {
801
0
      opts->renumber_map[num] = opts->renumber_map[opts->renumber_map[num]];
802
0
    }
803
0
  }
804
0
}
805
806
/*
807
 * Update indirect objects according to renumbering established when
808
 * removing duplicate objects and compacting the xref.
809
 */
810
811
static void renumberobj(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *obj)
812
0
{
813
0
  int i;
814
0
  int xref_len = pdf_xref_len(ctx, doc);
815
816
0
  if (pdf_is_dict(ctx, obj))
817
0
  {
818
0
    int n = pdf_dict_len(ctx, obj);
819
0
    for (i = 0; i < n; i++)
820
0
    {
821
0
      pdf_obj *key = pdf_dict_get_key(ctx, obj, i);
822
0
      pdf_obj *val = pdf_dict_get_val(ctx, obj, i);
823
0
      if (pdf_is_indirect(ctx, val))
824
0
      {
825
0
        int o = pdf_to_num(ctx, val);
826
0
        if (o >= xref_len || o <= 0 || opts->renumber_map[o] == 0)
827
0
          val = PDF_NULL;
828
0
        else
829
0
          val = pdf_new_indirect(ctx, doc, opts->renumber_map[o], 0);
830
0
        pdf_dict_put_drop(ctx, obj, key, val);
831
0
      }
832
0
      else
833
0
      {
834
0
        renumberobj(ctx, doc, opts, val);
835
0
      }
836
0
    }
837
0
  }
838
839
0
  else if (pdf_is_array(ctx, obj))
840
0
  {
841
0
    int n = pdf_array_len(ctx, obj);
842
0
    for (i = 0; i < n; i++)
843
0
    {
844
0
      pdf_obj *val = pdf_array_get(ctx, obj, i);
845
0
      if (pdf_is_indirect(ctx, val))
846
0
      {
847
0
        int o = pdf_to_num(ctx, val);
848
0
        if (o >= xref_len || o <= 0 || opts->renumber_map[o] == 0)
849
0
          val = PDF_NULL;
850
0
        else
851
0
          val = pdf_new_indirect(ctx, doc, opts->renumber_map[o], 0);
852
0
        pdf_array_put_drop(ctx, obj, i, val);
853
0
      }
854
0
      else
855
0
      {
856
0
        renumberobj(ctx, doc, opts, val);
857
0
      }
858
0
    }
859
0
  }
860
0
}
861
862
static void renumberobjs(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
863
0
{
864
0
  pdf_xref_entry *newxref = NULL;
865
0
  int newlen;
866
0
  int num;
867
0
  int *new_use_list;
868
0
  int xref_len = pdf_xref_len(ctx, doc);
869
870
0
  expand_lists(ctx, opts, xref_len);
871
0
  new_use_list = fz_calloc(ctx, opts->list_len, sizeof(int));
872
873
0
  fz_var(newxref);
874
0
  fz_try(ctx)
875
0
  {
876
    /* Apply renumber map to indirect references in all objects in xref */
877
0
    renumberobj(ctx, doc, opts, pdf_trailer(ctx, doc));
878
0
    for (num = 0; num < xref_len; num++)
879
0
    {
880
0
      pdf_obj *obj;
881
0
      int to = opts->renumber_map[num];
882
883
      /* If object is going to be dropped, don't bother renumbering */
884
0
      if (to == 0)
885
0
        continue;
886
887
0
      obj = pdf_get_xref_entry_no_null(ctx, doc, num)->obj;
888
889
0
      if (pdf_is_indirect(ctx, obj))
890
0
      {
891
0
        obj = pdf_new_indirect(ctx, doc, to, 0);
892
0
        fz_try(ctx)
893
0
          pdf_update_object(ctx, doc, num, obj);
894
0
        fz_always(ctx)
895
0
          pdf_drop_obj(ctx, obj);
896
0
        fz_catch(ctx)
897
0
          fz_rethrow(ctx);
898
0
      }
899
0
      else
900
0
      {
901
0
        renumberobj(ctx, doc, opts, obj);
902
0
      }
903
0
    }
904
905
    /* Create new table for the reordered, compacted xref */
906
0
    newxref = Memento_label(fz_malloc_array(ctx, xref_len + 3, pdf_xref_entry), "pdf_xref_entries");
907
0
    newxref[0] = *pdf_get_xref_entry_no_null(ctx, doc, 0);
908
909
    /* Move used objects into the new compacted xref */
910
0
    newlen = 0;
911
0
    for (num = 1; num < xref_len; num++)
912
0
    {
913
0
      if (opts->use_list[num])
914
0
      {
915
0
        pdf_xref_entry *e;
916
0
        if (newlen < opts->renumber_map[num])
917
0
          newlen = opts->renumber_map[num];
918
0
        e = pdf_get_xref_entry_no_null(ctx, doc, num);
919
0
        newxref[opts->renumber_map[num]] = *e;
920
0
        if (e->obj)
921
0
          pdf_set_obj_parent(ctx, e->obj, opts->renumber_map[num]);
922
0
        e->obj = NULL;
923
0
        e->stm_buf = NULL;
924
0
        new_use_list[opts->renumber_map[num]] = opts->use_list[num];
925
0
      }
926
0
      else
927
0
      {
928
0
        pdf_xref_entry *e = pdf_get_xref_entry_no_null(ctx, doc, num);
929
0
        pdf_drop_obj(ctx, e->obj);
930
0
        e->obj = NULL;
931
0
        fz_drop_buffer(ctx, e->stm_buf);
932
0
        e->stm_buf = NULL;
933
0
      }
934
0
    }
935
936
0
    pdf_replace_xref(ctx, doc, newxref, newlen + 1);
937
0
    newxref = NULL;
938
0
  }
939
0
  fz_catch(ctx)
940
0
  {
941
0
    fz_free(ctx, newxref);
942
0
    fz_free(ctx, new_use_list);
943
0
    fz_rethrow(ctx);
944
0
  }
945
0
  fz_free(ctx, opts->use_list);
946
0
  opts->use_list = new_use_list;
947
948
0
  for (num = 1; num < xref_len; num++)
949
0
  {
950
0
    opts->renumber_map[num] = num;
951
0
  }
952
0
}
953
954
static void page_objects_list_renumber(pdf_write_state *opts)
955
0
{
956
0
  int i, j;
957
958
0
  for (i = 0; i < opts->page_object_lists->len; i++)
959
0
  {
960
0
    page_objects *po = opts->page_object_lists->page[i];
961
0
    for (j = 0; j < po->len; j++)
962
0
    {
963
0
      po->object[j] = opts->renumber_map[po->object[j]];
964
0
    }
965
0
    po->page_object_number = opts->renumber_map[po->page_object_number];
966
0
  }
967
0
}
968
969
static void
970
swap_indirect_obj(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj **obj)
971
0
{
972
0
  pdf_obj *o = pdf_new_indirect(ctx, doc, opts->renumber_map[pdf_to_num(ctx, *obj)], 0);
973
974
0
  pdf_drop_obj(ctx, *obj);
975
0
  *obj = o;
976
0
}
977
978
static void
979
renumber_pages(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
980
0
{
981
0
  fz_page *page;
982
983
0
  for (page = doc->super.open; page != NULL; page = page->next)
984
0
  {
985
0
    pdf_page *ppage = (pdf_page *)page;
986
0
    pdf_annot *annot;
987
0
    swap_indirect_obj(ctx, doc, opts, &ppage->obj);
988
989
0
    for (annot = ppage->annots; annot != NULL; annot = annot->next)
990
0
      swap_indirect_obj(ctx, doc, opts, &annot->obj);
991
0
    for (annot = ppage->widgets; annot != NULL; annot = annot->next)
992
0
      swap_indirect_obj(ctx, doc, opts, &annot->obj);
993
0
  }
994
0
}
995
996
static void
997
mark_all(fz_context *ctx, pdf_document *doc, pdf_mark_list *list, pdf_write_state *opts, pdf_obj *val, int flag, int page)
998
0
{
999
0
  if (pdf_mark_list_push(ctx, list, val))
1000
0
    return;
1001
1002
0
  if (pdf_is_indirect(ctx, val))
1003
0
  {
1004
0
    int num = pdf_to_num(ctx, val);
1005
0
    int bits = flag;
1006
0
    if (num >= opts->list_len)
1007
0
      expand_lists(ctx, opts, num);
1008
0
    if (page >= 0)
1009
0
      page_objects_list_insert(ctx, opts, page, num);
1010
0
    if (opts->use_list[num] & USE_PAGE_MASK)
1011
      /* Already used */
1012
0
      bits = USE_SHARED;
1013
0
    if ((opts->use_list[num] | bits) == opts->use_list[num])
1014
0
    {
1015
      /* Been here already */
1016
0
      pdf_mark_list_pop(ctx, list);
1017
0
      return;
1018
0
    }
1019
0
    opts->use_list[num] |= bits;
1020
0
  }
1021
1022
0
  if (pdf_is_dict(ctx, val))
1023
0
  {
1024
0
    int i, n;
1025
0
    n = pdf_dict_len(ctx, val);
1026
1027
0
    for (i = 0; i < n; i++)
1028
0
    {
1029
0
      pdf_obj *v = pdf_dict_get_val(ctx, val, i);
1030
0
      pdf_obj *type = pdf_dict_get(ctx, v, PDF_NAME(Type));
1031
1032
      /* Don't walk through the Page tree, or direct to a page. */
1033
0
      if (pdf_name_eq(ctx, PDF_NAME(Pages), type) || pdf_name_eq(ctx, PDF_NAME(Page), type))
1034
0
        continue;
1035
1036
0
      mark_all(ctx, doc, list, opts, v, flag, page);
1037
0
    }
1038
0
  }
1039
0
  else if (pdf_is_array(ctx, val))
1040
0
  {
1041
0
    int i, n = pdf_array_len(ctx, val);
1042
1043
0
    for (i = 0; i < n; i++)
1044
0
    {
1045
0
      pdf_obj *v = pdf_array_get(ctx, val, i);
1046
0
      pdf_obj *type = pdf_dict_get(ctx, v, PDF_NAME(Type));
1047
1048
      /* Don't walk through the Page tree, or direct to a page. */
1049
0
      if (pdf_name_eq(ctx, PDF_NAME(Pages), type) || pdf_name_eq(ctx, PDF_NAME(Page), type))
1050
0
        continue;
1051
1052
0
      mark_all(ctx, doc, list, opts, v, flag, page);
1053
0
    }
1054
0
  }
1055
0
  pdf_mark_list_pop(ctx, list);
1056
0
}
1057
1058
static int
1059
mark_pages(fz_context *ctx, pdf_document *doc, pdf_mark_list *list, pdf_write_state *opts, pdf_obj *val, int pagenum)
1060
0
{
1061
0
  if (pdf_mark_list_push(ctx, list, val))
1062
0
    return pagenum;
1063
1064
0
  if (pdf_is_dict(ctx, val))
1065
0
  {
1066
0
    if (pdf_name_eq(ctx, PDF_NAME(Page), pdf_dict_get(ctx, val, PDF_NAME(Type))))
1067
0
    {
1068
0
      int num = pdf_to_num(ctx, val);
1069
0
      pdf_mark_list_pop(ctx, list);
1070
1071
0
      mark_all(ctx, doc, list, opts, val, pagenum == 0 ? USE_PAGE1 : (pagenum<<USE_PAGE_SHIFT), pagenum);
1072
0
      page_objects_list_set_page_object(ctx, opts, pagenum, num);
1073
0
      pagenum++;
1074
0
      opts->use_list[num] |= USE_PAGE_OBJECT;
1075
0
      return pagenum;
1076
0
    }
1077
0
    else
1078
0
    {
1079
0
      int i, n = pdf_dict_len(ctx, val);
1080
1081
0
      for (i = 0; i < n; i++)
1082
0
      {
1083
0
        pdf_obj *key = pdf_dict_get_key(ctx, val, i);
1084
0
        pdf_obj *obj = pdf_dict_get_val(ctx, val, i);
1085
1086
0
        if (pdf_name_eq(ctx, PDF_NAME(Kids), key))
1087
0
          pagenum = mark_pages(ctx, doc, list, opts, obj, pagenum);
1088
0
        else
1089
0
          mark_all(ctx, doc, list, opts, obj, USE_CATALOGUE, -1);
1090
0
      }
1091
1092
0
      if (pdf_is_indirect(ctx, val))
1093
0
      {
1094
0
        int num = pdf_to_num(ctx, val);
1095
0
        opts->use_list[num] |= USE_CATALOGUE;
1096
0
      }
1097
0
    }
1098
0
  }
1099
0
  else if (pdf_is_array(ctx, val))
1100
0
  {
1101
0
    int i, n = pdf_array_len(ctx, val);
1102
1103
0
    for (i = 0; i < n; i++)
1104
0
    {
1105
0
      pagenum = mark_pages(ctx, doc, list, opts, pdf_array_get(ctx, val, i), pagenum);
1106
0
    }
1107
0
    if (pdf_is_indirect(ctx, val))
1108
0
    {
1109
0
      int num = pdf_to_num(ctx, val);
1110
0
      opts->use_list[num] |= USE_CATALOGUE;
1111
0
    }
1112
0
  }
1113
0
  pdf_mark_list_pop(ctx, list);
1114
1115
0
  return pagenum;
1116
0
}
1117
1118
static void
1119
mark_root(fz_context *ctx, pdf_document *doc, pdf_mark_list *list, pdf_write_state *opts, pdf_obj *dict)
1120
0
{
1121
0
  int i, n = pdf_dict_len(ctx, dict);
1122
1123
0
  if (pdf_mark_list_push(ctx, list, dict))
1124
0
    return;
1125
1126
0
  if (pdf_is_indirect(ctx, dict))
1127
0
  {
1128
0
    int num = pdf_to_num(ctx, dict);
1129
0
    opts->use_list[num] |= USE_CATALOGUE;
1130
0
  }
1131
1132
0
  for (i = 0; i < n; i++)
1133
0
  {
1134
0
    pdf_obj *key = pdf_dict_get_key(ctx, dict, i);
1135
0
    pdf_obj *val = pdf_dict_get_val(ctx, dict, i);
1136
1137
0
    if (pdf_name_eq(ctx, PDF_NAME(Pages), key))
1138
0
      opts->page_count = mark_pages(ctx, doc, list, opts, val, 0);
1139
0
    else if (pdf_name_eq(ctx, PDF_NAME(Names), key))
1140
0
      mark_all(ctx, doc, list, opts, val, USE_OTHER_OBJECTS, -1);
1141
0
    else if (pdf_name_eq(ctx, PDF_NAME(Dests), key))
1142
0
      mark_all(ctx, doc, list, opts, val, USE_OTHER_OBJECTS, -1);
1143
0
    else if (pdf_name_eq(ctx, PDF_NAME(Outlines), key))
1144
0
    {
1145
0
      int section;
1146
      /* Look at PageMode to decide whether to
1147
       * USE_OTHER_OBJECTS or USE_PAGE1 here. */
1148
0
      if (pdf_name_eq(ctx, pdf_dict_get(ctx, dict, PDF_NAME(PageMode)), PDF_NAME(UseOutlines)))
1149
0
        section = USE_PAGE1;
1150
0
      else
1151
0
        section = USE_OTHER_OBJECTS;
1152
0
      mark_all(ctx, doc, list, opts, val, section, -1);
1153
0
    }
1154
0
    else
1155
0
      mark_all(ctx, doc, list, opts, val, USE_CATALOGUE, -1);
1156
0
  }
1157
0
  pdf_mark_list_pop(ctx, list);
1158
0
}
1159
1160
static void
1161
mark_trailer(fz_context *ctx, pdf_document *doc, pdf_mark_list *list, pdf_write_state *opts, pdf_obj *dict)
1162
0
{
1163
0
  int i, n = pdf_dict_len(ctx, dict);
1164
1165
0
  if (pdf_mark_list_push(ctx, list, dict))
1166
0
    return;
1167
1168
0
  for (i = 0; i < n; i++)
1169
0
  {
1170
0
    pdf_obj *key = pdf_dict_get_key(ctx, dict, i);
1171
0
    pdf_obj *val = pdf_dict_get_val(ctx, dict, i);
1172
1173
0
    if (pdf_name_eq(ctx, PDF_NAME(Root), key))
1174
0
      mark_root(ctx, doc, list, opts, val);
1175
0
    else
1176
0
      mark_all(ctx, doc, list, opts, val, USE_CATALOGUE, -1);
1177
0
  }
1178
0
  pdf_mark_list_pop(ctx, list);
1179
0
}
1180
1181
static void
1182
add_linearization_objs(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
1183
0
{
1184
0
  pdf_obj *params_obj = NULL;
1185
0
  pdf_obj *params_ref = NULL;
1186
0
  pdf_obj *hint_obj = NULL;
1187
0
  pdf_obj *hint_ref = NULL;
1188
0
  pdf_obj *o;
1189
0
  int params_num, hint_num;
1190
1191
0
  fz_var(params_obj);
1192
0
  fz_var(params_ref);
1193
0
  fz_var(hint_obj);
1194
0
  fz_var(hint_ref);
1195
1196
0
  fz_try(ctx)
1197
0
  {
1198
0
    pdf_xref_entry *xe;
1199
1200
    /* Linearization params */
1201
0
    params_obj = pdf_new_dict(ctx, doc, 10);
1202
0
    params_ref = pdf_add_object(ctx, doc, params_obj);
1203
0
    params_num = pdf_to_num(ctx, params_ref);
1204
1205
0
    opts->use_list[params_num] = USE_PARAMS;
1206
0
    opts->renumber_map[params_num] = params_num;
1207
0
    opts->rev_renumber_map[params_num] = params_num;
1208
0
    opts->gen_list[params_num] = 0;
1209
0
    pdf_dict_put_real(ctx, params_obj, PDF_NAME(Linearized), 1.0f);
1210
0
    opts->linear_l = pdf_new_int(ctx, INT_MIN);
1211
0
    pdf_dict_put(ctx, params_obj, PDF_NAME(L), opts->linear_l);
1212
0
    opts->linear_h0 = pdf_new_int(ctx, INT_MIN);
1213
0
    o = pdf_dict_put_array(ctx, params_obj, PDF_NAME(H), 2);
1214
0
    pdf_array_push(ctx, o, opts->linear_h0);
1215
0
    opts->linear_h1 = pdf_new_int(ctx, INT_MIN);
1216
0
    pdf_array_push(ctx, o, opts->linear_h1);
1217
0
    opts->linear_o = pdf_new_int(ctx, INT_MIN);
1218
0
    pdf_dict_put(ctx, params_obj, PDF_NAME(O), opts->linear_o);
1219
0
    opts->linear_e = pdf_new_int(ctx, INT_MIN);
1220
0
    pdf_dict_put(ctx, params_obj, PDF_NAME(E), opts->linear_e);
1221
0
    opts->linear_n = pdf_new_int(ctx, INT_MIN);
1222
0
    pdf_dict_put(ctx, params_obj, PDF_NAME(N), opts->linear_n);
1223
0
    opts->linear_t = pdf_new_int(ctx, INT_MIN);
1224
0
    pdf_dict_put(ctx, params_obj, PDF_NAME(T), opts->linear_t);
1225
0
    pdf_dict_put_int(ctx, params_obj, PDF_NAME(P), 0);
1226
1227
    /* Primary hint stream */
1228
0
    hint_obj = pdf_new_dict(ctx, doc, 10);
1229
0
    hint_ref = pdf_add_object(ctx, doc, hint_obj);
1230
0
    hint_num = pdf_to_num(ctx, hint_ref);
1231
1232
0
    opts->hint_object_num = hint_num;
1233
0
    opts->use_list[hint_num] = USE_HINTS;
1234
0
    opts->renumber_map[hint_num] = hint_num;
1235
0
    opts->rev_renumber_map[hint_num] = hint_num;
1236
0
    opts->gen_list[hint_num] = 0;
1237
0
    opts->hints_s = pdf_new_int(ctx, INT_MIN);
1238
0
    pdf_dict_put(ctx, hint_obj, PDF_NAME(S), opts->hints_s);
1239
    /* FIXME: Do we have thumbnails? Do a T entry */
1240
    /* FIXME: Do we have outlines? Do an O entry */
1241
    /* FIXME: Do we have article threads? Do an A entry */
1242
    /* FIXME: Do we have named destinations? Do a E entry */
1243
    /* FIXME: Do we have interactive forms? Do a V entry */
1244
    /* FIXME: Do we have document information? Do an I entry */
1245
    /* FIXME: Do we have logical structure hierarchy? Do a C entry */
1246
    /* FIXME: Do L, Page Label hint table */
1247
0
    pdf_dict_put(ctx, hint_obj, PDF_NAME(Filter), PDF_NAME(FlateDecode));
1248
0
    opts->hints_length = pdf_new_int(ctx, INT_MIN);
1249
0
    pdf_dict_put(ctx, hint_obj, PDF_NAME(Length), opts->hints_length);
1250
0
    xe = pdf_get_xref_entry_no_null(ctx, doc, hint_num);
1251
0
    xe->stm_ofs = 0;
1252
    /* Empty stream, required so that we write the object as
1253
     * a stream during the first pass. Without this, offsets
1254
     * for the xref will be wrong. */
1255
0
    xe->stm_buf = fz_new_buffer(ctx, 1);
1256
0
  }
1257
0
  fz_always(ctx)
1258
0
  {
1259
0
    pdf_drop_obj(ctx, params_obj);
1260
0
    pdf_drop_obj(ctx, params_ref);
1261
0
    pdf_drop_obj(ctx, hint_ref);
1262
0
    pdf_drop_obj(ctx, hint_obj);
1263
0
  }
1264
0
  fz_catch(ctx)
1265
0
  {
1266
0
    fz_rethrow(ctx);
1267
0
  }
1268
0
}
1269
1270
static void
1271
lpr_inherit_res_contents(fz_context *ctx, pdf_mark_list *list, int cycle, pdf_obj *res, pdf_obj *dict, pdf_obj *text)
1272
0
{
1273
0
  pdf_obj *o, *r;
1274
0
  int i, n;
1275
1276
  /* If the parent node doesn't have an entry of this type, give up. */
1277
0
  o = pdf_dict_get(ctx, dict, text);
1278
0
  if (!o)
1279
0
    return;
1280
1281
0
  if (!cycle)
1282
0
    cycle = pdf_mark_list_check(ctx, list, o);
1283
1284
  /* If the resources dict we are building doesn't have an entry of this
1285
   * type yet, then just copy it (ensuring it's not a reference) */
1286
0
  r = pdf_dict_get(ctx, res, text);
1287
0
  if (r == NULL)
1288
0
  {
1289
    /* Only copy the dict if to do so would not cause a cycle! */
1290
0
    if (!cycle)
1291
0
    {
1292
0
      if (pdf_is_dict(ctx, o))
1293
0
        o = pdf_copy_dict(ctx, o);
1294
0
      else if (pdf_is_array(ctx, o))
1295
0
        o = pdf_copy_array(ctx, o);
1296
0
      else
1297
0
        o = NULL;
1298
0
      if (o)
1299
0
        pdf_dict_put_drop(ctx, res, text, o);
1300
0
    }
1301
0
    else if (o)
1302
0
      pdf_dict_put(ctx, res, text, o);
1303
0
    return;
1304
0
  }
1305
1306
  /* Otherwise we need to merge o into r */
1307
0
  if (pdf_is_dict(ctx, o))
1308
0
  {
1309
0
    n = pdf_dict_len(ctx, o);
1310
0
    for (i = 0; i < n; i++)
1311
0
    {
1312
0
      pdf_obj *key = pdf_dict_get_key(ctx, o, i);
1313
0
      pdf_obj *val = pdf_dict_get_val(ctx, o, i);
1314
1315
0
      if (pdf_dict_get(ctx, r, key))
1316
0
        continue;
1317
0
      pdf_dict_put(ctx, r, key, val);
1318
0
    }
1319
0
  }
1320
0
}
1321
1322
static void
1323
lpr_inherit_res(fz_context *ctx, pdf_mark_list *list, pdf_obj *node, int depth, pdf_obj *dict)
1324
0
{
1325
0
  while (1)
1326
0
  {
1327
0
    pdf_obj *o;
1328
0
    int cycle;
1329
1330
0
    node = pdf_dict_get(ctx, node, PDF_NAME(Parent));
1331
0
    depth--;
1332
0
    if (!node || depth < 0)
1333
0
      break;
1334
1335
0
    cycle = pdf_mark_list_push(ctx, list, node);
1336
0
    o = pdf_dict_get(ctx, node, PDF_NAME(Resources));
1337
0
    if (o)
1338
0
    {
1339
0
      int cycle2 = cycle;
1340
0
      if (!cycle)
1341
0
        cycle2 = pdf_mark_list_push(ctx, list, o);
1342
0
      lpr_inherit_res_contents(ctx, list, cycle2, dict, o, PDF_NAME(ExtGState));
1343
0
      lpr_inherit_res_contents(ctx, list, cycle2, dict, o, PDF_NAME(ColorSpace));
1344
0
      lpr_inherit_res_contents(ctx, list, cycle2, dict, o, PDF_NAME(Pattern));
1345
0
      lpr_inherit_res_contents(ctx, list, cycle2, dict, o, PDF_NAME(Shading));
1346
0
      lpr_inherit_res_contents(ctx, list, cycle2, dict, o, PDF_NAME(XObject));
1347
0
      lpr_inherit_res_contents(ctx, list, cycle2, dict, o, PDF_NAME(Font));
1348
0
      lpr_inherit_res_contents(ctx, list, cycle2, dict, o, PDF_NAME(ProcSet));
1349
0
      lpr_inherit_res_contents(ctx, list, cycle2, dict, o, PDF_NAME(Properties));
1350
0
      if (!cycle2)
1351
0
        pdf_mark_list_pop(ctx, list);
1352
0
    }
1353
0
    if (!cycle)
1354
0
      pdf_mark_list_pop(ctx, list);
1355
0
  }
1356
0
}
1357
1358
static pdf_obj *
1359
lpr_inherit(fz_context *ctx, pdf_mark_list *list, pdf_obj *node, char *text, int depth)
1360
0
{
1361
0
  do
1362
0
  {
1363
0
    pdf_obj *o = pdf_dict_gets(ctx, node, text);
1364
1365
0
    if (o)
1366
0
    {
1367
      /* Watch for cycling here. If we do hit a cycle, then take
1368
       * care NOT to resolve the indirection to avoid creating direct
1369
       * object cycles. */
1370
0
      if (pdf_mark_list_push(ctx, list, o))
1371
0
        return o;
1372
1373
0
      pdf_mark_list_pop(ctx, list);
1374
0
      return pdf_resolve_indirect(ctx, o);
1375
0
    }
1376
0
    node = pdf_dict_get(ctx, node, PDF_NAME(Parent));
1377
0
    depth--;
1378
0
  }
1379
0
  while (depth >= 0 && node);
1380
1381
0
  return NULL;
1382
0
}
1383
1384
static int
1385
lpr(fz_context *ctx, pdf_document *doc, pdf_mark_list *list, pdf_obj *node, int depth, int page)
1386
0
{
1387
0
  pdf_obj *kids;
1388
0
  pdf_obj *o = NULL;
1389
0
  int i, n;
1390
1391
0
  if (pdf_mark_list_push(ctx, list, node))
1392
0
    return page;
1393
1394
0
  fz_var(o);
1395
1396
0
  fz_try(ctx)
1397
0
  {
1398
0
    if (pdf_name_eq(ctx, PDF_NAME(Page), pdf_dict_get(ctx, node, PDF_NAME(Type))))
1399
0
    {
1400
0
      pdf_obj *r; /* r is deliberately not cleaned up */
1401
1402
      /* Copy resources down to the child */
1403
0
      o = pdf_keep_obj(ctx, pdf_dict_get(ctx, node, PDF_NAME(Resources)));
1404
0
      if (!o)
1405
0
      {
1406
0
        o = pdf_keep_obj(ctx, pdf_new_dict(ctx, doc, 2));
1407
0
        pdf_dict_put(ctx, node, PDF_NAME(Resources), o);
1408
0
      }
1409
0
      lpr_inherit_res(ctx, list, node, depth, o);
1410
0
      r = lpr_inherit(ctx, list, node, "MediaBox", depth);
1411
0
      if (r)
1412
0
        pdf_dict_put(ctx, node, PDF_NAME(MediaBox), r);
1413
0
      r = lpr_inherit(ctx, list, node, "CropBox", depth);
1414
0
      if (r)
1415
0
        pdf_dict_put(ctx, node, PDF_NAME(CropBox), r);
1416
0
      r = lpr_inherit(ctx, list, node, "BleedBox", depth);
1417
0
      if (r)
1418
0
        pdf_dict_put(ctx, node, PDF_NAME(BleedBox), r);
1419
0
      r = lpr_inherit(ctx, list, node, "TrimBox", depth);
1420
0
      if (r)
1421
0
        pdf_dict_put(ctx, node, PDF_NAME(TrimBox), r);
1422
0
      r = lpr_inherit(ctx, list, node, "ArtBox", depth);
1423
0
      if (r)
1424
0
        pdf_dict_put(ctx, node, PDF_NAME(ArtBox), r);
1425
0
      r = lpr_inherit(ctx, list, node, "Rotate", depth);
1426
0
      if (r)
1427
0
        pdf_dict_put(ctx, node, PDF_NAME(Rotate), r);
1428
0
      page++;
1429
0
    }
1430
0
    else
1431
0
    {
1432
0
      kids = pdf_dict_get(ctx, node, PDF_NAME(Kids));
1433
0
      n = pdf_array_len(ctx, kids);
1434
0
      for(i = 0; i < n; i++)
1435
0
      {
1436
0
        page = lpr(ctx, doc, list, pdf_array_get(ctx, kids, i), depth+1, page);
1437
0
      }
1438
0
      pdf_dict_del(ctx, node, PDF_NAME(Resources));
1439
0
      pdf_dict_del(ctx, node, PDF_NAME(MediaBox));
1440
0
      pdf_dict_del(ctx, node, PDF_NAME(CropBox));
1441
0
      pdf_dict_del(ctx, node, PDF_NAME(BleedBox));
1442
0
      pdf_dict_del(ctx, node, PDF_NAME(TrimBox));
1443
0
      pdf_dict_del(ctx, node, PDF_NAME(ArtBox));
1444
0
      pdf_dict_del(ctx, node, PDF_NAME(Rotate));
1445
0
    }
1446
0
  }
1447
0
  fz_always(ctx)
1448
0
  {
1449
0
    pdf_mark_list_pop(ctx, list);
1450
0
    pdf_drop_obj(ctx, o);
1451
0
  }
1452
0
  fz_catch(ctx)
1453
0
    fz_rethrow(ctx);
1454
1455
0
  return page;
1456
0
}
1457
1458
static void
1459
pdf_localise_page_resources(fz_context *ctx, pdf_document *doc, pdf_mark_list *list)
1460
0
{
1461
0
  if (doc->resources_localised)
1462
0
    return;
1463
1464
0
  lpr(ctx, doc, list, pdf_dict_getl(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root), PDF_NAME(Pages), NULL), 0, 0);
1465
1466
0
  doc->resources_localised = 1;
1467
0
}
1468
1469
static void
1470
linearize(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
1471
0
{
1472
0
  int i;
1473
0
  int n = pdf_xref_len(ctx, doc) + 2;
1474
0
  int *reorder;
1475
0
  int *rev_renumber_map;
1476
0
  pdf_mark_list list;
1477
1478
0
  pdf_mark_list_init(ctx, &list);
1479
0
  opts->page_object_lists = page_objects_list_create(ctx);
1480
1481
  /* Ensure that every page has local references of its resources */
1482
0
  fz_try(ctx)
1483
0
  {
1484
    /* FIXME: We could 'thin' the resources according to what is actually
1485
     * required for each page, but this would require us to run the page
1486
     * content streams. */
1487
0
    pdf_localise_page_resources(ctx, doc, &list);
1488
1489
    /* Walk the objects for each page, marking which ones are used, where */
1490
0
    memset(opts->use_list, 0, n * sizeof(int));
1491
0
    mark_trailer(ctx, doc, &list, opts, pdf_trailer(ctx, doc));
1492
0
  }
1493
0
  fz_always(ctx)
1494
0
    pdf_mark_list_free(ctx, &list);
1495
0
  fz_catch(ctx)
1496
0
    fz_rethrow(ctx);
1497
1498
  /* Add new objects required for linearization */
1499
0
  add_linearization_objs(ctx, doc, opts);
1500
1501
#ifdef DEBUG_WRITING
1502
  fprintf(stderr, "Usage calculated:\n");
1503
  for (i=0; i < pdf_xref_len(ctx, doc); i++)
1504
  {
1505
    fprintf(stderr, "%d: use=%d\n", i, opts->use_list[i]);
1506
  }
1507
#endif
1508
1509
  /* Allocate/init the structures used for renumbering the objects */
1510
0
  reorder = fz_calloc(ctx, n, sizeof(int));
1511
0
  rev_renumber_map = fz_calloc(ctx, n, sizeof(int));
1512
0
  for (i = 0; i < n; i++)
1513
0
  {
1514
0
    reorder[i] = i;
1515
0
  }
1516
1517
  /* Heap sort the reordering */
1518
0
  heap_sort(reorder+1, n-1, opts->use_list, &order_ge);
1519
1520
#ifdef DEBUG_WRITING
1521
  fprintf(stderr, "Reordered:\n");
1522
  for (i=1; i < pdf_xref_len(ctx, doc); i++)
1523
  {
1524
    fprintf(stderr, "%d: use=%d\n", i, opts->use_list[reorder[i]]);
1525
  }
1526
#endif
1527
1528
  /* Find the split point */
1529
0
  for (i = 1; (opts->use_list[reorder[i]] & USE_PARAMS) == 0; i++) {}
1530
0
  opts->start = i;
1531
1532
  /* Roll the reordering into the renumber_map */
1533
0
  for (i = 0; i < n; i++)
1534
0
  {
1535
0
    opts->renumber_map[reorder[i]] = i;
1536
0
    rev_renumber_map[i] = opts->rev_renumber_map[reorder[i]];
1537
0
  }
1538
0
  fz_free(ctx, opts->rev_renumber_map);
1539
0
  opts->rev_renumber_map = rev_renumber_map;
1540
0
  fz_free(ctx, reorder);
1541
1542
  /* Apply the renumber_map */
1543
0
  page_objects_list_renumber(opts);
1544
0
  renumberobjs(ctx, doc, opts);
1545
0
  renumber_pages(ctx, doc, opts);
1546
1547
0
  page_objects_list_sort_and_dedupe(ctx, opts->page_object_lists);
1548
0
}
1549
1550
static void
1551
update_linearization_params(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
1552
0
{
1553
0
  int64_t offset;
1554
0
  pdf_set_int(ctx, opts->linear_l, opts->file_len);
1555
  /* Primary hint stream offset (of object, not stream!) */
1556
0
  pdf_set_int(ctx, opts->linear_h0, opts->ofs_list[pdf_xref_len(ctx, doc)-1]);
1557
  /* Primary hint stream length (of object, not stream!) */
1558
0
  offset = (opts->start == 1 ? opts->main_xref_offset : opts->ofs_list[1] + opts->hintstream_len);
1559
0
  pdf_set_int(ctx, opts->linear_h1, offset - opts->ofs_list[pdf_xref_len(ctx, doc)-1]);
1560
  /* Object number of first pages page object (the first object of page 0) */
1561
0
  pdf_set_int(ctx, opts->linear_o, opts->page_object_lists->page[0]->object[0]);
1562
  /* Offset of end of first page (first page is followed by primary
1563
   * hint stream (object n-1) then remaining pages (object 1...). The
1564
   * primary hint stream counts as part of the first pages data, I think.
1565
   */
1566
0
  offset = (opts->start == 1 ? opts->main_xref_offset : opts->ofs_list[1] + opts->hintstream_len);
1567
0
  pdf_set_int(ctx, opts->linear_e, offset);
1568
  /* Number of pages in document */
1569
0
  pdf_set_int(ctx, opts->linear_n, opts->page_count);
1570
  /* Offset of first entry in main xref table */
1571
0
  pdf_set_int(ctx, opts->linear_t, opts->first_xref_entry_offset + opts->hintstream_len);
1572
  /* Offset of shared objects hint table in the primary hint stream */
1573
0
  pdf_set_int(ctx, opts->hints_s, opts->hints_shared_offset);
1574
  /* Primary hint stream length */
1575
0
  pdf_set_int(ctx, opts->hints_length, opts->hintstream_len);
1576
0
}
1577
1578
/*
1579
 * Make sure we have loaded objects from object streams.
1580
 */
1581
1582
static void preloadobjstms(fz_context *ctx, pdf_document *doc)
1583
0
{
1584
0
  pdf_obj *obj;
1585
0
  int num;
1586
0
  pdf_xref_entry *x = NULL;
1587
0
  int load = 1;
1588
1589
  /* If we have attempted a repair, then everything will have been
1590
   * loaded already. */
1591
0
  if (doc->repair_attempted)
1592
0
  {
1593
    /* Bug 707112: But we do need to mark all our 'o' objects as being something else. */
1594
0
    load = 0;
1595
0
  }
1596
1597
0
  fz_var(num);
1598
0
  fz_var(x);
1599
1600
  /* xref_len may change due to repair, so check it every iteration */
1601
0
  for (num = 0; num < pdf_xref_len(ctx, doc); num++)
1602
0
  {
1603
0
    fz_try(ctx)
1604
0
    {
1605
0
      for (; num < pdf_xref_len(ctx, doc); num++)
1606
0
      {
1607
0
        x = pdf_get_xref_entry_no_null(ctx, doc, num);
1608
0
        if (x->type == 'o')
1609
0
        {
1610
0
          if (load)
1611
0
          {
1612
0
            obj = pdf_load_object(ctx, doc, num);
1613
0
            pdf_drop_obj(ctx, obj);
1614
0
          }
1615
          /* The object is no longer an objstm one. It's a regular object
1616
           * held in memory. Previously we used gen to hold the index of
1617
           * the obj in the objstm, so reset this to 0. */
1618
0
          x->type = 'n';
1619
0
          x->gen = 0;
1620
0
        }
1621
0
        x = NULL;
1622
0
      }
1623
0
    }
1624
0
    fz_catch(ctx)
1625
0
    {
1626
      /* We need to clear the type even in the event of an error, lest we
1627
       * hit an assert later. Bug 707110. */
1628
0
      if (x && x->type == 'o')
1629
0
      {
1630
0
        x->type = 'f';
1631
0
        x->gen = 0;
1632
0
      }
1633
      /* Ignore the error, so we can carry on trying to load. */
1634
0
      fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
1635
0
      fz_report_error(ctx);
1636
0
    }
1637
0
  }
1638
0
}
1639
1640
/*
1641
 * Save streams and objects to the output
1642
 */
1643
1644
static int is_bitmap_stream(fz_context *ctx, pdf_obj *obj, size_t len, int *w, int *h)
1645
0
{
1646
0
  pdf_obj *bpc;
1647
0
  pdf_obj *cs;
1648
0
  int stride;
1649
0
  if (pdf_dict_get(ctx, obj, PDF_NAME(Subtype)) != PDF_NAME(Image))
1650
0
    return 0;
1651
0
  *w = pdf_dict_get_int(ctx, obj, PDF_NAME(Width));
1652
0
  *h = pdf_dict_get_int(ctx, obj, PDF_NAME(Height));
1653
0
  stride = (*w + 7) >> 3;
1654
0
  if ((size_t)stride * (*h) != len)
1655
0
    return 0;
1656
0
  if (pdf_dict_get_bool(ctx, obj, PDF_NAME(ImageMask)))
1657
0
  {
1658
0
    return 1;
1659
0
  }
1660
0
  else
1661
0
  {
1662
0
    bpc = pdf_dict_get(ctx, obj, PDF_NAME(BitsPerComponent));
1663
0
    if (!pdf_is_int(ctx, bpc))
1664
0
      return 0;
1665
0
    if (pdf_to_int(ctx, bpc) != 1)
1666
0
      return 0;
1667
0
    cs = pdf_dict_get(ctx, obj, PDF_NAME(ColorSpace));
1668
0
    if (!pdf_name_eq(ctx, cs, PDF_NAME(DeviceGray)))
1669
0
      return 0;
1670
0
    return 1;
1671
0
  }
1672
0
}
1673
1674
static inline int isbinary(int c)
1675
0
{
1676
0
  if (c == '\n' || c == '\r' || c == '\t')
1677
0
    return 0;
1678
0
  return c < 32 || c > 127;
1679
0
}
1680
1681
static int isbinarystream(fz_context *ctx, const unsigned char *data, size_t len)
1682
0
{
1683
0
  size_t i;
1684
0
  for (i = 0; i < len; i++)
1685
0
    if (isbinary(data[i]))
1686
0
      return 1;
1687
0
  return 0;
1688
0
}
1689
1690
static fz_buffer *hexbuf(fz_context *ctx, const unsigned char *p, size_t n)
1691
0
{
1692
0
  static const char hex[17] = "0123456789abcdef";
1693
0
  int x = 0;
1694
0
  size_t len = n * 2 + (n / 32) + 1;
1695
0
  unsigned char *data = Memento_label(fz_malloc(ctx, len), "hexbuf");
1696
0
  fz_buffer *buf = fz_new_buffer_from_data(ctx, data, len);
1697
1698
0
  while (n--)
1699
0
  {
1700
0
    *data++ = hex[*p >> 4];
1701
0
    *data++ = hex[*p & 15];
1702
0
    if (++x == 32)
1703
0
    {
1704
0
      *data++ = '\n';
1705
0
      x = 0;
1706
0
    }
1707
0
    p++;
1708
0
  }
1709
1710
0
  *data++ = '>';
1711
1712
0
  return buf;
1713
0
}
1714
1715
static void addhexfilter(fz_context *ctx, pdf_document *doc, pdf_obj *dict)
1716
0
{
1717
0
  pdf_obj *f, *dp, *newf, *newdp;
1718
1719
0
  newf = newdp = NULL;
1720
0
  f = pdf_dict_get(ctx, dict, PDF_NAME(Filter));
1721
0
  dp = pdf_dict_get(ctx, dict, PDF_NAME(DecodeParms));
1722
1723
0
  fz_var(newf);
1724
0
  fz_var(newdp);
1725
1726
0
  fz_try(ctx)
1727
0
  {
1728
0
    if (pdf_is_name(ctx, f))
1729
0
    {
1730
0
      newf = pdf_new_array(ctx, doc, 2);
1731
0
      pdf_array_push(ctx, newf, PDF_NAME(ASCIIHexDecode));
1732
0
      pdf_array_push(ctx, newf, f);
1733
0
      f = newf;
1734
0
      if (pdf_is_dict(ctx, dp))
1735
0
      {
1736
0
        newdp = pdf_new_array(ctx, doc, 2);
1737
0
        pdf_array_push(ctx, newdp, PDF_NULL);
1738
0
        pdf_array_push(ctx, newdp, dp);
1739
0
        dp = newdp;
1740
0
      }
1741
0
    }
1742
0
    else if (pdf_is_array(ctx, f))
1743
0
    {
1744
0
      pdf_array_insert(ctx, f, PDF_NAME(ASCIIHexDecode), 0);
1745
0
      if (pdf_is_array(ctx, dp))
1746
0
        pdf_array_insert(ctx, dp, PDF_NULL, 0);
1747
0
    }
1748
0
    else
1749
0
      f = PDF_NAME(ASCIIHexDecode);
1750
1751
0
    pdf_dict_put(ctx, dict, PDF_NAME(Filter), f);
1752
0
    if (dp)
1753
0
      pdf_dict_put(ctx, dict, PDF_NAME(DecodeParms), dp);
1754
0
  }
1755
0
  fz_always(ctx)
1756
0
  {
1757
0
    pdf_drop_obj(ctx, newf);
1758
0
    pdf_drop_obj(ctx, newdp);
1759
0
  }
1760
0
  fz_catch(ctx)
1761
0
    fz_rethrow(ctx);
1762
0
}
1763
1764
static fz_buffer *deflatebuf(fz_context *ctx, const unsigned char *p, size_t n, int effort)
1765
0
{
1766
0
  fz_buffer *buf;
1767
0
  uLongf csize;
1768
0
  int t;
1769
0
  uLong longN = (uLong)n;
1770
0
  unsigned char *data;
1771
0
  size_t cap;
1772
0
  int mode;
1773
1774
0
  if (n != (size_t)longN)
1775
0
    fz_throw(ctx, FZ_ERROR_LIMIT, "Buffer too large to deflate");
1776
1777
0
  cap = compressBound(longN);
1778
0
  data = Memento_label(fz_malloc(ctx, cap), "pdf_write_deflate");
1779
0
  buf = fz_new_buffer_from_data(ctx, data, cap);
1780
0
  csize = (uLongf)cap;
1781
0
  if (effort == 0)
1782
0
    mode = Z_DEFAULT_COMPRESSION;
1783
0
  else
1784
0
    mode = effort * Z_BEST_COMPRESSION / 100;
1785
0
  t = compress2(data, &csize, p, longN, mode);
1786
0
  if (t != Z_OK)
1787
0
  {
1788
0
    fz_drop_buffer(ctx, buf);
1789
0
    fz_throw(ctx, FZ_ERROR_LIBRARY, "cannot deflate buffer");
1790
0
  }
1791
0
  fz_try(ctx)
1792
0
    fz_resize_buffer(ctx, buf, csize);
1793
0
  fz_catch(ctx)
1794
0
  {
1795
0
    fz_drop_buffer(ctx, buf);
1796
0
    fz_rethrow(ctx);
1797
0
  }
1798
0
  return buf;
1799
0
}
1800
1801
static int striphexfilter(fz_context *ctx, pdf_document *doc, pdf_obj *dict)
1802
0
{
1803
0
  pdf_obj *f, *dp;
1804
0
  int is_hex = 0;
1805
1806
0
  f = pdf_dict_get(ctx, dict, PDF_NAME(Filter));
1807
0
  dp = pdf_dict_get(ctx, dict, PDF_NAME(DecodeParms));
1808
1809
0
  if (pdf_is_array(ctx, f))
1810
0
  {
1811
    /* Remove ASCIIHexDecode from head of filter list */
1812
0
    if (pdf_array_get(ctx, f, 0) == PDF_NAME(ASCIIHexDecode))
1813
0
    {
1814
0
      is_hex = 1;
1815
0
      pdf_array_delete(ctx, f, 0);
1816
0
      if (pdf_is_array(ctx, dp))
1817
0
        pdf_array_delete(ctx, dp, 0);
1818
0
    }
1819
    /* Unpack array if only one filter remains */
1820
0
    if (pdf_array_len(ctx, f) == 1)
1821
0
    {
1822
0
      f = pdf_array_get(ctx, f, 0);
1823
0
      pdf_dict_put(ctx, dict, PDF_NAME(Filter), f);
1824
0
      if (dp)
1825
0
      {
1826
0
        dp = pdf_array_get(ctx, dp, 0);
1827
0
        pdf_dict_put(ctx, dict, PDF_NAME(DecodeParms), dp);
1828
0
      }
1829
0
    }
1830
    /* Remove array if no filters remain */
1831
0
    else if (pdf_array_len(ctx, f) == 0)
1832
0
    {
1833
0
      pdf_dict_del(ctx, dict, PDF_NAME(Filter));
1834
0
      pdf_dict_del(ctx, dict, PDF_NAME(DecodeParms));
1835
0
    }
1836
0
  }
1837
0
  else if (f == PDF_NAME(ASCIIHexDecode))
1838
0
  {
1839
0
    is_hex = 1;
1840
0
    pdf_dict_del(ctx, dict, PDF_NAME(Filter));
1841
0
    pdf_dict_del(ctx, dict, PDF_NAME(DecodeParms));
1842
0
  }
1843
1844
0
  return is_hex;
1845
0
}
1846
1847
static fz_buffer *unhexbuf(fz_context *ctx, const unsigned char *p, size_t n)
1848
0
{
1849
0
  fz_stream *mstm = NULL;
1850
0
  fz_stream *xstm = NULL;
1851
0
  fz_buffer *out = NULL;
1852
0
  fz_var(mstm);
1853
0
  fz_var(xstm);
1854
0
  fz_try(ctx)
1855
0
  {
1856
0
    mstm = fz_open_memory(ctx, p, n);
1857
0
    xstm = fz_open_ahxd(ctx, mstm);
1858
0
    out = fz_read_all(ctx, xstm, n/2);
1859
0
  }
1860
0
  fz_always(ctx)
1861
0
  {
1862
0
    fz_drop_stream(ctx, xstm);
1863
0
    fz_drop_stream(ctx, mstm);
1864
0
  }
1865
0
  fz_catch(ctx)
1866
0
    fz_rethrow(ctx);
1867
0
  return out;
1868
0
}
1869
1870
static void write_data(fz_context *ctx, void *arg, const unsigned char *data, size_t len)
1871
0
{
1872
0
  fz_write_data(ctx, (fz_output *)arg, data, len);
1873
0
}
1874
1875
static void copystream(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *obj_orig, int num, int gen, int do_deflate, int unenc)
1876
0
{
1877
0
  fz_buffer *tmp_unhex = NULL, *tmp_comp = NULL, *tmp_hex = NULL, *buf = NULL;
1878
0
  pdf_obj *obj = NULL;
1879
0
  pdf_obj *dp;
1880
0
  size_t len;
1881
0
  unsigned char *data;
1882
0
  int w, h;
1883
1884
0
  fz_var(buf);
1885
0
  fz_var(tmp_comp);
1886
0
  fz_var(tmp_hex);
1887
0
  fz_var(obj);
1888
1889
0
  fz_try(ctx)
1890
0
  {
1891
0
    buf = pdf_load_raw_stream_number(ctx, doc, num);
1892
0
    obj = pdf_copy_dict(ctx, obj_orig);
1893
1894
0
    len = fz_buffer_storage(ctx, buf, &data);
1895
1896
0
    if (do_deflate && striphexfilter(ctx, doc, obj))
1897
0
    {
1898
0
      tmp_unhex = unhexbuf(ctx, data, len);
1899
0
      len = fz_buffer_storage(ctx, tmp_unhex, &data);
1900
0
    }
1901
1902
0
    if (do_deflate && !pdf_dict_get(ctx, obj, PDF_NAME(Filter)))
1903
0
    {
1904
0
      if (is_bitmap_stream(ctx, obj, len, &w, &h))
1905
0
      {
1906
0
        tmp_comp = fz_compress_ccitt_fax_g4(ctx, data, w, h, (w+7)>>3);
1907
0
        pdf_dict_put(ctx, obj, PDF_NAME(Filter), PDF_NAME(CCITTFaxDecode));
1908
0
        dp = pdf_dict_put_dict(ctx, obj, PDF_NAME(DecodeParms), 1);
1909
0
        pdf_dict_put_int(ctx, dp, PDF_NAME(K), -1);
1910
0
        pdf_dict_put_int(ctx, dp, PDF_NAME(Columns), w);
1911
0
      }
1912
0
      else
1913
0
      {
1914
0
        tmp_comp = deflatebuf(ctx, data, len, opts->compression_effort);
1915
0
        pdf_dict_put(ctx, obj, PDF_NAME(Filter), PDF_NAME(FlateDecode));
1916
0
      }
1917
0
      len = fz_buffer_storage(ctx, tmp_comp, &data);
1918
0
    }
1919
1920
0
    if (opts->do_ascii && isbinarystream(ctx, data, len))
1921
0
    {
1922
0
      tmp_hex = hexbuf(ctx, data, len);
1923
0
      len = fz_buffer_storage(ctx, tmp_hex, &data);
1924
0
      addhexfilter(ctx, doc, obj);
1925
0
    }
1926
1927
0
    fz_write_printf(ctx, opts->out, "%d %d obj\n", num, gen);
1928
1929
0
    if (unenc)
1930
0
    {
1931
0
      pdf_dict_put_int(ctx, obj, PDF_NAME(Length), len);
1932
0
      pdf_print_obj(ctx, opts->out, obj, opts->do_tight, opts->do_ascii);
1933
0
      fz_write_string(ctx, opts->out, "\nstream\n");
1934
0
      fz_write_data(ctx, opts->out, data, len);
1935
0
    }
1936
0
    else
1937
0
    {
1938
0
      pdf_dict_put_int(ctx, obj, PDF_NAME(Length), pdf_encrypted_len(ctx, opts->crypt, num, gen, len));
1939
0
      pdf_print_encrypted_obj(ctx, opts->out, obj, opts->do_tight, opts->do_ascii, opts->crypt, num, gen, NULL);
1940
0
      fz_write_string(ctx, opts->out, "\nstream\n");
1941
0
      pdf_encrypt_data(ctx, opts->crypt, num, gen, write_data, opts->out, data, len);
1942
0
    }
1943
1944
0
    fz_write_string(ctx, opts->out, "\nendstream\nendobj\n\n");
1945
0
  }
1946
0
  fz_always(ctx)
1947
0
  {
1948
0
    fz_drop_buffer(ctx, tmp_unhex);
1949
0
    fz_drop_buffer(ctx, tmp_hex);
1950
0
    fz_drop_buffer(ctx, tmp_comp);
1951
0
    fz_drop_buffer(ctx, buf);
1952
0
    pdf_drop_obj(ctx, obj);
1953
0
  }
1954
0
  fz_catch(ctx)
1955
0
  {
1956
0
    fz_rethrow(ctx);
1957
0
  }
1958
0
}
1959
1960
static void expandstream(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *obj_orig, int num, int gen, int do_deflate, int unenc)
1961
0
{
1962
0
  fz_buffer *buf = NULL, *tmp_comp = NULL, *tmp_hex = NULL;
1963
0
  pdf_obj *obj = NULL;
1964
0
  pdf_obj *dp;
1965
0
  size_t len;
1966
0
  unsigned char *data;
1967
0
  int w, h;
1968
1969
0
  fz_var(buf);
1970
0
  fz_var(tmp_comp);
1971
0
  fz_var(tmp_hex);
1972
0
  fz_var(obj);
1973
1974
0
  fz_try(ctx)
1975
0
  {
1976
0
    buf = pdf_load_stream_number(ctx, doc, num);
1977
0
    obj = pdf_copy_dict(ctx, obj_orig);
1978
0
    pdf_dict_del(ctx, obj, PDF_NAME(Filter));
1979
0
    pdf_dict_del(ctx, obj, PDF_NAME(DecodeParms));
1980
1981
0
    len = fz_buffer_storage(ctx, buf, &data);
1982
0
    if (do_deflate)
1983
0
    {
1984
0
      if (is_bitmap_stream(ctx, obj, len, &w, &h))
1985
0
      {
1986
0
        tmp_comp = fz_compress_ccitt_fax_g4(ctx, data, w, h, (w+7)>>3);
1987
0
        pdf_dict_put(ctx, obj, PDF_NAME(Filter), PDF_NAME(CCITTFaxDecode));
1988
0
        dp = pdf_dict_put_dict(ctx, obj, PDF_NAME(DecodeParms), 1);
1989
0
        pdf_dict_put_int(ctx, dp, PDF_NAME(K), -1);
1990
0
        pdf_dict_put_int(ctx, dp, PDF_NAME(Columns), w);
1991
0
      }
1992
0
      else
1993
0
      {
1994
0
        tmp_comp = deflatebuf(ctx, data, len, opts->compression_effort);
1995
0
        pdf_dict_put(ctx, obj, PDF_NAME(Filter), PDF_NAME(FlateDecode));
1996
0
      }
1997
0
      len = fz_buffer_storage(ctx, tmp_comp, &data);
1998
0
    }
1999
2000
0
    if (opts->do_ascii && isbinarystream(ctx, data, len))
2001
0
    {
2002
0
      tmp_hex = hexbuf(ctx, data, len);
2003
0
      len = fz_buffer_storage(ctx, tmp_hex, &data);
2004
0
      addhexfilter(ctx, doc, obj);
2005
0
    }
2006
2007
0
    fz_write_printf(ctx, opts->out, "%d %d obj\n", num, gen);
2008
2009
0
    if (unenc)
2010
0
    {
2011
0
      pdf_dict_put_int(ctx, obj, PDF_NAME(Length), len);
2012
0
      pdf_print_obj(ctx, opts->out, obj, opts->do_tight, opts->do_ascii);
2013
0
      fz_write_string(ctx, opts->out, "\nstream\n");
2014
0
      fz_write_data(ctx, opts->out, data, len);
2015
0
    }
2016
0
    else
2017
0
    {
2018
0
      pdf_dict_put_int(ctx, obj, PDF_NAME(Length), pdf_encrypted_len(ctx, opts->crypt, num, gen, (int)len));
2019
0
      pdf_print_encrypted_obj(ctx, opts->out, obj, opts->do_tight, opts->do_ascii, opts->crypt, num, gen, NULL);
2020
0
      fz_write_string(ctx, opts->out, "\nstream\n");
2021
0
      pdf_encrypt_data(ctx, opts->crypt, num, gen, write_data, opts->out, data, len);
2022
0
    }
2023
2024
0
    fz_write_string(ctx, opts->out, "\nendstream\nendobj\n\n");
2025
0
  }
2026
0
  fz_always(ctx)
2027
0
  {
2028
0
    fz_drop_buffer(ctx, tmp_hex);
2029
0
    fz_drop_buffer(ctx, tmp_comp);
2030
0
    fz_drop_buffer(ctx, buf);
2031
0
    pdf_drop_obj(ctx, obj);
2032
0
  }
2033
0
  fz_catch(ctx)
2034
0
  {
2035
0
    fz_rethrow(ctx);
2036
0
  }
2037
0
}
2038
2039
static int is_image_filter(pdf_obj *s)
2040
0
{
2041
0
  return
2042
0
    s == PDF_NAME(CCITTFaxDecode) || s == PDF_NAME(CCF) ||
2043
0
    s == PDF_NAME(DCTDecode) || s == PDF_NAME(DCT) ||
2044
0
    s == PDF_NAME(RunLengthDecode) || s == PDF_NAME(RL) ||
2045
0
    s == PDF_NAME(JBIG2Decode) ||
2046
0
    s == PDF_NAME(JPXDecode);
2047
0
}
2048
2049
static int filter_implies_image(fz_context *ctx, pdf_obj *o)
2050
0
{
2051
0
  if (pdf_is_name(ctx, o))
2052
0
    return is_image_filter(o);
2053
0
  if (pdf_is_array(ctx, o))
2054
0
  {
2055
0
    int i, len;
2056
0
    len = pdf_array_len(ctx, o);
2057
0
    for (i = 0; i < len; i++)
2058
0
      if (is_image_filter(pdf_array_get(ctx, o, i)))
2059
0
        return 1;
2060
0
  }
2061
0
  return 0;
2062
0
}
2063
2064
static int is_jpx_filter(fz_context *ctx, pdf_obj *o)
2065
0
{
2066
0
  if (o == PDF_NAME(JPXDecode))
2067
0
    return 1;
2068
0
  if (pdf_is_array(ctx, o))
2069
0
  {
2070
0
    int i, len;
2071
0
    len = pdf_array_len(ctx, o);
2072
0
    for (i = 0; i < len; i++)
2073
0
      if (pdf_array_get(ctx, o, i) == PDF_NAME(JPXDecode))
2074
0
        return 1;
2075
0
  }
2076
0
  return 0;
2077
0
}
2078
2079
static int is_image_stream(fz_context *ctx, pdf_obj *obj)
2080
0
{
2081
0
  pdf_obj *o;
2082
0
  if ((o = pdf_dict_get(ctx, obj, PDF_NAME(Type)), pdf_name_eq(ctx, o, PDF_NAME(XObject))))
2083
0
    if ((o = pdf_dict_get(ctx, obj, PDF_NAME(Subtype)), pdf_name_eq(ctx, o, PDF_NAME(Image))))
2084
0
      return 1;
2085
0
  if (o = pdf_dict_get(ctx, obj, PDF_NAME(Filter)), filter_implies_image(ctx, o))
2086
0
    return 1;
2087
0
  if (pdf_dict_get(ctx, obj, PDF_NAME(Width)) != NULL && pdf_dict_get(ctx, obj, PDF_NAME(Height)) != NULL)
2088
0
    return 1;
2089
0
  return 0;
2090
0
}
2091
2092
static int is_font_stream(fz_context *ctx, pdf_obj *obj)
2093
0
{
2094
0
  pdf_obj *o;
2095
0
  if (o = pdf_dict_get(ctx, obj, PDF_NAME(Type)), pdf_name_eq(ctx, o, PDF_NAME(Font)))
2096
0
    return 1;
2097
0
  if (o = pdf_dict_get(ctx, obj, PDF_NAME(Type)), pdf_name_eq(ctx, o, PDF_NAME(FontDescriptor)))
2098
0
    return 1;
2099
0
  if (pdf_dict_get(ctx, obj, PDF_NAME(Length1)) != NULL)
2100
0
    return 1;
2101
0
  if (pdf_dict_get(ctx, obj, PDF_NAME(Length2)) != NULL)
2102
0
    return 1;
2103
0
  if (pdf_dict_get(ctx, obj, PDF_NAME(Length3)) != NULL)
2104
0
    return 1;
2105
0
  if (o = pdf_dict_get(ctx, obj, PDF_NAME(Subtype)), pdf_name_eq(ctx, o, PDF_NAME(Type1C)))
2106
0
    return 1;
2107
0
  if (o = pdf_dict_get(ctx, obj, PDF_NAME(Subtype)), pdf_name_eq(ctx, o, PDF_NAME(CIDFontType0C)))
2108
0
    return 1;
2109
0
  return 0;
2110
0
}
2111
2112
static int is_jpx_stream(fz_context *ctx, pdf_obj *obj)
2113
0
{
2114
0
  pdf_obj *o;
2115
0
  if (o = pdf_dict_get(ctx, obj, PDF_NAME(Filter)), is_jpx_filter(ctx, o))
2116
0
    return 1;
2117
0
  return 0;
2118
0
}
2119
2120
2121
static int is_xml_metadata(fz_context *ctx, pdf_obj *obj)
2122
0
{
2123
0
  if (pdf_name_eq(ctx, pdf_dict_get(ctx, obj, PDF_NAME(Type)), PDF_NAME(Metadata)))
2124
0
    if (pdf_name_eq(ctx, pdf_dict_get(ctx, obj, PDF_NAME(Subtype)), PDF_NAME(XML)))
2125
0
      return 1;
2126
0
  return 0;
2127
0
}
2128
2129
static void writeobject(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, int num, int gen, int skip_xrefs, int unenc)
2130
0
{
2131
0
  pdf_obj *obj = NULL;
2132
0
  fz_buffer *buf = NULL;
2133
0
  int do_deflate = 0;
2134
0
  int do_expand = 0;
2135
0
  int skip = 0;
2136
2137
0
  fz_var(obj);
2138
0
  fz_var(buf);
2139
2140
0
  if (opts->do_encrypt == PDF_ENCRYPT_NONE)
2141
0
    unenc = 1;
2142
2143
0
  fz_try(ctx)
2144
0
  {
2145
0
    obj = pdf_load_object(ctx, doc, num);
2146
2147
    /* skip ObjStm and XRef objects */
2148
0
    if (pdf_is_dict(ctx, obj))
2149
0
    {
2150
0
      pdf_obj *type = pdf_dict_get(ctx, obj, PDF_NAME(Type));
2151
0
      if (type == PDF_NAME(ObjStm) && !opts->do_use_objstms)
2152
0
      {
2153
0
        if (opts->use_list)
2154
0
          opts->use_list[num] = 0;
2155
0
        skip = 1;
2156
0
      }
2157
0
      if (skip_xrefs && type == PDF_NAME(XRef))
2158
0
      {
2159
0
        if (opts->use_list)
2160
0
          opts->use_list[num] = 0;
2161
0
        skip = 1;
2162
0
      }
2163
0
    }
2164
2165
0
    if (!skip)
2166
0
    {
2167
0
      if (pdf_obj_num_is_stream(ctx, doc, num))
2168
0
      {
2169
0
        do_deflate = opts->do_compress;
2170
0
        do_expand = opts->do_expand;
2171
0
        if (opts->do_compress_images && is_image_stream(ctx, obj))
2172
0
          do_deflate = 1, do_expand = 0;
2173
0
        if (opts->do_compress_fonts && is_font_stream(ctx, obj))
2174
0
          do_deflate = 1, do_expand = 0;
2175
0
        if (is_xml_metadata(ctx, obj))
2176
0
          do_deflate = 0, do_expand = 0;
2177
0
        if (is_jpx_stream(ctx, obj))
2178
0
          do_deflate = 0, do_expand = 0;
2179
2180
0
        if (do_expand && num != opts->hint_object_num)
2181
0
          expandstream(ctx, doc, opts, obj, num, gen, do_deflate, unenc);
2182
0
        else
2183
0
          copystream(ctx, doc, opts, obj, num, gen, do_deflate, unenc);
2184
0
      }
2185
0
      else
2186
0
      {
2187
0
        fz_write_printf(ctx, opts->out, "%d %d obj\n", num, gen);
2188
0
        pdf_print_encrypted_obj(ctx, opts->out, obj, opts->do_tight, opts->do_ascii, unenc ? NULL : opts->crypt, num, gen, NULL);
2189
0
        fz_write_string(ctx, opts->out, "\nendobj\n\n");
2190
0
      }
2191
0
    }
2192
0
  }
2193
0
  fz_always(ctx)
2194
0
  {
2195
0
    fz_drop_buffer(ctx, buf);
2196
0
    pdf_drop_obj(ctx, obj);
2197
0
  }
2198
0
  fz_catch(ctx)
2199
0
  {
2200
0
    fz_rethrow(ctx);
2201
0
  }
2202
0
}
2203
2204
static void writexrefsubsect(fz_context *ctx, pdf_write_state *opts, int from, int to)
2205
0
{
2206
0
  int num;
2207
2208
0
  fz_write_printf(ctx, opts->out, "%d %d\n", from, to - from);
2209
0
  for (num = from; num < to; num++)
2210
0
  {
2211
0
    if (opts->use_list[num])
2212
0
      fz_write_printf(ctx, opts->out, "%010lu %05d n \n", opts->ofs_list[num] - opts->bias, opts->gen_list[num]);
2213
0
    else
2214
0
      fz_write_printf(ctx, opts->out, "%010lu %05d f \n", opts->ofs_list[num] - opts->bias, opts->gen_list[num]);
2215
0
  }
2216
0
}
2217
2218
static void writexref(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, int from, int to, int first, int64_t main_xref_offset, int64_t startxref)
2219
0
{
2220
0
  pdf_obj *trailer = NULL;
2221
0
  pdf_obj *obj;
2222
2223
0
  fz_write_string(ctx, opts->out, "xref\n");
2224
0
  opts->first_xref_entry_offset = fz_tell_output(ctx, opts->out);
2225
2226
0
  if (opts->do_incremental)
2227
0
  {
2228
0
    int subfrom = from;
2229
0
    int subto;
2230
2231
0
    while (subfrom < to)
2232
0
    {
2233
0
      while (subfrom < to && !pdf_xref_is_incremental(ctx, doc, subfrom))
2234
0
        subfrom++;
2235
2236
0
      subto = subfrom;
2237
0
      while (subto < to && pdf_xref_is_incremental(ctx, doc, subto))
2238
0
        subto++;
2239
2240
0
      if (subfrom < subto)
2241
0
        writexrefsubsect(ctx, opts, subfrom, subto);
2242
2243
0
      subfrom = subto;
2244
0
    }
2245
0
  }
2246
0
  else
2247
0
  {
2248
0
    writexrefsubsect(ctx, opts, from, to);
2249
0
  }
2250
2251
0
  fz_write_string(ctx, opts->out, "\n");
2252
2253
0
  fz_var(trailer);
2254
2255
0
  fz_try(ctx)
2256
0
  {
2257
0
    if (opts->do_incremental)
2258
0
    {
2259
0
      trailer = pdf_keep_obj(ctx, pdf_trailer(ctx, doc));
2260
0
      pdf_dict_put_int(ctx, trailer, PDF_NAME(Size), pdf_xref_len(ctx, doc));
2261
0
      pdf_dict_put_int(ctx, trailer, PDF_NAME(Prev), doc->startxref);
2262
0
      pdf_dict_del(ctx, trailer, PDF_NAME(XRefStm));
2263
0
      if (!opts->do_snapshot)
2264
0
        doc->startxref = startxref - opts->bias;
2265
0
    }
2266
0
    else
2267
0
    {
2268
0
      trailer = pdf_new_dict(ctx, doc, 5);
2269
2270
0
      pdf_dict_put_int(ctx, trailer, PDF_NAME(Size), to);
2271
2272
0
      if (first)
2273
0
      {
2274
0
        pdf_obj *otrailer = pdf_trailer(ctx, doc);
2275
0
        obj = pdf_dict_get(ctx, otrailer, PDF_NAME(Info));
2276
0
        if (obj)
2277
0
          pdf_dict_put(ctx, trailer, PDF_NAME(Info), obj);
2278
2279
0
        obj = pdf_dict_get(ctx, otrailer, PDF_NAME(Root));
2280
0
        if (obj)
2281
0
          pdf_dict_put(ctx, trailer, PDF_NAME(Root), obj);
2282
2283
2284
0
        obj = pdf_dict_get(ctx, otrailer, PDF_NAME(ID));
2285
0
        if (obj)
2286
0
          pdf_dict_put(ctx, trailer, PDF_NAME(ID), obj);
2287
2288
        /* The encryption dictionary is kept in the writer state to handle
2289
           the encryption dictionary object being renumbered during repair.*/
2290
0
        if (opts->crypt_obj)
2291
0
        {
2292
          /* If the encryption dictionary used to be an indirect reference from the trailer,
2293
             store it the same way in the trailer in the saved file. */
2294
0
          if (pdf_is_indirect(ctx, opts->crypt_obj))
2295
0
            pdf_dict_put_indirect(ctx, trailer, PDF_NAME(Encrypt), opts->crypt_object_number);
2296
0
          else
2297
0
            pdf_dict_put(ctx, trailer, PDF_NAME(Encrypt), opts->crypt_obj);
2298
0
        }
2299
2300
0
        if (opts->metadata)
2301
0
          pdf_dict_putp(ctx, trailer, "Root/Metadata", opts->metadata);
2302
0
      }
2303
0
      if (main_xref_offset != 0)
2304
0
        pdf_dict_put_int(ctx, trailer, PDF_NAME(Prev), main_xref_offset);
2305
0
    }
2306
2307
0
    fz_write_string(ctx, opts->out, "trailer\n");
2308
    /* Trailer is NOT encrypted */
2309
0
    pdf_print_obj(ctx, opts->out, trailer, opts->do_tight, opts->do_ascii);
2310
0
    fz_write_string(ctx, opts->out, "\n");
2311
2312
0
    fz_write_printf(ctx, opts->out, "startxref\n%lu\n%%%%EOF\n", startxref - opts->bias);
2313
2314
0
    doc->last_xref_was_old_style = 1;
2315
0
  }
2316
0
  fz_always(ctx)
2317
0
  {
2318
0
    pdf_drop_obj(ctx, trailer);
2319
0
  }
2320
0
  fz_catch(ctx)
2321
0
  {
2322
0
    fz_rethrow(ctx);
2323
0
  }
2324
0
}
2325
2326
static void writexrefstreamsubsect(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *index, fz_buffer *fzbuf, int from, int to)
2327
0
{
2328
0
  int num;
2329
2330
0
  pdf_array_push_int(ctx, index, from);
2331
0
  pdf_array_push_int(ctx, index, to - from);
2332
0
  for (num = from; num < to; num++)
2333
0
  {
2334
0
    int f1, f2, f3;
2335
0
    pdf_xref_entry *x = pdf_get_xref_entry_no_null(ctx, doc, num);
2336
0
    if (opts->use_list[num] == 0)
2337
0
    {
2338
0
      f1 = 0; /* Free */
2339
0
      f2 = opts->ofs_list[num];
2340
0
      f3 = opts->gen_list[num];
2341
0
    }
2342
0
    else if (x->type == 'o')
2343
0
    {
2344
0
      f1 = 2; /* Object Stream */
2345
0
      f2 = opts->ofs_list[num];
2346
0
      f3 = opts->gen_list[num];
2347
0
    }
2348
0
    else
2349
0
    {
2350
0
      f1 = 1; /* Object */
2351
0
      f2 = opts->ofs_list[num] - opts->bias;
2352
0
      f3 = opts->gen_list[num];
2353
0
    }
2354
0
    fz_append_byte(ctx, fzbuf, f1);
2355
0
    fz_append_byte(ctx, fzbuf, f2>>24);
2356
0
    fz_append_byte(ctx, fzbuf, f2>>16);
2357
0
    fz_append_byte(ctx, fzbuf, f2>>8);
2358
0
    fz_append_byte(ctx, fzbuf, f2);
2359
0
    fz_append_byte(ctx, fzbuf, f3);
2360
0
  }
2361
0
}
2362
2363
static void writexrefstream(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, int from, int to, int first, int64_t main_xref_offset, int64_t startxref)
2364
0
{
2365
0
  int num;
2366
0
  pdf_obj *dict = NULL;
2367
0
  pdf_obj *obj;
2368
0
  pdf_obj *w = NULL;
2369
0
  pdf_obj *index;
2370
0
  fz_buffer *fzbuf = NULL;
2371
2372
0
  fz_var(dict);
2373
0
  fz_var(w);
2374
0
  fz_var(fzbuf);
2375
0
  fz_try(ctx)
2376
0
  {
2377
0
    num = pdf_create_object(ctx, doc);
2378
0
    expand_lists(ctx, opts, num);
2379
2380
0
    dict = pdf_new_dict(ctx, doc, 6);
2381
0
    pdf_update_object(ctx, doc, num, dict);
2382
2383
0
    opts->first_xref_entry_offset = fz_tell_output(ctx, opts->out);
2384
2385
0
    to++;
2386
2387
0
    if (first)
2388
0
    {
2389
0
      obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info));
2390
0
      if (obj)
2391
0
        pdf_dict_put(ctx, dict, PDF_NAME(Info), obj);
2392
2393
0
      obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
2394
0
      if (obj)
2395
0
        pdf_dict_put(ctx, dict, PDF_NAME(Root), obj);
2396
2397
0
      obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(ID));
2398
0
      if (obj)
2399
0
        pdf_dict_put(ctx, dict, PDF_NAME(ID), obj);
2400
2401
      /* The encryption dictionary is kept in the writer state to handle
2402
         the encryption dictionary object being renumbered during repair.*/
2403
0
      if (opts->crypt_obj)
2404
0
      {
2405
        /* If the encryption dictionary used to be an indirect reference from the trailer,
2406
           store it the same way in the xref stream in the saved file. */
2407
0
        if (pdf_is_indirect(ctx, opts->crypt_obj))
2408
0
          pdf_dict_put_indirect(ctx, dict, PDF_NAME(Encrypt), opts->crypt_object_number);
2409
0
        else
2410
0
          pdf_dict_put(ctx, dict, PDF_NAME(Encrypt), opts->crypt_obj);
2411
0
      }
2412
0
    }
2413
2414
0
    pdf_dict_put_int(ctx, dict, PDF_NAME(Size), to);
2415
2416
0
    if (opts->do_incremental)
2417
0
    {
2418
0
      pdf_dict_put_int(ctx, dict, PDF_NAME(Prev), doc->startxref);
2419
0
      if (!opts->do_snapshot)
2420
0
        doc->startxref = startxref - opts->bias;
2421
0
    }
2422
0
    else
2423
0
    {
2424
0
      if (main_xref_offset != 0)
2425
0
        pdf_dict_put_int(ctx, dict, PDF_NAME(Prev), main_xref_offset - opts->bias);
2426
0
    }
2427
2428
0
    pdf_dict_put(ctx, dict, PDF_NAME(Type), PDF_NAME(XRef));
2429
2430
0
    w = pdf_new_array(ctx, doc, 3);
2431
0
    pdf_dict_put(ctx, dict, PDF_NAME(W), w);
2432
0
    pdf_array_push_int(ctx, w, 1);
2433
0
    pdf_array_push_int(ctx, w, 4);
2434
0
    pdf_array_push_int(ctx, w, 1);
2435
2436
0
    index = pdf_new_array(ctx, doc, 2);
2437
0
    pdf_dict_put_drop(ctx, dict, PDF_NAME(Index), index);
2438
2439
    /* opts->gen_list[num] is already initialized by fz_calloc. */
2440
0
    opts->use_list[num] = 1;
2441
0
    opts->ofs_list[num] = opts->first_xref_entry_offset;
2442
2443
0
    fzbuf = fz_new_buffer(ctx, (1 + 4 + 1) * (to-from));
2444
2445
0
    if (opts->do_incremental)
2446
0
    {
2447
0
      int subfrom = from;
2448
0
      int subto;
2449
2450
0
      while (subfrom < to)
2451
0
      {
2452
0
        while (subfrom < to && !pdf_xref_is_incremental(ctx, doc, subfrom))
2453
0
          subfrom++;
2454
2455
0
        subto = subfrom;
2456
0
        while (subto < to && pdf_xref_is_incremental(ctx, doc, subto))
2457
0
          subto++;
2458
2459
0
        if (subfrom < subto)
2460
0
          writexrefstreamsubsect(ctx, doc, opts, index, fzbuf, subfrom, subto);
2461
2462
0
        subfrom = subto;
2463
0
      }
2464
0
    }
2465
0
    else
2466
0
    {
2467
0
      writexrefstreamsubsect(ctx, doc, opts, index, fzbuf, from, to);
2468
0
    }
2469
2470
0
    pdf_update_stream(ctx, doc, dict, fzbuf, 0);
2471
2472
0
    writeobject(ctx, doc, opts, num, 0, 0, 1);
2473
0
    fz_write_printf(ctx, opts->out, "startxref\n%lu\n%%%%EOF\n", startxref - opts->bias);
2474
2475
0
    if (opts->do_snapshot)
2476
0
      pdf_delete_object(ctx, doc, num);
2477
0
  }
2478
0
  fz_always(ctx)
2479
0
  {
2480
0
    pdf_drop_obj(ctx, dict);
2481
0
    pdf_drop_obj(ctx, w);
2482
0
    fz_drop_buffer(ctx, fzbuf);
2483
0
  }
2484
0
  fz_catch(ctx)
2485
0
  {
2486
0
    fz_rethrow(ctx);
2487
0
  }
2488
2489
0
  doc->last_xref_was_old_style = 0;
2490
0
}
2491
2492
static void
2493
padto(fz_context *ctx, fz_output *out, int64_t target)
2494
0
{
2495
0
  int64_t pos = fz_tell_output(ctx, out);
2496
2497
0
  assert(pos <= target);
2498
0
  while (pos < target)
2499
0
  {
2500
0
    fz_write_byte(ctx, out, '\n');
2501
0
    pos++;
2502
0
  }
2503
0
}
2504
2505
static void
2506
dowriteobject(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, int num, int pass)
2507
0
{
2508
0
  pdf_xref_entry *entry = pdf_get_xref_entry_no_null(ctx, doc, num);
2509
0
  int gen = opts->gen_list ? opts->gen_list[num] : 0;
2510
0
  if (entry->type == 'f')
2511
0
    gen = entry->gen;
2512
0
  if (entry->type == 'n')
2513
0
    gen = entry->gen;
2514
2515
  /* If we are renumbering, then make sure all generation numbers are
2516
   * zero (except object 0 which must be free, and have a gen number of
2517
   * 65535). Changing the generation numbers (and indeed object numbers)
2518
   * will break encryption - so only do this if we are renumbering
2519
   * anyway. */
2520
0
  if (opts->do_garbage >= 2)
2521
0
    gen = (num == 0 ? 65535 : 0);
2522
2523
  /* For objects in object streams, the gen number gives us the index of
2524
   * the object within the stream. */
2525
0
  if (entry->type == 'o')
2526
0
    gen = entry->gen;
2527
2528
0
  if (opts->gen_list)
2529
0
    opts->gen_list[num] = gen;
2530
2531
0
  if (opts->do_garbage && !opts->use_list[num])
2532
0
    return;
2533
2534
0
  if (entry->type == 'o' && (!opts->do_incremental || pdf_xref_is_incremental(ctx, doc, num)))
2535
0
  {
2536
0
    assert(opts->do_use_objstms);
2537
0
    opts->ofs_list[num] = entry->ofs;
2538
0
    return;
2539
0
  }
2540
2541
0
  if (entry->type == 'n')
2542
0
  {
2543
0
    if (pass > 0)
2544
0
      padto(ctx, opts->out, opts->ofs_list[num]);
2545
0
    if (!opts->do_incremental || pdf_xref_is_incremental(ctx, doc, num))
2546
0
    {
2547
0
      if (opts->ofs_list)
2548
0
        opts->ofs_list[num] = fz_tell_output(ctx, opts->out);
2549
0
      writeobject(ctx, doc, opts, num, gen, 1, num == opts->crypt_object_number);
2550
0
    }
2551
0
  }
2552
0
  else if (opts->use_list)
2553
0
    opts->use_list[num] = 0;
2554
0
}
2555
2556
static void
2557
writeobjects(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, int pass)
2558
0
{
2559
0
  int num;
2560
0
  int xref_len = pdf_xref_len(ctx, doc);
2561
2562
0
  if (!opts->do_incremental)
2563
0
  {
2564
0
    int version = pdf_version(ctx, doc);
2565
0
    fz_write_printf(ctx, opts->out, "%%PDF-%d.%d\n", version / 10, version % 10);
2566
0
    fz_write_string(ctx, opts->out, "%\xC2\xB5\xC2\xB6\n\n");
2567
0
  }
2568
2569
0
  dowriteobject(ctx, doc, opts, opts->start, pass);
2570
2571
0
  if (opts->do_linear)
2572
0
  {
2573
    /* Write first xref */
2574
0
    if (pass == 0)
2575
0
      opts->first_xref_offset = fz_tell_output(ctx, opts->out);
2576
0
    else
2577
0
      padto(ctx, opts->out, opts->first_xref_offset);
2578
0
    writexref(ctx, doc, opts, opts->start, pdf_xref_len(ctx, doc), 1, opts->main_xref_offset, 0);
2579
0
  }
2580
2581
0
  for (num = opts->start+1; num < xref_len; num++)
2582
0
    dowriteobject(ctx, doc, opts, num, pass);
2583
0
  if (opts->do_linear && pass == 1)
2584
0
  {
2585
0
    int64_t offset = (opts->start == 1 ? opts->main_xref_offset : opts->ofs_list[1] + opts->hintstream_len);
2586
0
    padto(ctx, opts->out, offset);
2587
0
  }
2588
0
  for (num = 1; num < opts->start; num++)
2589
0
  {
2590
0
    if (pass == 1)
2591
0
      opts->ofs_list[num] += opts->hintstream_len;
2592
0
    dowriteobject(ctx, doc, opts, num, pass);
2593
0
  }
2594
0
}
2595
2596
static int
2597
my_log2(int x)
2598
0
{
2599
0
  int i = 0;
2600
0
  const int sign_bit = sizeof(int)*8-1;
2601
2602
0
  if (x <= 0)
2603
0
    return 0;
2604
2605
0
  while ((1<<i) <= x && i < sign_bit)
2606
0
    i++;
2607
2608
0
  if (i >= sign_bit)
2609
0
    return 0;
2610
2611
0
  return i;
2612
0
}
2613
2614
static int64_t
2615
offset_of_first_used_obj_after(const pdf_write_state *opts, int i, int len)
2616
0
{
2617
  /* The objects in the file are laid out as:
2618
   *
2619
   * start
2620
   * ...
2621
   * len-1
2622
   * 1
2623
   * ...
2624
   * start-1
2625
   *
2626
   * But, some may not be present...
2627
   */
2628
0
  do
2629
0
  {
2630
0
    i++;
2631
0
    if (i == len)
2632
0
      i = 1;
2633
0
    if (i == opts->start)
2634
0
      return opts->main_xref_offset;
2635
0
  }
2636
0
  while (opts->use_list[i] == 0);
2637
2638
0
  return opts->ofs_list[i];
2639
0
}
2640
2641
static void
2642
make_page_offset_hints(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, fz_buffer *buf)
2643
0
{
2644
0
  int i, j;
2645
0
  int min_objs_per_page, max_objs_per_page;
2646
0
  int min_page_length, max_page_length;
2647
0
  int objs_per_page_bits;
2648
0
  int min_shared_object, max_shared_object;
2649
0
  int max_shared_object_refs = 0;
2650
0
  int min_shared_length, max_shared_length;
2651
0
  page_objects **pop = &opts->page_object_lists->page[0];
2652
0
  int page_len_bits, shared_object_bits, shared_object_id_bits;
2653
0
  int shared_length_bits;
2654
0
  int xref_len = pdf_xref_len(ctx, doc);
2655
2656
0
  min_shared_object = pdf_xref_len(ctx, doc);
2657
0
  max_shared_object = 1;
2658
0
  min_shared_length = opts->file_len;
2659
0
  max_shared_length = 0;
2660
0
  for (i=1; i < xref_len; i++)
2661
0
  {
2662
0
    int min, max, page;
2663
2664
0
    min = opts->ofs_list[i];
2665
0
    max = offset_of_first_used_obj_after(opts, i, xref_len);
2666
2667
0
    assert(max > min);
2668
2669
0
    if (opts->use_list[i] & USE_SHARED)
2670
0
    {
2671
0
      page = -1;
2672
0
      if (i < min_shared_object)
2673
0
        min_shared_object = i;
2674
0
      if (i > max_shared_object)
2675
0
        max_shared_object = i;
2676
0
      if (min_shared_length > max - min)
2677
0
        min_shared_length = max - min;
2678
0
      if (max_shared_length < max - min)
2679
0
        max_shared_length = max - min;
2680
0
    }
2681
0
    else if (opts->use_list[i] & (USE_CATALOGUE | USE_HINTS | USE_PARAMS))
2682
0
      page = -1;
2683
0
    else if (opts->use_list[i] & USE_PAGE1)
2684
0
    {
2685
0
      page = 0;
2686
0
      if (min_shared_length > max - min)
2687
0
        min_shared_length = max - min;
2688
0
      if (max_shared_length < max - min)
2689
0
        max_shared_length = max - min;
2690
0
    }
2691
0
    else if (opts->use_list[i] == 0)
2692
0
      page = -1;
2693
0
    else
2694
0
      page = opts->use_list[i]>>USE_PAGE_SHIFT;
2695
2696
0
    if (page >= 0)
2697
0
    {
2698
0
      pop[page]->num_objects++;
2699
0
      if (pop[page]->min_ofs > min)
2700
0
        pop[page]->min_ofs = min;
2701
0
      if (pop[page]->max_ofs < max)
2702
0
        pop[page]->max_ofs = max;
2703
0
    }
2704
0
  }
2705
2706
0
  min_objs_per_page = max_objs_per_page = pop[0]->num_objects;
2707
0
  min_page_length = max_page_length = pop[0]->max_ofs - pop[0]->min_ofs;
2708
0
  for (i=1; i < opts->page_count; i++)
2709
0
  {
2710
0
    int tmp;
2711
0
    if (min_objs_per_page > pop[i]->num_objects)
2712
0
      min_objs_per_page = pop[i]->num_objects;
2713
0
    if (max_objs_per_page < pop[i]->num_objects)
2714
0
      max_objs_per_page = pop[i]->num_objects;
2715
0
    tmp = pop[i]->max_ofs - pop[i]->min_ofs;
2716
0
    if (tmp < min_page_length)
2717
0
      min_page_length = tmp;
2718
0
    if (tmp > max_page_length)
2719
0
      max_page_length = tmp;
2720
0
  }
2721
2722
0
  for (i=0; i < opts->page_count; i++)
2723
0
  {
2724
0
    int count = 0;
2725
0
    page_objects *po = opts->page_object_lists->page[i];
2726
0
    for (j = 0; j < po->len; j++)
2727
0
    {
2728
0
      if (i == 0 && opts->use_list[po->object[j]] & USE_PAGE1)
2729
0
        count++;
2730
0
      else if (i != 0 && opts->use_list[po->object[j]] & USE_SHARED)
2731
0
        count++;
2732
0
    }
2733
0
    po->num_shared = count;
2734
0
    if (i == 0 || count > max_shared_object_refs)
2735
0
      max_shared_object_refs = count;
2736
0
  }
2737
0
  if (min_shared_object > max_shared_object)
2738
0
    min_shared_object = max_shared_object = 0;
2739
2740
  /* Table F.3 - Header */
2741
  /* Header Item 1: Least number of objects in a page */
2742
0
  fz_append_bits(ctx, buf, min_objs_per_page, 32);
2743
  /* Header Item 2: Location of first pages page object */
2744
0
  fz_append_bits(ctx, buf, opts->ofs_list[pop[0]->page_object_number], 32);
2745
  /* Header Item 3: Number of bits required to represent the difference
2746
   * between the greatest and least number of objects in a page. */
2747
0
  objs_per_page_bits = my_log2(max_objs_per_page - min_objs_per_page);
2748
0
  fz_append_bits(ctx, buf, objs_per_page_bits, 16);
2749
  /* Header Item 4: Least length of a page. */
2750
0
  fz_append_bits(ctx, buf, min_page_length, 32);
2751
  /* Header Item 5: Number of bits needed to represent the difference
2752
   * between the greatest and least length of a page. */
2753
0
  page_len_bits = my_log2(max_page_length - min_page_length);
2754
0
  fz_append_bits(ctx, buf, page_len_bits, 16);
2755
  /* Header Item 6: Least offset to start of content stream (Acrobat
2756
   * sets this to always be 0) */
2757
0
  fz_append_bits(ctx, buf, 0, 32);
2758
  /* Header Item 7: Number of bits needed to represent the difference
2759
   * between the greatest and least offset to content stream (Acrobat
2760
   * sets this to always be 0) */
2761
0
  fz_append_bits(ctx, buf, 0, 16);
2762
  /* Header Item 8: Least content stream length. (Acrobat
2763
   * sets this to always be 0) */
2764
0
  fz_append_bits(ctx, buf, 0, 32);
2765
  /* Header Item 9: Number of bits needed to represent the difference
2766
   * between the greatest and least content stream length (Acrobat
2767
   * sets this to always be the same as item 5) */
2768
0
  fz_append_bits(ctx, buf, page_len_bits, 16);
2769
  /* Header Item 10: Number of bits needed to represent the greatest
2770
   * number of shared object references. */
2771
0
  shared_object_bits = my_log2(max_shared_object_refs);
2772
0
  fz_append_bits(ctx, buf, shared_object_bits, 16);
2773
  /* Header Item 11: Number of bits needed to represent the greatest
2774
   * shared object identifier. */
2775
0
  shared_object_id_bits = my_log2(max_shared_object - min_shared_object + pop[0]->num_shared);
2776
0
  fz_append_bits(ctx, buf, shared_object_id_bits, 16);
2777
  /* Header Item 12: Number of bits needed to represent the numerator
2778
   * of the fractions. We always send 0. */
2779
0
  fz_append_bits(ctx, buf, 0, 16);
2780
  /* Header Item 13: Number of bits needed to represent the denominator
2781
   * of the fractions. We always send 0. */
2782
0
  fz_append_bits(ctx, buf, 0, 16);
2783
2784
  /* Table F.4 - Page offset hint table (per page) */
2785
  /* Item 1: A number that, when added to the least number of objects
2786
   * on a page, gives the number of objects in the page. */
2787
0
  for (i = 0; i < opts->page_count; i++)
2788
0
  {
2789
0
    fz_append_bits(ctx, buf, pop[i]->num_objects - min_objs_per_page, objs_per_page_bits);
2790
0
  }
2791
0
  fz_append_bits_pad(ctx, buf);
2792
  /* Item 2: A number that, when added to the least page length, gives
2793
   * the length of the page in bytes. */
2794
0
  for (i = 0; i < opts->page_count; i++)
2795
0
  {
2796
0
    fz_append_bits(ctx, buf, pop[i]->max_ofs - pop[i]->min_ofs - min_page_length, page_len_bits);
2797
0
  }
2798
0
  fz_append_bits_pad(ctx, buf);
2799
  /* Item 3: The number of shared objects referenced from the page. */
2800
0
  for (i = 0; i < opts->page_count; i++)
2801
0
  {
2802
0
    fz_append_bits(ctx, buf, pop[i]->num_shared, shared_object_bits);
2803
0
  }
2804
0
  fz_append_bits_pad(ctx, buf);
2805
  /* Item 4: Shared object id for each shared object ref in every page.
2806
   * Spec says "not for page 1", but acrobat does send page 1's - all
2807
   * as zeros. */
2808
0
  for (i = 0; i < opts->page_count; i++)
2809
0
  {
2810
0
    for (j = 0; j < pop[i]->len; j++)
2811
0
    {
2812
0
      int o = pop[i]->object[j];
2813
0
      if (i == 0 && opts->use_list[o] & USE_PAGE1)
2814
0
        fz_append_bits(ctx, buf, 0 /* o - pop[0]->page_object_number */, shared_object_id_bits);
2815
0
      if (i != 0 && opts->use_list[o] & USE_SHARED)
2816
0
        fz_append_bits(ctx, buf, o - min_shared_object + pop[0]->num_shared, shared_object_id_bits);
2817
0
    }
2818
0
  }
2819
0
  fz_append_bits_pad(ctx, buf);
2820
  /* Item 5: Numerator of fractional position for each shared object reference. */
2821
  /* We always send 0 in 0 bits */
2822
  /* Item 6: A number that, when added to the least offset to the start
2823
   * of the content stream (F.3 Item 6), gives the offset in bytes of
2824
   * start of the pages content stream object relative to the beginning
2825
   * of the page. Always 0 in 0 bits. */
2826
  /* Item 7: A number that, when added to the least content stream length
2827
   * (F.3 Item 8), gives the length of the pages content stream object.
2828
   * Always == Item 2 as least content stream length = least page stream
2829
   * length.
2830
   */
2831
0
  for (i = 0; i < opts->page_count; i++)
2832
0
  {
2833
0
    fz_append_bits(ctx, buf, pop[i]->max_ofs - pop[i]->min_ofs - min_page_length, page_len_bits);
2834
0
  }
2835
2836
  /* Pad, and then do shared object hint table */
2837
0
  fz_append_bits_pad(ctx, buf);
2838
0
  opts->hints_shared_offset = (int)fz_buffer_storage(ctx, buf, NULL);
2839
2840
  /* Table F.5: */
2841
  /* Header Item 1: Object number of the first object in the shared
2842
   * objects section. */
2843
0
  fz_append_bits(ctx, buf, min_shared_object, 32);
2844
  /* Header Item 2: Location of first object in the shared objects
2845
   * section. */
2846
0
  fz_append_bits(ctx, buf, opts->ofs_list[min_shared_object], 32);
2847
  /* Header Item 3: The number of shared object entries for the first
2848
   * page. */
2849
0
  fz_append_bits(ctx, buf, pop[0]->num_shared, 32);
2850
  /* Header Item 4: The number of shared object entries for the shared
2851
   * objects section + first page. */
2852
0
  fz_append_bits(ctx, buf, max_shared_object - min_shared_object + pop[0]->num_shared, 32);
2853
  /* Header Item 5: The number of bits needed to represent the greatest
2854
   * number of objects in a shared object group (Always 0). */
2855
0
  fz_append_bits(ctx, buf, 0, 16);
2856
  /* Header Item 6: The least length of a shared object group in bytes. */
2857
0
  fz_append_bits(ctx, buf, min_shared_length, 32);
2858
  /* Header Item 7: The number of bits required to represent the
2859
   * difference between the greatest and least length of a shared object
2860
   * group. */
2861
0
  shared_length_bits = my_log2(max_shared_length - min_shared_length);
2862
0
  fz_append_bits(ctx, buf, shared_length_bits, 16);
2863
2864
  /* Table F.6 */
2865
  /* Item 1: Shared object group length (page 1 objects) */
2866
0
  for (j = 0; j < pop[0]->len; j++)
2867
0
  {
2868
0
    int o = pop[0]->object[j];
2869
0
    int64_t min, max;
2870
0
    min = opts->ofs_list[o];
2871
0
    if (o == opts->start-1)
2872
0
      max = opts->main_xref_offset;
2873
0
    else if (o < xref_len-1)
2874
0
      max = opts->ofs_list[o+1];
2875
0
    else
2876
0
      max = opts->ofs_list[1];
2877
0
    if (opts->use_list[o] & USE_PAGE1)
2878
0
      fz_append_bits(ctx, buf, max - min - min_shared_length, shared_length_bits);
2879
0
  }
2880
  /* Item 1: Shared object group length (shared objects) */
2881
0
  for (i = min_shared_object; i <= max_shared_object; i++)
2882
0
  {
2883
0
    int min, max;
2884
0
    min = opts->ofs_list[i];
2885
0
    if (i == opts->start-1)
2886
0
      max = opts->main_xref_offset;
2887
0
    else if (i < xref_len-1)
2888
0
      max = opts->ofs_list[i+1];
2889
0
    else
2890
0
      max = opts->ofs_list[1];
2891
0
    fz_append_bits(ctx, buf, max - min - min_shared_length, shared_length_bits);
2892
0
  }
2893
0
  fz_append_bits_pad(ctx, buf);
2894
2895
  /* Item 2: MD5 presence flags */
2896
0
  for (i = max_shared_object - min_shared_object + pop[0]->num_shared; i > 0; i--)
2897
0
  {
2898
0
    fz_append_bits(ctx, buf, 0, 1);
2899
0
  }
2900
0
  fz_append_bits_pad(ctx, buf);
2901
  /* Item 3: MD5 sums (not present) */
2902
0
  fz_append_bits_pad(ctx, buf);
2903
  /* Item 4: Number of objects in the group (not present) */
2904
0
}
2905
2906
static void
2907
make_hint_stream(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
2908
0
{
2909
0
  fz_buffer *buf;
2910
0
  pdf_obj *obj = NULL;
2911
2912
0
  fz_var(obj);
2913
2914
0
  buf = fz_new_buffer(ctx, 100);
2915
0
  fz_try(ctx)
2916
0
  {
2917
0
    make_page_offset_hints(ctx, doc, opts, buf);
2918
0
    obj = pdf_load_object(ctx, doc, pdf_xref_len(ctx, doc)-1);
2919
0
    pdf_update_stream(ctx, doc, obj, buf, 0);
2920
0
    opts->hintstream_len = (int64_t)fz_buffer_storage(ctx, buf, NULL);
2921
0
  }
2922
0
  fz_always(ctx)
2923
0
  {
2924
0
    pdf_drop_obj(ctx, obj);
2925
0
    fz_drop_buffer(ctx, buf);
2926
0
  }
2927
0
  fz_catch(ctx)
2928
0
    fz_rethrow(ctx);
2929
0
}
2930
2931
#ifdef DEBUG_WRITING
2932
static void dump_object_details(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
2933
{
2934
  int i;
2935
2936
  for (i = 0; i < pdf_xref_len(ctx, doc); i++)
2937
  {
2938
    fprintf(stderr, "%d@%ld: use=%d\n", i, opts->ofs_list[i], opts->use_list[i]);
2939
  }
2940
}
2941
#endif
2942
2943
static void presize_unsaved_signature_byteranges(fz_context *ctx, pdf_document *doc)
2944
0
{
2945
0
  int s;
2946
2947
0
  for (s = 0; s < doc->num_incremental_sections; s++)
2948
0
  {
2949
0
    pdf_xref *xref = &doc->xref_sections[s];
2950
2951
0
    if (xref->unsaved_sigs)
2952
0
    {
2953
      /* The ByteRange objects of signatures are initially written out with
2954
      * dummy values, and then overwritten later. We need to make sure their
2955
      * initial form at least takes enough sufficient file space */
2956
0
      pdf_unsaved_sig *usig;
2957
0
      int n = 0;
2958
2959
0
      for (usig = xref->unsaved_sigs; usig; usig = usig->next)
2960
0
        n++;
2961
2962
0
      for (usig = xref->unsaved_sigs; usig; usig = usig->next)
2963
0
      {
2964
        /* There will be segments of bytes at the beginning, at
2965
        * the end and between each consecutive pair of signatures,
2966
        * hence n + 1 */
2967
0
        int i;
2968
0
        pdf_obj *byte_range = pdf_dict_getl(ctx, usig->field, PDF_NAME(V), PDF_NAME(ByteRange), NULL);
2969
2970
0
        for (i = 0; i < n+1; i++)
2971
0
        {
2972
0
          pdf_array_push_int(ctx, byte_range, INT_MAX);
2973
0
          pdf_array_push_int(ctx, byte_range, INT_MAX);
2974
0
        }
2975
0
      }
2976
0
    }
2977
0
  }
2978
0
}
2979
2980
static void complete_signatures(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
2981
0
{
2982
0
  pdf_obj *byte_range = NULL;
2983
0
  char *buf = NULL, *ptr;
2984
0
  int s;
2985
0
  fz_stream *stm = NULL;
2986
2987
0
  fz_var(byte_range);
2988
0
  fz_var(stm);
2989
0
  fz_var(buf);
2990
2991
0
  fz_try(ctx)
2992
0
  {
2993
0
    for (s = 0; s < doc->num_incremental_sections; s++)
2994
0
    {
2995
0
      pdf_xref *xref = &doc->xref_sections[doc->num_incremental_sections - s - 1];
2996
2997
0
      if (xref->unsaved_sigs)
2998
0
      {
2999
0
        pdf_unsaved_sig *usig;
3000
0
        size_t buf_size = 0;
3001
0
        size_t i;
3002
0
        size_t last_end;
3003
3004
0
        for (usig = xref->unsaved_sigs; usig; usig = usig->next)
3005
0
        {
3006
0
          size_t size = usig->signer->max_digest_size(ctx, usig->signer);
3007
0
          buf_size = fz_maxz(buf_size, size);
3008
0
        }
3009
3010
0
        buf_size = buf_size * 2 + SIG_EXTRAS_SIZE;
3011
3012
0
        buf = fz_calloc(ctx, buf_size, 1);
3013
3014
0
        stm = fz_stream_from_output(ctx, opts->out);
3015
        /* Locate the byte ranges and contents in the saved file */
3016
0
        for (usig = xref->unsaved_sigs; usig; usig = usig->next)
3017
0
        {
3018
0
          char *bstr, *cstr, *fstr;
3019
0
          size_t bytes_read;
3020
0
          int pnum = pdf_obj_parent_num(ctx, pdf_dict_getl(ctx, usig->field, PDF_NAME(V), PDF_NAME(ByteRange), NULL));
3021
0
          fz_seek(ctx, stm, opts->ofs_list[pnum], SEEK_SET);
3022
          /* SIG_EXTRAS_SIZE is an arbitrary value and its addition above to buf_size
3023
           * could cause an attempt to read off the end of the file. That's not an
3024
           * error, but we need to keep track of how many bytes are read and search
3025
           * for markers only in defined data */
3026
0
          bytes_read = fz_read(ctx, stm, (unsigned char *)buf, buf_size);
3027
0
          assert(bytes_read <= buf_size);
3028
3029
0
          bstr = fz_memmem(buf, bytes_read, SLASH_BYTE_RANGE, sizeof(SLASH_BYTE_RANGE)-1);
3030
0
          cstr = fz_memmem(buf, bytes_read, SLASH_CONTENTS, sizeof(SLASH_CONTENTS)-1);
3031
0
          fstr = fz_memmem(buf, bytes_read, SLASH_FILTER, sizeof(SLASH_FILTER)-1);
3032
3033
0
          if (!(bstr && cstr && fstr && bstr < cstr && cstr < fstr))
3034
0
            fz_throw(ctx, FZ_ERROR_FORMAT, "Failed to determine byte ranges while writing signature");
3035
3036
0
          usig->byte_range_start = bstr - buf + sizeof(SLASH_BYTE_RANGE)-1 + opts->ofs_list[pnum];
3037
0
          usig->byte_range_end = cstr - buf + opts->ofs_list[pnum];
3038
0
          usig->contents_start = cstr - buf + sizeof(SLASH_CONTENTS)-1 + opts->ofs_list[pnum];
3039
0
          usig->contents_end = fstr - buf + opts->ofs_list[pnum];
3040
0
        }
3041
3042
0
        fz_drop_stream(ctx, stm);
3043
0
        stm = NULL;
3044
3045
        /* Recreate ByteRange with correct values. */
3046
0
        byte_range = pdf_new_array(ctx, doc, 4);
3047
3048
0
        last_end = 0;
3049
0
        for (usig = xref->unsaved_sigs; usig; usig = usig->next)
3050
0
        {
3051
0
          pdf_array_push_int(ctx, byte_range, last_end);
3052
0
          pdf_array_push_int(ctx, byte_range, usig->contents_start - last_end);
3053
0
          last_end = usig->contents_end;
3054
0
        }
3055
0
        pdf_array_push_int(ctx, byte_range, last_end);
3056
0
        pdf_array_push_int(ctx, byte_range, xref->end_ofs - last_end);
3057
3058
        /* Copy the new ByteRange to the other unsaved signatures */
3059
0
        for (usig = xref->unsaved_sigs; usig; usig = usig->next)
3060
0
          pdf_dict_putl_drop(ctx, usig->field, pdf_copy_array(ctx, byte_range), PDF_NAME(V), PDF_NAME(ByteRange), NULL);
3061
3062
        /* Write the byte range into buf, padding with spaces*/
3063
0
        ptr = pdf_sprint_obj(ctx, buf, buf_size, &i, byte_range, 1, 0);
3064
0
        if (ptr != buf) /* should never happen, since data should fit in buf_size */
3065
0
          fz_free(ctx, ptr);
3066
0
        memset(buf+i, ' ', buf_size-i);
3067
3068
        /* Write the byte range to the file */
3069
0
        for (usig = xref->unsaved_sigs; usig; usig = usig->next)
3070
0
        {
3071
0
          fz_seek_output(ctx, opts->out, usig->byte_range_start, SEEK_SET);
3072
0
          fz_write_data(ctx, opts->out, buf, usig->byte_range_end - usig->byte_range_start);
3073
0
        }
3074
3075
        /* Write the digests into the file */
3076
0
        for (usig = xref->unsaved_sigs; usig; usig = usig->next)
3077
0
          pdf_write_digest(ctx, opts->out, byte_range, usig->field, usig->contents_start, usig->contents_end - usig->contents_start, usig->signer);
3078
3079
        /* delete the unsaved_sigs records */
3080
0
        while ((usig = xref->unsaved_sigs) != NULL)
3081
0
        {
3082
0
          xref->unsaved_sigs = usig->next;
3083
0
          pdf_drop_obj(ctx, usig->field);
3084
0
          pdf_drop_signer(ctx, usig->signer);
3085
0
          fz_free(ctx, usig);
3086
0
        }
3087
3088
0
        xref->unsaved_sigs_end = NULL;
3089
3090
0
        pdf_drop_obj(ctx, byte_range);
3091
0
        byte_range = NULL;
3092
3093
0
        fz_free(ctx, buf);
3094
0
        buf = NULL;
3095
0
      }
3096
0
    }
3097
0
  }
3098
0
  fz_always(ctx)
3099
0
  {
3100
0
    pdf_drop_obj(ctx, byte_range);
3101
0
  }
3102
0
  fz_catch(ctx)
3103
0
  {
3104
0
    fz_drop_stream(ctx, stm);
3105
0
    fz_free(ctx, buf);
3106
0
    fz_rethrow(ctx);
3107
0
  }
3108
0
}
3109
3110
static void clean_content_streams(fz_context *ctx, pdf_document *doc, int sanitize, int ascii, int newlines)
3111
0
{
3112
0
  int n = pdf_count_pages(ctx, doc);
3113
0
  int i;
3114
3115
0
  pdf_filter_options options = { 0 };
3116
0
  pdf_sanitize_filter_options sopts = { 0 };
3117
0
  pdf_filter_factory list[2] = { 0 };
3118
3119
0
  options.recurse = 1;
3120
0
  options.ascii = ascii;
3121
0
  options.newlines = newlines;
3122
0
  options.filters = sanitize ? list : NULL;
3123
0
  list[0].filter = pdf_new_sanitize_filter;
3124
0
  list[0].options = &sopts;
3125
3126
0
  for (i = 0; i < n; i++)
3127
0
  {
3128
0
    pdf_annot *annot;
3129
0
    pdf_page *page = pdf_load_page(ctx, doc, i);
3130
3131
0
    fz_try(ctx)
3132
0
    {
3133
0
      pdf_filter_page_contents(ctx, doc, page, &options);
3134
0
      for (annot = pdf_first_annot(ctx, page); annot != NULL; annot = pdf_next_annot(ctx, annot))
3135
0
      {
3136
0
        pdf_filter_annot_contents(ctx, doc, annot, &options);
3137
0
      }
3138
0
    }
3139
0
    fz_always(ctx)
3140
0
      fz_drop_page(ctx, &page->super);
3141
0
    fz_catch(ctx)
3142
0
      fz_rethrow(ctx);
3143
0
  }
3144
0
}
3145
3146
/* Initialise the pdf_write_state, used dynamically during the write, from the static
3147
 * pdf_write_options, passed into pdf_save_document */
3148
static void initialise_write_state(fz_context *ctx, pdf_document *doc, const pdf_write_options *in_opts, pdf_write_state *opts)
3149
0
{
3150
0
  int xref_len = pdf_xref_len(ctx, doc);
3151
3152
0
  opts->do_incremental = in_opts->do_incremental;
3153
0
  opts->do_ascii = in_opts->do_ascii;
3154
0
  opts->do_tight = !in_opts->do_pretty;
3155
0
  opts->do_expand = in_opts->do_decompress;
3156
0
  opts->do_compress = in_opts->do_compress;
3157
0
  opts->do_compress_images = in_opts->do_compress_images;
3158
0
  opts->do_compress_fonts = in_opts->do_compress_fonts;
3159
0
  opts->do_snapshot = in_opts->do_snapshot;
3160
0
  opts->compression_effort = in_opts->compression_effort;
3161
0
  if (opts->compression_effort < 0)
3162
0
    opts->compression_effort = 0;
3163
0
  else if (opts->compression_effort > 100)
3164
0
    opts->compression_effort = 100;
3165
3166
0
  opts->do_garbage = in_opts->do_garbage;
3167
0
  opts->do_linear = in_opts->do_linear;
3168
0
  opts->do_clean = in_opts->do_clean;
3169
0
  opts->do_encrypt = in_opts->do_encrypt;
3170
0
  opts->dont_regenerate_id = in_opts->dont_regenerate_id;
3171
0
  opts->do_preserve_metadata = in_opts->do_preserve_metadata;
3172
0
  opts->do_use_objstms = in_opts->do_use_objstms;
3173
0
  opts->start = 0;
3174
0
  opts->main_xref_offset = INT_MIN;
3175
3176
0
  opts->permissions = in_opts->permissions;
3177
0
  memcpy(opts->opwd_utf8, in_opts->opwd_utf8, nelem(opts->opwd_utf8));
3178
0
  memcpy(opts->upwd_utf8, in_opts->upwd_utf8, nelem(opts->upwd_utf8));
3179
3180
  /* We deliberately make these arrays long enough to cope with
3181
  * 1 to n access rather than 0..n-1, and add space for 2 new
3182
  * extra entries that may be required for linearization. */
3183
0
  opts->list_len = 0;
3184
0
  opts->use_list = NULL;
3185
0
  opts->ofs_list = NULL;
3186
0
  opts->gen_list = NULL;
3187
0
  opts->renumber_map = NULL;
3188
0
  opts->rev_renumber_map = NULL;
3189
3190
0
  expand_lists(ctx, opts, xref_len);
3191
0
}
3192
3193
/* Free the resources held by the dynamic write options */
3194
static void finalise_write_state(fz_context *ctx, pdf_write_state *opts)
3195
0
{
3196
0
  fz_free(ctx, opts->use_list);
3197
0
  fz_free(ctx, opts->ofs_list);
3198
0
  fz_free(ctx, opts->gen_list);
3199
0
  fz_free(ctx, opts->renumber_map);
3200
0
  fz_free(ctx, opts->rev_renumber_map);
3201
0
  pdf_drop_obj(ctx, opts->linear_l);
3202
0
  pdf_drop_obj(ctx, opts->linear_h0);
3203
0
  pdf_drop_obj(ctx, opts->linear_h1);
3204
0
  pdf_drop_obj(ctx, opts->linear_o);
3205
0
  pdf_drop_obj(ctx, opts->linear_e);
3206
0
  pdf_drop_obj(ctx, opts->linear_n);
3207
0
  pdf_drop_obj(ctx, opts->linear_t);
3208
0
  pdf_drop_obj(ctx, opts->hints_s);
3209
0
  pdf_drop_obj(ctx, opts->hints_length);
3210
0
  page_objects_list_destroy(ctx, opts->page_object_lists);
3211
0
}
3212
3213
const pdf_write_options pdf_default_write_options = {
3214
  0, /* do_incremental */
3215
  0, /* do_pretty */
3216
  0, /* do_ascii */
3217
  0, /* do_compress */
3218
  0, /* do_compress_images */
3219
  0, /* do_compress_fonts */
3220
  0, /* do_decompress */
3221
  0, /* do_garbage */
3222
  0, /* do_linear */
3223
  0, /* do_clean */
3224
  0, /* do_sanitize */
3225
  0, /* do_appearance */
3226
  0, /* do_encrypt */
3227
  0, /* dont_regenerate_id */
3228
  ~0, /* permissions */
3229
  "", /* opwd_utf8[128] */
3230
  "", /* upwd_utf8[128] */
3231
  0 /* do_snapshot */
3232
};
3233
3234
static const pdf_write_options pdf_snapshot_write_options = {
3235
  1, /* do_incremental */
3236
  0, /* do_pretty */
3237
  0, /* do_ascii */
3238
  0, /* do_compress */
3239
  0, /* do_compress_images */
3240
  0, /* do_compress_fonts */
3241
  0, /* do_decompress */
3242
  0, /* do_garbage */
3243
  0, /* do_linear */
3244
  0, /* do_clean */
3245
  0, /* do_sanitize */
3246
  0, /* do_appearance */
3247
  0, /* do_encrypt */
3248
  1, /* dont_regenerate_id */
3249
  ~0, /* permissions */
3250
  "", /* opwd_utf8[128] */
3251
  "", /* upwd_utf8[128] */
3252
  1 /* do_snapshot */
3253
};
3254
3255
const char *fz_pdf_write_options_usage =
3256
  "PDF output options:\n"
3257
  "\tdecompress: decompress all streams (except compress-fonts/images)\n"
3258
  "\tcompress: compress all streams\n"
3259
  "\tcompress-fonts: compress embedded fonts\n"
3260
  "\tcompress-images: compress images\n"
3261
  "\tascii: ASCII hex encode binary streams\n"
3262
  "\tpretty: pretty-print objects with indentation\n"
3263
  "\tlinearize: optimize for web browsers\n"
3264
  "\tclean: pretty-print graphics commands in content streams\n"
3265
  "\tsanitize: sanitize graphics commands in content streams\n"
3266
  "\tgarbage: garbage collect unused objects\n"
3267
  "\tincremental: write changes as incremental update\n"
3268
  "\tcontinue-on-error: continue saving the document even if there is an error\n"
3269
  "\tor garbage=compact: ... and compact cross reference table\n"
3270
  "\tor garbage=deduplicate: ... and remove duplicate objects\n"
3271
  "\tdecrypt: write unencrypted document\n"
3272
  "\tencrypt=rc4-40|rc4-128|aes-128|aes-256: write encrypted document\n"
3273
  "\tpermissions=NUMBER: document permissions to grant when encrypting\n"
3274
  "\tuser-password=PASSWORD: password required to read document\n"
3275
  "\towner-password=PASSWORD: password required to edit document\n"
3276
  "\tregenerate-id: (default yes) regenerate document id\n"
3277
  "\n";
3278
3279
pdf_write_options *
3280
pdf_parse_write_options(fz_context *ctx, pdf_write_options *opts, const char *args)
3281
0
{
3282
0
  const char *val;
3283
3284
0
  memset(opts, 0, sizeof *opts);
3285
3286
0
  if (fz_has_option(ctx, args, "decompress", &val))
3287
0
    opts->do_decompress = fz_option_eq(val, "yes");
3288
0
  if (fz_has_option(ctx, args, "compress", &val))
3289
0
    opts->do_compress = fz_option_eq(val, "yes");
3290
0
  if (fz_has_option(ctx, args, "compress-fonts", &val))
3291
0
    opts->do_compress_fonts = fz_option_eq(val, "yes");
3292
0
  if (fz_has_option(ctx, args, "compress-images", &val))
3293
0
    opts->do_compress_images = fz_option_eq(val, "yes");
3294
0
  if (fz_has_option(ctx, args, "compression-effort", &val))
3295
0
    opts->compression_effort = fz_atoi(val);
3296
0
  if (fz_has_option(ctx, args, "ascii", &val))
3297
0
    opts->do_ascii = fz_option_eq(val, "yes");
3298
0
  if (fz_has_option(ctx, args, "pretty", &val))
3299
0
    opts->do_pretty = fz_option_eq(val, "yes");
3300
0
  if (fz_has_option(ctx, args, "linearize", &val))
3301
0
    opts->do_linear = fz_option_eq(val, "yes");
3302
0
  if (fz_has_option(ctx, args, "clean", &val))
3303
0
    opts->do_clean = fz_option_eq(val, "yes");
3304
0
  if (fz_has_option(ctx, args, "sanitize", &val))
3305
0
    opts->do_sanitize = fz_option_eq(val, "yes");
3306
0
  if (fz_has_option(ctx, args, "incremental", &val))
3307
0
    opts->do_incremental = fz_option_eq(val, "yes");
3308
0
  if (fz_has_option(ctx, args, "objstms", &val))
3309
0
    opts->do_use_objstms = fz_option_eq(val, "yes");
3310
0
  if (fz_has_option(ctx, args, "regenerate-id", &val))
3311
0
    opts->dont_regenerate_id = fz_option_eq(val, "no");
3312
0
  if (fz_has_option(ctx, args, "decrypt", &val))
3313
0
    opts->do_encrypt = fz_option_eq(val, "yes") ? PDF_ENCRYPT_NONE : PDF_ENCRYPT_KEEP;
3314
0
  if (fz_has_option(ctx, args, "encrypt", &val))
3315
0
  {
3316
0
    opts->do_encrypt = PDF_ENCRYPT_UNKNOWN;
3317
0
    if (fz_option_eq(val, "none") || fz_option_eq(val, "no"))
3318
0
      opts->do_encrypt = PDF_ENCRYPT_NONE;
3319
0
    if (fz_option_eq(val, "keep"))
3320
0
      opts->do_encrypt = PDF_ENCRYPT_KEEP;
3321
0
    if (fz_option_eq(val, "rc4-40") || fz_option_eq(val, "yes"))
3322
0
      opts->do_encrypt = PDF_ENCRYPT_RC4_40;
3323
0
    if (fz_option_eq(val, "rc4-128"))
3324
0
      opts->do_encrypt = PDF_ENCRYPT_RC4_128;
3325
0
    if (fz_option_eq(val, "aes-128"))
3326
0
      opts->do_encrypt = PDF_ENCRYPT_AES_128;
3327
0
    if (fz_option_eq(val, "aes-256"))
3328
0
      opts->do_encrypt = PDF_ENCRYPT_AES_256;
3329
0
  }
3330
0
  if (fz_has_option(ctx, args, "owner-password", &val))
3331
0
    fz_copy_option(ctx, val, opts->opwd_utf8, nelem(opts->opwd_utf8));
3332
0
  if (fz_has_option(ctx, args, "user-password", &val))
3333
0
    fz_copy_option(ctx, val, opts->upwd_utf8, nelem(opts->upwd_utf8));
3334
0
  if (fz_has_option(ctx, args, "permissions", &val))
3335
0
    opts->permissions = fz_atoi(val);
3336
0
  else
3337
0
    opts->permissions = ~0;
3338
0
  if (fz_has_option(ctx, args, "garbage", &val))
3339
0
  {
3340
0
    if (fz_option_eq(val, "yes"))
3341
0
      opts->do_garbage = 1;
3342
0
    else if (fz_option_eq(val, "compact"))
3343
0
      opts->do_garbage = 2;
3344
0
    else if (fz_option_eq(val, "deduplicate"))
3345
0
      opts->do_garbage = 3;
3346
0
    else
3347
0
      opts->do_garbage = fz_atoi(val);
3348
0
  }
3349
0
  if (fz_has_option(ctx, args, "appearance", &val))
3350
0
  {
3351
0
    if (fz_option_eq(val, "yes"))
3352
0
      opts->do_appearance = 1;
3353
0
    else if (fz_option_eq(val, "all"))
3354
0
      opts->do_appearance = 2;
3355
0
  }
3356
3357
0
  return opts;
3358
0
}
3359
3360
int pdf_can_be_saved_incrementally(fz_context *ctx, pdf_document *doc)
3361
0
{
3362
0
  if (doc->repair_attempted)
3363
0
    return 0;
3364
0
  if (doc->redacted)
3365
0
    return 0;
3366
0
  return 1;
3367
0
}
3368
3369
static void
3370
prepare_for_save(fz_context *ctx, pdf_document *doc, const pdf_write_options *in_opts)
3371
0
{
3372
  /* Rewrite (and possibly sanitize) the operator streams */
3373
0
  if (in_opts->do_clean || in_opts->do_sanitize)
3374
0
  {
3375
0
    pdf_begin_operation(ctx, doc, "Clean content streams");
3376
0
    fz_try(ctx)
3377
0
    {
3378
0
      clean_content_streams(ctx, doc, in_opts->do_sanitize, in_opts->do_ascii, in_opts->do_pretty);
3379
0
      pdf_end_operation(ctx, doc);
3380
0
    }
3381
0
    fz_catch(ctx)
3382
0
    {
3383
0
      pdf_abandon_operation(ctx, doc);
3384
0
      fz_rethrow(ctx);
3385
0
    }
3386
0
  }
3387
3388
  /* When saving a PDF with signatures the file will
3389
  first be written once, then the file will have its
3390
  digests and byte ranges calculated and and then the
3391
  signature dictionary containing them will be updated
3392
  both in memory and in the saved file. By setting this
3393
  flag we avoid a new xref section from being created when
3394
  the signature dictionary is updated. */
3395
0
  doc->save_in_progress = 1;
3396
3397
0
  if (!in_opts->do_snapshot)
3398
0
    presize_unsaved_signature_byteranges(ctx, doc);
3399
0
}
3400
3401
static pdf_obj *
3402
new_identity(fz_context *ctx, pdf_document *doc)
3403
0
{
3404
0
  unsigned char rnd[32];
3405
0
  pdf_obj *id;
3406
3407
0
  fz_memrnd(ctx, rnd, nelem(rnd));
3408
3409
0
  id = pdf_dict_put_array(ctx, pdf_trailer(ctx, doc), PDF_NAME(ID), 2);
3410
0
  pdf_array_push_string(ctx, id, (char *) rnd + 0, nelem(rnd) / 2);
3411
0
  pdf_array_push_string(ctx, id, (char *) rnd + 16, nelem(rnd) / 2);
3412
3413
0
  return id;
3414
0
}
3415
3416
static void
3417
change_identity(fz_context *ctx, pdf_document *doc, pdf_obj *id)
3418
0
{
3419
0
  unsigned char rnd[16];
3420
0
  if (pdf_array_len(ctx, id) >= 2)
3421
0
  {
3422
    /* Update second half of ID array with new random data. */
3423
0
    fz_memrnd(ctx, rnd, 16);
3424
0
    pdf_array_put_string(ctx, id, 1, (char *)rnd, 16);
3425
0
  }
3426
0
}
3427
3428
static void
3429
create_encryption_dictionary(fz_context *ctx, pdf_document *doc, pdf_crypt *crypt)
3430
0
{
3431
0
  unsigned char *o, *u;
3432
0
  pdf_obj *encrypt;
3433
0
  int r;
3434
3435
0
  r = pdf_crypt_revision(ctx, crypt);
3436
3437
0
  encrypt = pdf_dict_put_dict(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt), 10);
3438
3439
0
  pdf_dict_put_name(ctx, encrypt, PDF_NAME(Filter), "Standard");
3440
0
  pdf_dict_put_int(ctx, encrypt, PDF_NAME(R), r);
3441
0
  pdf_dict_put_int(ctx, encrypt, PDF_NAME(V), pdf_crypt_version(ctx, crypt));
3442
0
  pdf_dict_put_int(ctx, encrypt, PDF_NAME(Length), pdf_crypt_length(ctx, crypt));
3443
0
  pdf_dict_put_int(ctx, encrypt, PDF_NAME(P), pdf_crypt_permissions(ctx, crypt));
3444
0
  pdf_dict_put_bool(ctx, encrypt, PDF_NAME(EncryptMetadata), pdf_crypt_encrypt_metadata(ctx, crypt));
3445
3446
0
  o = pdf_crypt_owner_password(ctx, crypt);
3447
0
  u = pdf_crypt_user_password(ctx, crypt);
3448
3449
0
  if (r < 4)
3450
0
  {
3451
0
    pdf_dict_put_string(ctx, encrypt, PDF_NAME(O), (char *) o, 32);
3452
0
    pdf_dict_put_string(ctx, encrypt, PDF_NAME(U), (char *) u, 32);
3453
0
  }
3454
0
  else if (r == 4)
3455
0
  {
3456
0
    pdf_obj *cf;
3457
3458
0
    pdf_dict_put_name(ctx, encrypt, PDF_NAME(StmF), "StdCF");
3459
0
    pdf_dict_put_name(ctx, encrypt, PDF_NAME(StrF), "StdCF");
3460
3461
0
    cf = pdf_dict_put_dict(ctx, encrypt, PDF_NAME(CF), 1);
3462
0
    cf = pdf_dict_put_dict(ctx, cf, PDF_NAME(StdCF), 3);
3463
0
    pdf_dict_put_name(ctx, cf, PDF_NAME(AuthEvent), "DocOpen");
3464
0
    pdf_dict_put_name(ctx, cf, PDF_NAME(CFM), "AESV2");
3465
0
    pdf_dict_put_int(ctx, cf, PDF_NAME(Length), 16);
3466
0
    pdf_dict_put_string(ctx, encrypt, PDF_NAME(O), (char *) o, 32);
3467
0
    pdf_dict_put_string(ctx, encrypt, PDF_NAME(U), (char *) u, 32);
3468
0
  }
3469
0
  else if (r == 6)
3470
0
  {
3471
0
    unsigned char *oe = pdf_crypt_owner_encryption(ctx, crypt);
3472
0
    unsigned char *ue = pdf_crypt_user_encryption(ctx, crypt);
3473
0
    pdf_obj *cf;
3474
3475
0
    pdf_dict_put_name(ctx, encrypt, PDF_NAME(StmF), "StdCF");
3476
0
    pdf_dict_put_name(ctx, encrypt, PDF_NAME(StrF), "StdCF");
3477
3478
0
    cf = pdf_dict_put_dict(ctx, encrypt, PDF_NAME(CF), 1);
3479
0
    cf = pdf_dict_put_dict(ctx, cf, PDF_NAME(StdCF), 3);
3480
0
    pdf_dict_put_name(ctx, cf, PDF_NAME(AuthEvent), "DocOpen");
3481
0
    pdf_dict_put_name(ctx, cf, PDF_NAME(CFM), "AESV3");
3482
0
    pdf_dict_put_int(ctx, cf, PDF_NAME(Length), 32);
3483
0
    pdf_dict_put_string(ctx, encrypt, PDF_NAME(O), (char *) o, 48);
3484
0
    pdf_dict_put_string(ctx, encrypt, PDF_NAME(U), (char *) u, 48);
3485
0
    pdf_dict_put_string(ctx, encrypt, PDF_NAME(OE), (char *) oe, 32);
3486
0
    pdf_dict_put_string(ctx, encrypt, PDF_NAME(UE), (char *) ue, 32);
3487
0
    pdf_dict_put_string(ctx, encrypt, PDF_NAME(Perms), (char *) pdf_crypt_permissions_encryption(ctx, crypt), 16);
3488
0
  }
3489
0
}
3490
3491
static void
3492
ensure_initial_incremental_contents(fz_context *ctx, fz_stream *in, fz_output *out, int64_t len)
3493
0
{
3494
0
  fz_stream *verify;
3495
0
  unsigned char buf0[4096];
3496
0
  unsigned char buf1[4096];
3497
0
  size_t n0, n1;
3498
0
  int64_t off = 0;
3499
0
  int same;
3500
3501
0
  if (!in)
3502
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "no input file for incremental write");
3503
3504
0
  verify = fz_stream_from_output(ctx, out);
3505
3506
0
  fz_try(ctx)
3507
0
  {
3508
    /* Compare current contents of output file (in case we append) */
3509
0
    if (verify)
3510
0
    {
3511
0
      do
3512
0
      {
3513
0
        int64_t read = sizeof(buf0);
3514
0
        if (off + read > len)
3515
0
          read = len - off;
3516
0
        fz_seek(ctx, in, off, SEEK_SET);
3517
0
        n0 = fz_read(ctx, in, buf0, read);
3518
0
        fz_seek(ctx, verify, off, SEEK_SET);
3519
0
        n1 = fz_read(ctx, verify, buf1, read);
3520
0
        same = (n0 == n1 && !memcmp(buf0, buf1, n0));
3521
0
        off += (int64_t)n0;
3522
0
      }
3523
0
      while (same && n0 > 0 && off < len);
3524
3525
0
      if (same)
3526
0
      {
3527
0
        fz_seek_output(ctx, out, len, SEEK_SET);
3528
0
        fz_truncate_output(ctx, out);
3529
0
        break; /* return from try */
3530
0
      }
3531
3532
0
      fz_seek_output(ctx, out, 0, SEEK_SET);
3533
0
    }
3534
3535
    /* Copy old contents into new file */
3536
0
    fz_seek(ctx, in, 0, SEEK_SET);
3537
0
    off = 0;
3538
0
    do
3539
0
    {
3540
0
      int64_t read = sizeof(buf0);
3541
0
      if (off + read > len)
3542
0
        read = len - off;
3543
0
      n0 = fz_read(ctx, in, buf0, read);
3544
0
      if (n0)
3545
0
        fz_write_data(ctx, out, buf0, n0);
3546
0
      off += n0;
3547
0
    }
3548
0
    while (n0 > 0 && off < len);
3549
3550
0
    if (verify)
3551
0
    {
3552
0
      fz_truncate_output(ctx, out);
3553
0
      fz_seek_output(ctx, out, 0, SEEK_END);
3554
0
    }
3555
0
  }
3556
0
  fz_always(ctx)
3557
0
    fz_drop_stream(ctx, verify);
3558
0
  fz_catch(ctx)
3559
0
    fz_rethrow(ctx);
3560
0
}
3561
3562
0
#define OBJSTM_MAXOBJS 256
3563
0
#define OBJSTM_MAXLEN 1<<24
3564
3565
typedef struct
3566
{
3567
  pdf_write_state *opts;
3568
  int n;
3569
  int objnum[OBJSTM_MAXOBJS];
3570
  size_t len[OBJSTM_MAXOBJS];
3571
  fz_buffer *content_buf;
3572
  fz_output *content_out;
3573
  int root_num;
3574
  int info_num;
3575
  int sep;
3576
} objstm_gather_data;
3577
3578
static void
3579
flush_gathered(fz_context *ctx, pdf_document *doc, objstm_gather_data *data)
3580
0
{
3581
0
  pdf_obj *obj;
3582
0
  pdf_obj *ref = NULL;
3583
0
  fz_buffer *newbuf = NULL;
3584
0
  fz_output *out = NULL;
3585
0
  int i;
3586
3587
0
  if (data->n == 0)
3588
0
    return;
3589
3590
0
  obj = pdf_new_dict(ctx, doc, 4);
3591
3592
0
  fz_var(ref);
3593
0
  fz_var(newbuf);
3594
0
  fz_var(out);
3595
3596
0
  fz_try(ctx)
3597
0
  {
3598
0
    size_t pos = 0, first;
3599
0
    int num;
3600
0
    newbuf = fz_new_buffer(ctx, 128);
3601
3602
0
    out = fz_new_output_with_buffer(ctx, newbuf);
3603
3604
0
    for (i = 0; i < data->n; i++)
3605
0
    {
3606
0
      fz_write_printf(ctx, out, "%d %d ", data->objnum[i], pos);
3607
0
      pos += data->len[i];
3608
0
    }
3609
3610
0
    fz_close_output(ctx, out);
3611
0
    first = fz_tell_output(ctx, out);
3612
0
    fz_drop_output(ctx, out);
3613
0
    out = NULL;
3614
3615
0
    pdf_dict_put_int(ctx, obj, PDF_NAME(First), first);
3616
0
    pdf_dict_put_int(ctx, obj, PDF_NAME(N), data->n);
3617
0
    pdf_dict_put(ctx, obj, PDF_NAME(Type), PDF_NAME(ObjStm));
3618
3619
0
    fz_close_output(ctx, data->content_out);
3620
0
    fz_append_buffer(ctx, newbuf, data->content_buf);
3621
3622
0
    doc->xref_base = 0; /* Might have been reset by our caller */
3623
0
    ref = pdf_add_object(ctx, doc, obj);
3624
0
    pdf_update_stream(ctx, doc, ref, newbuf, 0);
3625
3626
0
    num = pdf_to_num(ctx, ref);
3627
0
    expand_lists(ctx, data->opts, num);
3628
0
    data->opts->use_list[num] = 1;
3629
3630
    /* Update all the xref entries for the objects to point into this stream. */
3631
0
    for (i = 0; i < data->n; i++)
3632
0
    {
3633
0
      pdf_xref_entry *x = pdf_get_xref_entry_no_null(ctx, doc, data->objnum[i]);
3634
0
      x->ofs = num; /* ofs = which objstm is this in */
3635
0
      x->gen = i; /* gen = nth entry in the objstm */
3636
0
      data->opts->ofs_list[data->objnum[i]] = i;
3637
0
      data->opts->gen_list[data->objnum[i]] = i;
3638
0
    }
3639
3640
0
    data->n = 0;
3641
0
    data->sep = 0;
3642
0
  }
3643
0
  fz_always(ctx)
3644
0
  {
3645
0
    fz_drop_output(ctx, data->content_out);
3646
0
    data->content_out = NULL;
3647
0
    fz_drop_buffer(ctx, data->content_buf);
3648
0
    data->content_buf = NULL;
3649
0
    pdf_drop_obj(ctx, obj);
3650
0
    pdf_drop_obj(ctx, ref);
3651
0
    fz_drop_buffer(ctx, newbuf);
3652
0
    fz_drop_output(ctx, out);
3653
0
  }
3654
0
  fz_catch(ctx)
3655
0
    fz_rethrow(ctx);
3656
0
}
3657
3658
static void
3659
objstm_gather(fz_context *ctx, pdf_xref_entry *x, int i, pdf_document *doc, objstm_gather_data *data)
3660
0
{
3661
0
  size_t olen, len;
3662
3663
0
  if (i == data->root_num || i == data->info_num)
3664
0
    return;
3665
3666
  /* Ensure the object is loaded! */
3667
0
  if (i == 0)
3668
0
    return; /* pdf_cache_object does not like being called for i == 0 which should be free. */
3669
0
  pdf_cache_object(ctx, doc, i);
3670
3671
0
  if (x->type != 'n' || x->stm_buf != NULL || x->stm_ofs != 0 || x->gen != 0)
3672
0
    return; /* Stream objects, objects with generation number != 0 cannot be put in objstms */
3673
0
  if (i == data->opts->crypt_object_number)
3674
0
    return; /* Encryption dictionaries can also not be put in objstms */
3675
3676
  /* If we are writing incrementally, then only the last one can be gathered. */
3677
0
  if (data->opts->do_incremental && !pdf_obj_is_incremental(ctx, x->obj))
3678
0
    return;
3679
3680
  /* FIXME: Can we do a pass through to check for such objects more exactly? */
3681
0
  if (pdf_is_int(ctx, x->obj))
3682
0
    return; /* In case it's a Length value. */
3683
0
  if (pdf_is_indirect(ctx, x->obj))
3684
0
    return; /* Bare indirect references are not allowed. */
3685
0
  if (data->opts->do_linear && pdf_is_dict(ctx, x->obj))
3686
0
  {
3687
0
    pdf_obj *type = pdf_dict_get(ctx, x->obj, PDF_NAME(Type));
3688
0
    if (pdf_name_eq(ctx, type, PDF_NAME(Pages)) ||
3689
0
      pdf_name_eq(ctx, type, PDF_NAME(Page)))
3690
0
      return;
3691
0
  }
3692
3693
0
  if (data->content_buf == NULL)
3694
0
    data->content_buf = fz_new_buffer(ctx, 128);
3695
0
  if (data->content_out == NULL)
3696
0
    data->content_out = fz_new_output_with_buffer(ctx, data->content_buf);
3697
3698
0
  olen = data->content_buf->len;
3699
0
  pdf_print_encrypted_obj(ctx, data->content_out, x->obj, 1, 0, NULL, 0, 0, NULL);
3700
0
  data->objnum[data->n] = i;
3701
0
  len = data->content_buf->len;
3702
0
  data->len[data->n] = len - olen;
3703
0
  x->type = 'o';
3704
0
  x->gen = data->n;
3705
0
  data->n++;
3706
0
  if (data->n == OBJSTM_MAXOBJS || len > OBJSTM_MAXLEN)
3707
0
    flush_gathered(ctx, doc, data);
3708
0
}
3709
3710
static void
3711
gather_to_objstms(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, int xref_len)
3712
0
{
3713
0
  int count, num;
3714
0
  objstm_gather_data data = { 0 };
3715
3716
0
  data.opts = opts;
3717
0
  data.root_num = pdf_to_num(ctx, pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root)));
3718
0
  data.info_num = pdf_to_num(ctx, pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info)));
3719
3720
0
  count = pdf_xref_len(ctx, doc);
3721
0
  for (num = 1; num < count; ++num)
3722
0
  {
3723
0
    pdf_xref_entry *x = pdf_get_xref_entry_no_change(ctx, doc, num);
3724
0
    if (x)
3725
0
      objstm_gather(ctx, x, num, doc, &data);
3726
0
  }
3727
3728
0
  flush_gathered(ctx, doc, &data);
3729
0
}
3730
3731
static void
3732
do_pdf_save_document(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, const pdf_write_options *in_opts)
3733
0
{
3734
0
  int lastfree;
3735
0
  int num;
3736
0
  int xref_len;
3737
0
  pdf_obj *id1, *id = NULL;
3738
0
  int changed;
3739
3740
0
  if (in_opts->do_incremental)
3741
0
  {
3742
0
    ensure_initial_incremental_contents(ctx, doc->file, opts->out, doc->file_size);
3743
3744
    /* If no changes, nothing more to write */
3745
0
    if (!pdf_has_unsaved_changes(ctx, doc))
3746
0
    {
3747
0
      doc->save_in_progress = 0;
3748
0
      return;
3749
0
    }
3750
3751
0
    fz_write_string(ctx, opts->out, "\n");
3752
0
  }
3753
3754
0
  xref_len = pdf_xref_len(ctx, doc);
3755
3756
0
  pdf_begin_operation(ctx, doc, "Save document");
3757
0
  fz_try(ctx)
3758
0
  {
3759
0
    initialise_write_state(ctx, doc, in_opts, opts);
3760
3761
0
    if (!opts->dont_regenerate_id)
3762
0
    {
3763
      /* Update second half of ID array if it exists. */
3764
0
      id = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(ID));
3765
0
      if (id)
3766
0
        change_identity(ctx, doc, id);
3767
0
    }
3768
3769
    /* Remove encryption dictionary if saving without encryption. */
3770
0
    if (opts->do_encrypt == PDF_ENCRYPT_NONE)
3771
0
    {
3772
0
      assert(!in_opts->do_snapshot);
3773
0
      pdf_dict_del(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt));
3774
0
    }
3775
3776
    /* Keep encryption dictionary if saving with old encryption. */
3777
0
    else if (opts->do_encrypt == PDF_ENCRYPT_KEEP)
3778
0
    {
3779
0
      opts->crypt = doc->crypt;
3780
0
    }
3781
3782
    /* Create encryption dictionary if saving with new encryption. */
3783
0
    else
3784
0
    {
3785
0
      assert(!opts->do_snapshot);
3786
0
      if (!id)
3787
0
        id = new_identity(ctx, doc);
3788
0
      id1 = pdf_array_get(ctx, id, 0);
3789
0
      opts->crypt = pdf_new_encrypt(ctx, opts->opwd_utf8, opts->upwd_utf8, id1, opts->permissions, opts->do_encrypt);
3790
0
      create_encryption_dictionary(ctx, doc, opts->crypt);
3791
0
    }
3792
3793
    /* Stash Encrypt entry in the writer state, in case a repair pass throws away the old trailer. */
3794
0
    opts->crypt_obj = pdf_keep_obj(ctx, pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt)));
3795
3796
    /* If we're writing a snapshot, we can't be doing garbage
3797
     * collection, or linearisation, and must be writing
3798
     * incrementally. */
3799
0
    assert(!opts->do_snapshot || (opts->do_garbage == 0 && !opts->do_linear));
3800
3801
    /* Make sure any objects hidden in compressed streams have been loaded */
3802
0
    if (!opts->do_incremental)
3803
0
    {
3804
0
      pdf_ensure_solid_xref(ctx, doc, xref_len);
3805
0
      preloadobjstms(ctx, doc);
3806
0
    }
3807
3808
    /* If we're using objstms, then the version must be at least 1.5 */
3809
0
    if (opts->do_use_objstms && pdf_version(ctx, doc) < 15)
3810
0
    {
3811
0
      pdf_obj *root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
3812
0
      pdf_obj *version = pdf_dict_get(ctx, root, PDF_NAME(Version));
3813
0
      doc->version = 15;
3814
0
      if (opts->do_incremental || version != NULL)
3815
0
      {
3816
0
        pdf_dict_put(ctx, root, PDF_NAME(Version), PDF_NAME(1_5));
3817
0
      }
3818
0
    }
3819
3820
0
    if (opts->do_preserve_metadata)
3821
0
      opts->metadata = pdf_keep_obj(ctx, pdf_metadata(ctx, doc));
3822
3823
0
    xref_len = pdf_xref_len(ctx, doc); /* May have changed due to repair */
3824
0
    expand_lists(ctx, opts, xref_len);
3825
3826
0
    do
3827
0
    {
3828
0
      changed = 0;
3829
      /* Sweep & mark objects from the trailer */
3830
0
      if (opts->do_garbage >= 1 || opts->do_linear)
3831
0
      {
3832
        /* Start by removing indirect /Length attributes on streams */
3833
0
        for (num = 0; num < xref_len; num++)
3834
0
          bake_stream_length(ctx, doc, num);
3835
3836
0
        (void)markobj(ctx, doc, opts, pdf_trailer(ctx, doc));
3837
0
      }
3838
0
      else
3839
0
      {
3840
0
        for (num = 0; num < xref_len; num++)
3841
0
          opts->use_list[num] = 1;
3842
0
      }
3843
3844
      /* Coalesce and renumber duplicate objects */
3845
0
      if (opts->do_garbage >= 3)
3846
0
        changed = removeduplicateobjs(ctx, doc, opts);
3847
3848
      /* Compact xref by renumbering and removing unused objects */
3849
0
      if (opts->do_garbage >= 2 || opts->do_linear)
3850
0
        compactxref(ctx, doc, opts);
3851
3852
      /* Make renumbering affect all indirect references and update xref */
3853
0
      if (opts->do_garbage >= 2 || opts->do_linear)
3854
0
        renumberobjs(ctx, doc, opts);
3855
0
    }
3856
0
    while (changed);
3857
3858
0
    opts->crypt_object_number = 0;
3859
0
    if (opts->crypt)
3860
0
    {
3861
0
      pdf_obj *crypt = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt));
3862
0
      opts->crypt_object_number = pdf_to_num(ctx, crypt);
3863
0
    }
3864
3865
0
    if (opts->do_use_objstms)
3866
0
      gather_to_objstms(ctx, doc, opts, xref_len);
3867
3868
0
    xref_len = pdf_xref_len(ctx, doc); /* May have changed due to repair */
3869
0
    expand_lists(ctx, opts, xref_len);
3870
3871
    /* Truncate the xref after compacting and renumbering */
3872
0
    if ((opts->do_garbage >= 2 || opts->do_linear) &&
3873
0
      !opts->do_incremental)
3874
0
    {
3875
0
      while (xref_len > 0 && !opts->use_list[xref_len-1])
3876
0
        xref_len--;
3877
0
    }
3878
3879
0
    if (opts->do_linear)
3880
0
      linearize(ctx, doc, opts);
3881
3882
0
    if (opts->do_incremental)
3883
0
    {
3884
0
      int i;
3885
3886
0
      doc->disallow_new_increments = 1;
3887
3888
0
      for (i = 0; i < doc->num_incremental_sections; i++)
3889
0
      {
3890
0
        doc->xref_base = doc->num_incremental_sections - i - 1;
3891
0
        xref_len = pdf_xref_len(ctx, doc);
3892
3893
0
        writeobjects(ctx, doc, opts, 0);
3894
3895
#ifdef DEBUG_WRITING
3896
        dump_object_details(ctx, doc, opts);
3897
#endif
3898
3899
0
        for (num = 0; num < xref_len; num++)
3900
0
        {
3901
0
          if (!opts->use_list[num] && pdf_xref_is_incremental(ctx, doc, num))
3902
0
          {
3903
            /* Make unreusable. FIXME: would be better to link to existing free list */
3904
0
            opts->gen_list[num] = 65535;
3905
0
            opts->ofs_list[num] = 0;
3906
0
          }
3907
0
        }
3908
3909
0
        opts->first_xref_offset = fz_tell_output(ctx, opts->out);
3910
0
        if (!doc->last_xref_was_old_style || opts->do_use_objstms)
3911
0
          writexrefstream(ctx, doc, opts, 0, xref_len, 1, 0, opts->first_xref_offset);
3912
0
        else
3913
0
          writexref(ctx, doc, opts, 0, xref_len, 1, 0, opts->first_xref_offset);
3914
3915
0
        doc->xref_sections[doc->xref_base].end_ofs = fz_tell_output(ctx, opts->out);
3916
0
      }
3917
3918
0
      doc->xref_base = 0;
3919
0
      doc->disallow_new_increments = 0;
3920
0
    }
3921
0
    else
3922
0
    {
3923
0
      writeobjects(ctx, doc, opts, 0);
3924
3925
#ifdef DEBUG_WRITING
3926
      dump_object_details(ctx, doc, opts);
3927
#endif
3928
3929
      /* Construct linked list of free object slots */
3930
0
      lastfree = 0;
3931
0
      for (num = 0; num < xref_len; num++)
3932
0
      {
3933
0
        if (!opts->use_list[num])
3934
0
        {
3935
0
          opts->gen_list[num]++;
3936
0
          opts->ofs_list[lastfree] = num;
3937
0
          lastfree = num;
3938
0
        }
3939
0
      }
3940
0
      opts->gen_list[0] = 0xffff;
3941
3942
0
      if (opts->do_linear && opts->page_count > 0)
3943
0
      {
3944
0
        opts->main_xref_offset = fz_tell_output(ctx, opts->out);
3945
0
        writexref(ctx, doc, opts, 0, opts->start, 0, 0, opts->first_xref_offset);
3946
0
        opts->file_len = fz_tell_output(ctx, opts->out);
3947
3948
0
        make_hint_stream(ctx, doc, opts);
3949
0
        if (opts->do_ascii)
3950
0
        {
3951
0
          opts->hintstream_len *= 2;
3952
0
          opts->hintstream_len += 1 + ((opts->hintstream_len+63)>>6);
3953
0
        }
3954
0
        opts->file_len += opts->hintstream_len;
3955
0
        opts->main_xref_offset += opts->hintstream_len;
3956
0
        update_linearization_params(ctx, doc, opts);
3957
0
        fz_seek_output(ctx, opts->out, 0, 0);
3958
0
        writeobjects(ctx, doc, opts, 1);
3959
3960
0
        padto(ctx, opts->out, opts->main_xref_offset);
3961
0
        if (opts->do_use_objstms)
3962
0
          writexrefstream(ctx, doc, opts, 0, xref_len, 1, 0, opts->first_xref_offset);
3963
0
        else
3964
0
          writexref(ctx, doc, opts, 0, opts->start, 0, 0, opts->first_xref_offset);
3965
0
      }
3966
0
      else
3967
0
      {
3968
0
        opts->first_xref_offset = fz_tell_output(ctx, opts->out);
3969
0
        if (opts->do_use_objstms)
3970
0
          writexrefstream(ctx, doc, opts, 0, xref_len, 1, 0, opts->first_xref_offset);
3971
0
        else
3972
0
          writexref(ctx, doc, opts, 0, xref_len, 1, 0, opts->first_xref_offset);
3973
0
      }
3974
3975
0
      doc->xref_sections[0].end_ofs = fz_tell_output(ctx, opts->out);
3976
0
    }
3977
3978
0
    if (!in_opts->do_snapshot)
3979
0
    {
3980
0
      complete_signatures(ctx, doc, opts);
3981
0
    }
3982
0
    pdf_end_operation(ctx, doc);
3983
0
  }
3984
0
  fz_always(ctx)
3985
0
  {
3986
#ifdef DEBUG_LINEARIZATION
3987
    page_objects_dump(opts);
3988
    objects_dump(ctx, doc, opts);
3989
#endif
3990
0
    finalise_write_state(ctx, opts);
3991
0
    if (opts->crypt != doc->crypt)
3992
0
      pdf_drop_crypt(ctx, opts->crypt);
3993
0
    pdf_drop_obj(ctx, opts->crypt_obj);
3994
0
    pdf_drop_obj(ctx, opts->metadata);
3995
0
    doc->save_in_progress = 0;
3996
0
  }
3997
0
  fz_catch(ctx)
3998
0
  {
3999
0
    pdf_abandon_operation(ctx, doc);
4000
0
    fz_rethrow(ctx);
4001
0
  }
4002
0
}
4003
4004
int pdf_has_unsaved_sigs(fz_context *ctx, pdf_document *doc)
4005
0
{
4006
0
  int s;
4007
0
  for (s = 0; s < doc->num_incremental_sections; s++)
4008
0
  {
4009
0
    pdf_xref *xref = &doc->xref_sections[doc->num_incremental_sections - s - 1];
4010
4011
0
    if (xref->unsaved_sigs)
4012
0
      return 1;
4013
0
  }
4014
0
  return 0;
4015
0
}
4016
4017
void pdf_write_document(fz_context *ctx, pdf_document *doc, fz_output *out, const pdf_write_options *in_opts)
4018
0
{
4019
0
  pdf_write_options opts_defaults = pdf_default_write_options;
4020
0
  pdf_write_state opts = { 0 };
4021
4022
0
  if (!doc || !out)
4023
0
    return;
4024
4025
0
  if (!in_opts)
4026
0
    in_opts = &opts_defaults;
4027
4028
0
  if (in_opts->do_incremental && doc->repair_attempted)
4029
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't do incremental writes on a repaired file");
4030
0
  if (in_opts->do_incremental && in_opts->do_garbage)
4031
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't do incremental writes with garbage collection");
4032
0
  if (in_opts->do_incremental && in_opts->do_linear)
4033
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't do incremental writes with linearisation");
4034
0
  if (in_opts->do_incremental && in_opts->do_encrypt != PDF_ENCRYPT_KEEP)
4035
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't do incremental writes when changing encryption");
4036
0
  if (in_opts->do_snapshot)
4037
0
  {
4038
0
    if (in_opts->do_incremental == 0 ||
4039
0
      in_opts->do_pretty ||
4040
0
      in_opts->do_ascii ||
4041
0
      in_opts->do_compress ||
4042
0
      in_opts->do_compress_images ||
4043
0
      in_opts->do_compress_fonts ||
4044
0
      in_opts->do_decompress ||
4045
0
      in_opts->do_garbage ||
4046
0
      in_opts->do_linear ||
4047
0
      in_opts->do_clean ||
4048
0
      in_opts->do_sanitize ||
4049
0
      in_opts->do_appearance ||
4050
0
      in_opts->do_encrypt != PDF_ENCRYPT_KEEP)
4051
0
      fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't use these options when snapshotting!");
4052
0
  }
4053
0
  if (pdf_has_unsaved_sigs(ctx, doc) && !fz_output_supports_stream(ctx, out))
4054
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't write pdf that has unsaved sigs to a fz_output unless it supports fz_stream_from_output!");
4055
4056
0
  prepare_for_save(ctx, doc, in_opts);
4057
4058
0
  opts.out = out;
4059
4060
0
  do_pdf_save_document(ctx, doc, &opts, in_opts);
4061
0
}
4062
4063
void pdf_save_document(fz_context *ctx, pdf_document *doc, const char *filename, const pdf_write_options *in_opts)
4064
0
{
4065
0
  pdf_write_options opts_defaults = pdf_default_write_options;
4066
0
  pdf_write_state opts = { 0 };
4067
4068
0
  if (!doc)
4069
0
    return;
4070
4071
0
  if (!in_opts)
4072
0
    in_opts = &opts_defaults;
4073
4074
0
  if (in_opts->do_incremental && !doc->file)
4075
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't do incremental writes on a new document");
4076
0
  if (in_opts->do_incremental && doc->repair_attempted)
4077
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't do incremental writes on a repaired file");
4078
0
  if (in_opts->do_incremental && in_opts->do_garbage)
4079
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't do incremental writes with garbage collection");
4080
0
  if (in_opts->do_incremental && in_opts->do_linear)
4081
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't do incremental writes with linearisation");
4082
0
  if (in_opts->do_incremental && in_opts->do_encrypt != PDF_ENCRYPT_KEEP)
4083
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't do incremental writes when changing encryption");
4084
0
  if (in_opts->do_snapshot)
4085
0
  {
4086
0
    if (in_opts->do_incremental == 0 ||
4087
0
      in_opts->do_pretty ||
4088
0
      in_opts->do_ascii ||
4089
0
      in_opts->do_compress ||
4090
0
      in_opts->do_compress_images ||
4091
0
      in_opts->do_compress_fonts ||
4092
0
      in_opts->do_decompress ||
4093
0
      in_opts->do_garbage ||
4094
0
      in_opts->do_linear ||
4095
0
      in_opts->do_clean ||
4096
0
      in_opts->do_sanitize ||
4097
0
      in_opts->do_appearance ||
4098
0
      in_opts->do_encrypt != PDF_ENCRYPT_KEEP)
4099
0
      fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't use these options when snapshotting!");
4100
0
  }
4101
4102
0
  if (in_opts->do_appearance > 0)
4103
0
  {
4104
0
    int i, n = pdf_count_pages(ctx, doc);
4105
0
    for (i = 0; i < n; ++i)
4106
0
    {
4107
0
      pdf_page *page = pdf_load_page(ctx, doc, i);
4108
0
      fz_try(ctx)
4109
0
      {
4110
0
        pdf_annot *annot;
4111
0
        for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot))
4112
0
          if (in_opts->do_appearance > 1)
4113
0
            pdf_annot_request_resynthesis(ctx, annot);
4114
0
          else
4115
0
            pdf_annot_request_synthesis(ctx, annot);
4116
0
        for (annot = pdf_first_widget(ctx, page); annot; annot = pdf_next_widget(ctx, annot))
4117
0
          if (in_opts->do_appearance > 1)
4118
0
            pdf_annot_request_resynthesis(ctx, annot);
4119
0
          else
4120
0
            pdf_annot_request_synthesis(ctx, annot);
4121
0
        pdf_update_page(ctx, page);
4122
0
      }
4123
0
      fz_always(ctx)
4124
0
        fz_drop_page(ctx, &page->super);
4125
0
      fz_catch(ctx)
4126
0
        fz_warn(ctx, "could not create annotation appearances");
4127
0
    }
4128
0
  }
4129
4130
0
  if (in_opts->do_incremental)
4131
0
    opts.bias = doc->bias;
4132
4133
0
  prepare_for_save(ctx, doc, in_opts);
4134
4135
0
  if (in_opts->do_incremental)
4136
0
  {
4137
0
    opts.out = fz_new_output_with_path(ctx, filename, 1);
4138
0
  }
4139
0
  else
4140
0
  {
4141
0
    opts.out = fz_new_output_with_path(ctx, filename, 0);
4142
0
  }
4143
0
  fz_try(ctx)
4144
0
  {
4145
0
    do_pdf_save_document(ctx, doc, &opts, in_opts);
4146
0
    fz_close_output(ctx, opts.out);
4147
0
  }
4148
0
  fz_always(ctx)
4149
0
  {
4150
0
    fz_drop_output(ctx, opts.out);
4151
0
    opts.out = NULL;
4152
0
  }
4153
0
  fz_catch(ctx)
4154
0
  {
4155
0
    fz_rethrow(ctx);
4156
0
  }
4157
0
}
4158
4159
void pdf_save_snapshot(fz_context *ctx, pdf_document *doc, const char *filename)
4160
0
{
4161
0
  pdf_save_document(ctx, doc, filename, &pdf_snapshot_write_options);
4162
0
}
4163
4164
void pdf_write_snapshot(fz_context *ctx, pdf_document *doc, fz_output *out)
4165
0
{
4166
0
  pdf_write_document(ctx, doc, out, &pdf_snapshot_write_options);
4167
0
}
4168
4169
char *
4170
pdf_format_write_options(fz_context *ctx, char *buffer, size_t buffer_len, const pdf_write_options *opts)
4171
0
{
4172
0
#define ADD_OPT(S) do { if (!first) fz_strlcat(buffer, ",", buffer_len); fz_strlcat(buffer, (S), buffer_len); first = 0; } while (0)
4173
4174
0
  int first = 1;
4175
0
  *buffer = 0;
4176
0
  if (opts->do_decompress)
4177
0
    ADD_OPT("decompress=yes");
4178
0
  if (opts->do_compress)
4179
0
    ADD_OPT("compress=yes");
4180
0
  if (opts->do_compress_fonts)
4181
0
    ADD_OPT("compress-fonts=yes");
4182
0
  if (opts->do_compress_images)
4183
0
    ADD_OPT("compress-images=yes");
4184
0
  if (opts->do_ascii)
4185
0
    ADD_OPT("ascii=yes");
4186
0
  if (opts->do_pretty)
4187
0
    ADD_OPT("pretty=yes");
4188
0
  if (opts->do_linear)
4189
0
    ADD_OPT("linearize=yes");
4190
0
  if (opts->do_clean)
4191
0
    ADD_OPT("clean=yes");
4192
0
  if (opts->do_sanitize)
4193
0
    ADD_OPT("sanitize=yes");
4194
0
  if (opts->do_incremental)
4195
0
    ADD_OPT("incremental=yes");
4196
0
  if (opts->do_encrypt == PDF_ENCRYPT_NONE)
4197
0
    ADD_OPT("decrypt=yes");
4198
0
  else if (opts->do_encrypt == PDF_ENCRYPT_KEEP)
4199
0
    ADD_OPT("decrypt=no");
4200
0
  switch(opts->do_encrypt)
4201
0
  {
4202
0
  default:
4203
0
  case PDF_ENCRYPT_UNKNOWN:
4204
0
    break;
4205
0
  case PDF_ENCRYPT_NONE:
4206
0
    ADD_OPT("encrypt=no");
4207
0
    break;
4208
0
  case PDF_ENCRYPT_KEEP:
4209
0
    ADD_OPT("encrypt=keep");
4210
0
    break;
4211
0
  case PDF_ENCRYPT_RC4_40:
4212
0
    ADD_OPT("encrypt=rc4-40");
4213
0
    break;
4214
0
  case PDF_ENCRYPT_RC4_128:
4215
0
    ADD_OPT("encrypt=rc4-128");
4216
0
    break;
4217
0
  case PDF_ENCRYPT_AES_128:
4218
0
    ADD_OPT("encrypt=aes-128");
4219
0
    break;
4220
0
  case PDF_ENCRYPT_AES_256:
4221
0
    ADD_OPT("encrypt=aes-256");
4222
0
    break;
4223
0
  }
4224
0
  if (strlen(opts->opwd_utf8)) {
4225
0
    ADD_OPT("owner-password=");
4226
0
    fz_strlcat(buffer, opts->opwd_utf8, buffer_len);
4227
0
  }
4228
0
  if (strlen(opts->upwd_utf8)) {
4229
0
    ADD_OPT("user-password=");
4230
0
    fz_strlcat(buffer, opts->upwd_utf8, buffer_len);
4231
0
  }
4232
0
  {
4233
0
    char temp[32];
4234
0
    ADD_OPT("permissions=");
4235
0
    fz_snprintf(temp, sizeof(temp), "%d", opts->permissions);
4236
0
    fz_strlcat(buffer, temp, buffer_len);
4237
0
  }
4238
0
  switch(opts->do_garbage)
4239
0
  {
4240
0
  case 0:
4241
0
    break;
4242
0
  case 1:
4243
0
    ADD_OPT("garbage=yes");
4244
0
    break;
4245
0
  case 2:
4246
0
    ADD_OPT("garbage=compact");
4247
0
    break;
4248
0
  case 3:
4249
0
    ADD_OPT("garbage=deduplicate");
4250
0
    break;
4251
0
  default:
4252
0
  {
4253
0
    char temp[32];
4254
0
    fz_snprintf(temp, sizeof(temp), "%d", opts->do_garbage);
4255
0
    ADD_OPT("garbage=");
4256
0
    fz_strlcat(buffer, temp, buffer_len);
4257
0
    break;
4258
0
  }
4259
0
  }
4260
0
  switch(opts->do_appearance)
4261
0
  {
4262
0
  case 1:
4263
0
    ADD_OPT("appearance=yes");
4264
0
    break;
4265
0
  case 2:
4266
0
    ADD_OPT("appearance=all");
4267
0
    break;
4268
0
  }
4269
4270
0
#undef ADD_OPT
4271
4272
0
  return buffer;
4273
0
}
4274
4275
typedef struct
4276
{
4277
  fz_document_writer super;
4278
  pdf_document *pdf;
4279
  pdf_write_options opts;
4280
  fz_output *out;
4281
4282
  fz_rect mediabox;
4283
  pdf_obj *resources;
4284
  fz_buffer *contents;
4285
} pdf_writer;
4286
4287
static fz_device *
4288
pdf_writer_begin_page(fz_context *ctx, fz_document_writer *wri_, fz_rect mediabox)
4289
0
{
4290
0
  pdf_writer *wri = (pdf_writer*)wri_;
4291
0
  wri->mediabox = mediabox; // TODO: handle non-zero x0,y0
4292
0
  return pdf_page_write(ctx, wri->pdf, wri->mediabox, &wri->resources, &wri->contents);
4293
0
}
4294
4295
static void
4296
pdf_writer_end_page(fz_context *ctx, fz_document_writer *wri_, fz_device *dev)
4297
0
{
4298
0
  pdf_writer *wri = (pdf_writer*)wri_;
4299
0
  pdf_obj *obj = NULL;
4300
4301
0
  fz_var(obj);
4302
4303
0
  fz_try(ctx)
4304
0
  {
4305
0
    fz_close_device(ctx, dev);
4306
0
    obj = pdf_add_page(ctx, wri->pdf, wri->mediabox, 0, wri->resources, wri->contents);
4307
0
    pdf_insert_page(ctx, wri->pdf, -1, obj);
4308
0
  }
4309
0
  fz_always(ctx)
4310
0
  {
4311
0
    fz_drop_device(ctx, dev);
4312
0
    pdf_drop_obj(ctx, obj);
4313
0
    fz_drop_buffer(ctx, wri->contents);
4314
0
    wri->contents = NULL;
4315
0
    pdf_drop_obj(ctx, wri->resources);
4316
0
    wri->resources = NULL;
4317
0
  }
4318
0
  fz_catch(ctx)
4319
0
    fz_rethrow(ctx);
4320
0
}
4321
4322
static void
4323
pdf_writer_close_writer(fz_context *ctx, fz_document_writer *wri_)
4324
0
{
4325
0
  pdf_writer *wri = (pdf_writer*)wri_;
4326
0
  pdf_write_document(ctx, wri->pdf, wri->out, &wri->opts);
4327
0
  fz_close_output(ctx, wri->out);
4328
0
}
4329
4330
static void
4331
pdf_writer_drop_writer(fz_context *ctx, fz_document_writer *wri_)
4332
0
{
4333
0
  pdf_writer *wri = (pdf_writer*)wri_;
4334
0
  fz_drop_buffer(ctx, wri->contents);
4335
0
  pdf_drop_obj(ctx, wri->resources);
4336
0
  pdf_drop_document(ctx, wri->pdf);
4337
0
  fz_drop_output(ctx, wri->out);
4338
0
}
4339
4340
fz_document_writer *
4341
fz_new_pdf_writer_with_output(fz_context *ctx, fz_output *out, const char *options)
4342
0
{
4343
0
  pdf_writer *wri;
4344
4345
0
  fz_var(wri);
4346
4347
0
  fz_try(ctx)
4348
0
  {
4349
0
    wri = fz_new_derived_document_writer(ctx, pdf_writer, pdf_writer_begin_page, pdf_writer_end_page, pdf_writer_close_writer, pdf_writer_drop_writer);
4350
0
    pdf_parse_write_options(ctx, &wri->opts, options);
4351
0
    wri->out = out;
4352
0
    wri->pdf = pdf_create_document(ctx);
4353
0
  }
4354
0
  fz_catch(ctx)
4355
0
  {
4356
0
    fz_drop_output(ctx, out);
4357
0
    pdf_drop_document(ctx, wri->pdf);
4358
0
    fz_free(ctx, wri);
4359
0
    fz_rethrow(ctx);
4360
0
  }
4361
4362
0
  return (fz_document_writer*)wri;
4363
0
}
4364
4365
fz_document_writer *
4366
fz_new_pdf_writer(fz_context *ctx, const char *path, const char *options)
4367
0
{
4368
0
  fz_output *out = fz_new_output_with_path(ctx, path ? path : "out.pdf", 0);
4369
0
  return fz_new_pdf_writer_with_output(ctx, out, options);
4370
0
}
4371
4372
void pdf_write_journal(fz_context *ctx, pdf_document *doc, fz_output *out)
4373
0
{
4374
0
  if (!doc || !out)
4375
0
    return;
4376
4377
0
  if (!doc->journal)
4378
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't write non-existent journal");
4379
4380
0
  pdf_serialise_journal(ctx, doc, out);
4381
0
}
4382
4383
void pdf_save_journal(fz_context *ctx, pdf_document *doc, const char *filename)
4384
0
{
4385
0
  fz_output *out;
4386
4387
0
  if (!doc)
4388
0
    return;
4389
4390
0
  out = fz_new_output_with_path(ctx, filename, 0);
4391
0
  fz_try(ctx)
4392
0
  {
4393
0
    pdf_write_journal(ctx, doc, out);
4394
0
    fz_close_output(ctx, out);
4395
0
  }
4396
0
  fz_always(ctx)
4397
0
    fz_drop_output(ctx, out);
4398
0
  fz_catch(ctx)
4399
0
    fz_rethrow(ctx);
4400
0
}
4401
4402
void pdf_read_journal(fz_context *ctx, pdf_document *doc, fz_stream *stm)
4403
0
{
4404
0
  pdf_deserialise_journal(ctx, doc, stm);
4405
0
}
4406
4407
void pdf_load_journal(fz_context *ctx, pdf_document *doc, const char *filename)
4408
0
{
4409
0
  fz_stream *stm;
4410
4411
0
  if (!doc)
4412
0
    return;
4413
4414
0
  stm = fz_open_file(ctx, filename);
4415
0
  fz_try(ctx)
4416
0
    pdf_read_journal(ctx, doc, stm);
4417
0
  fz_always(ctx)
4418
0
    fz_drop_stream(ctx, stm);
4419
0
  fz_catch(ctx)
4420
0
    fz_rethrow(ctx);
4421
0
}