Coverage Report

Created: 2025-07-23 06:37

/src/mupdf/source/pdf/pdf-clean.c
Line
Count
Source (jump to first uncovered line)
1
// Copyright (C) 2004-2025 Artifex Software, Inc.
2
//
3
// This file is part of MuPDF.
4
//
5
// MuPDF is free software: you can redistribute it and/or modify it under the
6
// terms of the GNU Affero General Public License as published by the Free
7
// Software Foundation, either version 3 of the License, or (at your option)
8
// any later version.
9
//
10
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13
// details.
14
//
15
// You should have received a copy of the GNU Affero General Public License
16
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17
//
18
// Alternative licensing terms are available from the licensor.
19
// For commercial licensing, see <https://www.artifex.com/> or contact
20
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21
// CA 94129, USA, for further information.
22
23
#include "mupdf/fitz.h"
24
#include "pdf-annot-imp.h"
25
26
#include <string.h>
27
#include <assert.h>
28
29
static void
30
pdf_filter_xobject(fz_context *ctx, pdf_document *doc, pdf_obj *xobj, pdf_obj *page_res, pdf_filter_options *options, pdf_cycle_list *cycle_up);
31
32
static void
33
pdf_filter_type3(fz_context *ctx, pdf_document *doc, pdf_obj *obj, pdf_obj *page_res, pdf_filter_options *options, pdf_cycle_list *cycle_up);
34
35
static void
36
pdf_filter_resources(fz_context *ctx, pdf_document *doc, pdf_obj *in_res, pdf_obj *res, pdf_filter_options *options, pdf_cycle_list *cycle_up)
37
0
{
38
0
  pdf_obj *obj;
39
0
  int i, n;
40
41
0
  if (!options->recurse)
42
0
    return;
43
44
  /* ExtGState */
45
0
  obj = pdf_dict_get(ctx, res, PDF_NAME(ExtGState));
46
0
  if (obj)
47
0
  {
48
0
    n = pdf_dict_len(ctx, obj);
49
0
    for (i = 0; i < n; i++)
50
0
    {
51
0
      pdf_obj *smask = pdf_dict_get(ctx, pdf_dict_get_val(ctx, obj, i), PDF_NAME(SMask));
52
0
      if (smask)
53
0
      {
54
0
        pdf_obj *g = pdf_dict_get(ctx, smask, PDF_NAME(G));
55
0
        if (g)
56
0
        {
57
          /* Transparency group XObject */
58
0
          pdf_filter_xobject(ctx, doc, g, in_res, options, cycle_up);
59
0
        }
60
0
      }
61
0
    }
62
0
  }
63
64
  /* Pattern */
65
0
  obj = pdf_dict_get(ctx, res, PDF_NAME(Pattern));
66
0
  if (obj)
67
0
  {
68
0
    n = pdf_dict_len(ctx, obj);
69
0
    for (i = 0; i < n; i++)
70
0
    {
71
0
      pdf_obj *pat = pdf_dict_get_val(ctx, obj, i);
72
0
      if (pat && pdf_dict_get_int(ctx, pat, PDF_NAME(PatternType)) == 1)
73
0
      {
74
0
        pdf_filter_xobject(ctx, doc, pat, in_res, options, cycle_up);
75
0
      }
76
0
    }
77
0
  }
78
79
  /* XObject */
80
0
  if (!options->instance_forms)
81
0
  {
82
0
    obj = pdf_dict_get(ctx, res, PDF_NAME(XObject));
83
0
    if (obj)
84
0
    {
85
0
      n = pdf_dict_len(ctx, obj);
86
0
      for (i = 0; i < n; i++)
87
0
      {
88
0
        pdf_obj *xobj = pdf_dict_get_val(ctx, obj, i);
89
0
        if (xobj && pdf_dict_get(ctx, xobj, PDF_NAME(Subtype)) == PDF_NAME(Form))
90
0
        {
91
0
          pdf_filter_xobject(ctx, doc, xobj, in_res, options, cycle_up);
92
0
        }
93
0
      }
94
0
    }
95
0
  }
96
97
  /* Font */
98
0
  obj = pdf_dict_get(ctx, res, PDF_NAME(Font));
99
0
  if (obj)
100
0
  {
101
0
    n = pdf_dict_len(ctx, obj);
102
0
    for (i = 0; i < n; i++)
103
0
    {
104
0
      pdf_obj *font = pdf_dict_get_val(ctx, obj, i);
105
0
      if (font && pdf_dict_get(ctx, font, PDF_NAME(Subtype)) == PDF_NAME(Type3))
106
0
      {
107
0
        pdf_filter_type3(ctx, doc, font, in_res, options, cycle_up);
108
0
      }
109
0
    }
110
0
  }
111
112
0
}
113
114
/*
115
  Clean a content stream's rendering operations, with an optional post
116
  processing step.
117
118
  Firstly, this filters the PDF operators used to avoid (some cases of)
119
  repetition, and leaves the content stream in a balanced state with an
120
  unchanged top level matrix etc. At the same time, the resources actually
121
  used are collected into a new resource dictionary.
122
123
  Next, the resources themselves are recursively cleaned (as appropriate)
124
  in the same way, if the 'recurse' flag is set.
125
*/
126
static void
127
pdf_filter_content_stream(
128
  fz_context *ctx,
129
  pdf_document *doc,
130
  pdf_obj *in_stm,
131
  pdf_obj *in_res,
132
  fz_matrix transform,
133
  pdf_filter_options *options,
134
  int struct_parents,
135
  fz_buffer **out_buf,
136
  pdf_obj **out_res,
137
  pdf_cycle_list *cycle_up)
138
0
{
139
0
  pdf_processor *proc_buffer = NULL;
140
0
  pdf_processor *top = NULL;
141
0
  pdf_processor **list = NULL;
142
0
  int num_filters = 0;
143
0
  int i;
144
145
0
  fz_var(proc_buffer);
146
147
0
  *out_buf = NULL;
148
0
  *out_res = NULL;
149
150
0
  if (options->filters)
151
0
    for (; options->filters[num_filters].filter != NULL; num_filters++);
152
153
0
  if (num_filters > 0)
154
0
    list = fz_calloc(ctx, num_filters, sizeof(pdf_processor *));
155
156
0
  fz_try(ctx)
157
0
  {
158
0
    *out_buf = fz_new_buffer(ctx, 1024);
159
0
    top = proc_buffer = pdf_new_buffer_processor(ctx, *out_buf, options->ascii, options->newlines);
160
0
    if (num_filters > 0)
161
0
    {
162
0
      for (i = num_filters - 1; i >= 0; i--)
163
0
        top = list[i] = options->filters[i].filter(ctx, doc, top, struct_parents, transform, options, options->filters[i].options);
164
0
    }
165
166
0
    pdf_process_contents(ctx, top, doc, in_res, in_stm, NULL, out_res);
167
0
    pdf_close_processor(ctx, top);
168
169
0
    pdf_filter_resources(ctx, doc, in_res, *out_res, options, cycle_up);
170
0
  }
171
0
  fz_always(ctx)
172
0
  {
173
0
    for (i = 0; i < num_filters; i++)
174
0
      pdf_drop_processor(ctx, list[i]);
175
0
    pdf_drop_processor(ctx, proc_buffer);
176
0
    fz_free(ctx, list);
177
0
  }
178
0
  fz_catch(ctx)
179
0
  {
180
0
    fz_drop_buffer(ctx, *out_buf);
181
0
    *out_buf = NULL;
182
0
    pdf_drop_obj(ctx, *out_res);
183
0
    *out_res = NULL;
184
0
    fz_rethrow(ctx);
185
0
  }
186
0
}
187
188
/*
189
  Clean a Type 3 font's CharProcs content streams. This works almost
190
  exactly like pdf_filter_content_stream, but the resource dictionary is
191
  shared between all off the CharProcs.
192
*/
193
static void
194
pdf_filter_type3(fz_context *ctx, pdf_document *doc, pdf_obj *obj, pdf_obj *page_res, pdf_filter_options *options, pdf_cycle_list *cycle_up)
195
0
{
196
0
  pdf_cycle_list cycle;
197
0
  pdf_processor *proc_buffer = NULL;
198
0
  pdf_processor *proc_filter = NULL;
199
0
  pdf_obj *in_res;
200
0
  pdf_obj *out_res = NULL;
201
0
  pdf_obj *charprocs;
202
0
  int i, n;
203
0
  int num_filters = 0;
204
0
  pdf_processor **list = NULL;
205
0
  fz_buffer *buffer = NULL;
206
0
  pdf_processor *top = NULL;
207
0
  pdf_obj *res = NULL;
208
0
  fz_buffer *new_buf = NULL;
209
210
0
  fz_var(out_res);
211
0
  fz_var(proc_buffer);
212
0
  fz_var(proc_filter);
213
0
  fz_var(buffer);
214
0
  fz_var(res);
215
0
  fz_var(new_buf);
216
217
  /* We cannot combine instancing with type3 fonts. The new names for
218
   * instanced form/image resources would clash, since they start over for
219
   * each content stream. This is not a problem for now, because we only
220
   * use instancing with redaction, and redaction doesn't clean type3
221
   * fonts.
222
   */
223
0
  assert(!options->instance_forms);
224
225
  /* Avoid recursive cycles! */
226
0
  if (pdf_cycle(ctx, &cycle, cycle_up, obj))
227
0
    return;
228
229
0
  if (options->filters)
230
0
    for (; options->filters[num_filters].filter != NULL; num_filters++);
231
232
0
  if (num_filters > 0)
233
0
    list = fz_calloc(ctx, num_filters, sizeof(pdf_processor *));
234
235
0
  fz_try(ctx)
236
0
  {
237
0
    in_res = pdf_dict_get(ctx, obj, PDF_NAME(Resources));
238
0
    if (!in_res)
239
0
      in_res = page_res;
240
241
0
    buffer = fz_new_buffer(ctx, 1024);
242
0
    top = proc_buffer = pdf_new_buffer_processor(ctx, buffer, options->ascii, options->newlines);
243
0
    if (num_filters > 0)
244
0
    {
245
0
      for (i = num_filters - 1; i >= 0; i--)
246
0
        top = list[i] = options->filters[i].filter(ctx, doc, top, -1, fz_identity, options, options->filters[i].options);
247
0
    }
248
249
0
    pdf_processor_push_resources(ctx, top, in_res);
250
0
    charprocs = pdf_dict_get(ctx, obj, PDF_NAME(CharProcs));
251
0
    n = pdf_dict_len(ctx, charprocs);
252
0
    for (i = 0; i < n; i++)
253
0
    {
254
0
      pdf_obj *val = pdf_dict_get_val(ctx, charprocs, i);
255
256
0
      if (i > 0)
257
0
      {
258
0
        pdf_reset_processor(ctx, top);
259
0
        fz_clear_buffer(ctx, buffer);
260
0
      }
261
0
      pdf_process_raw_contents(ctx, top, doc, in_res, val, NULL);
262
263
0
      pdf_close_processor(ctx, top);
264
265
0
      if (!options->no_update)
266
0
      {
267
0
        new_buf = fz_clone_buffer(ctx, buffer);
268
0
        pdf_update_stream(ctx, doc, val, new_buf, 0);
269
0
        fz_drop_buffer(ctx, new_buf);
270
0
        new_buf = NULL;
271
0
      }
272
0
    }
273
274
0
  }
275
0
  fz_always(ctx)
276
0
  {
277
0
    res = pdf_processor_pop_resources(ctx, top);
278
0
    for (i = 0; i < num_filters; i++)
279
0
      pdf_drop_processor(ctx, list[i]);
280
0
    pdf_drop_processor(ctx, proc_buffer);
281
0
    fz_free(ctx, list);
282
0
    fz_drop_buffer(ctx, new_buf);
283
0
    fz_drop_buffer(ctx, buffer);
284
0
  }
285
0
  fz_catch(ctx)
286
0
  {
287
0
    pdf_drop_obj(ctx, res);
288
0
    fz_rethrow(ctx);
289
0
  }
290
0
  pdf_dict_put_drop(ctx, obj, PDF_NAME(Resources), res);
291
0
}
292
293
static void
294
pdf_filter_xobject(fz_context *ctx, pdf_document *doc, pdf_obj *stm, pdf_obj *page_res, pdf_filter_options *options, pdf_cycle_list *cycle_up)
295
0
{
296
0
  pdf_cycle_list cycle;
297
0
  int struct_parents;
298
0
  pdf_obj *new_res = NULL;
299
0
  fz_buffer *new_buf = NULL;
300
0
  pdf_obj *old_res;
301
302
0
  fz_var(new_buf);
303
0
  fz_var(new_res);
304
305
  // TODO for RJW: XObject can also be a StructParent; how do we handle that case?
306
307
0
  struct_parents = pdf_dict_get_int_default(ctx, stm, PDF_NAME(StructParents), -1);
308
309
0
  old_res = pdf_dict_get(ctx, stm, PDF_NAME(Resources));
310
0
  if (!old_res)
311
0
    old_res = page_res;
312
313
  // TODO: don't clean objects more than once.
314
315
  /* Avoid recursive cycles! */
316
0
  if (pdf_cycle(ctx, &cycle, cycle_up, stm))
317
0
    return;
318
0
  fz_try(ctx)
319
0
  {
320
0
    pdf_filter_content_stream(ctx, doc, stm, old_res, fz_identity, options, struct_parents, &new_buf, &new_res, &cycle);
321
0
    if (!options->no_update)
322
0
    {
323
0
      pdf_update_stream(ctx, doc, stm, new_buf, 0);
324
0
      pdf_dict_put(ctx, stm, PDF_NAME(Resources), new_res);
325
0
    }
326
0
  }
327
0
  fz_always(ctx)
328
0
  {
329
0
    fz_drop_buffer(ctx, new_buf);
330
0
    pdf_drop_obj(ctx, new_res);
331
0
  }
332
0
  fz_catch(ctx)
333
0
    fz_rethrow(ctx);
334
0
}
335
336
pdf_obj *
337
pdf_filter_xobject_instance(fz_context *ctx, pdf_obj *old_xobj, pdf_obj *page_res, fz_matrix transform, pdf_filter_options *options, pdf_cycle_list *cycle_up)
338
0
{
339
0
  pdf_cycle_list cycle;
340
0
  pdf_document *doc = pdf_get_bound_document(ctx, old_xobj);
341
0
  pdf_obj *new_xobj;
342
0
  pdf_obj *new_res, *old_res;
343
0
  fz_buffer *new_buf;
344
0
  int struct_parents;
345
0
  fz_matrix matrix;
346
347
0
  fz_var(new_xobj);
348
0
  fz_var(new_buf);
349
0
  fz_var(new_res);
350
351
  // TODO for RJW: XObject can also be a StructParent; how do we handle that case?
352
  // TODO for RJW: will we run into trouble by duplicating StructParents stuff?
353
354
0
  struct_parents = pdf_dict_get_int_default(ctx, old_xobj, PDF_NAME(StructParents), -1);
355
356
0
  old_res = pdf_dict_get(ctx, old_xobj, PDF_NAME(Resources));
357
0
  if (!old_res)
358
0
    old_res = page_res;
359
360
0
  if (pdf_cycle(ctx, &cycle, cycle_up, old_xobj))
361
0
    return pdf_keep_obj(ctx, old_xobj);
362
363
0
  matrix = pdf_dict_get_matrix(ctx, old_xobj, PDF_NAME(Matrix));
364
0
  transform = fz_concat(matrix, transform);
365
366
0
  fz_try(ctx)
367
0
  {
368
0
    new_xobj = pdf_add_object_drop(ctx, doc, pdf_copy_dict(ctx, old_xobj));
369
0
    pdf_filter_content_stream(ctx, doc, old_xobj, old_res, transform, options, struct_parents, &new_buf, &new_res, &cycle);
370
0
    if (!options->no_update)
371
0
    {
372
0
      pdf_update_stream(ctx, doc, new_xobj, new_buf, 0);
373
0
      pdf_dict_put(ctx, new_xobj, PDF_NAME(Resources), new_res);
374
0
    }
375
0
  }
376
0
  fz_always(ctx)
377
0
  {
378
0
    fz_drop_buffer(ctx, new_buf);
379
0
    pdf_drop_obj(ctx, new_res);
380
0
  }
381
0
  fz_catch(ctx)
382
0
  {
383
0
    pdf_drop_obj(ctx, new_xobj);
384
0
    fz_rethrow(ctx);
385
0
  }
386
387
0
  return new_xobj;
388
0
}
389
390
void pdf_filter_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page, pdf_filter_options *options)
391
0
{
392
0
  pdf_obj *contents, *old_res;
393
0
  pdf_obj *new_res;
394
0
  fz_buffer *buffer;
395
0
  int struct_parents;
396
397
0
  struct_parents = pdf_dict_get_int_default(ctx, page->obj, PDF_NAME(StructParents), -1);
398
399
0
  contents = pdf_page_contents(ctx, page);
400
0
  old_res = pdf_page_resources(ctx, page);
401
402
0
  pdf_filter_content_stream(ctx, doc, contents, old_res, fz_identity, options, struct_parents, &buffer, &new_res, NULL);
403
404
0
  fz_try(ctx)
405
0
  {
406
0
    if (options->complete)
407
0
      options->complete(ctx, buffer, options->opaque);
408
0
    if (!options->no_update)
409
0
    {
410
      /* Always create a new stream object to replace the page contents. This is useful
411
         both if the contents is an array of streams, is entirely missing or if the contents
412
         are shared between pages. */
413
0
      contents = pdf_add_object_drop(ctx, doc, pdf_new_dict(ctx, doc, 1));
414
0
      pdf_dict_put_drop(ctx, page->obj, PDF_NAME(Contents), contents);
415
0
      pdf_update_stream(ctx, doc, contents, buffer, 0);
416
0
      pdf_dict_put(ctx, page->obj, PDF_NAME(Resources), new_res);
417
0
    }
418
0
  }
419
0
  fz_always(ctx)
420
0
  {
421
0
    fz_drop_buffer(ctx, buffer);
422
0
    pdf_drop_obj(ctx, new_res);
423
0
  }
424
0
  fz_catch(ctx)
425
0
    fz_rethrow(ctx);
426
0
}
427
428
void pdf_filter_annot_contents(fz_context *ctx, pdf_document *doc, pdf_annot *annot, pdf_filter_options *options)
429
0
{
430
0
  pdf_obj *ap = pdf_dict_get(ctx, annot->obj, PDF_NAME(AP));
431
0
  if (pdf_is_dict(ctx, ap))
432
0
  {
433
0
    int i, n = pdf_dict_len(ctx, ap);
434
0
    for (i = 0; i < n; i++)
435
0
    {
436
0
      pdf_obj *stm = pdf_dict_get_val(ctx, ap, i);
437
0
      if (pdf_is_stream(ctx, stm))
438
0
      {
439
0
        pdf_filter_xobject(ctx, doc, stm, NULL, options, NULL);
440
0
      }
441
0
    }
442
0
  }
443
0
}
444
445
/* REDACTIONS */
446
447
struct redact_filter_state {
448
  pdf_filter_options filter_opts;
449
  pdf_sanitize_filter_options sanitize_opts;
450
  pdf_filter_factory filter_list[2];
451
  pdf_page *page;
452
  pdf_annot *target; // NULL if all
453
  int line_art;
454
  int text;
455
};
456
457
458
static void pdf_run_obj_to_buf(fz_context *ctx, fz_buffer *buffer, pdf_obj *obj, pdf_page *page)
459
0
{
460
0
  pdf_processor *proc = pdf_new_buffer_processor(ctx, buffer, 0, 0);
461
0
  pdf_obj *res;
462
463
464
0
  fz_try(ctx)
465
0
  {
466
0
    res = pdf_xobject_resources(ctx, obj);
467
0
    if (res == NULL)
468
0
      res = pdf_page_resources(ctx, page);
469
470
0
    pdf_process_contents(ctx, proc, page->doc, res, obj, NULL, NULL);
471
0
    pdf_close_processor(ctx, proc);
472
0
  }
473
0
  fz_always(ctx)
474
0
    pdf_drop_processor(ctx, proc);
475
0
  fz_catch(ctx)
476
0
    fz_rethrow(ctx);
477
0
}
478
479
static void
480
pdf_redact_end_page(fz_context *ctx, fz_buffer *buf, void *opaque)
481
0
{
482
0
  struct redact_filter_state *red = opaque;
483
0
  pdf_page *page = red->page;
484
0
  pdf_annot *annot;
485
0
  pdf_obj *qp;
486
0
  int i, n;
487
488
0
  fz_append_string(ctx, buf, " 0 g\n");
489
490
0
  for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot))
491
0
  {
492
0
    if (red->target != NULL && red->target != annot)
493
0
      continue;
494
0
    if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact))
495
0
    {
496
0
      pdf_obj *ro = pdf_dict_get(ctx, annot->obj, PDF_NAME(RO));
497
0
      if (ro)
498
0
      {
499
0
        pdf_run_obj_to_buf(ctx, buf, ro, page);
500
0
      }
501
0
      else
502
0
      {
503
0
        qp = pdf_dict_get(ctx, annot->obj, PDF_NAME(QuadPoints));
504
0
        n = pdf_array_len(ctx, qp);
505
0
        if (n > 0)
506
0
        {
507
0
          for (i = 0; i < n; i += 8)
508
0
          {
509
0
            fz_quad q = pdf_to_quad(ctx, qp, i);
510
0
            fz_append_printf(ctx, buf, "%g %g m\n", q.ll.x, q.ll.y);
511
0
            fz_append_printf(ctx, buf, "%g %g l\n", q.lr.x, q.lr.y);
512
0
            fz_append_printf(ctx, buf, "%g %g l\n", q.ur.x, q.ur.y);
513
0
            fz_append_printf(ctx, buf, "%g %g l\n", q.ul.x, q.ul.y);
514
0
            fz_append_string(ctx, buf, "f\n");
515
0
          }
516
0
        }
517
0
        else
518
0
        {
519
0
          fz_rect r = pdf_dict_get_rect(ctx, annot->obj, PDF_NAME(Rect));
520
0
          fz_append_printf(ctx, buf, "%g %g m\n", r.x0, r.y0);
521
0
          fz_append_printf(ctx, buf, "%g %g l\n", r.x1, r.y0);
522
0
          fz_append_printf(ctx, buf, "%g %g l\n", r.x1, r.y1);
523
0
          fz_append_printf(ctx, buf, "%g %g l\n", r.x0, r.y1);
524
0
          fz_append_string(ctx, buf, "f\n");
525
0
        }
526
0
      }
527
0
    }
528
0
  }
529
0
}
530
531
static int
532
pdf_redact_text_filter(fz_context *ctx, void *opaque, int *ucsbuf, int ucslen, fz_matrix trm, fz_matrix ctm, fz_rect bbox)
533
0
{
534
0
  struct redact_filter_state *red = opaque;
535
0
  pdf_page *page = red->page;
536
0
  pdf_annot *annot;
537
0
  pdf_obj *qp;
538
0
  fz_rect r;
539
0
  fz_quad q;
540
0
  int i, n;
541
0
  float w, h;
542
543
0
  trm = fz_concat(trm, ctm);
544
0
  bbox = fz_transform_rect(bbox, trm);
545
546
  /* Shrink character bbox a bit */
547
0
  w = bbox.x1 - bbox.x0;
548
0
  h = bbox.y1 - bbox.y0;
549
0
  bbox.x0 += w / 10;
550
0
  bbox.x1 -= w / 10;
551
0
  bbox.y0 += h / 10;
552
0
  bbox.y1 -= h / 10;
553
554
0
  for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot))
555
0
  {
556
0
    if (red->target != NULL && red->target != annot)
557
0
      continue;
558
0
    if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact))
559
0
    {
560
0
      qp = pdf_dict_get(ctx, annot->obj, PDF_NAME(QuadPoints));
561
0
      n = pdf_array_len(ctx, qp);
562
      /* Note, we test for the intersection being a valid rectangle, NOT
563
       * a non-empty one. This is because we can have 'empty' character
564
       * boxes (say for diacritics), that while 0 width, do have a defined
565
       * position on the plane, and hence inclusion makes sense. */
566
0
      if (n > 0)
567
0
      {
568
0
        for (i = 0; i < n; i += 8)
569
0
        {
570
0
          q = pdf_to_quad(ctx, qp, i);
571
0
          r = fz_rect_from_quad(q);
572
0
          if (fz_is_valid_rect(fz_intersect_rect(bbox, r)))
573
0
            return 1;
574
0
        }
575
0
      }
576
0
      else
577
0
      {
578
0
        r = pdf_dict_get_rect(ctx, annot->obj, PDF_NAME(Rect));
579
0
        if (fz_is_valid_rect(fz_intersect_rect(bbox, r)))
580
0
          return 1;
581
0
      }
582
0
    }
583
0
  }
584
585
0
  return 0;
586
0
}
587
588
static fz_pixmap *
589
pdf_redact_image_imp(fz_context *ctx, fz_matrix ctm, fz_image *image, fz_pixmap *pixmap, fz_pixmap **pmask, fz_quad q)
590
0
{
591
0
  fz_matrix inv_ctm;
592
0
  fz_irect r;
593
0
  int x, y, k, n, bpp;
594
0
  unsigned char white;
595
0
  fz_pixmap *mask = *pmask;
596
0
  int pixmap_cloned = 0;
597
598
0
  if (!pixmap)
599
0
  {
600
0
    fz_pixmap *original = fz_get_pixmap_from_image(ctx, image, NULL, NULL, NULL, NULL);
601
0
    int imagemask = image->imagemask;
602
603
0
    fz_try(ctx)
604
0
    {
605
0
      pixmap = fz_clone_pixmap(ctx, original);
606
0
      if (imagemask)
607
0
        fz_invert_pixmap_alpha(ctx, pixmap);
608
0
    }
609
0
    fz_always(ctx)
610
0
      fz_drop_pixmap(ctx, original);
611
0
    fz_catch(ctx)
612
0
      fz_rethrow(ctx);
613
0
    pixmap_cloned = 1;
614
0
  }
615
616
0
  if (!mask && image->mask)
617
0
  {
618
0
    fz_pixmap *original = fz_get_pixmap_from_image(ctx, image->mask, NULL, NULL, NULL, NULL);
619
620
0
    fz_try(ctx)
621
0
    {
622
0
      mask = fz_clone_pixmap(ctx, original);
623
0
      *pmask = mask;
624
0
    }
625
0
    fz_always(ctx)
626
0
    {
627
0
      fz_drop_pixmap(ctx, original);
628
0
    }
629
0
    fz_catch(ctx)
630
0
    {
631
0
      if (pixmap_cloned)
632
0
        fz_drop_pixmap(ctx, pixmap);
633
0
      fz_rethrow(ctx);
634
0
    }
635
0
  }
636
637
  /* If we have a 1x1 image, to which a mask is being applied
638
   * then it's the mask we really want to change, not the
639
   * image. We might have just a small section of the image
640
   * being covered, and setting the whole thing to white
641
   * will blank stuff outside the desired area. */
642
0
  if (!mask || pixmap->w > 1 || pixmap->h > 1)
643
0
  {
644
0
    n = pixmap->n - pixmap->alpha;
645
0
    bpp = pixmap->n;
646
0
    if (fz_colorspace_is_subtractive(ctx, pixmap->colorspace))
647
0
      white = 0;
648
0
    else
649
0
      white = 255;
650
651
0
    inv_ctm = fz_post_scale(fz_invert_matrix(ctm), pixmap->w, pixmap->h);
652
0
    r = fz_round_rect(fz_transform_rect(fz_rect_from_quad(q), inv_ctm));
653
0
    r.x0 = fz_clampi(r.x0, 0, pixmap->w);
654
0
    r.x1 = fz_clampi(r.x1, 0, pixmap->w);
655
0
    r.y1 = fz_clampi(pixmap->h - r.y1, 0, pixmap->h);
656
0
    r.y0 = fz_clampi(pixmap->h - r.y0, 0, pixmap->h);
657
0
    for (y = r.y1; y < r.y0; ++y)
658
0
    {
659
0
      for (x = r.x0; x < r.x1; ++x)
660
0
      {
661
0
        unsigned char *s = &pixmap->samples[(size_t)y * pixmap->stride + (size_t)x * bpp];
662
0
        for (k = 0; k < n; ++k)
663
0
          s[k] = white;
664
0
        if (pixmap->alpha)
665
0
          s[k] = 255;
666
0
      }
667
0
    }
668
0
  }
669
670
0
  if (mask)
671
0
  {
672
0
    inv_ctm = fz_post_scale(fz_invert_matrix(ctm), mask->w, mask->h);
673
0
    r = fz_round_rect(fz_transform_rect(fz_rect_from_quad(q), inv_ctm));
674
0
    r.x0 = fz_clampi(r.x0, 0, mask->w);
675
0
    r.x1 = fz_clampi(r.x1, 0, mask->w);
676
0
    r.y1 = fz_clampi(mask->h - r.y1, 0, mask->h);
677
0
    r.y0 = fz_clampi(mask->h - r.y0, 0, mask->h);
678
0
    for (y = r.y1; y < r.y0; ++y)
679
0
    {
680
0
      unsigned char *s = &mask->samples[(size_t)y * mask->stride + (size_t)r.x0];
681
0
      memset(s, 0xff, r.x1-r.x0);
682
0
    }
683
0
  }
684
685
0
  return pixmap;
686
0
}
687
688
static fz_image *
689
pdf_redact_image_filter_remove(fz_context *ctx, void *opaque, fz_matrix ctm, const char *name, fz_image *image, fz_rect clip)
690
0
{
691
0
  fz_pixmap *redacted = NULL;
692
0
  struct redact_filter_state *red = opaque;
693
0
  pdf_page *page = red->page;
694
0
  pdf_annot *annot;
695
0
  pdf_obj *qp;
696
0
  fz_rect area;
697
0
  fz_rect r;
698
0
  int i, n;
699
700
0
  fz_var(redacted);
701
702
0
  area = fz_transform_rect(fz_unit_rect, ctm);
703
704
0
  for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot))
705
0
  {
706
0
    if (red->target != NULL && red->target != annot)
707
0
      continue;
708
0
    if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact))
709
0
    {
710
0
      qp = pdf_dict_get(ctx, annot->obj, PDF_NAME(QuadPoints));
711
0
      n = pdf_array_len(ctx, qp);
712
0
      if (n > 0)
713
0
      {
714
0
        for (i = 0; i < n; i += 8)
715
0
        {
716
0
          r = fz_rect_from_quad(pdf_to_quad(ctx, qp, i));
717
0
          r = fz_intersect_rect(r, area);
718
0
          if (!fz_is_empty_rect(r))
719
0
            return NULL;
720
0
        }
721
0
      }
722
0
      else
723
0
      {
724
0
        r = pdf_dict_get_rect(ctx, annot->obj, PDF_NAME(Rect));
725
0
        r = fz_intersect_rect(r, area);
726
0
        if (!fz_is_empty_rect(r))
727
0
          return NULL;
728
0
      }
729
0
    }
730
0
  }
731
732
0
  return fz_keep_image(ctx, image);
733
0
}
734
735
static fz_image *
736
pdf_redact_image_filter_remove_invisible(fz_context *ctx, void *opaque, fz_matrix ctm, const char *name, fz_image *image, fz_rect clip)
737
0
{
738
0
  fz_pixmap *redacted = NULL;
739
0
  struct redact_filter_state *red = opaque;
740
0
  pdf_page *page = red->page;
741
0
  pdf_annot *annot;
742
0
  pdf_obj *qp;
743
0
  fz_rect area;
744
0
  fz_rect r;
745
0
  int i, n;
746
747
0
  fz_var(redacted);
748
749
0
  area = fz_transform_rect(fz_unit_rect, ctm);
750
751
  /* Restrict the are of the image to that which can actually be seen. */
752
0
  area = fz_intersect_rect(area, clip);
753
754
0
  for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot))
755
0
  {
756
0
    if (red->target != NULL && red->target != annot)
757
0
      continue;
758
0
    if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact))
759
0
    {
760
0
      qp = pdf_dict_get(ctx, annot->obj, PDF_NAME(QuadPoints));
761
0
      n = pdf_array_len(ctx, qp);
762
0
      if (n > 0)
763
0
      {
764
0
        for (i = 0; i < n; i += 8)
765
0
        {
766
0
          r = fz_rect_from_quad(pdf_to_quad(ctx, qp, i));
767
0
          r = fz_intersect_rect(r, area);
768
0
          if (!fz_is_empty_rect(r))
769
0
            return NULL;
770
0
        }
771
0
      }
772
0
      else
773
0
      {
774
0
        r = pdf_dict_get_rect(ctx, annot->obj, PDF_NAME(Rect));
775
0
        r = fz_intersect_rect(r, area);
776
0
        if (!fz_is_empty_rect(r))
777
0
          return NULL;
778
0
      }
779
0
    }
780
0
  }
781
782
0
  return fz_keep_image(ctx, image);
783
0
}
784
785
static fz_image *
786
pdf_redact_image_filter_pixels(fz_context *ctx, void *opaque, fz_matrix ctm, const char *name, fz_image *image, fz_rect clip)
787
0
{
788
0
  fz_pixmap *redacted = NULL;
789
0
  fz_pixmap *mask = NULL;
790
0
  struct redact_filter_state *red = opaque;
791
0
  pdf_page *page = red->page;
792
0
  pdf_annot *annot;
793
0
  pdf_obj *qp;
794
0
  fz_quad area, q;
795
0
  fz_rect r;
796
0
  int i, n;
797
798
0
  fz_var(redacted);
799
0
  fz_var(mask);
800
801
0
  area = fz_transform_quad(fz_quad_from_rect(fz_unit_rect), ctm);
802
803
  /* First see if we can redact the image completely */
804
0
  for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot))
805
0
  {
806
0
    if (red->target != NULL && red->target != annot)
807
0
      continue;
808
0
    if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact))
809
0
    {
810
0
      qp = pdf_dict_get(ctx, annot->obj, PDF_NAME(QuadPoints));
811
0
      n = pdf_array_len(ctx, qp);
812
0
      if (n > 0)
813
0
      {
814
0
        for (i = 0; i < n; i += 8)
815
0
        {
816
0
          q = pdf_to_quad(ctx, qp, i);
817
0
          if (fz_is_quad_inside_quad(area, q))
818
0
            return NULL;
819
0
        }
820
0
      }
821
0
      else
822
0
      {
823
0
        r = pdf_dict_get_rect(ctx, annot->obj, PDF_NAME(Rect));
824
0
        q = fz_quad_from_rect(r);
825
0
        if (fz_is_quad_inside_quad(area, q))
826
0
          return NULL;
827
0
      }
828
0
    }
829
0
  }
830
831
  /* Blank out redacted parts of the image if necessary */
832
0
  fz_try(ctx)
833
0
  {
834
0
    for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot))
835
0
    {
836
0
      if (red->target != NULL && red->target != annot)
837
0
        continue;
838
0
      if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact))
839
0
      {
840
0
        qp = pdf_dict_get(ctx, annot->obj, PDF_NAME(QuadPoints));
841
0
        n = pdf_array_len(ctx, qp);
842
0
        if (n > 0)
843
0
        {
844
0
          for (i = 0; i < n; i += 8)
845
0
          {
846
0
            q = pdf_to_quad(ctx, qp, i);
847
0
            if (fz_is_quad_intersecting_quad(area, q))
848
0
              redacted = pdf_redact_image_imp(ctx, ctm, image, redacted, &mask, q);
849
0
          }
850
0
        }
851
0
        else
852
0
        {
853
0
          r = pdf_dict_get_rect(ctx, annot->obj, PDF_NAME(Rect));
854
0
          q = fz_quad_from_rect(r);
855
0
          if (fz_is_quad_intersecting_quad(area, q))
856
0
            redacted = pdf_redact_image_imp(ctx, ctm, image, redacted, &mask, q);
857
0
        }
858
0
      }
859
0
    }
860
0
  }
861
0
  fz_catch(ctx)
862
0
  {
863
0
    fz_drop_pixmap(ctx, redacted);
864
0
    fz_drop_pixmap(ctx, mask);
865
0
    fz_rethrow(ctx);
866
0
  }
867
868
0
  if (redacted)
869
0
  {
870
0
    int imagemask = image->imagemask;
871
0
    fz_image *imask = fz_keep_image(ctx, image->mask);
872
873
0
    fz_var(imask);
874
875
0
    fz_try(ctx)
876
0
    {
877
0
      if (mask)
878
0
      {
879
0
        fz_drop_image(ctx, imask);
880
0
        imask = NULL;
881
0
        imask = fz_new_image_from_pixmap(ctx, mask, NULL);
882
0
      }
883
0
      image = fz_new_image_from_pixmap(ctx, redacted, NULL);
884
0
      image->imagemask = imagemask;
885
0
      image->mask = imask;
886
0
      imask = NULL;
887
0
    }
888
0
    fz_always(ctx)
889
0
    {
890
0
      fz_drop_pixmap(ctx, redacted);
891
0
      fz_drop_pixmap(ctx, mask);
892
0
      fz_drop_image(ctx, imask);
893
0
    }
894
0
    fz_catch(ctx)
895
0
      fz_rethrow(ctx);
896
0
    return image;
897
0
  }
898
899
0
  return fz_keep_image(ctx, image);
900
0
}
901
902
/* Returns 0 if area does not intersect with any of our redactions.
903
 * Returns 2 if area is completely included within one of our redactions.
904
 * Returns 1 otherwise. */
905
static int
906
rect_touches_redactions(fz_context *ctx, fz_rect area, struct redact_filter_state *red)
907
0
{
908
0
  pdf_annot *annot;
909
0
  pdf_obj *qp;
910
0
  fz_quad q;
911
0
  fz_rect r, s;
912
0
  int i, n;
913
0
  pdf_page *page = red->page;
914
915
0
  for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot))
916
0
  {
917
0
    if (red->target != NULL && red->target != annot)
918
0
      continue;
919
0
    if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact))
920
0
    {
921
0
      qp = pdf_dict_get(ctx, annot->obj, PDF_NAME(QuadPoints));
922
0
      n = pdf_array_len(ctx, qp);
923
0
      if (n > 0)
924
0
      {
925
0
        for (i = 0; i < n; i += 8)
926
0
        {
927
0
          q = pdf_to_quad(ctx, qp, i);
928
0
          r = fz_rect_from_quad(q);
929
0
          s = fz_intersect_rect(r, area);
930
0
          if (!fz_is_empty_rect(s))
931
0
          {
932
0
            if (fz_contains_rect(r, area))
933
0
              return 2;
934
0
            return 1;
935
0
          }
936
0
        }
937
0
      }
938
0
      else
939
0
      {
940
0
        r = pdf_dict_get_rect(ctx, annot->obj, PDF_NAME(Rect));
941
0
        s = fz_intersect_rect(r, area);
942
0
        if (!fz_is_empty_rect(s))
943
0
        {
944
0
          if (fz_contains_rect(r, area))
945
0
            return 2;
946
0
          return 1;
947
0
        }
948
0
      }
949
0
    }
950
0
  }
951
0
  return 0;
952
0
}
953
954
static void
955
remove_page_link(fz_context *ctx, pdf_page *page, pdf_obj *obj)
956
0
{
957
0
  pdf_link **linkp = (pdf_link **)&page->links;
958
0
  pdf_link *link;
959
960
0
  while ((link = *linkp) != NULL)
961
0
  {
962
0
    if (link->obj == obj)
963
0
    {
964
0
      *linkp = (pdf_link *)link->super.next;
965
0
      link->super.next = NULL;
966
0
      fz_drop_link(ctx, &link->super);
967
0
      break;
968
0
    }
969
0
    else
970
0
    {
971
0
      linkp = (pdf_link **)&link->super.next;
972
0
    }
973
0
  }
974
0
}
975
976
static void
977
pdf_redact_page_links(fz_context *ctx, struct redact_filter_state *red)
978
0
{
979
0
  pdf_obj *annots;
980
0
  pdf_obj *link;
981
0
  fz_rect area;
982
0
  int k;
983
984
0
  annots = pdf_dict_get(ctx, red->page->obj, PDF_NAME(Annots));
985
0
  k = 0;
986
0
  while (k < pdf_array_len(ctx, annots))
987
0
  {
988
0
    link = pdf_array_get(ctx, annots, k);
989
0
    if (pdf_dict_get(ctx, link, PDF_NAME(Subtype)) == PDF_NAME(Link))
990
0
    {
991
0
      area = pdf_dict_get_rect(ctx, link, PDF_NAME(Rect));
992
0
      if (rect_touches_redactions(ctx, area, red))
993
0
      {
994
0
        pdf_array_delete(ctx, annots, k);
995
0
        remove_page_link(ctx, red->page, link);
996
0
        continue;
997
0
      }
998
0
    }
999
0
    ++k;
1000
0
  }
1001
0
}
1002
1003
static void
1004
pdf_redact_page_annotations(fz_context *ctx, struct redact_filter_state *red)
1005
0
{
1006
0
  pdf_annot *annot;
1007
0
  fz_rect area;
1008
1009
0
restart:
1010
0
  for (annot = pdf_first_annot(ctx, red->page); annot; annot = pdf_next_annot(ctx, annot))
1011
0
  {
1012
0
    if (pdf_annot_type(ctx, annot) == PDF_ANNOT_FREE_TEXT)
1013
0
    {
1014
0
      area = pdf_dict_get_rect(ctx, pdf_annot_obj(ctx, annot), PDF_NAME(Rect));
1015
0
      if (rect_touches_redactions(ctx, area, red))
1016
0
      {
1017
0
        pdf_delete_annot(ctx, red->page, annot);
1018
0
        goto restart;
1019
0
      }
1020
0
    }
1021
0
  }
1022
0
}
1023
1024
static int culler(fz_context *ctx, void *opaque, fz_rect bbox, fz_cull_type type)
1025
0
{
1026
0
  struct redact_filter_state *red = opaque;
1027
1028
0
  switch (type)
1029
0
  {
1030
0
  case FZ_CULL_PATH_FILL:
1031
0
  case FZ_CULL_PATH_STROKE:
1032
0
  case FZ_CULL_PATH_FILL_STROKE:
1033
0
  case FZ_CULL_CLIP_PATH_FILL:
1034
0
  case FZ_CULL_CLIP_PATH_STROKE:
1035
0
  case FZ_CULL_CLIP_PATH_FILL_STROKE:
1036
0
    if (red->line_art == PDF_REDACT_LINE_ART_REMOVE_IF_COVERED)
1037
0
      return (rect_touches_redactions(ctx, bbox, red) == 2);
1038
0
    else if (red->line_art == PDF_REDACT_LINE_ART_REMOVE_IF_TOUCHED)
1039
0
      return (rect_touches_redactions(ctx, bbox, red) != 0);
1040
0
    return 0;
1041
0
  default:
1042
0
    return 0;
1043
0
  }
1044
0
}
1045
1046
static
1047
void init_redact_filter(fz_context *ctx, pdf_redact_options *redact_opts, struct redact_filter_state *red, pdf_page *page, pdf_annot *target)
1048
0
{
1049
0
  int black_boxes = redact_opts ? redact_opts->black_boxes : 0;
1050
0
  int image_method = redact_opts ? redact_opts->image_method : PDF_REDACT_IMAGE_PIXELS;
1051
0
  int line_art = redact_opts ? redact_opts->line_art : PDF_REDACT_LINE_ART_NONE;
1052
0
  int text = redact_opts ? redact_opts->text : PDF_REDACT_TEXT_REMOVE;
1053
1054
0
  memset(&red->filter_opts, 0, sizeof red->filter_opts);
1055
0
  memset(&red->sanitize_opts, 0, sizeof red->sanitize_opts);
1056
1057
0
  red->filter_opts.recurse = 0; /* don't redact patterns, softmasks, and type3 fonts */
1058
0
  red->filter_opts.instance_forms = 1; /* redact xobjects with instancing */
1059
0
  red->filter_opts.ascii = 1;
1060
0
  red->filter_opts.opaque = red;
1061
0
  red->filter_opts.filters = red->filter_list;
1062
0
  if (black_boxes)
1063
0
    red->filter_opts.complete = pdf_redact_end_page;
1064
0
  red->line_art = line_art;
1065
0
  red->text = text;
1066
1067
0
  red->sanitize_opts.opaque = red;
1068
0
  if (text == PDF_REDACT_TEXT_REMOVE)
1069
0
    red->sanitize_opts.text_filter = pdf_redact_text_filter;
1070
0
  if (image_method == PDF_REDACT_IMAGE_PIXELS)
1071
0
    red->sanitize_opts.image_filter = pdf_redact_image_filter_pixels;
1072
0
  if (image_method == PDF_REDACT_IMAGE_REMOVE)
1073
0
    red->sanitize_opts.image_filter = pdf_redact_image_filter_remove;
1074
0
  if (image_method == PDF_REDACT_IMAGE_REMOVE_UNLESS_INVISIBLE)
1075
0
    red->sanitize_opts.image_filter = pdf_redact_image_filter_remove_invisible;
1076
0
  red->sanitize_opts.culler = culler;
1077
1078
0
  red->filter_list[0].filter = pdf_new_sanitize_filter;
1079
0
  red->filter_list[0].options = &red->sanitize_opts;
1080
0
  red->filter_list[1].filter = NULL;
1081
0
  red->filter_list[1].options = NULL;
1082
1083
0
  red->page = page;
1084
0
  red->target = target;
1085
0
}
1086
1087
static int
1088
pdf_apply_redaction_imp(fz_context *ctx, pdf_page *page, pdf_annot *target, pdf_redact_options *redact_opts)
1089
0
{
1090
0
  pdf_annot *annot;
1091
0
  int has_redactions = 0;
1092
0
  struct redact_filter_state red;
1093
0
  pdf_document *doc = page->doc;
1094
1095
0
  for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot)) {
1096
0
    if (target != NULL && target != annot)
1097
0
      continue;
1098
0
    if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact))
1099
0
      has_redactions = 1;
1100
0
  }
1101
1102
0
  if (!has_redactions)
1103
0
    return 0;
1104
1105
0
  init_redact_filter(ctx, redact_opts, &red, page, target);
1106
1107
0
  if (target)
1108
0
    pdf_begin_operation(ctx, doc, "Apply redaction");
1109
0
  else
1110
0
    pdf_begin_operation(ctx, doc, "Apply redactions on page");
1111
0
  fz_try(ctx)
1112
0
  {
1113
0
    pdf_filter_page_contents(ctx, doc, page, &red.filter_opts);
1114
0
    pdf_redact_page_links(ctx, &red);
1115
0
    pdf_redact_page_annotations(ctx, &red);
1116
1117
0
    annot = pdf_first_annot(ctx, page);
1118
0
    while (annot)
1119
0
    {
1120
0
      if (target == NULL || annot == target)
1121
0
      {
1122
0
        if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact))
1123
0
        {
1124
0
          pdf_delete_annot(ctx, page, annot);
1125
0
          annot = pdf_first_annot(ctx, page);
1126
0
          continue;
1127
0
        }
1128
0
      }
1129
0
      annot = pdf_next_annot(ctx, annot);
1130
0
    }
1131
1132
0
    doc->redacted = 1;
1133
0
    pdf_end_operation(ctx, doc);
1134
0
  }
1135
0
  fz_catch(ctx)
1136
0
  {
1137
0
    pdf_abandon_operation(ctx, doc);
1138
0
    fz_rethrow(ctx);
1139
0
  }
1140
1141
0
  return 1;
1142
0
}
1143
1144
int
1145
pdf_redact_page(fz_context *ctx, pdf_document *doc, pdf_page *page, pdf_redact_options *redact_opts)
1146
0
{
1147
0
  if (page == NULL || page->doc != doc)
1148
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't redact a page not from the doc");
1149
0
  return pdf_apply_redaction_imp(ctx, page, NULL, redact_opts);
1150
0
}
1151
1152
int
1153
pdf_apply_redaction(fz_context *ctx, pdf_annot *annot, pdf_redact_options *redact_opts)
1154
0
{
1155
0
  return pdf_apply_redaction_imp(ctx, annot->page, annot, redact_opts);
1156
0
}
1157
1158
/* Hard clipping of pages */
1159
1160
struct clip_filter_state {
1161
  pdf_filter_options filter_opts;
1162
  pdf_sanitize_filter_options sanitize_opts;
1163
  pdf_filter_factory filter_list[2];
1164
  pdf_page *page;
1165
  fz_rect clip;
1166
};
1167
1168
static int clip_culler(fz_context *ctx, void *opaque, fz_rect bbox, fz_cull_type type)
1169
0
{
1170
0
  struct clip_filter_state *hc = opaque;
1171
1172
0
  switch (type)
1173
0
  {
1174
0
  case FZ_CULL_PATH_FILL:
1175
0
  case FZ_CULL_PATH_STROKE:
1176
0
  case FZ_CULL_PATH_FILL_STROKE:
1177
0
  case FZ_CULL_CLIP_PATH_FILL:
1178
0
  case FZ_CULL_CLIP_PATH_STROKE:
1179
0
  case FZ_CULL_CLIP_PATH_FILL_STROKE:
1180
0
  case FZ_CULL_GLYPH:
1181
0
  case FZ_CULL_IMAGE:
1182
0
  case FZ_CULL_SHADING:
1183
0
    return (fz_is_empty_rect(fz_intersect_rect(bbox, hc->clip)));
1184
0
  default:
1185
0
    return 0;
1186
0
  }
1187
0
}
1188
1189
static
1190
void init_clip_filter(fz_context *ctx, struct clip_filter_state *hc, pdf_page *page, fz_rect *clip)
1191
0
{
1192
0
  memset(&hc->filter_opts, 0, sizeof hc->filter_opts);
1193
0
  memset(&hc->sanitize_opts, 0, sizeof hc->sanitize_opts);
1194
1195
0
  hc->filter_opts.recurse = 0; /* don't redact patterns, softmasks, and type3 fonts */
1196
0
  hc->filter_opts.instance_forms = 1; /* redact xobjects with instancing */
1197
0
  hc->filter_opts.ascii = 0;
1198
0
  hc->filter_opts.opaque = hc;
1199
0
  hc->filter_opts.filters = hc->filter_list;
1200
0
  hc->clip = *clip;
1201
1202
0
  hc->sanitize_opts.opaque = hc;
1203
0
  hc->sanitize_opts.culler = clip_culler;
1204
1205
0
  hc->filter_list[0].filter = pdf_new_sanitize_filter;
1206
0
  hc->filter_list[0].options = &hc->sanitize_opts;
1207
0
  hc->filter_list[1].filter = NULL;
1208
0
  hc->filter_list[1].options = NULL;
1209
1210
0
  hc->page = page;
1211
0
}
1212
1213
static void
1214
pdf_clip_page_links(fz_context *ctx, struct clip_filter_state *hc)
1215
0
{
1216
0
  pdf_obj *annots;
1217
0
  pdf_obj *link;
1218
0
  fz_rect area;
1219
0
  int k;
1220
1221
0
  annots = pdf_dict_get(ctx, hc->page->obj, PDF_NAME(Annots));
1222
0
  k = 0;
1223
0
  while (k < pdf_array_len(ctx, annots))
1224
0
  {
1225
0
    link = pdf_array_get(ctx, annots, k);
1226
0
    if (pdf_dict_get(ctx, link, PDF_NAME(Subtype)) == PDF_NAME(Link))
1227
0
    {
1228
0
      area = pdf_dict_get_rect(ctx, link, PDF_NAME(Rect));
1229
0
      if (fz_is_empty_rect(fz_intersect_rect(area, hc->clip)))
1230
0
      {
1231
0
        pdf_array_delete(ctx, annots, k);
1232
0
        continue;
1233
0
      }
1234
0
    }
1235
0
    ++k;
1236
0
  }
1237
0
}
1238
1239
static void
1240
pdf_clip_page_annotations(fz_context *ctx, struct clip_filter_state *hc)
1241
0
{
1242
0
  pdf_annot *annot;
1243
0
  fz_rect area;
1244
1245
0
restart:
1246
0
  for (annot = pdf_first_annot(ctx, hc->page); annot; annot = pdf_next_annot(ctx, annot))
1247
0
  {
1248
0
    if (pdf_annot_type(ctx, annot) == PDF_ANNOT_FREE_TEXT)
1249
0
    {
1250
0
      area = pdf_dict_get_rect(ctx, pdf_annot_obj(ctx, annot), PDF_NAME(Rect));
1251
0
      if (fz_is_empty_rect(fz_intersect_rect(area, hc->clip)))
1252
0
      {
1253
0
        pdf_delete_annot(ctx, hc->page, annot);
1254
0
        goto restart;
1255
0
      }
1256
0
    }
1257
0
  }
1258
0
}
1259
1260
void
1261
pdf_clip_page(fz_context *ctx, pdf_page *page, fz_rect *clip)
1262
0
{
1263
0
  pdf_document *doc;
1264
0
  struct clip_filter_state hc;
1265
1266
0
  if (page == NULL)
1267
0
    return;
1268
1269
0
  doc = page->doc;
1270
1271
0
  init_clip_filter(ctx, &hc, page, clip);
1272
1273
0
  pdf_begin_operation(ctx, doc, "Apply hard clip to page");
1274
0
  fz_try(ctx)
1275
0
  {
1276
0
    pdf_filter_page_contents(ctx, doc, page, &hc.filter_opts);
1277
0
    pdf_clip_page_links(ctx, &hc);
1278
0
    pdf_clip_page_annotations(ctx, &hc);
1279
0
    pdf_end_operation(ctx, doc);
1280
0
  }
1281
0
  fz_catch(ctx)
1282
0
  {
1283
0
    pdf_abandon_operation(ctx, doc);
1284
0
    fz_rethrow(ctx);
1285
0
  }
1286
0
}