/src/mupdf/source/pdf/pdf-clean.c
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (C) 2004-2025 Artifex Software, Inc. |
2 | | // |
3 | | // This file is part of MuPDF. |
4 | | // |
5 | | // MuPDF is free software: you can redistribute it and/or modify it under the |
6 | | // terms of the GNU Affero General Public License as published by the Free |
7 | | // Software Foundation, either version 3 of the License, or (at your option) |
8 | | // any later version. |
9 | | // |
10 | | // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY |
11 | | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
12 | | // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more |
13 | | // details. |
14 | | // |
15 | | // You should have received a copy of the GNU Affero General Public License |
16 | | // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> |
17 | | // |
18 | | // Alternative licensing terms are available from the licensor. |
19 | | // For commercial licensing, see <https://www.artifex.com/> or contact |
20 | | // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, |
21 | | // CA 94129, USA, for further information. |
22 | | |
23 | | #include "mupdf/fitz.h" |
24 | | #include "pdf-annot-imp.h" |
25 | | |
26 | | #include <string.h> |
27 | | #include <assert.h> |
28 | | |
29 | | static void |
30 | | pdf_filter_xobject(fz_context *ctx, pdf_document *doc, pdf_obj *xobj, pdf_obj *page_res, pdf_filter_options *options, pdf_cycle_list *cycle_up); |
31 | | |
32 | | static void |
33 | | pdf_filter_type3(fz_context *ctx, pdf_document *doc, pdf_obj *obj, pdf_obj *page_res, pdf_filter_options *options, pdf_cycle_list *cycle_up); |
34 | | |
35 | | static void |
36 | | pdf_filter_resources(fz_context *ctx, pdf_document *doc, pdf_obj *in_res, pdf_obj *res, pdf_filter_options *options, pdf_cycle_list *cycle_up) |
37 | 0 | { |
38 | 0 | pdf_obj *obj; |
39 | 0 | int i, n; |
40 | |
|
41 | 0 | if (!options->recurse) |
42 | 0 | return; |
43 | | |
44 | | /* ExtGState */ |
45 | 0 | obj = pdf_dict_get(ctx, res, PDF_NAME(ExtGState)); |
46 | 0 | if (obj) |
47 | 0 | { |
48 | 0 | n = pdf_dict_len(ctx, obj); |
49 | 0 | for (i = 0; i < n; i++) |
50 | 0 | { |
51 | 0 | pdf_obj *smask = pdf_dict_get(ctx, pdf_dict_get_val(ctx, obj, i), PDF_NAME(SMask)); |
52 | 0 | if (smask) |
53 | 0 | { |
54 | 0 | pdf_obj *g = pdf_dict_get(ctx, smask, PDF_NAME(G)); |
55 | 0 | if (g) |
56 | 0 | { |
57 | | /* Transparency group XObject */ |
58 | 0 | pdf_filter_xobject(ctx, doc, g, in_res, options, cycle_up); |
59 | 0 | } |
60 | 0 | } |
61 | 0 | } |
62 | 0 | } |
63 | | |
64 | | /* Pattern */ |
65 | 0 | obj = pdf_dict_get(ctx, res, PDF_NAME(Pattern)); |
66 | 0 | if (obj) |
67 | 0 | { |
68 | 0 | n = pdf_dict_len(ctx, obj); |
69 | 0 | for (i = 0; i < n; i++) |
70 | 0 | { |
71 | 0 | pdf_obj *pat = pdf_dict_get_val(ctx, obj, i); |
72 | 0 | if (pat && pdf_dict_get_int(ctx, pat, PDF_NAME(PatternType)) == 1) |
73 | 0 | { |
74 | 0 | pdf_filter_xobject(ctx, doc, pat, in_res, options, cycle_up); |
75 | 0 | } |
76 | 0 | } |
77 | 0 | } |
78 | | |
79 | | /* XObject */ |
80 | 0 | if (!options->instance_forms) |
81 | 0 | { |
82 | 0 | obj = pdf_dict_get(ctx, res, PDF_NAME(XObject)); |
83 | 0 | if (obj) |
84 | 0 | { |
85 | 0 | n = pdf_dict_len(ctx, obj); |
86 | 0 | for (i = 0; i < n; i++) |
87 | 0 | { |
88 | 0 | pdf_obj *xobj = pdf_dict_get_val(ctx, obj, i); |
89 | 0 | if (xobj && pdf_dict_get(ctx, xobj, PDF_NAME(Subtype)) == PDF_NAME(Form)) |
90 | 0 | { |
91 | 0 | pdf_filter_xobject(ctx, doc, xobj, in_res, options, cycle_up); |
92 | 0 | } |
93 | 0 | } |
94 | 0 | } |
95 | 0 | } |
96 | | |
97 | | /* Font */ |
98 | 0 | obj = pdf_dict_get(ctx, res, PDF_NAME(Font)); |
99 | 0 | if (obj) |
100 | 0 | { |
101 | 0 | n = pdf_dict_len(ctx, obj); |
102 | 0 | for (i = 0; i < n; i++) |
103 | 0 | { |
104 | 0 | pdf_obj *font = pdf_dict_get_val(ctx, obj, i); |
105 | 0 | if (font && pdf_dict_get(ctx, font, PDF_NAME(Subtype)) == PDF_NAME(Type3)) |
106 | 0 | { |
107 | 0 | pdf_filter_type3(ctx, doc, font, in_res, options, cycle_up); |
108 | 0 | } |
109 | 0 | } |
110 | 0 | } |
111 | |
|
112 | 0 | } |
113 | | |
114 | | /* |
115 | | Clean a content stream's rendering operations, with an optional post |
116 | | processing step. |
117 | | |
118 | | Firstly, this filters the PDF operators used to avoid (some cases of) |
119 | | repetition, and leaves the content stream in a balanced state with an |
120 | | unchanged top level matrix etc. At the same time, the resources actually |
121 | | used are collected into a new resource dictionary. |
122 | | |
123 | | Next, the resources themselves are recursively cleaned (as appropriate) |
124 | | in the same way, if the 'recurse' flag is set. |
125 | | */ |
126 | | static void |
127 | | pdf_filter_content_stream( |
128 | | fz_context *ctx, |
129 | | pdf_document *doc, |
130 | | pdf_obj *in_stm, |
131 | | pdf_obj *in_res, |
132 | | fz_matrix transform, |
133 | | pdf_filter_options *options, |
134 | | int struct_parents, |
135 | | fz_buffer **out_buf, |
136 | | pdf_obj **out_res, |
137 | | pdf_cycle_list *cycle_up) |
138 | 0 | { |
139 | 0 | pdf_processor *proc_buffer = NULL; |
140 | 0 | pdf_processor *top = NULL; |
141 | 0 | pdf_processor **list = NULL; |
142 | 0 | int num_filters = 0; |
143 | 0 | int i; |
144 | |
|
145 | 0 | fz_var(proc_buffer); |
146 | |
|
147 | 0 | *out_buf = NULL; |
148 | 0 | *out_res = NULL; |
149 | |
|
150 | 0 | if (options->filters) |
151 | 0 | for (; options->filters[num_filters].filter != NULL; num_filters++); |
152 | |
|
153 | 0 | if (num_filters > 0) |
154 | 0 | list = fz_calloc(ctx, num_filters, sizeof(pdf_processor *)); |
155 | |
|
156 | 0 | fz_try(ctx) |
157 | 0 | { |
158 | 0 | *out_buf = fz_new_buffer(ctx, 1024); |
159 | 0 | top = proc_buffer = pdf_new_buffer_processor(ctx, *out_buf, options->ascii, options->newlines); |
160 | 0 | if (num_filters > 0) |
161 | 0 | { |
162 | 0 | for (i = num_filters - 1; i >= 0; i--) |
163 | 0 | top = list[i] = options->filters[i].filter(ctx, doc, top, struct_parents, transform, options, options->filters[i].options); |
164 | 0 | } |
165 | |
|
166 | 0 | pdf_process_contents(ctx, top, doc, in_res, in_stm, NULL, out_res); |
167 | 0 | pdf_close_processor(ctx, top); |
168 | |
|
169 | 0 | pdf_filter_resources(ctx, doc, in_res, *out_res, options, cycle_up); |
170 | 0 | } |
171 | 0 | fz_always(ctx) |
172 | 0 | { |
173 | 0 | for (i = 0; i < num_filters; i++) |
174 | 0 | pdf_drop_processor(ctx, list[i]); |
175 | 0 | pdf_drop_processor(ctx, proc_buffer); |
176 | 0 | fz_free(ctx, list); |
177 | 0 | } |
178 | 0 | fz_catch(ctx) |
179 | 0 | { |
180 | 0 | fz_drop_buffer(ctx, *out_buf); |
181 | 0 | *out_buf = NULL; |
182 | 0 | pdf_drop_obj(ctx, *out_res); |
183 | 0 | *out_res = NULL; |
184 | 0 | fz_rethrow(ctx); |
185 | 0 | } |
186 | 0 | } |
187 | | |
188 | | /* |
189 | | Clean a Type 3 font's CharProcs content streams. This works almost |
190 | | exactly like pdf_filter_content_stream, but the resource dictionary is |
191 | | shared between all off the CharProcs. |
192 | | */ |
193 | | static void |
194 | | pdf_filter_type3(fz_context *ctx, pdf_document *doc, pdf_obj *obj, pdf_obj *page_res, pdf_filter_options *options, pdf_cycle_list *cycle_up) |
195 | 0 | { |
196 | 0 | pdf_cycle_list cycle; |
197 | 0 | pdf_processor *proc_buffer = NULL; |
198 | 0 | pdf_processor *proc_filter = NULL; |
199 | 0 | pdf_obj *in_res; |
200 | 0 | pdf_obj *out_res = NULL; |
201 | 0 | pdf_obj *charprocs; |
202 | 0 | int i, n; |
203 | 0 | int num_filters = 0; |
204 | 0 | pdf_processor **list = NULL; |
205 | 0 | fz_buffer *buffer = NULL; |
206 | 0 | pdf_processor *top = NULL; |
207 | 0 | pdf_obj *res = NULL; |
208 | 0 | fz_buffer *new_buf = NULL; |
209 | |
|
210 | 0 | fz_var(out_res); |
211 | 0 | fz_var(proc_buffer); |
212 | 0 | fz_var(proc_filter); |
213 | 0 | fz_var(buffer); |
214 | 0 | fz_var(res); |
215 | 0 | fz_var(new_buf); |
216 | | |
217 | | /* We cannot combine instancing with type3 fonts. The new names for |
218 | | * instanced form/image resources would clash, since they start over for |
219 | | * each content stream. This is not a problem for now, because we only |
220 | | * use instancing with redaction, and redaction doesn't clean type3 |
221 | | * fonts. |
222 | | */ |
223 | 0 | assert(!options->instance_forms); |
224 | | |
225 | | /* Avoid recursive cycles! */ |
226 | 0 | if (pdf_cycle(ctx, &cycle, cycle_up, obj)) |
227 | 0 | return; |
228 | | |
229 | 0 | if (options->filters) |
230 | 0 | for (; options->filters[num_filters].filter != NULL; num_filters++); |
231 | |
|
232 | 0 | if (num_filters > 0) |
233 | 0 | list = fz_calloc(ctx, num_filters, sizeof(pdf_processor *)); |
234 | |
|
235 | 0 | fz_try(ctx) |
236 | 0 | { |
237 | 0 | in_res = pdf_dict_get(ctx, obj, PDF_NAME(Resources)); |
238 | 0 | if (!in_res) |
239 | 0 | in_res = page_res; |
240 | |
|
241 | 0 | buffer = fz_new_buffer(ctx, 1024); |
242 | 0 | top = proc_buffer = pdf_new_buffer_processor(ctx, buffer, options->ascii, options->newlines); |
243 | 0 | if (num_filters > 0) |
244 | 0 | { |
245 | 0 | for (i = num_filters - 1; i >= 0; i--) |
246 | 0 | top = list[i] = options->filters[i].filter(ctx, doc, top, -1, fz_identity, options, options->filters[i].options); |
247 | 0 | } |
248 | |
|
249 | 0 | pdf_processor_push_resources(ctx, top, in_res); |
250 | 0 | charprocs = pdf_dict_get(ctx, obj, PDF_NAME(CharProcs)); |
251 | 0 | n = pdf_dict_len(ctx, charprocs); |
252 | 0 | for (i = 0; i < n; i++) |
253 | 0 | { |
254 | 0 | pdf_obj *val = pdf_dict_get_val(ctx, charprocs, i); |
255 | |
|
256 | 0 | if (i > 0) |
257 | 0 | { |
258 | 0 | pdf_reset_processor(ctx, top); |
259 | 0 | fz_clear_buffer(ctx, buffer); |
260 | 0 | } |
261 | 0 | pdf_process_raw_contents(ctx, top, doc, in_res, val, NULL); |
262 | |
|
263 | 0 | pdf_close_processor(ctx, top); |
264 | |
|
265 | 0 | if (!options->no_update) |
266 | 0 | { |
267 | 0 | new_buf = fz_clone_buffer(ctx, buffer); |
268 | 0 | pdf_update_stream(ctx, doc, val, new_buf, 0); |
269 | 0 | fz_drop_buffer(ctx, new_buf); |
270 | 0 | new_buf = NULL; |
271 | 0 | } |
272 | 0 | } |
273 | |
|
274 | 0 | } |
275 | 0 | fz_always(ctx) |
276 | 0 | { |
277 | 0 | res = pdf_processor_pop_resources(ctx, top); |
278 | 0 | for (i = 0; i < num_filters; i++) |
279 | 0 | pdf_drop_processor(ctx, list[i]); |
280 | 0 | pdf_drop_processor(ctx, proc_buffer); |
281 | 0 | fz_free(ctx, list); |
282 | 0 | fz_drop_buffer(ctx, new_buf); |
283 | 0 | fz_drop_buffer(ctx, buffer); |
284 | 0 | } |
285 | 0 | fz_catch(ctx) |
286 | 0 | { |
287 | 0 | pdf_drop_obj(ctx, res); |
288 | 0 | fz_rethrow(ctx); |
289 | 0 | } |
290 | 0 | pdf_dict_put_drop(ctx, obj, PDF_NAME(Resources), res); |
291 | 0 | } |
292 | | |
293 | | static void |
294 | | pdf_filter_xobject(fz_context *ctx, pdf_document *doc, pdf_obj *stm, pdf_obj *page_res, pdf_filter_options *options, pdf_cycle_list *cycle_up) |
295 | 0 | { |
296 | 0 | pdf_cycle_list cycle; |
297 | 0 | int struct_parents; |
298 | 0 | pdf_obj *new_res = NULL; |
299 | 0 | fz_buffer *new_buf = NULL; |
300 | 0 | pdf_obj *old_res; |
301 | |
|
302 | 0 | fz_var(new_buf); |
303 | 0 | fz_var(new_res); |
304 | | |
305 | | // TODO for RJW: XObject can also be a StructParent; how do we handle that case? |
306 | |
|
307 | 0 | struct_parents = pdf_dict_get_int_default(ctx, stm, PDF_NAME(StructParents), -1); |
308 | |
|
309 | 0 | old_res = pdf_dict_get(ctx, stm, PDF_NAME(Resources)); |
310 | 0 | if (!old_res) |
311 | 0 | old_res = page_res; |
312 | | |
313 | | // TODO: don't clean objects more than once. |
314 | | |
315 | | /* Avoid recursive cycles! */ |
316 | 0 | if (pdf_cycle(ctx, &cycle, cycle_up, stm)) |
317 | 0 | return; |
318 | 0 | fz_try(ctx) |
319 | 0 | { |
320 | 0 | pdf_filter_content_stream(ctx, doc, stm, old_res, fz_identity, options, struct_parents, &new_buf, &new_res, &cycle); |
321 | 0 | if (!options->no_update) |
322 | 0 | { |
323 | 0 | pdf_update_stream(ctx, doc, stm, new_buf, 0); |
324 | 0 | pdf_dict_put(ctx, stm, PDF_NAME(Resources), new_res); |
325 | 0 | } |
326 | 0 | } |
327 | 0 | fz_always(ctx) |
328 | 0 | { |
329 | 0 | fz_drop_buffer(ctx, new_buf); |
330 | 0 | pdf_drop_obj(ctx, new_res); |
331 | 0 | } |
332 | 0 | fz_catch(ctx) |
333 | 0 | fz_rethrow(ctx); |
334 | 0 | } |
335 | | |
336 | | pdf_obj * |
337 | | pdf_filter_xobject_instance(fz_context *ctx, pdf_obj *old_xobj, pdf_obj *page_res, fz_matrix transform, pdf_filter_options *options, pdf_cycle_list *cycle_up) |
338 | 0 | { |
339 | 0 | pdf_cycle_list cycle; |
340 | 0 | pdf_document *doc = pdf_get_bound_document(ctx, old_xobj); |
341 | 0 | pdf_obj *new_xobj; |
342 | 0 | pdf_obj *new_res, *old_res; |
343 | 0 | fz_buffer *new_buf; |
344 | 0 | int struct_parents; |
345 | 0 | fz_matrix matrix; |
346 | |
|
347 | 0 | fz_var(new_xobj); |
348 | 0 | fz_var(new_buf); |
349 | 0 | fz_var(new_res); |
350 | | |
351 | | // TODO for RJW: XObject can also be a StructParent; how do we handle that case? |
352 | | // TODO for RJW: will we run into trouble by duplicating StructParents stuff? |
353 | |
|
354 | 0 | struct_parents = pdf_dict_get_int_default(ctx, old_xobj, PDF_NAME(StructParents), -1); |
355 | |
|
356 | 0 | old_res = pdf_dict_get(ctx, old_xobj, PDF_NAME(Resources)); |
357 | 0 | if (!old_res) |
358 | 0 | old_res = page_res; |
359 | |
|
360 | 0 | if (pdf_cycle(ctx, &cycle, cycle_up, old_xobj)) |
361 | 0 | return pdf_keep_obj(ctx, old_xobj); |
362 | | |
363 | 0 | matrix = pdf_dict_get_matrix(ctx, old_xobj, PDF_NAME(Matrix)); |
364 | 0 | transform = fz_concat(matrix, transform); |
365 | |
|
366 | 0 | fz_try(ctx) |
367 | 0 | { |
368 | 0 | new_xobj = pdf_add_object_drop(ctx, doc, pdf_copy_dict(ctx, old_xobj)); |
369 | 0 | pdf_filter_content_stream(ctx, doc, old_xobj, old_res, transform, options, struct_parents, &new_buf, &new_res, &cycle); |
370 | 0 | if (!options->no_update) |
371 | 0 | { |
372 | 0 | pdf_update_stream(ctx, doc, new_xobj, new_buf, 0); |
373 | 0 | pdf_dict_put(ctx, new_xobj, PDF_NAME(Resources), new_res); |
374 | 0 | } |
375 | 0 | } |
376 | 0 | fz_always(ctx) |
377 | 0 | { |
378 | 0 | fz_drop_buffer(ctx, new_buf); |
379 | 0 | pdf_drop_obj(ctx, new_res); |
380 | 0 | } |
381 | 0 | fz_catch(ctx) |
382 | 0 | { |
383 | 0 | pdf_drop_obj(ctx, new_xobj); |
384 | 0 | fz_rethrow(ctx); |
385 | 0 | } |
386 | | |
387 | 0 | return new_xobj; |
388 | 0 | } |
389 | | |
390 | | void pdf_filter_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page, pdf_filter_options *options) |
391 | 0 | { |
392 | 0 | pdf_obj *contents, *old_res; |
393 | 0 | pdf_obj *new_res; |
394 | 0 | fz_buffer *buffer; |
395 | 0 | int struct_parents; |
396 | |
|
397 | 0 | struct_parents = pdf_dict_get_int_default(ctx, page->obj, PDF_NAME(StructParents), -1); |
398 | |
|
399 | 0 | contents = pdf_page_contents(ctx, page); |
400 | 0 | old_res = pdf_page_resources(ctx, page); |
401 | |
|
402 | 0 | pdf_filter_content_stream(ctx, doc, contents, old_res, fz_identity, options, struct_parents, &buffer, &new_res, NULL); |
403 | |
|
404 | 0 | fz_try(ctx) |
405 | 0 | { |
406 | 0 | if (options->complete) |
407 | 0 | options->complete(ctx, buffer, options->opaque); |
408 | 0 | if (!options->no_update) |
409 | 0 | { |
410 | | /* Always create a new stream object to replace the page contents. This is useful |
411 | | both if the contents is an array of streams, is entirely missing or if the contents |
412 | | are shared between pages. */ |
413 | 0 | contents = pdf_add_object_drop(ctx, doc, pdf_new_dict(ctx, doc, 1)); |
414 | 0 | pdf_dict_put_drop(ctx, page->obj, PDF_NAME(Contents), contents); |
415 | 0 | pdf_update_stream(ctx, doc, contents, buffer, 0); |
416 | 0 | pdf_dict_put(ctx, page->obj, PDF_NAME(Resources), new_res); |
417 | 0 | } |
418 | 0 | } |
419 | 0 | fz_always(ctx) |
420 | 0 | { |
421 | 0 | fz_drop_buffer(ctx, buffer); |
422 | 0 | pdf_drop_obj(ctx, new_res); |
423 | 0 | } |
424 | 0 | fz_catch(ctx) |
425 | 0 | fz_rethrow(ctx); |
426 | 0 | } |
427 | | |
428 | | void pdf_filter_annot_contents(fz_context *ctx, pdf_document *doc, pdf_annot *annot, pdf_filter_options *options) |
429 | 0 | { |
430 | 0 | pdf_obj *ap = pdf_dict_get(ctx, annot->obj, PDF_NAME(AP)); |
431 | 0 | if (pdf_is_dict(ctx, ap)) |
432 | 0 | { |
433 | 0 | int i, n = pdf_dict_len(ctx, ap); |
434 | 0 | for (i = 0; i < n; i++) |
435 | 0 | { |
436 | 0 | pdf_obj *stm = pdf_dict_get_val(ctx, ap, i); |
437 | 0 | if (pdf_is_stream(ctx, stm)) |
438 | 0 | { |
439 | 0 | pdf_filter_xobject(ctx, doc, stm, NULL, options, NULL); |
440 | 0 | } |
441 | 0 | } |
442 | 0 | } |
443 | 0 | } |
444 | | |
445 | | /* REDACTIONS */ |
446 | | |
447 | | struct redact_filter_state { |
448 | | pdf_filter_options filter_opts; |
449 | | pdf_sanitize_filter_options sanitize_opts; |
450 | | pdf_filter_factory filter_list[2]; |
451 | | pdf_page *page; |
452 | | pdf_annot *target; // NULL if all |
453 | | int line_art; |
454 | | int text; |
455 | | }; |
456 | | |
457 | | |
458 | | static void pdf_run_obj_to_buf(fz_context *ctx, fz_buffer *buffer, pdf_obj *obj, pdf_page *page) |
459 | 0 | { |
460 | 0 | pdf_processor *proc = pdf_new_buffer_processor(ctx, buffer, 0, 0); |
461 | 0 | pdf_obj *res; |
462 | | |
463 | |
|
464 | 0 | fz_try(ctx) |
465 | 0 | { |
466 | 0 | res = pdf_xobject_resources(ctx, obj); |
467 | 0 | if (res == NULL) |
468 | 0 | res = pdf_page_resources(ctx, page); |
469 | |
|
470 | 0 | pdf_process_contents(ctx, proc, page->doc, res, obj, NULL, NULL); |
471 | 0 | pdf_close_processor(ctx, proc); |
472 | 0 | } |
473 | 0 | fz_always(ctx) |
474 | 0 | pdf_drop_processor(ctx, proc); |
475 | 0 | fz_catch(ctx) |
476 | 0 | fz_rethrow(ctx); |
477 | 0 | } |
478 | | |
479 | | static void |
480 | | pdf_redact_end_page(fz_context *ctx, fz_buffer *buf, void *opaque) |
481 | 0 | { |
482 | 0 | struct redact_filter_state *red = opaque; |
483 | 0 | pdf_page *page = red->page; |
484 | 0 | pdf_annot *annot; |
485 | 0 | pdf_obj *qp; |
486 | 0 | int i, n; |
487 | |
|
488 | 0 | fz_append_string(ctx, buf, " 0 g\n"); |
489 | |
|
490 | 0 | for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot)) |
491 | 0 | { |
492 | 0 | if (red->target != NULL && red->target != annot) |
493 | 0 | continue; |
494 | 0 | if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact)) |
495 | 0 | { |
496 | 0 | pdf_obj *ro = pdf_dict_get(ctx, annot->obj, PDF_NAME(RO)); |
497 | 0 | if (ro) |
498 | 0 | { |
499 | 0 | pdf_run_obj_to_buf(ctx, buf, ro, page); |
500 | 0 | } |
501 | 0 | else |
502 | 0 | { |
503 | 0 | qp = pdf_dict_get(ctx, annot->obj, PDF_NAME(QuadPoints)); |
504 | 0 | n = pdf_array_len(ctx, qp); |
505 | 0 | if (n > 0) |
506 | 0 | { |
507 | 0 | for (i = 0; i < n; i += 8) |
508 | 0 | { |
509 | 0 | fz_quad q = pdf_to_quad(ctx, qp, i); |
510 | 0 | fz_append_printf(ctx, buf, "%g %g m\n", q.ll.x, q.ll.y); |
511 | 0 | fz_append_printf(ctx, buf, "%g %g l\n", q.lr.x, q.lr.y); |
512 | 0 | fz_append_printf(ctx, buf, "%g %g l\n", q.ur.x, q.ur.y); |
513 | 0 | fz_append_printf(ctx, buf, "%g %g l\n", q.ul.x, q.ul.y); |
514 | 0 | fz_append_string(ctx, buf, "f\n"); |
515 | 0 | } |
516 | 0 | } |
517 | 0 | else |
518 | 0 | { |
519 | 0 | fz_rect r = pdf_dict_get_rect(ctx, annot->obj, PDF_NAME(Rect)); |
520 | 0 | fz_append_printf(ctx, buf, "%g %g m\n", r.x0, r.y0); |
521 | 0 | fz_append_printf(ctx, buf, "%g %g l\n", r.x1, r.y0); |
522 | 0 | fz_append_printf(ctx, buf, "%g %g l\n", r.x1, r.y1); |
523 | 0 | fz_append_printf(ctx, buf, "%g %g l\n", r.x0, r.y1); |
524 | 0 | fz_append_string(ctx, buf, "f\n"); |
525 | 0 | } |
526 | 0 | } |
527 | 0 | } |
528 | 0 | } |
529 | 0 | } |
530 | | |
531 | | static int |
532 | | pdf_redact_text_filter(fz_context *ctx, void *opaque, int *ucsbuf, int ucslen, fz_matrix trm, fz_matrix ctm, fz_rect bbox) |
533 | 0 | { |
534 | 0 | struct redact_filter_state *red = opaque; |
535 | 0 | pdf_page *page = red->page; |
536 | 0 | pdf_annot *annot; |
537 | 0 | pdf_obj *qp; |
538 | 0 | fz_rect r; |
539 | 0 | fz_quad q; |
540 | 0 | int i, n; |
541 | 0 | float w, h; |
542 | |
|
543 | 0 | trm = fz_concat(trm, ctm); |
544 | 0 | bbox = fz_transform_rect(bbox, trm); |
545 | | |
546 | | /* Shrink character bbox a bit */ |
547 | 0 | w = bbox.x1 - bbox.x0; |
548 | 0 | h = bbox.y1 - bbox.y0; |
549 | 0 | bbox.x0 += w / 10; |
550 | 0 | bbox.x1 -= w / 10; |
551 | 0 | bbox.y0 += h / 10; |
552 | 0 | bbox.y1 -= h / 10; |
553 | |
|
554 | 0 | for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot)) |
555 | 0 | { |
556 | 0 | if (red->target != NULL && red->target != annot) |
557 | 0 | continue; |
558 | 0 | if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact)) |
559 | 0 | { |
560 | 0 | qp = pdf_dict_get(ctx, annot->obj, PDF_NAME(QuadPoints)); |
561 | 0 | n = pdf_array_len(ctx, qp); |
562 | | /* Note, we test for the intersection being a valid rectangle, NOT |
563 | | * a non-empty one. This is because we can have 'empty' character |
564 | | * boxes (say for diacritics), that while 0 width, do have a defined |
565 | | * position on the plane, and hence inclusion makes sense. */ |
566 | 0 | if (n > 0) |
567 | 0 | { |
568 | 0 | for (i = 0; i < n; i += 8) |
569 | 0 | { |
570 | 0 | q = pdf_to_quad(ctx, qp, i); |
571 | 0 | r = fz_rect_from_quad(q); |
572 | 0 | if (fz_is_valid_rect(fz_intersect_rect(bbox, r))) |
573 | 0 | return 1; |
574 | 0 | } |
575 | 0 | } |
576 | 0 | else |
577 | 0 | { |
578 | 0 | r = pdf_dict_get_rect(ctx, annot->obj, PDF_NAME(Rect)); |
579 | 0 | if (fz_is_valid_rect(fz_intersect_rect(bbox, r))) |
580 | 0 | return 1; |
581 | 0 | } |
582 | 0 | } |
583 | 0 | } |
584 | | |
585 | 0 | return 0; |
586 | 0 | } |
587 | | |
588 | | static fz_pixmap * |
589 | | pdf_redact_image_imp(fz_context *ctx, fz_matrix ctm, fz_image *image, fz_pixmap *pixmap, fz_pixmap **pmask, fz_quad q) |
590 | 0 | { |
591 | 0 | fz_matrix inv_ctm; |
592 | 0 | fz_irect r; |
593 | 0 | int x, y, k, n, bpp; |
594 | 0 | unsigned char white; |
595 | 0 | fz_pixmap *mask = *pmask; |
596 | 0 | int pixmap_cloned = 0; |
597 | |
|
598 | 0 | if (!pixmap) |
599 | 0 | { |
600 | 0 | fz_pixmap *original = fz_get_pixmap_from_image(ctx, image, NULL, NULL, NULL, NULL); |
601 | 0 | int imagemask = image->imagemask; |
602 | |
|
603 | 0 | fz_try(ctx) |
604 | 0 | { |
605 | 0 | pixmap = fz_clone_pixmap(ctx, original); |
606 | 0 | if (imagemask) |
607 | 0 | fz_invert_pixmap_alpha(ctx, pixmap); |
608 | 0 | } |
609 | 0 | fz_always(ctx) |
610 | 0 | fz_drop_pixmap(ctx, original); |
611 | 0 | fz_catch(ctx) |
612 | 0 | fz_rethrow(ctx); |
613 | 0 | pixmap_cloned = 1; |
614 | 0 | } |
615 | | |
616 | 0 | if (!mask && image->mask) |
617 | 0 | { |
618 | 0 | fz_pixmap *original = fz_get_pixmap_from_image(ctx, image->mask, NULL, NULL, NULL, NULL); |
619 | |
|
620 | 0 | fz_try(ctx) |
621 | 0 | { |
622 | 0 | mask = fz_clone_pixmap(ctx, original); |
623 | 0 | *pmask = mask; |
624 | 0 | } |
625 | 0 | fz_always(ctx) |
626 | 0 | { |
627 | 0 | fz_drop_pixmap(ctx, original); |
628 | 0 | } |
629 | 0 | fz_catch(ctx) |
630 | 0 | { |
631 | 0 | if (pixmap_cloned) |
632 | 0 | fz_drop_pixmap(ctx, pixmap); |
633 | 0 | fz_rethrow(ctx); |
634 | 0 | } |
635 | 0 | } |
636 | | |
637 | | /* If we have a 1x1 image, to which a mask is being applied |
638 | | * then it's the mask we really want to change, not the |
639 | | * image. We might have just a small section of the image |
640 | | * being covered, and setting the whole thing to white |
641 | | * will blank stuff outside the desired area. */ |
642 | 0 | if (!mask || pixmap->w > 1 || pixmap->h > 1) |
643 | 0 | { |
644 | 0 | n = pixmap->n - pixmap->alpha; |
645 | 0 | bpp = pixmap->n; |
646 | 0 | if (fz_colorspace_is_subtractive(ctx, pixmap->colorspace)) |
647 | 0 | white = 0; |
648 | 0 | else |
649 | 0 | white = 255; |
650 | |
|
651 | 0 | inv_ctm = fz_post_scale(fz_invert_matrix(ctm), pixmap->w, pixmap->h); |
652 | 0 | r = fz_round_rect(fz_transform_rect(fz_rect_from_quad(q), inv_ctm)); |
653 | 0 | r.x0 = fz_clampi(r.x0, 0, pixmap->w); |
654 | 0 | r.x1 = fz_clampi(r.x1, 0, pixmap->w); |
655 | 0 | r.y1 = fz_clampi(pixmap->h - r.y1, 0, pixmap->h); |
656 | 0 | r.y0 = fz_clampi(pixmap->h - r.y0, 0, pixmap->h); |
657 | 0 | for (y = r.y1; y < r.y0; ++y) |
658 | 0 | { |
659 | 0 | for (x = r.x0; x < r.x1; ++x) |
660 | 0 | { |
661 | 0 | unsigned char *s = &pixmap->samples[(size_t)y * pixmap->stride + (size_t)x * bpp]; |
662 | 0 | for (k = 0; k < n; ++k) |
663 | 0 | s[k] = white; |
664 | 0 | if (pixmap->alpha) |
665 | 0 | s[k] = 255; |
666 | 0 | } |
667 | 0 | } |
668 | 0 | } |
669 | |
|
670 | 0 | if (mask) |
671 | 0 | { |
672 | 0 | inv_ctm = fz_post_scale(fz_invert_matrix(ctm), mask->w, mask->h); |
673 | 0 | r = fz_round_rect(fz_transform_rect(fz_rect_from_quad(q), inv_ctm)); |
674 | 0 | r.x0 = fz_clampi(r.x0, 0, mask->w); |
675 | 0 | r.x1 = fz_clampi(r.x1, 0, mask->w); |
676 | 0 | r.y1 = fz_clampi(mask->h - r.y1, 0, mask->h); |
677 | 0 | r.y0 = fz_clampi(mask->h - r.y0, 0, mask->h); |
678 | 0 | for (y = r.y1; y < r.y0; ++y) |
679 | 0 | { |
680 | 0 | unsigned char *s = &mask->samples[(size_t)y * mask->stride + (size_t)r.x0]; |
681 | 0 | memset(s, 0xff, r.x1-r.x0); |
682 | 0 | } |
683 | 0 | } |
684 | |
|
685 | 0 | return pixmap; |
686 | 0 | } |
687 | | |
688 | | static fz_image * |
689 | | pdf_redact_image_filter_remove(fz_context *ctx, void *opaque, fz_matrix ctm, const char *name, fz_image *image, fz_rect clip) |
690 | 0 | { |
691 | 0 | fz_pixmap *redacted = NULL; |
692 | 0 | struct redact_filter_state *red = opaque; |
693 | 0 | pdf_page *page = red->page; |
694 | 0 | pdf_annot *annot; |
695 | 0 | pdf_obj *qp; |
696 | 0 | fz_rect area; |
697 | 0 | fz_rect r; |
698 | 0 | int i, n; |
699 | |
|
700 | 0 | fz_var(redacted); |
701 | |
|
702 | 0 | area = fz_transform_rect(fz_unit_rect, ctm); |
703 | |
|
704 | 0 | for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot)) |
705 | 0 | { |
706 | 0 | if (red->target != NULL && red->target != annot) |
707 | 0 | continue; |
708 | 0 | if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact)) |
709 | 0 | { |
710 | 0 | qp = pdf_dict_get(ctx, annot->obj, PDF_NAME(QuadPoints)); |
711 | 0 | n = pdf_array_len(ctx, qp); |
712 | 0 | if (n > 0) |
713 | 0 | { |
714 | 0 | for (i = 0; i < n; i += 8) |
715 | 0 | { |
716 | 0 | r = fz_rect_from_quad(pdf_to_quad(ctx, qp, i)); |
717 | 0 | r = fz_intersect_rect(r, area); |
718 | 0 | if (!fz_is_empty_rect(r)) |
719 | 0 | return NULL; |
720 | 0 | } |
721 | 0 | } |
722 | 0 | else |
723 | 0 | { |
724 | 0 | r = pdf_dict_get_rect(ctx, annot->obj, PDF_NAME(Rect)); |
725 | 0 | r = fz_intersect_rect(r, area); |
726 | 0 | if (!fz_is_empty_rect(r)) |
727 | 0 | return NULL; |
728 | 0 | } |
729 | 0 | } |
730 | 0 | } |
731 | | |
732 | 0 | return fz_keep_image(ctx, image); |
733 | 0 | } |
734 | | |
735 | | static fz_image * |
736 | | pdf_redact_image_filter_remove_invisible(fz_context *ctx, void *opaque, fz_matrix ctm, const char *name, fz_image *image, fz_rect clip) |
737 | 0 | { |
738 | 0 | fz_pixmap *redacted = NULL; |
739 | 0 | struct redact_filter_state *red = opaque; |
740 | 0 | pdf_page *page = red->page; |
741 | 0 | pdf_annot *annot; |
742 | 0 | pdf_obj *qp; |
743 | 0 | fz_rect area; |
744 | 0 | fz_rect r; |
745 | 0 | int i, n; |
746 | |
|
747 | 0 | fz_var(redacted); |
748 | |
|
749 | 0 | area = fz_transform_rect(fz_unit_rect, ctm); |
750 | | |
751 | | /* Restrict the are of the image to that which can actually be seen. */ |
752 | 0 | area = fz_intersect_rect(area, clip); |
753 | |
|
754 | 0 | for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot)) |
755 | 0 | { |
756 | 0 | if (red->target != NULL && red->target != annot) |
757 | 0 | continue; |
758 | 0 | if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact)) |
759 | 0 | { |
760 | 0 | qp = pdf_dict_get(ctx, annot->obj, PDF_NAME(QuadPoints)); |
761 | 0 | n = pdf_array_len(ctx, qp); |
762 | 0 | if (n > 0) |
763 | 0 | { |
764 | 0 | for (i = 0; i < n; i += 8) |
765 | 0 | { |
766 | 0 | r = fz_rect_from_quad(pdf_to_quad(ctx, qp, i)); |
767 | 0 | r = fz_intersect_rect(r, area); |
768 | 0 | if (!fz_is_empty_rect(r)) |
769 | 0 | return NULL; |
770 | 0 | } |
771 | 0 | } |
772 | 0 | else |
773 | 0 | { |
774 | 0 | r = pdf_dict_get_rect(ctx, annot->obj, PDF_NAME(Rect)); |
775 | 0 | r = fz_intersect_rect(r, area); |
776 | 0 | if (!fz_is_empty_rect(r)) |
777 | 0 | return NULL; |
778 | 0 | } |
779 | 0 | } |
780 | 0 | } |
781 | | |
782 | 0 | return fz_keep_image(ctx, image); |
783 | 0 | } |
784 | | |
785 | | static fz_image * |
786 | | pdf_redact_image_filter_pixels(fz_context *ctx, void *opaque, fz_matrix ctm, const char *name, fz_image *image, fz_rect clip) |
787 | 0 | { |
788 | 0 | fz_pixmap *redacted = NULL; |
789 | 0 | fz_pixmap *mask = NULL; |
790 | 0 | struct redact_filter_state *red = opaque; |
791 | 0 | pdf_page *page = red->page; |
792 | 0 | pdf_annot *annot; |
793 | 0 | pdf_obj *qp; |
794 | 0 | fz_quad area, q; |
795 | 0 | fz_rect r; |
796 | 0 | int i, n; |
797 | |
|
798 | 0 | fz_var(redacted); |
799 | 0 | fz_var(mask); |
800 | |
|
801 | 0 | area = fz_transform_quad(fz_quad_from_rect(fz_unit_rect), ctm); |
802 | | |
803 | | /* First see if we can redact the image completely */ |
804 | 0 | for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot)) |
805 | 0 | { |
806 | 0 | if (red->target != NULL && red->target != annot) |
807 | 0 | continue; |
808 | 0 | if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact)) |
809 | 0 | { |
810 | 0 | qp = pdf_dict_get(ctx, annot->obj, PDF_NAME(QuadPoints)); |
811 | 0 | n = pdf_array_len(ctx, qp); |
812 | 0 | if (n > 0) |
813 | 0 | { |
814 | 0 | for (i = 0; i < n; i += 8) |
815 | 0 | { |
816 | 0 | q = pdf_to_quad(ctx, qp, i); |
817 | 0 | if (fz_is_quad_inside_quad(area, q)) |
818 | 0 | return NULL; |
819 | 0 | } |
820 | 0 | } |
821 | 0 | else |
822 | 0 | { |
823 | 0 | r = pdf_dict_get_rect(ctx, annot->obj, PDF_NAME(Rect)); |
824 | 0 | q = fz_quad_from_rect(r); |
825 | 0 | if (fz_is_quad_inside_quad(area, q)) |
826 | 0 | return NULL; |
827 | 0 | } |
828 | 0 | } |
829 | 0 | } |
830 | | |
831 | | /* Blank out redacted parts of the image if necessary */ |
832 | 0 | fz_try(ctx) |
833 | 0 | { |
834 | 0 | for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot)) |
835 | 0 | { |
836 | 0 | if (red->target != NULL && red->target != annot) |
837 | 0 | continue; |
838 | 0 | if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact)) |
839 | 0 | { |
840 | 0 | qp = pdf_dict_get(ctx, annot->obj, PDF_NAME(QuadPoints)); |
841 | 0 | n = pdf_array_len(ctx, qp); |
842 | 0 | if (n > 0) |
843 | 0 | { |
844 | 0 | for (i = 0; i < n; i += 8) |
845 | 0 | { |
846 | 0 | q = pdf_to_quad(ctx, qp, i); |
847 | 0 | if (fz_is_quad_intersecting_quad(area, q)) |
848 | 0 | redacted = pdf_redact_image_imp(ctx, ctm, image, redacted, &mask, q); |
849 | 0 | } |
850 | 0 | } |
851 | 0 | else |
852 | 0 | { |
853 | 0 | r = pdf_dict_get_rect(ctx, annot->obj, PDF_NAME(Rect)); |
854 | 0 | q = fz_quad_from_rect(r); |
855 | 0 | if (fz_is_quad_intersecting_quad(area, q)) |
856 | 0 | redacted = pdf_redact_image_imp(ctx, ctm, image, redacted, &mask, q); |
857 | 0 | } |
858 | 0 | } |
859 | 0 | } |
860 | 0 | } |
861 | 0 | fz_catch(ctx) |
862 | 0 | { |
863 | 0 | fz_drop_pixmap(ctx, redacted); |
864 | 0 | fz_drop_pixmap(ctx, mask); |
865 | 0 | fz_rethrow(ctx); |
866 | 0 | } |
867 | | |
868 | 0 | if (redacted) |
869 | 0 | { |
870 | 0 | int imagemask = image->imagemask; |
871 | 0 | fz_image *imask = fz_keep_image(ctx, image->mask); |
872 | |
|
873 | 0 | fz_var(imask); |
874 | |
|
875 | 0 | fz_try(ctx) |
876 | 0 | { |
877 | 0 | if (mask) |
878 | 0 | { |
879 | 0 | fz_drop_image(ctx, imask); |
880 | 0 | imask = NULL; |
881 | 0 | imask = fz_new_image_from_pixmap(ctx, mask, NULL); |
882 | 0 | } |
883 | 0 | image = fz_new_image_from_pixmap(ctx, redacted, NULL); |
884 | 0 | image->imagemask = imagemask; |
885 | 0 | image->mask = imask; |
886 | 0 | imask = NULL; |
887 | 0 | } |
888 | 0 | fz_always(ctx) |
889 | 0 | { |
890 | 0 | fz_drop_pixmap(ctx, redacted); |
891 | 0 | fz_drop_pixmap(ctx, mask); |
892 | 0 | fz_drop_image(ctx, imask); |
893 | 0 | } |
894 | 0 | fz_catch(ctx) |
895 | 0 | fz_rethrow(ctx); |
896 | 0 | return image; |
897 | 0 | } |
898 | | |
899 | 0 | return fz_keep_image(ctx, image); |
900 | 0 | } |
901 | | |
902 | | /* Returns 0 if area does not intersect with any of our redactions. |
903 | | * Returns 2 if area is completely included within one of our redactions. |
904 | | * Returns 1 otherwise. */ |
905 | | static int |
906 | | rect_touches_redactions(fz_context *ctx, fz_rect area, struct redact_filter_state *red) |
907 | 0 | { |
908 | 0 | pdf_annot *annot; |
909 | 0 | pdf_obj *qp; |
910 | 0 | fz_quad q; |
911 | 0 | fz_rect r, s; |
912 | 0 | int i, n; |
913 | 0 | pdf_page *page = red->page; |
914 | |
|
915 | 0 | for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot)) |
916 | 0 | { |
917 | 0 | if (red->target != NULL && red->target != annot) |
918 | 0 | continue; |
919 | 0 | if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact)) |
920 | 0 | { |
921 | 0 | qp = pdf_dict_get(ctx, annot->obj, PDF_NAME(QuadPoints)); |
922 | 0 | n = pdf_array_len(ctx, qp); |
923 | 0 | if (n > 0) |
924 | 0 | { |
925 | 0 | for (i = 0; i < n; i += 8) |
926 | 0 | { |
927 | 0 | q = pdf_to_quad(ctx, qp, i); |
928 | 0 | r = fz_rect_from_quad(q); |
929 | 0 | s = fz_intersect_rect(r, area); |
930 | 0 | if (!fz_is_empty_rect(s)) |
931 | 0 | { |
932 | 0 | if (fz_contains_rect(r, area)) |
933 | 0 | return 2; |
934 | 0 | return 1; |
935 | 0 | } |
936 | 0 | } |
937 | 0 | } |
938 | 0 | else |
939 | 0 | { |
940 | 0 | r = pdf_dict_get_rect(ctx, annot->obj, PDF_NAME(Rect)); |
941 | 0 | s = fz_intersect_rect(r, area); |
942 | 0 | if (!fz_is_empty_rect(s)) |
943 | 0 | { |
944 | 0 | if (fz_contains_rect(r, area)) |
945 | 0 | return 2; |
946 | 0 | return 1; |
947 | 0 | } |
948 | 0 | } |
949 | 0 | } |
950 | 0 | } |
951 | 0 | return 0; |
952 | 0 | } |
953 | | |
954 | | static void |
955 | | remove_page_link(fz_context *ctx, pdf_page *page, pdf_obj *obj) |
956 | 0 | { |
957 | 0 | pdf_link **linkp = (pdf_link **)&page->links; |
958 | 0 | pdf_link *link; |
959 | |
|
960 | 0 | while ((link = *linkp) != NULL) |
961 | 0 | { |
962 | 0 | if (link->obj == obj) |
963 | 0 | { |
964 | 0 | *linkp = (pdf_link *)link->super.next; |
965 | 0 | link->super.next = NULL; |
966 | 0 | fz_drop_link(ctx, &link->super); |
967 | 0 | break; |
968 | 0 | } |
969 | 0 | else |
970 | 0 | { |
971 | 0 | linkp = (pdf_link **)&link->super.next; |
972 | 0 | } |
973 | 0 | } |
974 | 0 | } |
975 | | |
976 | | static void |
977 | | pdf_redact_page_links(fz_context *ctx, struct redact_filter_state *red) |
978 | 0 | { |
979 | 0 | pdf_obj *annots; |
980 | 0 | pdf_obj *link; |
981 | 0 | fz_rect area; |
982 | 0 | int k; |
983 | |
|
984 | 0 | annots = pdf_dict_get(ctx, red->page->obj, PDF_NAME(Annots)); |
985 | 0 | k = 0; |
986 | 0 | while (k < pdf_array_len(ctx, annots)) |
987 | 0 | { |
988 | 0 | link = pdf_array_get(ctx, annots, k); |
989 | 0 | if (pdf_dict_get(ctx, link, PDF_NAME(Subtype)) == PDF_NAME(Link)) |
990 | 0 | { |
991 | 0 | area = pdf_dict_get_rect(ctx, link, PDF_NAME(Rect)); |
992 | 0 | if (rect_touches_redactions(ctx, area, red)) |
993 | 0 | { |
994 | 0 | pdf_array_delete(ctx, annots, k); |
995 | 0 | remove_page_link(ctx, red->page, link); |
996 | 0 | continue; |
997 | 0 | } |
998 | 0 | } |
999 | 0 | ++k; |
1000 | 0 | } |
1001 | 0 | } |
1002 | | |
1003 | | static void |
1004 | | pdf_redact_page_annotations(fz_context *ctx, struct redact_filter_state *red) |
1005 | 0 | { |
1006 | 0 | pdf_annot *annot; |
1007 | 0 | fz_rect area; |
1008 | |
|
1009 | 0 | restart: |
1010 | 0 | for (annot = pdf_first_annot(ctx, red->page); annot; annot = pdf_next_annot(ctx, annot)) |
1011 | 0 | { |
1012 | 0 | if (pdf_annot_type(ctx, annot) == PDF_ANNOT_FREE_TEXT) |
1013 | 0 | { |
1014 | 0 | area = pdf_dict_get_rect(ctx, pdf_annot_obj(ctx, annot), PDF_NAME(Rect)); |
1015 | 0 | if (rect_touches_redactions(ctx, area, red)) |
1016 | 0 | { |
1017 | 0 | pdf_delete_annot(ctx, red->page, annot); |
1018 | 0 | goto restart; |
1019 | 0 | } |
1020 | 0 | } |
1021 | 0 | } |
1022 | 0 | } |
1023 | | |
1024 | | static int culler(fz_context *ctx, void *opaque, fz_rect bbox, fz_cull_type type) |
1025 | 0 | { |
1026 | 0 | struct redact_filter_state *red = opaque; |
1027 | |
|
1028 | 0 | switch (type) |
1029 | 0 | { |
1030 | 0 | case FZ_CULL_PATH_FILL: |
1031 | 0 | case FZ_CULL_PATH_STROKE: |
1032 | 0 | case FZ_CULL_PATH_FILL_STROKE: |
1033 | 0 | case FZ_CULL_CLIP_PATH_FILL: |
1034 | 0 | case FZ_CULL_CLIP_PATH_STROKE: |
1035 | 0 | case FZ_CULL_CLIP_PATH_FILL_STROKE: |
1036 | 0 | if (red->line_art == PDF_REDACT_LINE_ART_REMOVE_IF_COVERED) |
1037 | 0 | return (rect_touches_redactions(ctx, bbox, red) == 2); |
1038 | 0 | else if (red->line_art == PDF_REDACT_LINE_ART_REMOVE_IF_TOUCHED) |
1039 | 0 | return (rect_touches_redactions(ctx, bbox, red) != 0); |
1040 | 0 | return 0; |
1041 | 0 | default: |
1042 | 0 | return 0; |
1043 | 0 | } |
1044 | 0 | } |
1045 | | |
1046 | | static |
1047 | | void init_redact_filter(fz_context *ctx, pdf_redact_options *redact_opts, struct redact_filter_state *red, pdf_page *page, pdf_annot *target) |
1048 | 0 | { |
1049 | 0 | int black_boxes = redact_opts ? redact_opts->black_boxes : 0; |
1050 | 0 | int image_method = redact_opts ? redact_opts->image_method : PDF_REDACT_IMAGE_PIXELS; |
1051 | 0 | int line_art = redact_opts ? redact_opts->line_art : PDF_REDACT_LINE_ART_NONE; |
1052 | 0 | int text = redact_opts ? redact_opts->text : PDF_REDACT_TEXT_REMOVE; |
1053 | |
|
1054 | 0 | memset(&red->filter_opts, 0, sizeof red->filter_opts); |
1055 | 0 | memset(&red->sanitize_opts, 0, sizeof red->sanitize_opts); |
1056 | |
|
1057 | 0 | red->filter_opts.recurse = 0; /* don't redact patterns, softmasks, and type3 fonts */ |
1058 | 0 | red->filter_opts.instance_forms = 1; /* redact xobjects with instancing */ |
1059 | 0 | red->filter_opts.ascii = 1; |
1060 | 0 | red->filter_opts.opaque = red; |
1061 | 0 | red->filter_opts.filters = red->filter_list; |
1062 | 0 | if (black_boxes) |
1063 | 0 | red->filter_opts.complete = pdf_redact_end_page; |
1064 | 0 | red->line_art = line_art; |
1065 | 0 | red->text = text; |
1066 | |
|
1067 | 0 | red->sanitize_opts.opaque = red; |
1068 | 0 | if (text == PDF_REDACT_TEXT_REMOVE) |
1069 | 0 | red->sanitize_opts.text_filter = pdf_redact_text_filter; |
1070 | 0 | if (image_method == PDF_REDACT_IMAGE_PIXELS) |
1071 | 0 | red->sanitize_opts.image_filter = pdf_redact_image_filter_pixels; |
1072 | 0 | if (image_method == PDF_REDACT_IMAGE_REMOVE) |
1073 | 0 | red->sanitize_opts.image_filter = pdf_redact_image_filter_remove; |
1074 | 0 | if (image_method == PDF_REDACT_IMAGE_REMOVE_UNLESS_INVISIBLE) |
1075 | 0 | red->sanitize_opts.image_filter = pdf_redact_image_filter_remove_invisible; |
1076 | 0 | red->sanitize_opts.culler = culler; |
1077 | |
|
1078 | 0 | red->filter_list[0].filter = pdf_new_sanitize_filter; |
1079 | 0 | red->filter_list[0].options = &red->sanitize_opts; |
1080 | 0 | red->filter_list[1].filter = NULL; |
1081 | 0 | red->filter_list[1].options = NULL; |
1082 | |
|
1083 | 0 | red->page = page; |
1084 | 0 | red->target = target; |
1085 | 0 | } |
1086 | | |
1087 | | static int |
1088 | | pdf_apply_redaction_imp(fz_context *ctx, pdf_page *page, pdf_annot *target, pdf_redact_options *redact_opts) |
1089 | 0 | { |
1090 | 0 | pdf_annot *annot; |
1091 | 0 | int has_redactions = 0; |
1092 | 0 | struct redact_filter_state red; |
1093 | 0 | pdf_document *doc = page->doc; |
1094 | |
|
1095 | 0 | for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot)) { |
1096 | 0 | if (target != NULL && target != annot) |
1097 | 0 | continue; |
1098 | 0 | if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact)) |
1099 | 0 | has_redactions = 1; |
1100 | 0 | } |
1101 | |
|
1102 | 0 | if (!has_redactions) |
1103 | 0 | return 0; |
1104 | | |
1105 | 0 | init_redact_filter(ctx, redact_opts, &red, page, target); |
1106 | |
|
1107 | 0 | if (target) |
1108 | 0 | pdf_begin_operation(ctx, doc, "Apply redaction"); |
1109 | 0 | else |
1110 | 0 | pdf_begin_operation(ctx, doc, "Apply redactions on page"); |
1111 | 0 | fz_try(ctx) |
1112 | 0 | { |
1113 | 0 | pdf_filter_page_contents(ctx, doc, page, &red.filter_opts); |
1114 | 0 | pdf_redact_page_links(ctx, &red); |
1115 | 0 | pdf_redact_page_annotations(ctx, &red); |
1116 | |
|
1117 | 0 | annot = pdf_first_annot(ctx, page); |
1118 | 0 | while (annot) |
1119 | 0 | { |
1120 | 0 | if (target == NULL || annot == target) |
1121 | 0 | { |
1122 | 0 | if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact)) |
1123 | 0 | { |
1124 | 0 | pdf_delete_annot(ctx, page, annot); |
1125 | 0 | annot = pdf_first_annot(ctx, page); |
1126 | 0 | continue; |
1127 | 0 | } |
1128 | 0 | } |
1129 | 0 | annot = pdf_next_annot(ctx, annot); |
1130 | 0 | } |
1131 | |
|
1132 | 0 | doc->redacted = 1; |
1133 | 0 | pdf_end_operation(ctx, doc); |
1134 | 0 | } |
1135 | 0 | fz_catch(ctx) |
1136 | 0 | { |
1137 | 0 | pdf_abandon_operation(ctx, doc); |
1138 | 0 | fz_rethrow(ctx); |
1139 | 0 | } |
1140 | | |
1141 | 0 | return 1; |
1142 | 0 | } |
1143 | | |
1144 | | int |
1145 | | pdf_redact_page(fz_context *ctx, pdf_document *doc, pdf_page *page, pdf_redact_options *redact_opts) |
1146 | 0 | { |
1147 | 0 | if (page == NULL || page->doc != doc) |
1148 | 0 | fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't redact a page not from the doc"); |
1149 | 0 | return pdf_apply_redaction_imp(ctx, page, NULL, redact_opts); |
1150 | 0 | } |
1151 | | |
1152 | | int |
1153 | | pdf_apply_redaction(fz_context *ctx, pdf_annot *annot, pdf_redact_options *redact_opts) |
1154 | 0 | { |
1155 | 0 | return pdf_apply_redaction_imp(ctx, annot->page, annot, redact_opts); |
1156 | 0 | } |
1157 | | |
1158 | | /* Hard clipping of pages */ |
1159 | | |
1160 | | struct clip_filter_state { |
1161 | | pdf_filter_options filter_opts; |
1162 | | pdf_sanitize_filter_options sanitize_opts; |
1163 | | pdf_filter_factory filter_list[2]; |
1164 | | pdf_page *page; |
1165 | | fz_rect clip; |
1166 | | }; |
1167 | | |
1168 | | static int clip_culler(fz_context *ctx, void *opaque, fz_rect bbox, fz_cull_type type) |
1169 | 0 | { |
1170 | 0 | struct clip_filter_state *hc = opaque; |
1171 | |
|
1172 | 0 | switch (type) |
1173 | 0 | { |
1174 | 0 | case FZ_CULL_PATH_FILL: |
1175 | 0 | case FZ_CULL_PATH_STROKE: |
1176 | 0 | case FZ_CULL_PATH_FILL_STROKE: |
1177 | 0 | case FZ_CULL_CLIP_PATH_FILL: |
1178 | 0 | case FZ_CULL_CLIP_PATH_STROKE: |
1179 | 0 | case FZ_CULL_CLIP_PATH_FILL_STROKE: |
1180 | 0 | case FZ_CULL_GLYPH: |
1181 | 0 | case FZ_CULL_IMAGE: |
1182 | 0 | case FZ_CULL_SHADING: |
1183 | 0 | return (fz_is_empty_rect(fz_intersect_rect(bbox, hc->clip))); |
1184 | 0 | default: |
1185 | 0 | return 0; |
1186 | 0 | } |
1187 | 0 | } |
1188 | | |
1189 | | static |
1190 | | void init_clip_filter(fz_context *ctx, struct clip_filter_state *hc, pdf_page *page, fz_rect *clip) |
1191 | 0 | { |
1192 | 0 | memset(&hc->filter_opts, 0, sizeof hc->filter_opts); |
1193 | 0 | memset(&hc->sanitize_opts, 0, sizeof hc->sanitize_opts); |
1194 | |
|
1195 | 0 | hc->filter_opts.recurse = 0; /* don't redact patterns, softmasks, and type3 fonts */ |
1196 | 0 | hc->filter_opts.instance_forms = 1; /* redact xobjects with instancing */ |
1197 | 0 | hc->filter_opts.ascii = 0; |
1198 | 0 | hc->filter_opts.opaque = hc; |
1199 | 0 | hc->filter_opts.filters = hc->filter_list; |
1200 | 0 | hc->clip = *clip; |
1201 | |
|
1202 | 0 | hc->sanitize_opts.opaque = hc; |
1203 | 0 | hc->sanitize_opts.culler = clip_culler; |
1204 | |
|
1205 | 0 | hc->filter_list[0].filter = pdf_new_sanitize_filter; |
1206 | 0 | hc->filter_list[0].options = &hc->sanitize_opts; |
1207 | 0 | hc->filter_list[1].filter = NULL; |
1208 | 0 | hc->filter_list[1].options = NULL; |
1209 | |
|
1210 | 0 | hc->page = page; |
1211 | 0 | } |
1212 | | |
1213 | | static void |
1214 | | pdf_clip_page_links(fz_context *ctx, struct clip_filter_state *hc) |
1215 | 0 | { |
1216 | 0 | pdf_obj *annots; |
1217 | 0 | pdf_obj *link; |
1218 | 0 | fz_rect area; |
1219 | 0 | int k; |
1220 | |
|
1221 | 0 | annots = pdf_dict_get(ctx, hc->page->obj, PDF_NAME(Annots)); |
1222 | 0 | k = 0; |
1223 | 0 | while (k < pdf_array_len(ctx, annots)) |
1224 | 0 | { |
1225 | 0 | link = pdf_array_get(ctx, annots, k); |
1226 | 0 | if (pdf_dict_get(ctx, link, PDF_NAME(Subtype)) == PDF_NAME(Link)) |
1227 | 0 | { |
1228 | 0 | area = pdf_dict_get_rect(ctx, link, PDF_NAME(Rect)); |
1229 | 0 | if (fz_is_empty_rect(fz_intersect_rect(area, hc->clip))) |
1230 | 0 | { |
1231 | 0 | pdf_array_delete(ctx, annots, k); |
1232 | 0 | continue; |
1233 | 0 | } |
1234 | 0 | } |
1235 | 0 | ++k; |
1236 | 0 | } |
1237 | 0 | } |
1238 | | |
1239 | | static void |
1240 | | pdf_clip_page_annotations(fz_context *ctx, struct clip_filter_state *hc) |
1241 | 0 | { |
1242 | 0 | pdf_annot *annot; |
1243 | 0 | fz_rect area; |
1244 | |
|
1245 | 0 | restart: |
1246 | 0 | for (annot = pdf_first_annot(ctx, hc->page); annot; annot = pdf_next_annot(ctx, annot)) |
1247 | 0 | { |
1248 | 0 | if (pdf_annot_type(ctx, annot) == PDF_ANNOT_FREE_TEXT) |
1249 | 0 | { |
1250 | 0 | area = pdf_dict_get_rect(ctx, pdf_annot_obj(ctx, annot), PDF_NAME(Rect)); |
1251 | 0 | if (fz_is_empty_rect(fz_intersect_rect(area, hc->clip))) |
1252 | 0 | { |
1253 | 0 | pdf_delete_annot(ctx, hc->page, annot); |
1254 | 0 | goto restart; |
1255 | 0 | } |
1256 | 0 | } |
1257 | 0 | } |
1258 | 0 | } |
1259 | | |
1260 | | void |
1261 | | pdf_clip_page(fz_context *ctx, pdf_page *page, fz_rect *clip) |
1262 | 0 | { |
1263 | 0 | pdf_document *doc; |
1264 | 0 | struct clip_filter_state hc; |
1265 | |
|
1266 | 0 | if (page == NULL) |
1267 | 0 | return; |
1268 | | |
1269 | 0 | doc = page->doc; |
1270 | |
|
1271 | 0 | init_clip_filter(ctx, &hc, page, clip); |
1272 | |
|
1273 | 0 | pdf_begin_operation(ctx, doc, "Apply hard clip to page"); |
1274 | 0 | fz_try(ctx) |
1275 | 0 | { |
1276 | 0 | pdf_filter_page_contents(ctx, doc, page, &hc.filter_opts); |
1277 | 0 | pdf_clip_page_links(ctx, &hc); |
1278 | 0 | pdf_clip_page_annotations(ctx, &hc); |
1279 | 0 | pdf_end_operation(ctx, doc); |
1280 | 0 | } |
1281 | 0 | fz_catch(ctx) |
1282 | 0 | { |
1283 | 0 | pdf_abandon_operation(ctx, doc); |
1284 | 0 | fz_rethrow(ctx); |
1285 | 0 | } |
1286 | 0 | } |