/src/mupdf/source/pdf/pdf-nametree.c
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (C) 2004-2021 Artifex Software, Inc. |
2 | | // |
3 | | // This file is part of MuPDF. |
4 | | // |
5 | | // MuPDF is free software: you can redistribute it and/or modify it under the |
6 | | // terms of the GNU Affero General Public License as published by the Free |
7 | | // Software Foundation, either version 3 of the License, or (at your option) |
8 | | // any later version. |
9 | | // |
10 | | // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY |
11 | | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
12 | | // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more |
13 | | // details. |
14 | | // |
15 | | // You should have received a copy of the GNU Affero General Public License |
16 | | // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> |
17 | | // |
18 | | // Alternative licensing terms are available from the licensor. |
19 | | // For commercial licensing, see <https://www.artifex.com/> or contact |
20 | | // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, |
21 | | // CA 94129, USA, for further information. |
22 | | |
23 | | #include "mupdf/fitz.h" |
24 | | #include "mupdf/pdf.h" |
25 | | |
26 | | #include <string.h> |
27 | | |
28 | | static pdf_obj * |
29 | | pdf_lookup_name_imp(fz_context *ctx, pdf_obj *node, const char *needle, pdf_cycle_list *cycle_up) |
30 | 0 | { |
31 | 0 | pdf_cycle_list cycle; |
32 | 0 | pdf_obj *kids = pdf_dict_get(ctx, node, PDF_NAME(Kids)); |
33 | 0 | pdf_obj *names = pdf_dict_get(ctx, node, PDF_NAME(Names)); |
34 | |
|
35 | 0 | if (pdf_cycle(ctx, &cycle, cycle_up, node)) |
36 | 0 | return NULL; |
37 | | |
38 | 0 | if (pdf_is_array(ctx, kids)) |
39 | 0 | { |
40 | 0 | int l = 0; |
41 | 0 | int r = pdf_array_len(ctx, kids) - 1; |
42 | |
|
43 | 0 | while (l <= r) |
44 | 0 | { |
45 | 0 | int m = (l + r) >> 1; |
46 | 0 | pdf_obj *kid = pdf_array_get(ctx, kids, m); |
47 | 0 | pdf_obj *limits = pdf_dict_get(ctx, kid, PDF_NAME(Limits)); |
48 | 0 | const char *first = pdf_array_get_text_string(ctx, limits, 0); |
49 | 0 | const char *last = pdf_array_get_text_string(ctx, limits, 1); |
50 | |
|
51 | 0 | if (!pdf_is_indirect(ctx, kid)) |
52 | 0 | { |
53 | 0 | fz_warn(ctx, "non-indirect internal node found in name tree"); |
54 | 0 | break; |
55 | 0 | } |
56 | | |
57 | 0 | if (strcmp(needle, first) < 0) |
58 | 0 | r = m - 1; |
59 | 0 | else if (strcmp(needle, last) > 0) |
60 | 0 | l = m + 1; |
61 | 0 | else |
62 | 0 | { |
63 | 0 | pdf_obj *obj = pdf_lookup_name_imp(ctx, kid, needle, &cycle); |
64 | 0 | if (obj) |
65 | 0 | return obj; |
66 | 0 | else |
67 | 0 | break; |
68 | 0 | } |
69 | 0 | } |
70 | | |
71 | | /* Spec says names should be sorted (hence the binary search, |
72 | | * above), but Acrobat copes with non-sorted. Drop back to a |
73 | | * simple search if the binary search fails. */ |
74 | 0 | r = pdf_array_len(ctx, kids); |
75 | 0 | for (l = 0; l < r; l++) |
76 | 0 | { |
77 | 0 | pdf_obj *obj, *kid = pdf_array_get(ctx, kids, l); |
78 | 0 | if (!pdf_is_indirect(ctx, kid)) |
79 | 0 | { |
80 | 0 | fz_warn(ctx, "non-indirect internal node found in name tree"); |
81 | 0 | continue; |
82 | 0 | } |
83 | 0 | obj = pdf_lookup_name_imp(ctx, kid, needle, &cycle); |
84 | 0 | if (obj) |
85 | 0 | return obj; |
86 | 0 | } |
87 | 0 | } |
88 | | |
89 | 0 | if (pdf_is_array(ctx, names)) |
90 | 0 | { |
91 | 0 | int l = 0; |
92 | 0 | int r = (pdf_array_len(ctx, names) / 2) - 1; |
93 | |
|
94 | 0 | while (l <= r) |
95 | 0 | { |
96 | 0 | int m = (l + r) >> 1; |
97 | 0 | int c; |
98 | 0 | const char *key = pdf_array_get_text_string(ctx, names, m * 2); |
99 | 0 | pdf_obj *val = pdf_array_get(ctx, names, m * 2 + 1); |
100 | |
|
101 | 0 | c = strcmp(needle, key); |
102 | 0 | if (c < 0) |
103 | 0 | r = m - 1; |
104 | 0 | else if (c > 0) |
105 | 0 | l = m + 1; |
106 | 0 | else |
107 | 0 | return val; |
108 | 0 | } |
109 | | |
110 | | /* Spec says names should be sorted (hence the binary search, |
111 | | * above), but Acrobat copes with non-sorted. Drop back to a |
112 | | * simple search if the binary search fails. */ |
113 | 0 | r = pdf_array_len(ctx, names)/2; |
114 | 0 | for (l = 0; l < r; l++) |
115 | 0 | if (!strcmp(needle, pdf_array_get_text_string(ctx, names, l * 2))) |
116 | 0 | return pdf_array_get(ctx, names, l * 2 + 1); |
117 | 0 | } |
118 | | |
119 | 0 | return NULL; |
120 | 0 | } |
121 | | |
122 | | pdf_obj * |
123 | | pdf_lookup_name(fz_context *ctx, pdf_document *doc, pdf_obj *which, pdf_obj *needle) |
124 | 0 | { |
125 | 0 | pdf_obj *root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root)); |
126 | 0 | pdf_obj *names = pdf_dict_get(ctx, root, PDF_NAME(Names)); |
127 | 0 | pdf_obj *tree = pdf_dict_get(ctx, names, which); |
128 | 0 | return pdf_lookup_name_imp(ctx, tree, pdf_to_text_string(ctx, needle), NULL); |
129 | 0 | } |
130 | | |
131 | | pdf_obj * |
132 | | pdf_lookup_dest(fz_context *ctx, pdf_document *doc, pdf_obj *needle) |
133 | 0 | { |
134 | 0 | pdf_obj *root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root)); |
135 | 0 | pdf_obj *dests = pdf_dict_get(ctx, root, PDF_NAME(Dests)); |
136 | 0 | pdf_obj *names = pdf_dict_get(ctx, root, PDF_NAME(Names)); |
137 | | |
138 | | /* PDF 1.1 has destinations in a dictionary */ |
139 | 0 | if (dests) |
140 | 0 | { |
141 | 0 | if (pdf_is_name(ctx, needle)) |
142 | 0 | return pdf_dict_get(ctx, dests, needle); |
143 | 0 | else |
144 | 0 | return pdf_dict_gets(ctx, dests, pdf_to_str_buf(ctx, needle)); |
145 | 0 | } |
146 | | |
147 | | /* PDF 1.2 has destinations in a name tree */ |
148 | 0 | if (names) |
149 | 0 | { |
150 | 0 | pdf_obj *tree = pdf_dict_get(ctx, names, PDF_NAME(Dests)); |
151 | 0 | return pdf_lookup_name_imp(ctx, tree, pdf_to_text_string(ctx, needle), NULL); |
152 | 0 | } |
153 | | |
154 | 0 | return NULL; |
155 | 0 | } |
156 | | |
157 | | static void |
158 | | pdf_load_name_tree_imp(fz_context *ctx, pdf_obj *dict, pdf_document *doc, pdf_obj *node, pdf_cycle_list *cycle_up) |
159 | 0 | { |
160 | 0 | pdf_cycle_list cycle; |
161 | 0 | pdf_obj *kids = pdf_dict_get(ctx, node, PDF_NAME(Kids)); |
162 | 0 | pdf_obj *names = pdf_dict_get(ctx, node, PDF_NAME(Names)); |
163 | 0 | int i; |
164 | |
|
165 | 0 | if (kids && !pdf_cycle(ctx, &cycle, cycle_up, node)) |
166 | 0 | { |
167 | 0 | int len = pdf_array_len(ctx, kids); |
168 | 0 | for (i = 0; i < len; i++) |
169 | 0 | pdf_load_name_tree_imp(ctx, dict, doc, pdf_array_get(ctx, kids, i), &cycle); |
170 | 0 | } |
171 | |
|
172 | 0 | if (names) |
173 | 0 | { |
174 | 0 | int len = pdf_array_len(ctx, names); |
175 | 0 | for (i = 0; i + 1 < len; i += 2) |
176 | 0 | { |
177 | 0 | pdf_obj *key = pdf_array_get(ctx, names, i); |
178 | 0 | pdf_obj *val = pdf_array_get(ctx, names, i + 1); |
179 | 0 | if (pdf_is_string(ctx, key)) |
180 | 0 | { |
181 | 0 | key = pdf_new_name(ctx, pdf_to_text_string(ctx, key)); |
182 | 0 | fz_try(ctx) |
183 | 0 | pdf_dict_put(ctx, dict, key, val); |
184 | 0 | fz_always(ctx) |
185 | 0 | pdf_drop_obj(ctx, key); |
186 | 0 | fz_catch(ctx) |
187 | 0 | fz_rethrow(ctx); |
188 | 0 | } |
189 | 0 | else if (pdf_is_name(ctx, key)) |
190 | 0 | { |
191 | 0 | pdf_dict_put(ctx, dict, key, val); |
192 | 0 | } |
193 | 0 | } |
194 | 0 | } |
195 | 0 | } |
196 | | |
197 | | /* FIXME: fz_try/fz_catch needed here */ |
198 | | pdf_obj * |
199 | | pdf_load_name_tree(fz_context *ctx, pdf_document *doc, pdf_obj *which) |
200 | 0 | { |
201 | 0 | pdf_obj *root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root)); |
202 | 0 | pdf_obj *names = pdf_dict_get(ctx, root, PDF_NAME(Names)); |
203 | 0 | pdf_obj *tree = pdf_dict_get(ctx, names, which); |
204 | 0 | if (pdf_is_dict(ctx, tree)) |
205 | 0 | { |
206 | 0 | pdf_obj *dict = pdf_new_dict(ctx, doc, 100); |
207 | 0 | pdf_load_name_tree_imp(ctx, dict, doc, tree, NULL); |
208 | 0 | return dict; |
209 | 0 | } |
210 | 0 | return NULL; |
211 | 0 | } |
212 | | |
213 | | pdf_obj * |
214 | | pdf_lookup_number_imp(fz_context *ctx, pdf_obj *node, int needle, pdf_cycle_list *cycle_up) |
215 | 29.9k | { |
216 | 29.9k | pdf_cycle_list cycle; |
217 | 29.9k | pdf_obj *kids = pdf_dict_get(ctx, node, PDF_NAME(Kids)); |
218 | 29.9k | pdf_obj *nums = pdf_dict_get(ctx, node, PDF_NAME(Nums)); |
219 | | |
220 | 29.9k | if (pdf_is_array(ctx, kids)) |
221 | 7.45k | { |
222 | 7.45k | int l = 0; |
223 | 7.45k | int r = pdf_array_len(ctx, kids) - 1; |
224 | | |
225 | 13.2k | while (l <= r) |
226 | 13.2k | { |
227 | 13.2k | int m = (l + r) >> 1; |
228 | 13.2k | pdf_obj *kid = pdf_array_get(ctx, kids, m); |
229 | 13.2k | pdf_obj *limits = pdf_dict_get(ctx, kid, PDF_NAME(Limits)); |
230 | 13.2k | int first = pdf_array_get_int(ctx, limits, 0); |
231 | 13.2k | int last = pdf_array_get_int(ctx, limits, 1); |
232 | | |
233 | 13.2k | if (needle < first) |
234 | 5.76k | r = m - 1; |
235 | 7.45k | else if (needle > last) |
236 | 0 | l = m + 1; |
237 | 7.45k | else |
238 | 7.45k | { |
239 | 7.45k | if (pdf_cycle(ctx, &cycle, cycle_up, node)) |
240 | 0 | break; |
241 | 7.45k | return pdf_lookup_number_imp(ctx, kid, needle, &cycle); |
242 | 7.45k | } |
243 | 13.2k | } |
244 | 7.45k | } |
245 | | |
246 | 22.5k | if (pdf_is_array(ctx, nums)) |
247 | 16.2k | { |
248 | 16.2k | pdf_obj *nums = pdf_dict_get(ctx, node, PDF_NAME(Nums)); |
249 | 16.2k | int l = 0; |
250 | 16.2k | int r = (pdf_array_len(ctx, nums) / 2) - 1; |
251 | | |
252 | 43.5k | while (l <= r) |
253 | 43.5k | { |
254 | 43.5k | int m = (l + r) >> 1; |
255 | 43.5k | int key = pdf_array_get_int(ctx, nums, m * 2); |
256 | 43.5k | pdf_obj *val = pdf_array_get(ctx, nums, m * 2 + 1); |
257 | | |
258 | 43.5k | if (needle < key) |
259 | 16.8k | r = m - 1; |
260 | 26.7k | else if (needle > key) |
261 | 10.5k | l = m + 1; |
262 | 16.2k | else |
263 | 16.2k | return val; |
264 | 43.5k | } |
265 | | |
266 | | /* Parallel the nametree lookup above by allowing for non-sorted lists. */ |
267 | 0 | r = pdf_array_len(ctx, nums)/2; |
268 | 0 | for (l = 0; l < r; l++) |
269 | 0 | if (needle == pdf_array_get_int(ctx, nums, l * 2)) |
270 | 0 | return pdf_array_get(ctx, nums, l * 2 + 1); |
271 | 0 | } |
272 | | |
273 | 6.25k | return NULL; |
274 | 22.5k | } |
275 | | |
276 | | pdf_obj * |
277 | | pdf_lookup_number(fz_context *ctx, pdf_obj *node, int needle) |
278 | 22.5k | { |
279 | 22.5k | return pdf_lookup_number_imp(ctx, node, needle, NULL); |
280 | 22.5k | } |
281 | | |
282 | | static void pdf_walk_tree_imp(fz_context *ctx, pdf_obj *obj, pdf_obj *kid_name, |
283 | | void (*arrive)(fz_context *, pdf_obj *, void *, pdf_obj **), |
284 | | void (*leave)(fz_context *, pdf_obj *, void *), |
285 | | void *arg, |
286 | | pdf_obj **inherit_names, |
287 | | pdf_obj **inherit_vals, |
288 | | pdf_cycle_list *cycle_up); |
289 | | |
290 | | static void |
291 | | pdf_walk_tree_kid(fz_context *ctx, |
292 | | pdf_obj *obj, |
293 | | pdf_obj *kid_name, |
294 | | void (*arrive)(fz_context *, pdf_obj *, void *, pdf_obj **), |
295 | | void (*leave)(fz_context *, pdf_obj *, void *), |
296 | | void *arg, |
297 | | pdf_obj **inherit_names, |
298 | | pdf_obj **inherit_vals, |
299 | | pdf_cycle_list *cycle_up) |
300 | 0 | { |
301 | 0 | pdf_cycle_list cycle; |
302 | 0 | pdf_obj **new_vals = NULL; |
303 | |
|
304 | 0 | if (obj == NULL || pdf_cycle(ctx, &cycle, cycle_up, obj)) |
305 | 0 | return; |
306 | | |
307 | 0 | fz_var(new_vals); |
308 | |
|
309 | 0 | fz_try(ctx) |
310 | 0 | { |
311 | | /* First we run through the names we've been asked to collect |
312 | | * inherited values for updating the values. */ |
313 | 0 | if (inherit_names != NULL) |
314 | 0 | { |
315 | 0 | int i, n; |
316 | |
|
317 | 0 | for (n = 0; inherit_names[n] != NULL; n++); |
318 | |
|
319 | 0 | for (i = 0; i < n; i++) |
320 | 0 | { |
321 | 0 | pdf_obj *v = pdf_dict_get(ctx, obj, inherit_names[i]); |
322 | 0 | if (v != NULL) |
323 | 0 | { |
324 | 0 | if (new_vals == NULL) |
325 | 0 | { |
326 | 0 | new_vals = fz_malloc_array(ctx, n, pdf_obj *); |
327 | 0 | memcpy(new_vals, inherit_vals, n*sizeof(pdf_obj *)); |
328 | 0 | inherit_vals = new_vals; |
329 | 0 | } |
330 | 0 | inherit_vals[i] = v; |
331 | 0 | } |
332 | 0 | } |
333 | 0 | } |
334 | |
|
335 | 0 | if (arrive) |
336 | 0 | arrive(ctx, obj, arg, inherit_vals); |
337 | 0 | pdf_walk_tree_imp(ctx, pdf_dict_get(ctx, obj, kid_name), kid_name, arrive, leave, arg, inherit_names, inherit_vals, &cycle); |
338 | 0 | if (leave) |
339 | 0 | leave(ctx, obj, arg); |
340 | 0 | } |
341 | 0 | fz_always(ctx) |
342 | 0 | fz_free(ctx, new_vals); |
343 | 0 | fz_catch(ctx) |
344 | 0 | fz_rethrow(ctx); |
345 | 0 | } |
346 | | |
347 | | static void pdf_walk_tree_imp(fz_context *ctx, pdf_obj *obj, pdf_obj *kid_name, |
348 | | void (*arrive)(fz_context *, pdf_obj *, void *, pdf_obj **), |
349 | | void (*leave)(fz_context *, pdf_obj *, void *), |
350 | | void *arg, |
351 | | pdf_obj **inherit_names, |
352 | | pdf_obj **inherit_vals, |
353 | | pdf_cycle_list *cycle_up) |
354 | 0 | { |
355 | 0 | pdf_cycle_list cycle; |
356 | |
|
357 | 0 | if (obj == NULL || pdf_cycle(ctx, &cycle, cycle_up, obj)) |
358 | 0 | return; |
359 | | |
360 | 0 | if (pdf_is_array(ctx, obj)) |
361 | 0 | { |
362 | 0 | int i, n = pdf_array_len(ctx, obj); |
363 | 0 | for (i = 0; i < n; i++) |
364 | 0 | pdf_walk_tree_kid(ctx, pdf_array_get(ctx, obj, i), kid_name, arrive, leave, arg, inherit_names, inherit_vals, &cycle); |
365 | 0 | } |
366 | 0 | else |
367 | 0 | { |
368 | 0 | pdf_walk_tree_kid(ctx, obj, kid_name, arrive, leave, arg, inherit_names, inherit_vals, &cycle); |
369 | 0 | } |
370 | 0 | } |
371 | | |
372 | | void pdf_walk_tree(fz_context *ctx, pdf_obj *obj, pdf_obj *kid_name, |
373 | | void (*arrive)(fz_context *, pdf_obj *, void *, pdf_obj **), |
374 | | void (*leave)(fz_context *, pdf_obj *, void *), |
375 | | void *arg, |
376 | | pdf_obj **inherit_names, |
377 | | pdf_obj **inherit_vals) |
378 | 0 | { |
379 | 0 | pdf_walk_tree_imp(ctx, obj, kid_name, arrive, leave, arg, inherit_names, inherit_vals, NULL); |
380 | 0 | } |