/src/mupdf/source/pdf/pdf-cmap.c
Line | Count | Source |
1 | | // Copyright (C) 2004-2021 Artifex Software, Inc. |
2 | | // |
3 | | // This file is part of MuPDF. |
4 | | // |
5 | | // MuPDF is free software: you can redistribute it and/or modify it under the |
6 | | // terms of the GNU Affero General Public License as published by the Free |
7 | | // Software Foundation, either version 3 of the License, or (at your option) |
8 | | // any later version. |
9 | | // |
10 | | // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY |
11 | | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
12 | | // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more |
13 | | // details. |
14 | | // |
15 | | // You should have received a copy of the GNU Affero General Public License |
16 | | // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> |
17 | | // |
18 | | // Alternative licensing terms are available from the licensor. |
19 | | // For commercial licensing, see <https://www.artifex.com/> or contact |
20 | | // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, |
21 | | // CA 94129, USA, for further information. |
22 | | |
23 | | #include "mupdf/fitz.h" |
24 | | #include "mupdf/pdf.h" |
25 | | |
26 | | #include <assert.h> |
27 | | #include <string.h> |
28 | | |
29 | | #undef CHECK_SPLAY |
30 | | #undef DUMP_SPLAY |
31 | | |
32 | 6.14k | #define CMAP_TABLE_LIMIT 65536 |
33 | | |
34 | | /* |
35 | | * Allocate, destroy and simple parameters. |
36 | | */ |
37 | | |
38 | | void |
39 | | pdf_drop_cmap_imp(fz_context *ctx, fz_storable *cmap_) |
40 | 955 | { |
41 | 955 | pdf_cmap *cmap = (pdf_cmap *)cmap_; |
42 | 955 | pdf_drop_cmap(ctx, cmap->usecmap); |
43 | 955 | fz_free(ctx, cmap->ranges); |
44 | 955 | fz_free(ctx, cmap->xranges); |
45 | 955 | fz_free(ctx, cmap->mranges); |
46 | 955 | fz_free(ctx, cmap->dict); |
47 | 955 | fz_free(ctx, cmap->tree); |
48 | 955 | fz_free(ctx, cmap); |
49 | 955 | } |
50 | | |
51 | | pdf_cmap * |
52 | | pdf_new_cmap(fz_context *ctx) |
53 | 955 | { |
54 | 955 | pdf_cmap *cmap = fz_malloc_struct(ctx, pdf_cmap); |
55 | 955 | FZ_INIT_STORABLE(cmap, 1, pdf_drop_cmap_imp); |
56 | 955 | return cmap; |
57 | 955 | } |
58 | | |
59 | | /* Could be a macro for speed */ |
60 | | pdf_cmap * |
61 | | pdf_keep_cmap(fz_context *ctx, pdf_cmap *cmap) |
62 | 14 | { |
63 | 14 | if (cmap) |
64 | 14 | return fz_keep_storable(ctx, &cmap->storable); |
65 | 0 | return NULL; |
66 | 14 | } |
67 | | |
68 | | /* Could be a macro for speed */ |
69 | | void |
70 | | pdf_drop_cmap(fz_context *ctx, pdf_cmap *cmap) |
71 | 3.23k | { |
72 | 3.23k | if (cmap) |
73 | 1.06k | fz_drop_storable(ctx, &cmap->storable); |
74 | 3.23k | } |
75 | | |
76 | | void |
77 | | pdf_set_usecmap(fz_context *ctx, pdf_cmap *cmap, pdf_cmap *usecmap) |
78 | 0 | { |
79 | 0 | int i; |
80 | |
|
81 | 0 | pdf_drop_cmap(ctx, cmap->usecmap); |
82 | 0 | cmap->usecmap = pdf_keep_cmap(ctx, usecmap); |
83 | |
|
84 | 0 | if (cmap->codespace_len == 0) |
85 | 0 | { |
86 | 0 | cmap->codespace_len = usecmap->codespace_len; |
87 | 0 | for (i = 0; i < usecmap->codespace_len; i++) |
88 | 0 | cmap->codespace[i] = usecmap->codespace[i]; |
89 | 0 | } |
90 | 0 | } |
91 | | |
92 | | int |
93 | | pdf_cmap_wmode(fz_context *ctx, pdf_cmap *cmap) |
94 | 158 | { |
95 | 158 | return cmap->wmode; |
96 | 158 | } |
97 | | |
98 | | void |
99 | | pdf_set_cmap_wmode(fz_context *ctx, pdf_cmap *cmap, int wmode) |
100 | 545 | { |
101 | 545 | cmap->wmode = wmode; |
102 | 545 | } |
103 | | |
104 | | void |
105 | | pdf_add_codespace(fz_context *ctx, pdf_cmap *cmap, unsigned int low, unsigned int high, size_t n) |
106 | 955 | { |
107 | 955 | if (cmap->codespace_len + 1 == nelem(cmap->codespace)) |
108 | 0 | { |
109 | 0 | fz_warn(ctx, "assert: too many code space ranges"); |
110 | 0 | return; |
111 | 0 | } |
112 | | |
113 | 955 | if ((uint32_t)n != n) |
114 | 0 | { |
115 | 0 | fz_warn(ctx, "assert: code space range too large"); |
116 | 0 | return; |
117 | 0 | } |
118 | | |
119 | 955 | cmap->codespace[cmap->codespace_len].n = (int)n; |
120 | 955 | cmap->codespace[cmap->codespace_len].low = low; |
121 | 955 | cmap->codespace[cmap->codespace_len].high = high; |
122 | 955 | cmap->codespace_len ++; |
123 | 955 | } |
124 | | |
125 | | struct cmap_splay { |
126 | | unsigned int low; |
127 | | unsigned int high; |
128 | | unsigned int out; |
129 | | unsigned int left; |
130 | | unsigned int right; |
131 | | unsigned int parent : 31; |
132 | | unsigned int many : 1; |
133 | | }; |
134 | | |
135 | 1.31M | #define EMPTY ((unsigned int)0x40000000) |
136 | | |
137 | | /* |
138 | | The splaying steps used: |
139 | | |
140 | | Case 1: | z x |
141 | | | y D => A y |
142 | | | x C B z |
143 | | | A B C D |
144 | | |
145 | | Case 2: | z x |
146 | | | y D => y z |
147 | | | A x A B C D |
148 | | | B C |
149 | | |
150 | | Case 3: | y x |
151 | | | x C => A y |
152 | | | A B B C |
153 | | */ |
154 | | |
155 | | static void |
156 | | move_to_root(cmap_splay *tree, unsigned int x) |
157 | 50.4k | { |
158 | 50.4k | if (x == EMPTY) |
159 | 0 | return; |
160 | 50.4k | do |
161 | 50.4k | { |
162 | 50.4k | unsigned int z, zp; |
163 | 50.4k | unsigned int y = tree[x].parent; |
164 | 50.4k | if (y == EMPTY) |
165 | 845 | break; |
166 | 49.5k | z = tree[y].parent; |
167 | 49.5k | if (z == EMPTY) |
168 | 49.5k | { |
169 | | /* Case 3 */ |
170 | 49.5k | tree[x].parent = EMPTY; |
171 | 49.5k | tree[y].parent = x; |
172 | 49.5k | if (tree[y].left == x) |
173 | 0 | { |
174 | | /* Case 3 */ |
175 | 0 | tree[y].left = tree[x].right; |
176 | 0 | if (tree[y].left != EMPTY) |
177 | 0 | tree[tree[y].left].parent = y; |
178 | 0 | tree[x].right = y; |
179 | 0 | } |
180 | 49.5k | else |
181 | 49.5k | { |
182 | | /* Case 3 - reflected */ |
183 | 49.5k | assert(tree[y].right == x); |
184 | 49.5k | tree[y].right = tree[x].left; |
185 | 49.5k | if (tree[y].right != EMPTY) |
186 | 0 | tree[tree[y].right].parent = y; |
187 | 49.5k | tree[x].left = y; |
188 | 49.5k | } |
189 | 49.5k | break; |
190 | 49.5k | } |
191 | | |
192 | 0 | zp = tree[z].parent; |
193 | 0 | tree[x].parent = zp; |
194 | 0 | if (zp != EMPTY) { |
195 | 0 | if (tree[zp].left == z) |
196 | 0 | tree[zp].left = x; |
197 | 0 | else |
198 | 0 | { |
199 | 0 | assert(tree[zp].right == z); |
200 | 0 | tree[zp].right = x; |
201 | 0 | } |
202 | 0 | } |
203 | 0 | tree[y].parent = x; |
204 | 0 | if (tree[y].left == x) |
205 | 0 | { |
206 | 0 | tree[y].left = tree[x].right; |
207 | 0 | if (tree[y].left != EMPTY) |
208 | 0 | tree[tree[y].left].parent = y; |
209 | 0 | tree[x].right = y; |
210 | 0 | if (tree[z].left == y) |
211 | 0 | { |
212 | | /* Case 1 */ |
213 | 0 | tree[z].parent = y; |
214 | 0 | tree[z].left = tree[y].right; |
215 | 0 | if (tree[z].left != EMPTY) |
216 | 0 | tree[tree[z].left].parent = z; |
217 | 0 | tree[y].right = z; |
218 | 0 | } |
219 | 0 | else |
220 | 0 | { |
221 | | /* Case 2 - reflected */ |
222 | 0 | assert(tree[z].right == y); |
223 | 0 | tree[z].parent = x; |
224 | 0 | tree[z].right = tree[x].left; |
225 | 0 | if (tree[z].right != EMPTY) |
226 | 0 | tree[tree[z].right].parent = z; |
227 | 0 | tree[x].left = z; |
228 | 0 | } |
229 | 0 | } |
230 | 0 | else |
231 | 0 | { |
232 | 0 | assert(tree[y].right == x); |
233 | 0 | tree[y].right = tree[x].left; |
234 | 0 | if (tree[y].right != EMPTY) |
235 | 0 | tree[tree[y].right].parent = y; |
236 | 0 | tree[x].left = y; |
237 | 0 | if (tree[z].left == y) |
238 | 0 | { |
239 | | /* Case 2 */ |
240 | 0 | tree[z].parent = x; |
241 | 0 | tree[z].left = tree[x].right; |
242 | 0 | if (tree[z].left != EMPTY) |
243 | 0 | tree[tree[z].left].parent = z; |
244 | 0 | tree[x].right = z; |
245 | 0 | } |
246 | 0 | else |
247 | 0 | { |
248 | | /* Case 1 - reflected */ |
249 | 0 | assert(tree[z].right == y); |
250 | 0 | tree[z].parent = y; |
251 | 0 | tree[z].right = tree[y].left; |
252 | 0 | if (tree[z].right != EMPTY) |
253 | 0 | tree[tree[z].right].parent = z; |
254 | 0 | tree[y].left = z; |
255 | 0 | } |
256 | 0 | } |
257 | 0 | } while (1); |
258 | 50.4k | } |
259 | | |
260 | | static unsigned int delete_node(pdf_cmap *cmap, unsigned int current) |
261 | 0 | { |
262 | 0 | cmap_splay *tree = cmap->tree; |
263 | 0 | unsigned int parent; |
264 | 0 | unsigned int replacement; |
265 | |
|
266 | 0 | assert(current != EMPTY); |
267 | |
|
268 | 0 | parent = tree[current].parent; |
269 | 0 | if (tree[current].right == EMPTY) |
270 | 0 | { |
271 | 0 | if (parent == EMPTY) |
272 | 0 | { |
273 | 0 | replacement = cmap->ttop = tree[current].left; |
274 | 0 | } |
275 | 0 | else if (tree[parent].left == current) |
276 | 0 | { |
277 | 0 | replacement = tree[parent].left = tree[current].left; |
278 | 0 | } |
279 | 0 | else |
280 | 0 | { |
281 | 0 | assert(tree[parent].right == current); |
282 | 0 | replacement = tree[parent].right = tree[current].left; |
283 | 0 | } |
284 | 0 | if (replacement != EMPTY) |
285 | 0 | tree[replacement].parent = parent; |
286 | 0 | else |
287 | 0 | replacement = parent; |
288 | 0 | } |
289 | 0 | else if (tree[current].left == EMPTY) |
290 | 0 | { |
291 | 0 | if (parent == EMPTY) |
292 | 0 | { |
293 | 0 | replacement = cmap->ttop = tree[current].right; |
294 | 0 | } |
295 | 0 | else if (tree[parent].left == current) |
296 | 0 | { |
297 | 0 | replacement = tree[parent].left = tree[current].right; |
298 | 0 | } |
299 | 0 | else |
300 | 0 | { |
301 | 0 | assert(tree[parent].right == current); |
302 | 0 | replacement = tree[parent].right = tree[current].right; |
303 | 0 | } |
304 | 0 | if (replacement != EMPTY) |
305 | 0 | tree[replacement].parent = parent; |
306 | 0 | else |
307 | 0 | replacement = parent; |
308 | 0 | } |
309 | 0 | else |
310 | 0 | { |
311 | | /* Hard case, find the in-order predecessor of current */ |
312 | 0 | unsigned int amputee = current; |
313 | 0 | replacement = tree[current].left; |
314 | 0 | while (tree[replacement].right != EMPTY) { |
315 | 0 | amputee = replacement; |
316 | 0 | replacement = tree[replacement].right; |
317 | 0 | } |
318 | | /* Remove replacement from the tree */ |
319 | 0 | if (amputee == current) |
320 | 0 | { |
321 | 0 | tree[amputee].left = tree[replacement].left; |
322 | 0 | if (tree[amputee].left != EMPTY) |
323 | 0 | tree[tree[amputee].left].parent = amputee; |
324 | 0 | } |
325 | 0 | else |
326 | 0 | { |
327 | 0 | tree[amputee].right = tree[replacement].left; |
328 | 0 | if (tree[amputee].right != EMPTY) |
329 | 0 | tree[tree[amputee].right].parent = amputee; |
330 | 0 | } |
331 | | /* Insert replacement in place of current */ |
332 | 0 | tree[replacement].parent = parent; |
333 | 0 | if (parent == EMPTY) |
334 | 0 | { |
335 | 0 | tree[replacement].parent = EMPTY; |
336 | 0 | cmap->ttop = replacement; |
337 | 0 | } |
338 | 0 | else if (tree[parent].left == current) |
339 | 0 | tree[parent].left = replacement; |
340 | 0 | else |
341 | 0 | { |
342 | 0 | assert(tree[parent].right == current); |
343 | 0 | tree[parent].right = replacement; |
344 | 0 | } |
345 | 0 | tree[replacement].left = tree[current].left; |
346 | 0 | if (tree[replacement].left != EMPTY) |
347 | 0 | tree[tree[replacement].left].parent = replacement; |
348 | 0 | tree[replacement].right = tree[current].right; |
349 | 0 | if (tree[replacement].right != EMPTY) |
350 | 0 | tree[tree[replacement].right].parent = replacement; |
351 | 0 | } |
352 | | |
353 | | /* current is now unlinked. We need to remove it from our array. */ |
354 | 0 | cmap->tlen--; |
355 | 0 | if (current != (unsigned int) cmap->tlen) |
356 | 0 | { |
357 | 0 | if (replacement == (unsigned int) cmap->tlen) |
358 | 0 | replacement = current; |
359 | 0 | tree[current] = tree[cmap->tlen]; |
360 | 0 | parent = tree[current].parent; |
361 | 0 | if (parent == EMPTY) |
362 | 0 | cmap->ttop = current; |
363 | 0 | else if (tree[parent].left == (unsigned int) cmap->tlen) |
364 | 0 | tree[parent].left = current; |
365 | 0 | else |
366 | 0 | { |
367 | 0 | assert(tree[parent].right == (unsigned int) cmap->tlen); |
368 | 0 | tree[parent].right = current; |
369 | 0 | } |
370 | 0 | if (tree[current].left != EMPTY) |
371 | 0 | { |
372 | 0 | assert(tree[tree[current].left].parent == (unsigned int) cmap->tlen); |
373 | 0 | tree[tree[current].left].parent = current; |
374 | 0 | } |
375 | 0 | if (tree[current].right != EMPTY) |
376 | 0 | { |
377 | 0 | assert(tree[tree[current].right].parent == (unsigned int) cmap->tlen); |
378 | 0 | tree[tree[current].right].parent = current; |
379 | 0 | } |
380 | 0 | } |
381 | | |
382 | | /* Return the node that we should continue searching from */ |
383 | 0 | return replacement; |
384 | 0 | } |
385 | | |
386 | | #ifdef DUMP_SPLAY |
387 | | static void |
388 | | dump_splay(cmap_splay *tree, unsigned int node, int depth, const char *pre) |
389 | | { |
390 | | int i; |
391 | | |
392 | | if (tree == NULL || node == EMPTY) |
393 | | return; |
394 | | |
395 | | for (i = 0; i < depth; i++) |
396 | | fprintf(stderr, " "); |
397 | | fprintf(stderr, "%s%d:", pre, node); |
398 | | if (tree[node].parent == EMPTY) |
399 | | fprintf(stderr, "^EMPTY"); |
400 | | else |
401 | | fprintf(stderr, "^%d", tree[node].parent); |
402 | | if (tree[node].left == EMPTY) |
403 | | fprintf(stderr, "<EMPTY"); |
404 | | else |
405 | | fprintf(stderr, "<%d", tree[node].left); |
406 | | if (tree[node].right == EMPTY) |
407 | | fprintf(stderr, ">EMPTY"); |
408 | | else |
409 | | fprintf(stderr, ">%d", tree[node].right); |
410 | | fprintf(stderr, "(%x,%x,%x,%d)\n", tree[node].low, tree[node].high, tree[node].out, tree[node].many); |
411 | | assert(tree[node].parent == EMPTY || depth); |
412 | | assert(tree[node].left == EMPTY || tree[tree[node].left].parent == node); |
413 | | assert(tree[node].right == EMPTY || tree[tree[node].right].parent == node); |
414 | | dump_splay(tree, tree[node].left, depth+1, "L"); |
415 | | dump_splay(tree, tree[node].right, depth+1, "R"); |
416 | | } |
417 | | #endif |
418 | | |
419 | | enum |
420 | | { |
421 | | TOP = 0, |
422 | | LEFT = 1, |
423 | | RIGHT = 2 |
424 | | }; |
425 | | |
426 | | static void walk_splay(cmap_splay *tree, unsigned int node, void (*fn)(cmap_splay *, void *), void *arg) |
427 | 1.69k | { |
428 | 1.69k | int from = TOP; |
429 | | |
430 | 199k | while (node != EMPTY) |
431 | 199k | { |
432 | 199k | switch (from) |
433 | 199k | { |
434 | 100k | case TOP: |
435 | 100k | if (tree[node].left != EMPTY) |
436 | 99.1k | { |
437 | 99.1k | node = tree[node].left; |
438 | 99.1k | from = TOP; |
439 | 99.1k | break; |
440 | 99.1k | } |
441 | | /* fallthrough */ |
442 | 100k | case LEFT: |
443 | 100k | fn(&tree[node], arg); |
444 | 100k | if (tree[node].right != EMPTY) |
445 | 0 | { |
446 | 0 | node = tree[node].right; |
447 | 0 | from = TOP; |
448 | 0 | break; |
449 | 0 | } |
450 | | /* fallthrough */ |
451 | 100k | case RIGHT: |
452 | 100k | { |
453 | 100k | unsigned int parent = tree[node].parent; |
454 | 100k | if (parent == EMPTY) |
455 | 1.69k | return; |
456 | 99.1k | if (tree[parent].left == node) |
457 | 99.1k | from = LEFT; |
458 | 0 | else |
459 | 0 | { |
460 | 0 | assert(tree[parent].right == node); |
461 | 0 | from = RIGHT; |
462 | 0 | } |
463 | 99.1k | node = parent; |
464 | 99.1k | } |
465 | 199k | } |
466 | 199k | } |
467 | 1.69k | } |
468 | | |
469 | | #ifdef CHECK_SPLAY |
470 | | |
471 | | static int |
472 | | tree_has_overlap(cmap_splay *tree, int node, int low, int high) |
473 | | { |
474 | | if (tree[node].left != EMPTY) |
475 | | if (tree_has_overlap(tree, tree[node].left, low, high)) |
476 | | return 1; |
477 | | if (tree[node].right != EMPTY) |
478 | | if (tree_has_overlap(tree, tree[node].right, low, high)) |
479 | | return 1; |
480 | | return (tree[node].low < low && low < tree[node].high) || (tree[node].low < high && high < tree[node].high); |
481 | | } |
482 | | |
483 | | static void |
484 | | do_check(cmap_splay *node, void *arg) |
485 | | { |
486 | | cmap_splay *tree = arg; |
487 | | unsigned int num = node - tree; |
488 | | assert(!node->many || node->low == node->high); |
489 | | assert(node->low <= node->high); |
490 | | assert((node->left == EMPTY) || (tree[node->left].parent == num && |
491 | | tree[node->left].high < node->low)); |
492 | | assert(node->right == EMPTY || (tree[node->right].parent == num && |
493 | | node->high < tree[node->right].low)); |
494 | | assert(!tree_has_overlap(tree, num, node->low, node->high)); |
495 | | } |
496 | | |
497 | | static void |
498 | | check_splay(cmap_splay *tree, unsigned int node, int depth) |
499 | | { |
500 | | if (node == EMPTY) |
501 | | return; |
502 | | assert(tree[node].parent == EMPTY); |
503 | | walk_splay(tree, node, do_check, tree); |
504 | | } |
505 | | #endif |
506 | | |
507 | | /* |
508 | | * Add a range. |
509 | | */ |
510 | | static void |
511 | | add_range(fz_context *ctx, pdf_cmap *cmap, unsigned int low, unsigned int high, unsigned int out, int check_for_overlap, int many) |
512 | 62.0k | { |
513 | 62.0k | int current; |
514 | 62.0k | cmap_splay *tree; |
515 | | |
516 | 62.0k | if (low > high) |
517 | 0 | { |
518 | 0 | fz_warn(ctx, "range limits out of range in cmap %s", cmap->cmap_name); |
519 | 0 | return; |
520 | 0 | } |
521 | | |
522 | 62.0k | if (cmap->codespace_len == 0) |
523 | 0 | { |
524 | 0 | fz_warn(ctx, "CMap is missing codespace range"); |
525 | 0 | pdf_add_codespace(ctx, cmap, 0, 65535, 2); |
526 | 0 | } |
527 | | |
528 | 62.0k | tree = cmap->tree; |
529 | | |
530 | 62.0k | if (cmap->tlen) |
531 | 61.1k | { |
532 | 61.1k | unsigned int move = cmap->ttop; |
533 | 61.1k | unsigned int gt = EMPTY; |
534 | 61.1k | unsigned int lt = EMPTY; |
535 | 61.1k | if (check_for_overlap) |
536 | 61.1k | { |
537 | | /* Check for collision with the current node */ |
538 | 61.1k | do |
539 | 61.1k | { |
540 | 61.1k | current = move; |
541 | | /* Cases we might meet: |
542 | | * tree[i]: <-----> |
543 | | * case 0: <-> |
544 | | * case 1: <-------> |
545 | | * case 2: <-------------> |
546 | | * case 3: <-> |
547 | | * case 4: <-------> |
548 | | * case 5: <-> |
549 | | */ |
550 | 61.1k | if (low <= tree[current].low && tree[current].low <= high) |
551 | 0 | { |
552 | | /* case 1, reduces to case 0 */ |
553 | | /* or case 2, deleting the node */ |
554 | 0 | if (!tree[current].many) |
555 | 0 | { |
556 | 0 | tree[current].out += high + 1 - tree[current].low; |
557 | 0 | tree[current].low = high + 1; |
558 | 0 | } |
559 | 0 | if (tree[current].low > tree[current].high || tree[current].many) |
560 | 0 | { |
561 | | /* update lt/gt references that will be moved/stale after deleting current */ |
562 | 0 | if (gt == (unsigned int) cmap->tlen - 1) |
563 | 0 | gt = current; |
564 | 0 | if (lt == (unsigned int) cmap->tlen - 1) |
565 | 0 | lt = current; |
566 | | /* delete_node() moves the element at cmap->tlen-1 into current */ |
567 | 0 | move = delete_node(cmap, current); |
568 | 0 | current = EMPTY; |
569 | 0 | continue; |
570 | 0 | } |
571 | 0 | } |
572 | 61.1k | else if (low <= tree[current].high && tree[current].high <= high) |
573 | 0 | { |
574 | | /* case 4, reduces to case 5 */ |
575 | 0 | tree[current].high = low - 1; |
576 | 0 | assert(tree[current].low <= tree[current].high); |
577 | 0 | } |
578 | 61.1k | else if (tree[current].low < low && high < tree[current].high) |
579 | 0 | { |
580 | | /* case 3, reduces to case 5 */ |
581 | 0 | int new_high = tree[current].high; |
582 | 0 | tree[current].high = low-1; |
583 | 0 | assert(!tree[current].many); |
584 | 0 | add_range(ctx, cmap, high+1, new_high, tree[current].out + high + 1 - tree[current].low, 0, 0); |
585 | 0 | tree = cmap->tree; |
586 | 0 | } |
587 | | /* Now look for where to move to next (left for case 0, right for case 5) */ |
588 | 61.1k | if (tree[current].low > high) { |
589 | 0 | move = tree[current].left; |
590 | 0 | gt = current; |
591 | 0 | } |
592 | 61.1k | else |
593 | 61.1k | { |
594 | 61.1k | move = tree[current].right; |
595 | 61.1k | lt = current; |
596 | 61.1k | } |
597 | 61.1k | } |
598 | 61.1k | while (move != EMPTY); |
599 | 61.1k | } |
600 | 0 | else |
601 | 0 | { |
602 | 0 | do |
603 | 0 | { |
604 | 0 | current = move; |
605 | 0 | if (tree[current].low > high) |
606 | 0 | { |
607 | 0 | move = tree[current].left; |
608 | 0 | gt = current; |
609 | 0 | } |
610 | 0 | else |
611 | 0 | { |
612 | 0 | move = tree[current].right; |
613 | 0 | lt = current; |
614 | 0 | } |
615 | 0 | } while (move != EMPTY); |
616 | 0 | } |
617 | | /* current is now the node to which we would be adding the new node */ |
618 | | /* lt is the last node we traversed which is lt the new node. */ |
619 | | /* gt is the last node we traversed which is gt the new node. */ |
620 | | |
621 | 61.1k | if (!many) |
622 | 55.9k | { |
623 | | /* Check for the 'merge' cases. */ |
624 | 55.9k | if (lt != EMPTY && !tree[lt].many && tree[lt].high == low-1 && tree[lt].out - tree[lt].low == out - low) |
625 | 11.6k | { |
626 | 11.6k | tree[lt].high = high; |
627 | 11.6k | if (gt != EMPTY && !tree[gt].many && tree[gt].low == high+1 && tree[gt].out - tree[gt].low == out - low) |
628 | 0 | { |
629 | 0 | tree[lt].high = tree[gt].high; |
630 | 0 | delete_node(cmap, gt); |
631 | 0 | } |
632 | 11.6k | goto exit; |
633 | 11.6k | } |
634 | 44.2k | if (gt != EMPTY && !tree[gt].many && tree[gt].low == high+1 && tree[gt].out - tree[gt].low == out - low) |
635 | 0 | { |
636 | 0 | tree[gt].low = low; |
637 | 0 | tree[gt].out = out; |
638 | 0 | goto exit; |
639 | 0 | } |
640 | 44.2k | } |
641 | 61.1k | } |
642 | 845 | else |
643 | 845 | current = EMPTY; |
644 | | |
645 | 50.4k | if (cmap->tlen == cmap->tcap) |
646 | 873 | { |
647 | 873 | int new_cap = cmap->tcap ? cmap->tcap * 2 : 256; |
648 | 873 | if (new_cap > CMAP_TABLE_LIMIT) |
649 | 0 | fz_throw(ctx, FZ_ERROR_LIMIT, "too many entries in CMap table"); |
650 | 873 | tree = cmap->tree = fz_realloc_array(ctx, cmap->tree, new_cap, cmap_splay); |
651 | 873 | cmap->tcap = new_cap; |
652 | 873 | } |
653 | 50.4k | tree[cmap->tlen].low = low; |
654 | 50.4k | tree[cmap->tlen].high = high; |
655 | 50.4k | tree[cmap->tlen].out = out; |
656 | 50.4k | tree[cmap->tlen].parent = current; |
657 | 50.4k | tree[cmap->tlen].left = EMPTY; |
658 | 50.4k | tree[cmap->tlen].right = EMPTY; |
659 | 50.4k | tree[cmap->tlen].many = many; |
660 | 50.4k | cmap->tlen++; |
661 | 50.4k | if (current == EMPTY) |
662 | 845 | cmap->ttop = 0; |
663 | 49.5k | else if (tree[current].low > high) |
664 | 0 | tree[current].left = cmap->tlen-1; |
665 | 49.5k | else |
666 | 49.5k | { |
667 | 49.5k | assert(tree[current].high < low); |
668 | 49.5k | tree[current].right = cmap->tlen-1; |
669 | 49.5k | } |
670 | 50.4k | move_to_root(tree, cmap->tlen-1); |
671 | 50.4k | cmap->ttop = cmap->tlen-1; |
672 | 62.0k | exit: |
673 | 62.0k | {} |
674 | | #ifdef CHECK_SPLAY |
675 | | check_splay(cmap->tree, cmap->ttop, 0); |
676 | | #endif |
677 | | #ifdef DUMP_SPLAY |
678 | | dump_splay(cmap->tree, cmap->ttop, 0, ""); |
679 | | #endif |
680 | 62.0k | } |
681 | | |
682 | | /* |
683 | | * Add a one-to-many mapping. |
684 | | */ |
685 | | static void |
686 | | add_mrange(fz_context *ctx, pdf_cmap *cmap, unsigned int low, int *out, int len) |
687 | 5.26k | { |
688 | 5.26k | int out_pos; |
689 | 5.26k | int new_len = cmap->dlen + len + 1; |
690 | 5.26k | int new_cap; |
691 | | |
692 | 5.26k | if (new_len > CMAP_TABLE_LIMIT) |
693 | 0 | fz_throw(ctx, FZ_ERROR_LIMIT, "too many entries in CMap table"); |
694 | | |
695 | 5.26k | if (new_len > cmap->dcap) |
696 | 14 | { |
697 | 14 | new_cap = cmap->dcap > 0 ? cmap->dcap : 256; |
698 | 26 | while (new_len > new_cap) |
699 | 12 | new_cap *= 2; |
700 | 14 | cmap->dict = fz_realloc_array(ctx, cmap->dict, new_cap, int); |
701 | 14 | cmap->dcap = new_cap; |
702 | 14 | } |
703 | 5.26k | out_pos = cmap->dlen; |
704 | 5.26k | cmap->dict[out_pos] = len; |
705 | 5.26k | memcpy(&cmap->dict[out_pos+1], out, sizeof(int)*len); |
706 | 5.26k | cmap->dlen = new_len; |
707 | | |
708 | 5.26k | add_range(ctx, cmap, low, low, out_pos, 1, 1); |
709 | 5.26k | } |
710 | | |
711 | | void |
712 | | pdf_map_range_to_range(fz_context *ctx, pdf_cmap *cmap, unsigned int low, unsigned int high, int out) |
713 | 30.3k | { |
714 | 30.3k | add_range(ctx, cmap, low, high, out, 1, 0); |
715 | 30.3k | } |
716 | | |
717 | | void |
718 | | pdf_map_one_to_many(fz_context *ctx, pdf_cmap *cmap, unsigned int low, int *values, size_t len) |
719 | 31.7k | { |
720 | 31.7k | int *ovalues = values; |
721 | | /* len is always restricted to <= 256 by the callers. */ |
722 | 31.7k | int local[PDF_MRANGE_CAP]; |
723 | | |
724 | 31.7k | assert(len <= PDF_MRANGE_CAP); |
725 | | |
726 | | /* Decode unicode surrogate pairs. */ |
727 | | /* Only the *-UCS2 CMaps use one-to-many mappings, so assuming unicode should be safe. */ |
728 | 31.7k | if (len >= 2) |
729 | 5.26k | { |
730 | 5.26k | size_t i, j; |
731 | | /* Look for mranges with either multiple surrogate pairs in, or surrogate pairs |
732 | | * with other chars. See bug 706131. */ |
733 | 17.8k | for (i = 0, j = 0; i < len; i++, j++) |
734 | 12.5k | { |
735 | 12.5k | int hi = ovalues[i]; |
736 | 12.5k | if (hi >= 0xd800 && hi < 0xdc00 && i < len-1) |
737 | 0 | { |
738 | 0 | int lo = ovalues[i+1]; |
739 | 0 | if (lo >= 0xdc00 && lo < 0xe000) |
740 | 0 | { |
741 | 0 | hi = ((hi - 0xD800) << 10) + (lo - 0xDC00) + 0x10000; |
742 | 0 | i++; |
743 | 0 | } |
744 | 0 | } |
745 | 12.5k | if (values != local) |
746 | 5.26k | { |
747 | | /* We can't change the callers data, so copy stuff in. */ |
748 | 5.26k | if (j) |
749 | 0 | memcpy(local, values, sizeof(local[0]) * (j-1)); |
750 | 5.26k | values = local; |
751 | 5.26k | } |
752 | 12.5k | values[j] = hi; |
753 | 12.5k | } |
754 | 5.26k | len = j; |
755 | 5.26k | } |
756 | | |
757 | 31.7k | if (len == 1) |
758 | 26.4k | { |
759 | 26.4k | add_range(ctx, cmap, low, low, values[0], 1, 0); |
760 | 26.4k | return; |
761 | 26.4k | } |
762 | | |
763 | 5.26k | if (len > PDF_MRANGE_CAP) |
764 | 0 | { |
765 | 0 | fz_warn(ctx, "ignoring one to many mapping in cmap %s", cmap->cmap_name); |
766 | 0 | return; |
767 | 0 | } |
768 | | |
769 | 5.26k | add_mrange(ctx, cmap, low, values, (int)len); |
770 | 5.26k | } |
771 | | |
772 | | static void |
773 | | count_node_types(cmap_splay *node, void *arg) |
774 | 50.4k | { |
775 | 50.4k | int *counts = (int *)arg; |
776 | | |
777 | 50.4k | if (node->many) |
778 | 5.26k | counts[2]++; |
779 | 45.1k | else if (node->low <= 0xffff && node->high <= 0xFFFF && node->out <= 0xFFFF) |
780 | 45.1k | counts[0]++; |
781 | 0 | else |
782 | 0 | counts[1]++; |
783 | 50.4k | } |
784 | | |
785 | | static void |
786 | | copy_node_types(cmap_splay *node, void *arg) |
787 | 50.4k | { |
788 | 50.4k | pdf_cmap *cmap = (pdf_cmap *)arg; |
789 | | |
790 | 50.4k | if (node->many) |
791 | 5.26k | { |
792 | 5.26k | assert(node->low == node->high); |
793 | 5.26k | cmap->mranges[cmap->mlen].low = node->low; |
794 | 5.26k | cmap->mranges[cmap->mlen].out = node->out; |
795 | 5.26k | cmap->mlen++; |
796 | 5.26k | } |
797 | 45.1k | else if (node->low <= 0xffff && node->high <= 0xFFFF && node->out <= 0xFFFF) |
798 | 45.1k | { |
799 | 45.1k | cmap->ranges[cmap->rlen].low = node->low; |
800 | 45.1k | cmap->ranges[cmap->rlen].high = node->high; |
801 | 45.1k | cmap->ranges[cmap->rlen].out = node->out; |
802 | 45.1k | cmap->rlen++; |
803 | 45.1k | } |
804 | 0 | else |
805 | 0 | { |
806 | 0 | cmap->xranges[cmap->xlen].low = node->low; |
807 | 0 | cmap->xranges[cmap->xlen].high = node->high; |
808 | 0 | cmap->xranges[cmap->xlen].out = node->out; |
809 | 0 | cmap->xlen++; |
810 | 0 | } |
811 | 50.4k | } |
812 | | |
813 | | void |
814 | | pdf_sort_cmap(fz_context *ctx, pdf_cmap *cmap) |
815 | 955 | { |
816 | 955 | int counts[3]; |
817 | | |
818 | 955 | if (cmap->tree == NULL) |
819 | 110 | return; |
820 | | |
821 | 845 | counts[0] = 0; |
822 | 845 | counts[1] = 0; |
823 | 845 | counts[2] = 0; |
824 | 845 | walk_splay(cmap->tree, cmap->ttop, count_node_types, &counts); |
825 | | |
826 | 845 | cmap->ranges = Memento_label(fz_malloc_array(ctx, counts[0], pdf_range), "cmap_range"); |
827 | 845 | cmap->rcap = counts[0]; |
828 | 845 | cmap->xranges = Memento_label(fz_malloc_array(ctx, counts[1], pdf_xrange), "cmap_xrange"); |
829 | 845 | cmap->xcap = counts[1]; |
830 | 845 | cmap->mranges = Memento_label(fz_malloc_array(ctx, counts[2], pdf_mrange), "cmap_mrange"); |
831 | 845 | cmap->mcap = counts[2]; |
832 | | |
833 | 845 | walk_splay(cmap->tree, cmap->ttop, copy_node_types, cmap); |
834 | | |
835 | 845 | fz_free(ctx, cmap->tree); |
836 | 845 | cmap->tree = NULL; |
837 | 845 | } |
838 | | |
839 | | int |
840 | | pdf_lookup_cmap(pdf_cmap *cmap, unsigned int cpt) |
841 | 1.84M | { |
842 | 1.84M | pdf_range *ranges = cmap->ranges; |
843 | 1.84M | pdf_xrange *xranges = cmap->xranges; |
844 | 1.84M | int l, r, m; |
845 | | |
846 | 1.84M | l = 0; |
847 | 1.84M | r = cmap->rlen - 1; |
848 | 2.62M | while (l <= r) |
849 | 2.59M | { |
850 | 2.59M | m = (l + r) >> 1; |
851 | 2.59M | if (cpt < ranges[m].low) |
852 | 277k | r = m - 1; |
853 | 2.31M | else if (cpt > ranges[m].high) |
854 | 508k | l = m + 1; |
855 | 1.80M | else |
856 | 1.80M | return cpt - ranges[m].low + ranges[m].out; |
857 | 2.59M | } |
858 | | |
859 | 36.0k | l = 0; |
860 | 36.0k | r = cmap->xlen - 1; |
861 | 36.0k | while (l <= r) |
862 | 0 | { |
863 | 0 | m = (l + r) >> 1; |
864 | 0 | if (cpt < xranges[m].low) |
865 | 0 | r = m - 1; |
866 | 0 | else if (cpt > xranges[m].high) |
867 | 0 | l = m + 1; |
868 | 0 | else |
869 | 0 | return cpt - xranges[m].low + xranges[m].out; |
870 | 0 | } |
871 | | |
872 | 36.0k | if (cmap->usecmap) |
873 | 0 | return pdf_lookup_cmap(cmap->usecmap, cpt); |
874 | | |
875 | 36.0k | return -1; |
876 | 36.0k | } |
877 | | |
878 | | int |
879 | | pdf_lookup_cmap_full(pdf_cmap *cmap, unsigned int cpt, int *out) |
880 | 4.42M | { |
881 | 4.42M | pdf_range *ranges = cmap->ranges; |
882 | 4.42M | pdf_xrange *xranges = cmap->xranges; |
883 | 4.42M | pdf_mrange *mranges = cmap->mranges; |
884 | 4.42M | unsigned int i; |
885 | 4.42M | int l, r, m; |
886 | | |
887 | 4.42M | l = 0; |
888 | 4.42M | r = cmap->rlen - 1; |
889 | 24.8M | while (l <= r) |
890 | 20.4M | { |
891 | 20.4M | m = (l + r) >> 1; |
892 | 20.4M | if (cpt < ranges[m].low) |
893 | 249k | r = m - 1; |
894 | 20.2M | else if (cpt > ranges[m].high) |
895 | 20.1M | l = m + 1; |
896 | 68.2k | else |
897 | 68.2k | { |
898 | 68.2k | out[0] = cpt - ranges[m].low + ranges[m].out; |
899 | 68.2k | return 1; |
900 | 68.2k | } |
901 | 20.4M | } |
902 | | |
903 | 4.35M | l = 0; |
904 | 4.35M | r = cmap->xlen - 1; |
905 | 4.35M | while (l <= r) |
906 | 0 | { |
907 | 0 | m = (l + r) >> 1; |
908 | 0 | if (cpt < xranges[m].low) |
909 | 0 | r = m - 1; |
910 | 0 | else if (cpt > xranges[m].high) |
911 | 0 | l = m + 1; |
912 | 0 | else |
913 | 0 | { |
914 | 0 | out[0] = cpt - xranges[m].low + xranges[m].out; |
915 | 0 | return 1; |
916 | 0 | } |
917 | 0 | } |
918 | | |
919 | 4.35M | l = 0; |
920 | 4.35M | r = cmap->mlen - 1; |
921 | 4.92M | while (l <= r) |
922 | 574k | { |
923 | 574k | m = (l + r) >> 1; |
924 | 574k | if (cpt < mranges[m].low) |
925 | 11.8k | r = m - 1; |
926 | 563k | else if (cpt > mranges[m].low) |
927 | 560k | l = m + 1; |
928 | 2.63k | else |
929 | 2.63k | { |
930 | 2.63k | int *ptr = &cmap->dict[cmap->mranges[m].out]; |
931 | 2.63k | unsigned int len = (unsigned int)*ptr++; |
932 | 8.91k | for (i = 0; i < len; ++i) |
933 | 6.28k | out[i] = *ptr++; |
934 | 2.63k | return len; |
935 | 2.63k | } |
936 | 574k | } |
937 | | |
938 | 4.35M | if (cmap->usecmap) |
939 | 0 | return pdf_lookup_cmap_full(cmap->usecmap, cpt, out); |
940 | | |
941 | 4.35M | return 0; |
942 | 4.35M | } |
943 | | |
944 | | int |
945 | | pdf_decode_cmap(pdf_cmap *cmap, unsigned char *buf, unsigned char *end, unsigned int *cpt) |
946 | 1.38M | { |
947 | 1.38M | unsigned int c; |
948 | 1.38M | int k, n; |
949 | 1.38M | int len = end - buf; |
950 | | |
951 | 1.38M | if (len > 4) |
952 | 415k | len = 4; |
953 | | |
954 | 1.38M | c = 0; |
955 | 1.41M | for (n = 0; n < len; n++) |
956 | 1.41M | { |
957 | 1.41M | c = (c << 8) | buf[n]; |
958 | 1.44M | for (k = 0; k < cmap->codespace_len; k++) |
959 | 1.41M | { |
960 | 1.41M | if (cmap->codespace[k].n == n + 1) |
961 | 1.38M | { |
962 | 1.38M | if (c >= cmap->codespace[k].low && c <= cmap->codespace[k].high) |
963 | 1.38M | { |
964 | 1.38M | *cpt = c; |
965 | 1.38M | return n + 1; |
966 | 1.38M | } |
967 | 1.38M | } |
968 | 1.41M | } |
969 | 1.41M | } |
970 | | |
971 | 0 | *cpt = 0; |
972 | 0 | return 1; |
973 | 1.38M | } |
974 | | |
975 | | size_t |
976 | | pdf_cmap_size(fz_context *ctx, pdf_cmap *cmap) |
977 | 1.98k | { |
978 | 1.98k | if (cmap == NULL) |
979 | 955 | return 0; |
980 | 1.03k | if (cmap->storable.refs < 0) |
981 | 79 | return 0; |
982 | | |
983 | 955 | return pdf_cmap_size(ctx, cmap->usecmap) + |
984 | 955 | cmap->rcap * sizeof *cmap->ranges + |
985 | 955 | cmap->xcap * sizeof *cmap->xranges + |
986 | 955 | cmap->mcap * sizeof *cmap->mranges + |
987 | 955 | cmap->tcap * sizeof *cmap->tree + |
988 | 955 | sizeof(*cmap); |
989 | 1.03k | } |