/src/mupdf/source/pdf/pdf-cmap.c
Line | Count | Source |
1 | | // Copyright (C) 2004-2021 Artifex Software, Inc. |
2 | | // |
3 | | // This file is part of MuPDF. |
4 | | // |
5 | | // MuPDF is free software: you can redistribute it and/or modify it under the |
6 | | // terms of the GNU Affero General Public License as published by the Free |
7 | | // Software Foundation, either version 3 of the License, or (at your option) |
8 | | // any later version. |
9 | | // |
10 | | // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY |
11 | | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
12 | | // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more |
13 | | // details. |
14 | | // |
15 | | // You should have received a copy of the GNU Affero General Public License |
16 | | // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> |
17 | | // |
18 | | // Alternative licensing terms are available from the licensor. |
19 | | // For commercial licensing, see <https://www.artifex.com/> or contact |
20 | | // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, |
21 | | // CA 94129, USA, for further information. |
22 | | |
23 | | #include "mupdf/fitz.h" |
24 | | #include "mupdf/pdf.h" |
25 | | |
26 | | #include <assert.h> |
27 | | #include <string.h> |
28 | | |
29 | | #undef CHECK_SPLAY |
30 | | #undef DUMP_SPLAY |
31 | | |
32 | | /* |
33 | | * Allocate, destroy and simple parameters. |
34 | | */ |
35 | | |
36 | | void |
37 | | pdf_drop_cmap_imp(fz_context *ctx, fz_storable *cmap_) |
38 | 6 | { |
39 | 6 | pdf_cmap *cmap = (pdf_cmap *)cmap_; |
40 | 6 | pdf_drop_cmap(ctx, cmap->usecmap); |
41 | 6 | fz_free(ctx, cmap->ranges); |
42 | 6 | fz_free(ctx, cmap->xranges); |
43 | 6 | fz_free(ctx, cmap->mranges); |
44 | 6 | fz_free(ctx, cmap->dict); |
45 | 6 | fz_free(ctx, cmap->tree); |
46 | 6 | fz_free(ctx, cmap); |
47 | 6 | } |
48 | | |
49 | | pdf_cmap * |
50 | | pdf_new_cmap(fz_context *ctx) |
51 | 6 | { |
52 | 6 | pdf_cmap *cmap = fz_malloc_struct(ctx, pdf_cmap); |
53 | 6 | FZ_INIT_STORABLE(cmap, 1, pdf_drop_cmap_imp); |
54 | 6 | return cmap; |
55 | 6 | } |
56 | | |
57 | | /* Could be a macro for speed */ |
58 | | pdf_cmap * |
59 | | pdf_keep_cmap(fz_context *ctx, pdf_cmap *cmap) |
60 | 2 | { |
61 | 2 | return fz_keep_storable(ctx, &cmap->storable); |
62 | 2 | } |
63 | | |
64 | | /* Could be a macro for speed */ |
65 | | void |
66 | | pdf_drop_cmap(fz_context *ctx, pdf_cmap *cmap) |
67 | 22 | { |
68 | 22 | fz_drop_storable(ctx, &cmap->storable); |
69 | 22 | } |
70 | | |
71 | | void |
72 | | pdf_set_usecmap(fz_context *ctx, pdf_cmap *cmap, pdf_cmap *usecmap) |
73 | 0 | { |
74 | 0 | int i; |
75 | |
|
76 | 0 | pdf_drop_cmap(ctx, cmap->usecmap); |
77 | 0 | cmap->usecmap = pdf_keep_cmap(ctx, usecmap); |
78 | |
|
79 | 0 | if (cmap->codespace_len == 0) |
80 | 0 | { |
81 | 0 | cmap->codespace_len = usecmap->codespace_len; |
82 | 0 | for (i = 0; i < usecmap->codespace_len; i++) |
83 | 0 | cmap->codespace[i] = usecmap->codespace[i]; |
84 | 0 | } |
85 | 0 | } |
86 | | |
87 | | int |
88 | | pdf_cmap_wmode(fz_context *ctx, pdf_cmap *cmap) |
89 | 4 | { |
90 | 4 | return cmap->wmode; |
91 | 4 | } |
92 | | |
93 | | void |
94 | | pdf_set_cmap_wmode(fz_context *ctx, pdf_cmap *cmap, int wmode) |
95 | 2 | { |
96 | 2 | cmap->wmode = wmode; |
97 | 2 | } |
98 | | |
99 | | void |
100 | | pdf_add_codespace(fz_context *ctx, pdf_cmap *cmap, unsigned int low, unsigned int high, size_t n) |
101 | 6 | { |
102 | 6 | if (cmap->codespace_len + 1 == nelem(cmap->codespace)) |
103 | 0 | { |
104 | 0 | fz_warn(ctx, "assert: too many code space ranges"); |
105 | 0 | return; |
106 | 0 | } |
107 | | |
108 | 6 | if ((uint32_t)n != n) |
109 | 0 | { |
110 | 0 | fz_warn(ctx, "assert: code space range too large"); |
111 | 0 | return; |
112 | 0 | } |
113 | | |
114 | 6 | cmap->codespace[cmap->codespace_len].n = (int)n; |
115 | 6 | cmap->codespace[cmap->codespace_len].low = low; |
116 | 6 | cmap->codespace[cmap->codespace_len].high = high; |
117 | 6 | cmap->codespace_len ++; |
118 | 6 | } |
119 | | |
120 | | struct cmap_splay { |
121 | | unsigned int low; |
122 | | unsigned int high; |
123 | | unsigned int out; |
124 | | unsigned int left; |
125 | | unsigned int right; |
126 | | unsigned int parent : 31; |
127 | | unsigned int many : 1; |
128 | | }; |
129 | | |
130 | 1.67M | #define EMPTY ((unsigned int)0x40000000) |
131 | | |
132 | | /* |
133 | | The splaying steps used: |
134 | | |
135 | | Case 1: | z x |
136 | | | y D => A y |
137 | | | x C B z |
138 | | | A B C D |
139 | | |
140 | | Case 2: | z x |
141 | | | y D => y z |
142 | | | A x A B C D |
143 | | | B C |
144 | | |
145 | | Case 3: | y x |
146 | | | x C => A y |
147 | | | A B B C |
148 | | */ |
149 | | |
150 | | static void |
151 | | move_to_root(cmap_splay *tree, unsigned int x) |
152 | 65.1k | { |
153 | 65.1k | if (x == EMPTY) |
154 | 0 | return; |
155 | 65.1k | do |
156 | 79.1k | { |
157 | 79.1k | unsigned int z, zp; |
158 | 79.1k | unsigned int y = tree[x].parent; |
159 | 79.1k | if (y == EMPTY) |
160 | 2.62k | break; |
161 | 76.4k | z = tree[y].parent; |
162 | 76.4k | if (z == EMPTY) |
163 | 62.5k | { |
164 | | /* Case 3 */ |
165 | 62.5k | tree[x].parent = EMPTY; |
166 | 62.5k | tree[y].parent = x; |
167 | 62.5k | if (tree[y].left == x) |
168 | 7 | { |
169 | | /* Case 3 */ |
170 | 7 | tree[y].left = tree[x].right; |
171 | 7 | if (tree[y].left != EMPTY) |
172 | 7 | tree[tree[y].left].parent = y; |
173 | 7 | tree[x].right = y; |
174 | 7 | } |
175 | 62.5k | else |
176 | 62.5k | { |
177 | | /* Case 3 - reflected */ |
178 | 62.5k | assert(tree[y].right == x); |
179 | 62.5k | tree[y].right = tree[x].left; |
180 | 62.5k | if (tree[y].right != EMPTY) |
181 | 1 | tree[tree[y].right].parent = y; |
182 | 62.5k | tree[x].left = y; |
183 | 62.5k | } |
184 | 62.5k | break; |
185 | 62.5k | } |
186 | | |
187 | 13.9k | zp = tree[z].parent; |
188 | 13.9k | tree[x].parent = zp; |
189 | 13.9k | if (zp != EMPTY) { |
190 | 11.3k | if (tree[zp].left == z) |
191 | 11.2k | tree[zp].left = x; |
192 | 55 | else |
193 | 55 | { |
194 | 55 | assert(tree[zp].right == z); |
195 | 55 | tree[zp].right = x; |
196 | 55 | } |
197 | 11.3k | } |
198 | 13.9k | tree[y].parent = x; |
199 | 13.9k | if (tree[y].left == x) |
200 | 13.9k | { |
201 | 13.9k | tree[y].left = tree[x].right; |
202 | 13.9k | if (tree[y].left != EMPTY) |
203 | 11.2k | tree[tree[y].left].parent = y; |
204 | 13.9k | tree[x].right = y; |
205 | 13.9k | if (tree[z].left == y) |
206 | 11.3k | { |
207 | | /* Case 1 */ |
208 | 11.3k | tree[z].parent = y; |
209 | 11.3k | tree[z].left = tree[y].right; |
210 | 11.3k | if (tree[z].left != EMPTY) |
211 | 5.15k | tree[tree[z].left].parent = z; |
212 | 11.3k | tree[y].right = z; |
213 | 11.3k | } |
214 | 2.61k | else |
215 | 2.61k | { |
216 | | /* Case 2 - reflected */ |
217 | 2.61k | assert(tree[z].right == y); |
218 | 2.61k | tree[z].parent = x; |
219 | 2.61k | tree[z].right = tree[x].left; |
220 | 2.61k | if (tree[z].right != EMPTY) |
221 | 6 | tree[tree[z].right].parent = z; |
222 | 2.61k | tree[x].left = z; |
223 | 2.61k | } |
224 | 13.9k | } |
225 | 50 | else |
226 | 50 | { |
227 | 50 | assert(tree[y].right == x); |
228 | 50 | tree[y].right = tree[x].left; |
229 | 50 | if (tree[y].right != EMPTY) |
230 | 28 | tree[tree[y].right].parent = y; |
231 | 50 | tree[x].left = y; |
232 | 50 | if (tree[z].left == y) |
233 | 14 | { |
234 | | /* Case 2 */ |
235 | 14 | tree[z].parent = x; |
236 | 14 | tree[z].left = tree[x].right; |
237 | 14 | if (tree[z].left != EMPTY) |
238 | 4 | tree[tree[z].left].parent = z; |
239 | 14 | tree[x].right = z; |
240 | 14 | } |
241 | 36 | else |
242 | 36 | { |
243 | | /* Case 1 - reflected */ |
244 | 36 | assert(tree[z].right == y); |
245 | 36 | tree[z].parent = y; |
246 | 36 | tree[z].right = tree[y].left; |
247 | 36 | if (tree[z].right != EMPTY) |
248 | 25 | tree[tree[z].right].parent = z; |
249 | 36 | tree[y].left = z; |
250 | 36 | } |
251 | 50 | } |
252 | 13.9k | } while (1); |
253 | 65.1k | } |
254 | | |
255 | | static unsigned int delete_node(pdf_cmap *cmap, unsigned int current) |
256 | 226 | { |
257 | 226 | cmap_splay *tree = cmap->tree; |
258 | 226 | unsigned int parent; |
259 | 226 | unsigned int replacement; |
260 | | |
261 | 226 | assert(current != EMPTY); |
262 | | |
263 | 226 | parent = tree[current].parent; |
264 | 226 | if (tree[current].right == EMPTY) |
265 | 9 | { |
266 | 9 | if (parent == EMPTY) |
267 | 7 | { |
268 | 7 | replacement = cmap->ttop = tree[current].left; |
269 | 7 | } |
270 | 2 | else if (tree[parent].left == current) |
271 | 2 | { |
272 | 2 | replacement = tree[parent].left = tree[current].left; |
273 | 2 | } |
274 | 0 | else |
275 | 0 | { |
276 | 0 | assert(tree[parent].right == current); |
277 | 0 | replacement = tree[parent].right = tree[current].left; |
278 | 0 | } |
279 | 9 | if (replacement != EMPTY) |
280 | 9 | tree[replacement].parent = parent; |
281 | 0 | else |
282 | 0 | replacement = parent; |
283 | 9 | } |
284 | 217 | else if (tree[current].left == EMPTY) |
285 | 2 | { |
286 | 2 | if (parent == EMPTY) |
287 | 0 | { |
288 | 0 | replacement = cmap->ttop = tree[current].right; |
289 | 0 | } |
290 | 2 | else if (tree[parent].left == current) |
291 | 1 | { |
292 | 1 | replacement = tree[parent].left = tree[current].right; |
293 | 1 | } |
294 | 1 | else |
295 | 1 | { |
296 | 1 | assert(tree[parent].right == current); |
297 | 1 | replacement = tree[parent].right = tree[current].right; |
298 | 1 | } |
299 | 2 | if (replacement != EMPTY) |
300 | 2 | tree[replacement].parent = parent; |
301 | 0 | else |
302 | 0 | replacement = parent; |
303 | 2 | } |
304 | 215 | else |
305 | 215 | { |
306 | | /* Hard case, find the in-order predecessor of current */ |
307 | 215 | unsigned int amputee = current; |
308 | 215 | replacement = tree[current].left; |
309 | 224 | while (tree[replacement].right != EMPTY) { |
310 | 9 | amputee = replacement; |
311 | 9 | replacement = tree[replacement].right; |
312 | 9 | } |
313 | | /* Remove replacement from the tree */ |
314 | 215 | if (amputee == current) |
315 | 207 | { |
316 | 207 | tree[amputee].left = tree[replacement].left; |
317 | 207 | if (tree[amputee].left != EMPTY) |
318 | 206 | tree[tree[amputee].left].parent = amputee; |
319 | 207 | } |
320 | 8 | else |
321 | 8 | { |
322 | 8 | tree[amputee].right = tree[replacement].left; |
323 | 8 | if (tree[amputee].right != EMPTY) |
324 | 3 | tree[tree[amputee].right].parent = amputee; |
325 | 8 | } |
326 | | /* Insert replacement in place of current */ |
327 | 215 | tree[replacement].parent = parent; |
328 | 215 | if (parent == EMPTY) |
329 | 208 | { |
330 | 208 | tree[replacement].parent = EMPTY; |
331 | 208 | cmap->ttop = replacement; |
332 | 208 | } |
333 | 7 | else if (tree[parent].left == current) |
334 | 4 | tree[parent].left = replacement; |
335 | 3 | else |
336 | 3 | { |
337 | 3 | assert(tree[parent].right == current); |
338 | 3 | tree[parent].right = replacement; |
339 | 3 | } |
340 | 215 | tree[replacement].left = tree[current].left; |
341 | 215 | if (tree[replacement].left != EMPTY) |
342 | 214 | tree[tree[replacement].left].parent = replacement; |
343 | 215 | tree[replacement].right = tree[current].right; |
344 | 215 | if (tree[replacement].right != EMPTY) |
345 | 215 | tree[tree[replacement].right].parent = replacement; |
346 | 215 | } |
347 | | |
348 | | /* current is now unlinked. We need to remove it from our array. */ |
349 | 226 | cmap->tlen--; |
350 | 226 | if (current != (unsigned int) cmap->tlen) |
351 | 11 | { |
352 | 11 | if (replacement == (unsigned int) cmap->tlen) |
353 | 0 | replacement = current; |
354 | 11 | tree[current] = tree[cmap->tlen]; |
355 | 11 | parent = tree[current].parent; |
356 | 11 | if (parent == EMPTY) |
357 | 11 | cmap->ttop = current; |
358 | 0 | else if (tree[parent].left == (unsigned int) cmap->tlen) |
359 | 0 | tree[parent].left = current; |
360 | 0 | else |
361 | 0 | { |
362 | 0 | assert(tree[parent].right == (unsigned int) cmap->tlen); |
363 | 0 | tree[parent].right = current; |
364 | 0 | } |
365 | 11 | if (tree[current].left != EMPTY) |
366 | 11 | { |
367 | 11 | assert(tree[tree[current].left].parent == (unsigned int) cmap->tlen); |
368 | 11 | tree[tree[current].left].parent = current; |
369 | 11 | } |
370 | 11 | if (tree[current].right != EMPTY) |
371 | 7 | { |
372 | 7 | assert(tree[tree[current].right].parent == (unsigned int) cmap->tlen); |
373 | 7 | tree[tree[current].right].parent = current; |
374 | 7 | } |
375 | 11 | } |
376 | | |
377 | | /* Return the node that we should continue searching from */ |
378 | 226 | return replacement; |
379 | 226 | } |
380 | | |
381 | | #ifdef DUMP_SPLAY |
382 | | static void |
383 | | dump_splay(cmap_splay *tree, unsigned int node, int depth, const char *pre) |
384 | | { |
385 | | int i; |
386 | | |
387 | | if (tree == NULL || node == EMPTY) |
388 | | return; |
389 | | |
390 | | for (i = 0; i < depth; i++) |
391 | | fprintf(stderr, " "); |
392 | | fprintf(stderr, "%s%d:", pre, node); |
393 | | if (tree[node].parent == EMPTY) |
394 | | fprintf(stderr, "^EMPTY"); |
395 | | else |
396 | | fprintf(stderr, "^%d", tree[node].parent); |
397 | | if (tree[node].left == EMPTY) |
398 | | fprintf(stderr, "<EMPTY"); |
399 | | else |
400 | | fprintf(stderr, "<%d", tree[node].left); |
401 | | if (tree[node].right == EMPTY) |
402 | | fprintf(stderr, ">EMPTY"); |
403 | | else |
404 | | fprintf(stderr, ">%d", tree[node].right); |
405 | | fprintf(stderr, "(%x,%x,%x,%d)\n", tree[node].low, tree[node].high, tree[node].out, tree[node].many); |
406 | | assert(tree[node].parent == EMPTY || depth); |
407 | | assert(tree[node].left == EMPTY || tree[tree[node].left].parent == node); |
408 | | assert(tree[node].right == EMPTY || tree[tree[node].right].parent == node); |
409 | | dump_splay(tree, tree[node].left, depth+1, "L"); |
410 | | dump_splay(tree, tree[node].right, depth+1, "R"); |
411 | | } |
412 | | #endif |
413 | | |
414 | | enum |
415 | | { |
416 | | TOP = 0, |
417 | | LEFT = 1, |
418 | | RIGHT = 2 |
419 | | }; |
420 | | |
421 | | static void walk_splay(cmap_splay *tree, unsigned int node, void (*fn)(cmap_splay *, void *), void *arg) |
422 | 12 | { |
423 | 12 | int from = TOP; |
424 | | |
425 | 259k | while (node != EMPTY) |
426 | 259k | { |
427 | 259k | switch (from) |
428 | 259k | { |
429 | 129k | case TOP: |
430 | 129k | if (tree[node].left != EMPTY) |
431 | 117k | { |
432 | 117k | node = tree[node].left; |
433 | 117k | from = TOP; |
434 | 117k | break; |
435 | 117k | } |
436 | | /* fallthrough */ |
437 | 129k | case LEFT: |
438 | 129k | fn(&tree[node], arg); |
439 | 129k | if (tree[node].right != EMPTY) |
440 | 12.2k | { |
441 | 12.2k | node = tree[node].right; |
442 | 12.2k | from = TOP; |
443 | 12.2k | break; |
444 | 12.2k | } |
445 | | /* fallthrough */ |
446 | 129k | case RIGHT: |
447 | 129k | { |
448 | 129k | unsigned int parent = tree[node].parent; |
449 | 129k | if (parent == EMPTY) |
450 | 12 | return; |
451 | 129k | if (tree[parent].left == node) |
452 | 117k | from = LEFT; |
453 | 12.2k | else |
454 | 12.2k | { |
455 | 12.2k | assert(tree[parent].right == node); |
456 | 12.2k | from = RIGHT; |
457 | 12.2k | } |
458 | 129k | node = parent; |
459 | 129k | } |
460 | 259k | } |
461 | 259k | } |
462 | 12 | } |
463 | | |
464 | | #ifdef CHECK_SPLAY |
465 | | |
466 | | static int |
467 | | tree_has_overlap(cmap_splay *tree, int node, int low, int high) |
468 | | { |
469 | | if (tree[node].left != EMPTY) |
470 | | if (tree_has_overlap(tree, tree[node].left, low, high)) |
471 | | return 1; |
472 | | if (tree[node].right != EMPTY) |
473 | | if (tree_has_overlap(tree, tree[node].right, low, high)) |
474 | | return 1; |
475 | | return (tree[node].low < low && low < tree[node].high) || (tree[node].low < high && high < tree[node].high); |
476 | | } |
477 | | |
478 | | static void |
479 | | do_check(cmap_splay *node, void *arg) |
480 | | { |
481 | | cmap_splay *tree = arg; |
482 | | unsigned int num = node - tree; |
483 | | assert(!node->many || node->low == node->high); |
484 | | assert(node->low <= node->high); |
485 | | assert((node->left == EMPTY) || (tree[node->left].parent == num && |
486 | | tree[node->left].high < node->low)); |
487 | | assert(node->right == EMPTY || (tree[node->right].parent == num && |
488 | | node->high < tree[node->right].low)); |
489 | | assert(!tree_has_overlap(tree, num, node->low, node->high)); |
490 | | } |
491 | | |
492 | | static void |
493 | | check_splay(cmap_splay *tree, unsigned int node, int depth) |
494 | | { |
495 | | if (node == EMPTY) |
496 | | return; |
497 | | assert(tree[node].parent == EMPTY); |
498 | | walk_splay(tree, node, do_check, tree); |
499 | | } |
500 | | #endif |
501 | | |
502 | | /* |
503 | | * Add a range. |
504 | | */ |
505 | | static void |
506 | | add_range(fz_context *ctx, pdf_cmap *cmap, unsigned int low, unsigned int high, unsigned int out, int check_for_overlap, int many) |
507 | 65.1k | { |
508 | 65.1k | int current; |
509 | 65.1k | cmap_splay *tree; |
510 | | |
511 | 65.1k | if (low > high) |
512 | 0 | { |
513 | 0 | fz_warn(ctx, "range limits out of range in cmap %s", cmap->cmap_name); |
514 | 0 | return; |
515 | 0 | } |
516 | | |
517 | 65.1k | if (cmap->codespace_len == 0) |
518 | 0 | { |
519 | 0 | fz_warn(ctx, "CMap is missing codespace range"); |
520 | 0 | pdf_add_codespace(ctx, cmap, 0, 65535, 2); |
521 | 0 | } |
522 | | |
523 | 65.1k | tree = cmap->tree; |
524 | | |
525 | 65.1k | if (cmap->tlen) |
526 | 65.1k | { |
527 | 65.1k | unsigned int move = cmap->ttop; |
528 | 65.1k | unsigned int gt = EMPTY; |
529 | 65.1k | unsigned int lt = EMPTY; |
530 | 65.1k | if (check_for_overlap) |
531 | 65.1k | { |
532 | | /* Check for collision with the current node */ |
533 | 65.1k | do |
534 | 90.6k | { |
535 | 90.6k | current = move; |
536 | | /* Cases we might meet: |
537 | | * tree[i]: <-----> |
538 | | * case 0: <-> |
539 | | * case 1: <-------> |
540 | | * case 2: <-------------> |
541 | | * case 3: <-> |
542 | | * case 4: <-------> |
543 | | * case 5: <-> |
544 | | */ |
545 | 90.6k | if (low <= tree[current].low && tree[current].low <= high) |
546 | 226 | { |
547 | | /* case 1, reduces to case 0 */ |
548 | | /* or case 2, deleting the node */ |
549 | 226 | if (!tree[current].many) |
550 | 226 | { |
551 | 226 | tree[current].out += high + 1 - tree[current].low; |
552 | 226 | tree[current].low = high + 1; |
553 | 226 | } |
554 | 226 | if (tree[current].low > tree[current].high || tree[current].many) |
555 | 226 | { |
556 | | /* update lt/gt references that will be moved/stale after deleting current */ |
557 | 226 | if (gt == (unsigned int) cmap->tlen - 1) |
558 | 1 | gt = current; |
559 | 226 | if (lt == (unsigned int) cmap->tlen - 1) |
560 | 0 | lt = current; |
561 | | /* delete_node() moves the element at cmap->tlen-1 into current */ |
562 | 226 | move = delete_node(cmap, current); |
563 | 226 | current = EMPTY; |
564 | 226 | continue; |
565 | 226 | } |
566 | 226 | } |
567 | 90.4k | else if (low <= tree[current].high && tree[current].high <= high) |
568 | 0 | { |
569 | | /* case 4, reduces to case 5 */ |
570 | 0 | tree[current].high = low - 1; |
571 | 0 | assert(tree[current].low <= tree[current].high); |
572 | 0 | } |
573 | 90.4k | else if (tree[current].low < low && high < tree[current].high) |
574 | 0 | { |
575 | | /* case 3, reduces to case 5 */ |
576 | 0 | int new_high = tree[current].high; |
577 | 0 | tree[current].high = low-1; |
578 | 0 | assert(!tree[current].many); |
579 | 0 | add_range(ctx, cmap, high+1, new_high, tree[current].out + high + 1 - tree[current].low, 0, 0); |
580 | 0 | tree = cmap->tree; |
581 | 0 | } |
582 | | /* Now look for where to move to next (left for case 0, right for case 5) */ |
583 | 90.4k | if (tree[current].low > high) { |
584 | 25.2k | move = tree[current].left; |
585 | 25.2k | gt = current; |
586 | 25.2k | } |
587 | 65.2k | else |
588 | 65.2k | { |
589 | 65.2k | move = tree[current].right; |
590 | 65.2k | lt = current; |
591 | 65.2k | } |
592 | 90.4k | } |
593 | 90.6k | while (move != EMPTY); |
594 | 65.1k | } |
595 | 0 | else |
596 | 0 | { |
597 | 0 | do |
598 | 0 | { |
599 | 0 | current = move; |
600 | 0 | if (tree[current].low > high) |
601 | 0 | { |
602 | 0 | move = tree[current].left; |
603 | 0 | gt = current; |
604 | 0 | } |
605 | 0 | else |
606 | 0 | { |
607 | 0 | move = tree[current].right; |
608 | 0 | lt = current; |
609 | 0 | } |
610 | 0 | } while (move != EMPTY); |
611 | 0 | } |
612 | | /* current is now the node to which we would be adding the new node */ |
613 | | /* lt is the last node we traversed which is lt the new node. */ |
614 | | /* gt is the last node we traversed which is gt the new node. */ |
615 | | |
616 | 65.1k | if (!many) |
617 | 55.7k | { |
618 | | /* Check for the 'merge' cases. */ |
619 | 55.7k | if (lt != EMPTY && !tree[lt].many && tree[lt].high == low-1 && tree[lt].out - tree[lt].low == out - low) |
620 | 24 | { |
621 | 24 | tree[lt].high = high; |
622 | 24 | if (gt != EMPTY && !tree[gt].many && tree[gt].low == high+1 && tree[gt].out - tree[gt].low == out - low) |
623 | 0 | { |
624 | 0 | tree[lt].high = tree[gt].high; |
625 | 0 | delete_node(cmap, gt); |
626 | 0 | } |
627 | 24 | goto exit; |
628 | 24 | } |
629 | 55.7k | if (gt != EMPTY && !tree[gt].many && tree[gt].low == high+1 && tree[gt].out - tree[gt].low == out - low) |
630 | 0 | { |
631 | 0 | tree[gt].low = low; |
632 | 0 | tree[gt].out = out; |
633 | 0 | goto exit; |
634 | 0 | } |
635 | 55.7k | } |
636 | 65.1k | } |
637 | 6 | else |
638 | 6 | current = EMPTY; |
639 | | |
640 | 65.1k | if (cmap->tlen == cmap->tcap) |
641 | 32 | { |
642 | 32 | int new_cap = cmap->tcap ? cmap->tcap * 2 : 256; |
643 | 32 | tree = cmap->tree = fz_realloc_array(ctx, cmap->tree, new_cap, cmap_splay); |
644 | 32 | cmap->tcap = new_cap; |
645 | 32 | } |
646 | 65.1k | tree[cmap->tlen].low = low; |
647 | 65.1k | tree[cmap->tlen].high = high; |
648 | 65.1k | tree[cmap->tlen].out = out; |
649 | 65.1k | tree[cmap->tlen].parent = current; |
650 | 65.1k | tree[cmap->tlen].left = EMPTY; |
651 | 65.1k | tree[cmap->tlen].right = EMPTY; |
652 | 65.1k | tree[cmap->tlen].many = many; |
653 | 65.1k | cmap->tlen++; |
654 | 65.1k | if (current == EMPTY) |
655 | 6 | cmap->ttop = 0; |
656 | 65.1k | else if (tree[current].low > high) |
657 | 2.63k | tree[current].left = cmap->tlen-1; |
658 | 62.4k | else |
659 | 62.4k | { |
660 | 62.4k | assert(tree[current].high < low); |
661 | 62.4k | tree[current].right = cmap->tlen-1; |
662 | 62.4k | } |
663 | 65.1k | move_to_root(tree, cmap->tlen-1); |
664 | 65.1k | cmap->ttop = cmap->tlen-1; |
665 | 65.1k | exit: |
666 | 65.1k | {} |
667 | | #ifdef CHECK_SPLAY |
668 | | check_splay(cmap->tree, cmap->ttop, 0); |
669 | | #endif |
670 | | #ifdef DUMP_SPLAY |
671 | | dump_splay(cmap->tree, cmap->ttop, 0, ""); |
672 | | #endif |
673 | 65.1k | } |
674 | | |
675 | | /* |
676 | | * Add a one-to-many mapping. |
677 | | */ |
678 | | static void |
679 | | add_mrange(fz_context *ctx, pdf_cmap *cmap, unsigned int low, int *out, int len) |
680 | 9.38k | { |
681 | 9.38k | int out_pos; |
682 | 9.38k | int new_len = cmap->dlen + len + 1; |
683 | | |
684 | 9.38k | if (new_len > cmap->dcap) |
685 | 26 | { |
686 | 26 | int new_cap = cmap->dcap > 0 ? cmap->dcap : 256; |
687 | 48 | while (new_len > new_cap) |
688 | 22 | new_cap *= 2; |
689 | 26 | cmap->dict = fz_realloc_array(ctx, cmap->dict, new_cap, int); |
690 | 26 | cmap->dcap = new_cap; |
691 | 26 | } |
692 | 9.38k | out_pos = cmap->dlen; |
693 | 9.38k | cmap->dict[out_pos] = len; |
694 | 9.38k | memcpy(&cmap->dict[out_pos+1], out, sizeof(int)*len); |
695 | 9.38k | cmap->dlen = new_len; |
696 | | |
697 | 9.38k | add_range(ctx, cmap, low, low, out_pos, 1, 1); |
698 | 9.38k | } |
699 | | |
700 | | void |
701 | | pdf_map_range_to_range(fz_context *ctx, pdf_cmap *cmap, unsigned int low, unsigned int high, int out) |
702 | 27.7k | { |
703 | 27.7k | add_range(ctx, cmap, low, high, out, 1, 0); |
704 | 27.7k | } |
705 | | |
706 | | void |
707 | | pdf_map_one_to_many(fz_context *ctx, pdf_cmap *cmap, unsigned int low, int *values, size_t len) |
708 | 37.3k | { |
709 | 37.3k | int *ovalues = values; |
710 | | /* len is always restricted to <= 256 by the callers. */ |
711 | 37.3k | int local[PDF_MRANGE_CAP]; |
712 | | |
713 | 37.3k | assert(len <= PDF_MRANGE_CAP); |
714 | | |
715 | | /* Decode unicode surrogate pairs. */ |
716 | | /* Only the *-UCS2 CMaps use one-to-many mappings, so assuming unicode should be safe. */ |
717 | 37.3k | if (len >= 2) |
718 | 9.38k | { |
719 | 9.38k | size_t i, j; |
720 | | /* Look for mranges with either multiple surrogate pairs in, or surrogate pairs |
721 | | * with other chars. See bug 706131. */ |
722 | 31.2k | for (i = 0, j = 0; i < len; i++, j++) |
723 | 21.8k | { |
724 | 21.8k | int hi = ovalues[i]; |
725 | 21.8k | if (hi >= 0xd800 && hi < 0xdc00 && i < len-1) |
726 | 0 | { |
727 | 0 | int lo = ovalues[i+1]; |
728 | 0 | if (lo >= 0xdc00 && lo < 0xe000) |
729 | 0 | { |
730 | 0 | hi = ((hi - 0xD800) << 10) + (lo - 0xDC00) + 0x10000; |
731 | 0 | i++; |
732 | 0 | } |
733 | 0 | } |
734 | 21.8k | if (values != local) |
735 | 9.38k | { |
736 | | /* We can't change the callers data, so copy stuff in. */ |
737 | 9.38k | if (j) |
738 | 0 | memcpy(local, values, sizeof(local[0]) * (j-1)); |
739 | 9.38k | values = local; |
740 | 9.38k | } |
741 | 21.8k | values[j] = hi; |
742 | 21.8k | } |
743 | 9.38k | len = j; |
744 | 9.38k | } |
745 | | |
746 | 37.3k | if (len == 1) |
747 | 28.0k | { |
748 | 28.0k | add_range(ctx, cmap, low, low, values[0], 1, 0); |
749 | 28.0k | return; |
750 | 28.0k | } |
751 | | |
752 | 9.38k | if (len > PDF_MRANGE_CAP) |
753 | 0 | { |
754 | 0 | fz_warn(ctx, "ignoring one to many mapping in cmap %s", cmap->cmap_name); |
755 | 0 | return; |
756 | 0 | } |
757 | | |
758 | 9.38k | add_mrange(ctx, cmap, low, values, (int)len); |
759 | 9.38k | } |
760 | | |
761 | | static void |
762 | | count_node_types(cmap_splay *node, void *arg) |
763 | 64.9k | { |
764 | 64.9k | int *counts = (int *)arg; |
765 | | |
766 | 64.9k | if (node->many) |
767 | 9.38k | counts[2]++; |
768 | 55.5k | else if (node->low <= 0xffff && node->high <= 0xFFFF && node->out <= 0xFFFF) |
769 | 55.5k | counts[0]++; |
770 | 6 | else |
771 | 6 | counts[1]++; |
772 | 64.9k | } |
773 | | |
774 | | static void |
775 | | copy_node_types(cmap_splay *node, void *arg) |
776 | 64.9k | { |
777 | 64.9k | pdf_cmap *cmap = (pdf_cmap *)arg; |
778 | | |
779 | 64.9k | if (node->many) |
780 | 9.38k | { |
781 | 9.38k | assert(node->low == node->high); |
782 | 9.38k | cmap->mranges[cmap->mlen].low = node->low; |
783 | 9.38k | cmap->mranges[cmap->mlen].out = node->out; |
784 | 9.38k | cmap->mlen++; |
785 | 9.38k | } |
786 | 55.5k | else if (node->low <= 0xffff && node->high <= 0xFFFF && node->out <= 0xFFFF) |
787 | 55.5k | { |
788 | 55.5k | cmap->ranges[cmap->rlen].low = node->low; |
789 | 55.5k | cmap->ranges[cmap->rlen].high = node->high; |
790 | 55.5k | cmap->ranges[cmap->rlen].out = node->out; |
791 | 55.5k | cmap->rlen++; |
792 | 55.5k | } |
793 | 6 | else |
794 | 6 | { |
795 | 6 | cmap->xranges[cmap->xlen].low = node->low; |
796 | 6 | cmap->xranges[cmap->xlen].high = node->high; |
797 | 6 | cmap->xranges[cmap->xlen].out = node->out; |
798 | 6 | cmap->xlen++; |
799 | 6 | } |
800 | 64.9k | } |
801 | | |
802 | | void |
803 | | pdf_sort_cmap(fz_context *ctx, pdf_cmap *cmap) |
804 | 6 | { |
805 | 6 | int counts[3]; |
806 | | |
807 | 6 | if (cmap->tree == NULL) |
808 | 0 | return; |
809 | | |
810 | 6 | counts[0] = 0; |
811 | 6 | counts[1] = 0; |
812 | 6 | counts[2] = 0; |
813 | 6 | walk_splay(cmap->tree, cmap->ttop, count_node_types, &counts); |
814 | | |
815 | 6 | cmap->ranges = Memento_label(fz_malloc_array(ctx, counts[0], pdf_range), "cmap_range"); |
816 | 6 | cmap->rcap = counts[0]; |
817 | 6 | cmap->xranges = Memento_label(fz_malloc_array(ctx, counts[1], pdf_xrange), "cmap_xrange"); |
818 | 6 | cmap->xcap = counts[1]; |
819 | 6 | cmap->mranges = Memento_label(fz_malloc_array(ctx, counts[2], pdf_mrange), "cmap_mrange"); |
820 | 6 | cmap->mcap = counts[2]; |
821 | | |
822 | 6 | walk_splay(cmap->tree, cmap->ttop, copy_node_types, cmap); |
823 | | |
824 | 6 | fz_free(ctx, cmap->tree); |
825 | 6 | cmap->tree = NULL; |
826 | 6 | } |
827 | | |
828 | | int |
829 | | pdf_lookup_cmap(pdf_cmap *cmap, unsigned int cpt) |
830 | 131k | { |
831 | 131k | pdf_range *ranges = cmap->ranges; |
832 | 131k | pdf_xrange *xranges = cmap->xranges; |
833 | 131k | int l, r, m; |
834 | | |
835 | 131k | l = 0; |
836 | 131k | r = cmap->rlen - 1; |
837 | 929k | while (l <= r) |
838 | 917k | { |
839 | 917k | m = (l + r) >> 1; |
840 | 917k | if (cpt < ranges[m].low) |
841 | 393k | r = m - 1; |
842 | 524k | else if (cpt > ranges[m].high) |
843 | 403k | l = m + 1; |
844 | 120k | else |
845 | 120k | return cpt - ranges[m].low + ranges[m].out; |
846 | 917k | } |
847 | | |
848 | 11.2k | l = 0; |
849 | 11.2k | r = cmap->xlen - 1; |
850 | 11.2k | while (l <= r) |
851 | 0 | { |
852 | 0 | m = (l + r) >> 1; |
853 | 0 | if (cpt < xranges[m].low) |
854 | 0 | r = m - 1; |
855 | 0 | else if (cpt > xranges[m].high) |
856 | 0 | l = m + 1; |
857 | 0 | else |
858 | 0 | return cpt - xranges[m].low + xranges[m].out; |
859 | 0 | } |
860 | | |
861 | 11.2k | if (cmap->usecmap) |
862 | 0 | return pdf_lookup_cmap(cmap->usecmap, cpt); |
863 | | |
864 | 11.2k | return -1; |
865 | 11.2k | } |
866 | | |
867 | | int |
868 | | pdf_lookup_cmap_full(pdf_cmap *cmap, unsigned int cpt, int *out) |
869 | 131k | { |
870 | 131k | pdf_range *ranges = cmap->ranges; |
871 | 131k | pdf_xrange *xranges = cmap->xranges; |
872 | 131k | pdf_mrange *mranges = cmap->mranges; |
873 | 131k | unsigned int i; |
874 | 131k | int l, r, m; |
875 | | |
876 | 131k | l = 0; |
877 | 131k | r = cmap->rlen - 1; |
878 | 1.95M | while (l <= r) |
879 | 1.84M | { |
880 | 1.84M | m = (l + r) >> 1; |
881 | 1.84M | if (cpt < ranges[m].low) |
882 | 251k | r = m - 1; |
883 | 1.59M | else if (cpt > ranges[m].high) |
884 | 1.56M | l = m + 1; |
885 | 27.7k | else |
886 | 27.7k | { |
887 | 27.7k | out[0] = cpt - ranges[m].low + ranges[m].out; |
888 | 27.7k | return 1; |
889 | 27.7k | } |
890 | 1.84M | } |
891 | | |
892 | 103k | l = 0; |
893 | 103k | r = cmap->xlen - 1; |
894 | 213k | while (l <= r) |
895 | 110k | { |
896 | 110k | m = (l + r) >> 1; |
897 | 110k | if (cpt < xranges[m].low) |
898 | 110k | r = m - 1; |
899 | 0 | else if (cpt > xranges[m].high) |
900 | 0 | l = m + 1; |
901 | 0 | else |
902 | 0 | { |
903 | 0 | out[0] = cpt - xranges[m].low + xranges[m].out; |
904 | 0 | return 1; |
905 | 0 | } |
906 | 110k | } |
907 | | |
908 | 103k | l = 0; |
909 | 103k | r = cmap->mlen - 1; |
910 | 1.32M | while (l <= r) |
911 | 1.23M | { |
912 | 1.23M | m = (l + r) >> 1; |
913 | 1.23M | if (cpt < mranges[m].low) |
914 | 24.2k | r = m - 1; |
915 | 1.20M | else if (cpt > mranges[m].low) |
916 | 1.20M | l = m + 1; |
917 | 4.69k | else |
918 | 4.69k | { |
919 | 4.69k | int *ptr = &cmap->dict[cmap->mranges[m].out]; |
920 | 4.69k | unsigned int len = (unsigned int)*ptr++; |
921 | 15.6k | for (i = 0; i < len; ++i) |
922 | 10.9k | out[i] = *ptr++; |
923 | 4.69k | return len; |
924 | 4.69k | } |
925 | 1.23M | } |
926 | | |
927 | 98.6k | if (cmap->usecmap) |
928 | 0 | return pdf_lookup_cmap_full(cmap->usecmap, cpt, out); |
929 | | |
930 | 98.6k | return 0; |
931 | 98.6k | } |
932 | | |
933 | | int |
934 | | pdf_decode_cmap(pdf_cmap *cmap, unsigned char *buf, unsigned char *end, unsigned int *cpt) |
935 | 219 | { |
936 | 219 | unsigned int c; |
937 | 219 | int k, n; |
938 | 219 | int len = end - buf; |
939 | | |
940 | 219 | if (len > 4) |
941 | 18 | len = 4; |
942 | | |
943 | 219 | c = 0; |
944 | 227 | for (n = 0; n < len; n++) |
945 | 227 | { |
946 | 227 | c = (c << 8) | buf[n]; |
947 | 235 | for (k = 0; k < cmap->codespace_len; k++) |
948 | 227 | { |
949 | 227 | if (cmap->codespace[k].n == n + 1) |
950 | 219 | { |
951 | 219 | if (c >= cmap->codespace[k].low && c <= cmap->codespace[k].high) |
952 | 219 | { |
953 | 219 | *cpt = c; |
954 | 219 | return n + 1; |
955 | 219 | } |
956 | 219 | } |
957 | 227 | } |
958 | 227 | } |
959 | | |
960 | 0 | *cpt = 0; |
961 | 0 | return 1; |
962 | 219 | } |
963 | | |
964 | | size_t |
965 | | pdf_cmap_size(fz_context *ctx, pdf_cmap *cmap) |
966 | 14 | { |
967 | 14 | if (cmap == NULL) |
968 | 6 | return 0; |
969 | 8 | if (cmap->storable.refs < 0) |
970 | 2 | return 0; |
971 | | |
972 | 6 | return pdf_cmap_size(ctx, cmap->usecmap) + |
973 | 6 | cmap->rcap * sizeof *cmap->ranges + |
974 | 6 | cmap->xcap * sizeof *cmap->xranges + |
975 | 6 | cmap->mcap * sizeof *cmap->mranges + |
976 | 6 | cmap->tcap * sizeof *cmap->tree + |
977 | 6 | sizeof(*cmap); |
978 | 8 | } |