Coverage Report

Created: 2026-06-30 07:15

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/mupdf/source/pdf/pdf-cmap.c
Line
Count
Source
1
// Copyright (C) 2004-2021 Artifex Software, Inc.
2
//
3
// This file is part of MuPDF.
4
//
5
// MuPDF is free software: you can redistribute it and/or modify it under the
6
// terms of the GNU Affero General Public License as published by the Free
7
// Software Foundation, either version 3 of the License, or (at your option)
8
// any later version.
9
//
10
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13
// details.
14
//
15
// You should have received a copy of the GNU Affero General Public License
16
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17
//
18
// Alternative licensing terms are available from the licensor.
19
// For commercial licensing, see <https://www.artifex.com/> or contact
20
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21
// CA 94129, USA, for further information.
22
23
#include "mupdf/fitz.h"
24
#include "mupdf/pdf.h"
25
26
#include <assert.h>
27
#include <string.h>
28
29
#undef CHECK_SPLAY
30
#undef DUMP_SPLAY
31
32
6.14k
#define CMAP_TABLE_LIMIT 65536
33
34
/*
35
 * Allocate, destroy and simple parameters.
36
 */
37
38
void
39
pdf_drop_cmap_imp(fz_context *ctx, fz_storable *cmap_)
40
955
{
41
955
  pdf_cmap *cmap = (pdf_cmap *)cmap_;
42
955
  pdf_drop_cmap(ctx, cmap->usecmap);
43
955
  fz_free(ctx, cmap->ranges);
44
955
  fz_free(ctx, cmap->xranges);
45
955
  fz_free(ctx, cmap->mranges);
46
955
  fz_free(ctx, cmap->dict);
47
955
  fz_free(ctx, cmap->tree);
48
955
  fz_free(ctx, cmap);
49
955
}
50
51
pdf_cmap *
52
pdf_new_cmap(fz_context *ctx)
53
955
{
54
955
  pdf_cmap *cmap = fz_malloc_struct(ctx, pdf_cmap);
55
955
  FZ_INIT_STORABLE(cmap, 1, pdf_drop_cmap_imp);
56
955
  return cmap;
57
955
}
58
59
/* Could be a macro for speed */
60
pdf_cmap *
61
pdf_keep_cmap(fz_context *ctx, pdf_cmap *cmap)
62
14
{
63
14
  if (cmap)
64
14
    return fz_keep_storable(ctx, &cmap->storable);
65
0
  return NULL;
66
14
}
67
68
/* Could be a macro for speed */
69
void
70
pdf_drop_cmap(fz_context *ctx, pdf_cmap *cmap)
71
3.23k
{
72
3.23k
  if (cmap)
73
1.06k
    fz_drop_storable(ctx, &cmap->storable);
74
3.23k
}
75
76
void
77
pdf_set_usecmap(fz_context *ctx, pdf_cmap *cmap, pdf_cmap *usecmap)
78
0
{
79
0
  int i;
80
81
0
  pdf_drop_cmap(ctx, cmap->usecmap);
82
0
  cmap->usecmap = pdf_keep_cmap(ctx, usecmap);
83
84
0
  if (cmap->codespace_len == 0)
85
0
  {
86
0
    cmap->codespace_len = usecmap->codespace_len;
87
0
    for (i = 0; i < usecmap->codespace_len; i++)
88
0
      cmap->codespace[i] = usecmap->codespace[i];
89
0
  }
90
0
}
91
92
int
93
pdf_cmap_wmode(fz_context *ctx, pdf_cmap *cmap)
94
158
{
95
158
  return cmap->wmode;
96
158
}
97
98
void
99
pdf_set_cmap_wmode(fz_context *ctx, pdf_cmap *cmap, int wmode)
100
545
{
101
545
  cmap->wmode = wmode;
102
545
}
103
104
void
105
pdf_add_codespace(fz_context *ctx, pdf_cmap *cmap, unsigned int low, unsigned int high, size_t n)
106
955
{
107
955
  if (cmap->codespace_len + 1 == nelem(cmap->codespace))
108
0
  {
109
0
    fz_warn(ctx, "assert: too many code space ranges");
110
0
    return;
111
0
  }
112
113
955
  if ((uint32_t)n != n)
114
0
  {
115
0
    fz_warn(ctx, "assert: code space range too large");
116
0
    return;
117
0
  }
118
119
955
  cmap->codespace[cmap->codespace_len].n = (int)n;
120
955
  cmap->codespace[cmap->codespace_len].low = low;
121
955
  cmap->codespace[cmap->codespace_len].high = high;
122
955
  cmap->codespace_len ++;
123
955
}
124
125
struct cmap_splay {
126
  unsigned int low;
127
  unsigned int high;
128
  unsigned int out;
129
  unsigned int left;
130
  unsigned int right;
131
  unsigned int parent : 31;
132
  unsigned int many : 1;
133
};
134
135
1.31M
#define EMPTY ((unsigned int)0x40000000)
136
137
/*
138
  The splaying steps used:
139
140
  Case 1: |              z              x
141
    |          y       D  =>  A       y
142
    |        x   C                  B   z
143
    |       A B                        C D
144
145
  Case 2: |         z              x
146
    |     y       D  =>   y     z
147
    |   A   x            A B   C D
148
    |  B C
149
150
  Case 3: |     y                 x
151
    |  x     C       =>   A     y
152
    | A B                      B C
153
*/
154
155
static void
156
move_to_root(cmap_splay *tree, unsigned int x)
157
50.4k
{
158
50.4k
  if (x == EMPTY)
159
0
    return;
160
50.4k
  do
161
50.4k
  {
162
50.4k
    unsigned int z, zp;
163
50.4k
    unsigned int y = tree[x].parent;
164
50.4k
    if (y == EMPTY)
165
845
      break;
166
49.5k
    z = tree[y].parent;
167
49.5k
    if (z == EMPTY)
168
49.5k
    {
169
      /* Case 3 */
170
49.5k
      tree[x].parent = EMPTY;
171
49.5k
      tree[y].parent = x;
172
49.5k
      if (tree[y].left == x)
173
0
      {
174
        /* Case 3 */
175
0
        tree[y].left = tree[x].right;
176
0
        if (tree[y].left != EMPTY)
177
0
          tree[tree[y].left].parent = y;
178
0
        tree[x].right = y;
179
0
      }
180
49.5k
      else
181
49.5k
      {
182
        /* Case 3 - reflected */
183
49.5k
        assert(tree[y].right == x);
184
49.5k
        tree[y].right = tree[x].left;
185
49.5k
        if (tree[y].right != EMPTY)
186
0
          tree[tree[y].right].parent = y;
187
49.5k
        tree[x].left = y;
188
49.5k
      }
189
49.5k
      break;
190
49.5k
    }
191
192
0
    zp = tree[z].parent;
193
0
    tree[x].parent = zp;
194
0
    if (zp != EMPTY) {
195
0
      if (tree[zp].left == z)
196
0
        tree[zp].left = x;
197
0
      else
198
0
      {
199
0
        assert(tree[zp].right == z);
200
0
        tree[zp].right = x;
201
0
      }
202
0
    }
203
0
    tree[y].parent = x;
204
0
    if (tree[y].left == x)
205
0
    {
206
0
      tree[y].left = tree[x].right;
207
0
      if (tree[y].left != EMPTY)
208
0
        tree[tree[y].left].parent = y;
209
0
      tree[x].right = y;
210
0
      if (tree[z].left == y)
211
0
      {
212
        /* Case 1 */
213
0
        tree[z].parent = y;
214
0
        tree[z].left = tree[y].right;
215
0
        if (tree[z].left != EMPTY)
216
0
          tree[tree[z].left].parent = z;
217
0
        tree[y].right = z;
218
0
      }
219
0
      else
220
0
      {
221
        /* Case 2 - reflected */
222
0
        assert(tree[z].right == y);
223
0
        tree[z].parent = x;
224
0
        tree[z].right = tree[x].left;
225
0
        if (tree[z].right != EMPTY)
226
0
          tree[tree[z].right].parent = z;
227
0
        tree[x].left = z;
228
0
      }
229
0
    }
230
0
    else
231
0
    {
232
0
      assert(tree[y].right == x);
233
0
      tree[y].right = tree[x].left;
234
0
      if (tree[y].right != EMPTY)
235
0
        tree[tree[y].right].parent = y;
236
0
      tree[x].left = y;
237
0
      if (tree[z].left == y)
238
0
      {
239
        /* Case 2 */
240
0
        tree[z].parent = x;
241
0
        tree[z].left = tree[x].right;
242
0
        if (tree[z].left != EMPTY)
243
0
          tree[tree[z].left].parent = z;
244
0
        tree[x].right = z;
245
0
      }
246
0
      else
247
0
      {
248
        /* Case 1 - reflected */
249
0
        assert(tree[z].right == y);
250
0
        tree[z].parent = y;
251
0
        tree[z].right = tree[y].left;
252
0
        if (tree[z].right != EMPTY)
253
0
          tree[tree[z].right].parent = z;
254
0
        tree[y].left = z;
255
0
      }
256
0
    }
257
0
  } while (1);
258
50.4k
}
259
260
static unsigned int delete_node(pdf_cmap *cmap, unsigned int current)
261
0
{
262
0
  cmap_splay *tree = cmap->tree;
263
0
  unsigned int parent;
264
0
  unsigned int replacement;
265
266
0
  assert(current != EMPTY);
267
268
0
  parent = tree[current].parent;
269
0
  if (tree[current].right == EMPTY)
270
0
  {
271
0
    if (parent == EMPTY)
272
0
    {
273
0
      replacement = cmap->ttop = tree[current].left;
274
0
    }
275
0
    else if (tree[parent].left == current)
276
0
    {
277
0
      replacement = tree[parent].left = tree[current].left;
278
0
    }
279
0
    else
280
0
    {
281
0
      assert(tree[parent].right == current);
282
0
      replacement = tree[parent].right = tree[current].left;
283
0
    }
284
0
    if (replacement != EMPTY)
285
0
      tree[replacement].parent = parent;
286
0
    else
287
0
      replacement = parent;
288
0
  }
289
0
  else if (tree[current].left == EMPTY)
290
0
  {
291
0
    if (parent == EMPTY)
292
0
    {
293
0
      replacement = cmap->ttop = tree[current].right;
294
0
    }
295
0
    else if (tree[parent].left == current)
296
0
    {
297
0
      replacement = tree[parent].left = tree[current].right;
298
0
    }
299
0
    else
300
0
    {
301
0
      assert(tree[parent].right == current);
302
0
      replacement = tree[parent].right = tree[current].right;
303
0
    }
304
0
    if (replacement != EMPTY)
305
0
      tree[replacement].parent = parent;
306
0
    else
307
0
      replacement = parent;
308
0
  }
309
0
  else
310
0
  {
311
    /* Hard case, find the in-order predecessor of current */
312
0
    unsigned int amputee = current;
313
0
    replacement = tree[current].left;
314
0
    while (tree[replacement].right != EMPTY) {
315
0
      amputee = replacement;
316
0
      replacement = tree[replacement].right;
317
0
    }
318
    /* Remove replacement from the tree */
319
0
    if (amputee == current)
320
0
    {
321
0
      tree[amputee].left = tree[replacement].left;
322
0
      if (tree[amputee].left != EMPTY)
323
0
        tree[tree[amputee].left].parent = amputee;
324
0
    }
325
0
    else
326
0
    {
327
0
      tree[amputee].right = tree[replacement].left;
328
0
      if (tree[amputee].right != EMPTY)
329
0
        tree[tree[amputee].right].parent = amputee;
330
0
    }
331
    /* Insert replacement in place of current */
332
0
    tree[replacement].parent = parent;
333
0
    if (parent == EMPTY)
334
0
    {
335
0
      tree[replacement].parent = EMPTY;
336
0
      cmap->ttop = replacement;
337
0
    }
338
0
    else if (tree[parent].left == current)
339
0
      tree[parent].left = replacement;
340
0
    else
341
0
    {
342
0
      assert(tree[parent].right == current);
343
0
      tree[parent].right = replacement;
344
0
    }
345
0
    tree[replacement].left = tree[current].left;
346
0
    if (tree[replacement].left != EMPTY)
347
0
      tree[tree[replacement].left].parent = replacement;
348
0
    tree[replacement].right = tree[current].right;
349
0
    if (tree[replacement].right != EMPTY)
350
0
      tree[tree[replacement].right].parent = replacement;
351
0
  }
352
353
  /* current is now unlinked. We need to remove it from our array. */
354
0
  cmap->tlen--;
355
0
  if (current != (unsigned int) cmap->tlen)
356
0
  {
357
0
    if (replacement == (unsigned int) cmap->tlen)
358
0
      replacement = current;
359
0
    tree[current] = tree[cmap->tlen];
360
0
    parent = tree[current].parent;
361
0
    if (parent == EMPTY)
362
0
      cmap->ttop = current;
363
0
    else if (tree[parent].left == (unsigned int) cmap->tlen)
364
0
      tree[parent].left = current;
365
0
    else
366
0
    {
367
0
      assert(tree[parent].right == (unsigned int) cmap->tlen);
368
0
      tree[parent].right = current;
369
0
    }
370
0
    if (tree[current].left != EMPTY)
371
0
    {
372
0
      assert(tree[tree[current].left].parent == (unsigned int) cmap->tlen);
373
0
      tree[tree[current].left].parent = current;
374
0
    }
375
0
    if (tree[current].right != EMPTY)
376
0
    {
377
0
      assert(tree[tree[current].right].parent == (unsigned int) cmap->tlen);
378
0
      tree[tree[current].right].parent = current;
379
0
    }
380
0
  }
381
382
  /* Return the node that we should continue searching from */
383
0
  return replacement;
384
0
}
385
386
#ifdef DUMP_SPLAY
387
static void
388
dump_splay(cmap_splay *tree, unsigned int node, int depth, const char *pre)
389
{
390
  int i;
391
392
  if (tree == NULL || node == EMPTY)
393
    return;
394
395
  for (i = 0; i < depth; i++)
396
    fprintf(stderr, " ");
397
  fprintf(stderr, "%s%d:", pre, node);
398
  if (tree[node].parent == EMPTY)
399
    fprintf(stderr, "^EMPTY");
400
  else
401
    fprintf(stderr, "^%d", tree[node].parent);
402
  if (tree[node].left == EMPTY)
403
    fprintf(stderr, "<EMPTY");
404
  else
405
    fprintf(stderr, "<%d", tree[node].left);
406
  if (tree[node].right == EMPTY)
407
    fprintf(stderr, ">EMPTY");
408
  else
409
    fprintf(stderr, ">%d", tree[node].right);
410
  fprintf(stderr, "(%x,%x,%x,%d)\n", tree[node].low, tree[node].high, tree[node].out, tree[node].many);
411
  assert(tree[node].parent == EMPTY || depth);
412
  assert(tree[node].left == EMPTY || tree[tree[node].left].parent == node);
413
  assert(tree[node].right == EMPTY || tree[tree[node].right].parent == node);
414
  dump_splay(tree, tree[node].left, depth+1, "L");
415
  dump_splay(tree, tree[node].right, depth+1, "R");
416
}
417
#endif
418
419
enum
420
{
421
  TOP = 0,
422
  LEFT = 1,
423
  RIGHT = 2
424
};
425
426
static void walk_splay(cmap_splay *tree, unsigned int node, void (*fn)(cmap_splay *, void *), void *arg)
427
1.69k
{
428
1.69k
  int from = TOP;
429
430
199k
  while (node != EMPTY)
431
199k
  {
432
199k
    switch (from)
433
199k
    {
434
100k
    case TOP:
435
100k
      if (tree[node].left != EMPTY)
436
99.1k
      {
437
99.1k
        node = tree[node].left;
438
99.1k
        from = TOP;
439
99.1k
        break;
440
99.1k
      }
441
      /* fallthrough */
442
100k
    case LEFT:
443
100k
      fn(&tree[node], arg);
444
100k
      if (tree[node].right != EMPTY)
445
0
      {
446
0
        node = tree[node].right;
447
0
        from = TOP;
448
0
        break;
449
0
      }
450
      /* fallthrough */
451
100k
    case RIGHT:
452
100k
      {
453
100k
        unsigned int parent = tree[node].parent;
454
100k
        if (parent == EMPTY)
455
1.69k
          return;
456
99.1k
        if (tree[parent].left == node)
457
99.1k
          from = LEFT;
458
0
        else
459
0
        {
460
0
          assert(tree[parent].right == node);
461
0
          from = RIGHT;
462
0
        }
463
99.1k
        node = parent;
464
99.1k
      }
465
199k
    }
466
199k
  }
467
1.69k
}
468
469
#ifdef CHECK_SPLAY
470
471
static int
472
tree_has_overlap(cmap_splay *tree, int node, int low, int high)
473
{
474
  if (tree[node].left != EMPTY)
475
    if (tree_has_overlap(tree, tree[node].left, low, high))
476
      return 1;
477
  if (tree[node].right != EMPTY)
478
    if (tree_has_overlap(tree, tree[node].right, low, high))
479
      return 1;
480
  return (tree[node].low < low && low < tree[node].high) || (tree[node].low < high && high < tree[node].high);
481
}
482
483
static void
484
do_check(cmap_splay *node, void *arg)
485
{
486
  cmap_splay *tree = arg;
487
  unsigned int num = node - tree;
488
  assert(!node->many || node->low == node->high);
489
  assert(node->low <= node->high);
490
  assert((node->left == EMPTY) || (tree[node->left].parent == num &&
491
    tree[node->left].high < node->low));
492
  assert(node->right == EMPTY || (tree[node->right].parent == num &&
493
    node->high < tree[node->right].low));
494
  assert(!tree_has_overlap(tree, num, node->low, node->high));
495
}
496
497
static void
498
check_splay(cmap_splay *tree, unsigned int node, int depth)
499
{
500
  if (node == EMPTY)
501
    return;
502
  assert(tree[node].parent == EMPTY);
503
  walk_splay(tree, node, do_check, tree);
504
}
505
#endif
506
507
/*
508
 * Add a range.
509
 */
510
static void
511
add_range(fz_context *ctx, pdf_cmap *cmap, unsigned int low, unsigned int high, unsigned int out, int check_for_overlap, int many)
512
62.0k
{
513
62.0k
  int current;
514
62.0k
  cmap_splay *tree;
515
516
62.0k
  if (low > high)
517
0
  {
518
0
    fz_warn(ctx, "range limits out of range in cmap %s", cmap->cmap_name);
519
0
    return;
520
0
  }
521
522
62.0k
  if (cmap->codespace_len == 0)
523
0
  {
524
0
    fz_warn(ctx, "CMap is missing codespace range");
525
0
    pdf_add_codespace(ctx, cmap, 0, 65535, 2);
526
0
  }
527
528
62.0k
  tree = cmap->tree;
529
530
62.0k
  if (cmap->tlen)
531
61.1k
  {
532
61.1k
    unsigned int move = cmap->ttop;
533
61.1k
    unsigned int gt = EMPTY;
534
61.1k
    unsigned int lt = EMPTY;
535
61.1k
    if (check_for_overlap)
536
61.1k
    {
537
      /* Check for collision with the current node */
538
61.1k
      do
539
61.1k
      {
540
61.1k
        current = move;
541
        /* Cases we might meet:
542
         * tree[i]:        <----->
543
         * case 0:     <->
544
         * case 1:     <------->
545
         * case 2:     <------------->
546
         * case 3:           <->
547
         * case 4:           <------->
548
         * case 5:                 <->
549
         */
550
61.1k
        if (low <= tree[current].low && tree[current].low <= high)
551
0
        {
552
          /* case 1, reduces to case 0 */
553
          /* or case 2, deleting the node */
554
0
          if (!tree[current].many)
555
0
          {
556
0
            tree[current].out += high + 1 - tree[current].low;
557
0
            tree[current].low = high + 1;
558
0
          }
559
0
          if (tree[current].low > tree[current].high || tree[current].many)
560
0
          {
561
            /* update lt/gt references that will be moved/stale after deleting current */
562
0
            if (gt == (unsigned int) cmap->tlen - 1)
563
0
              gt = current;
564
0
            if (lt == (unsigned int) cmap->tlen - 1)
565
0
              lt = current;
566
            /* delete_node() moves the element at cmap->tlen-1 into current */
567
0
            move = delete_node(cmap, current);
568
0
            current = EMPTY;
569
0
            continue;
570
0
          }
571
0
        }
572
61.1k
        else if (low <= tree[current].high && tree[current].high <= high)
573
0
        {
574
          /* case 4, reduces to case 5 */
575
0
          tree[current].high = low - 1;
576
0
          assert(tree[current].low <= tree[current].high);
577
0
        }
578
61.1k
        else if (tree[current].low < low && high < tree[current].high)
579
0
        {
580
          /* case 3, reduces to case 5 */
581
0
          int new_high = tree[current].high;
582
0
          tree[current].high = low-1;
583
0
          assert(!tree[current].many);
584
0
          add_range(ctx, cmap, high+1, new_high, tree[current].out + high + 1 - tree[current].low, 0, 0);
585
0
          tree = cmap->tree;
586
0
        }
587
        /* Now look for where to move to next (left for case 0, right for case 5) */
588
61.1k
        if (tree[current].low > high) {
589
0
          move = tree[current].left;
590
0
          gt = current;
591
0
        }
592
61.1k
        else
593
61.1k
        {
594
61.1k
          move = tree[current].right;
595
61.1k
          lt = current;
596
61.1k
        }
597
61.1k
      }
598
61.1k
      while (move != EMPTY);
599
61.1k
    }
600
0
    else
601
0
    {
602
0
      do
603
0
      {
604
0
        current = move;
605
0
        if (tree[current].low > high)
606
0
        {
607
0
          move = tree[current].left;
608
0
          gt = current;
609
0
        }
610
0
        else
611
0
        {
612
0
          move = tree[current].right;
613
0
          lt = current;
614
0
        }
615
0
      } while (move != EMPTY);
616
0
    }
617
    /* current is now the node to which we would be adding the new node */
618
    /* lt is the last node we traversed which is lt the new node. */
619
    /* gt is the last node we traversed which is gt the new node. */
620
621
61.1k
    if (!many)
622
55.9k
    {
623
      /* Check for the 'merge' cases. */
624
55.9k
      if (lt != EMPTY && !tree[lt].many && tree[lt].high == low-1 && tree[lt].out - tree[lt].low == out - low)
625
11.6k
      {
626
11.6k
        tree[lt].high = high;
627
11.6k
        if (gt != EMPTY && !tree[gt].many && tree[gt].low == high+1 && tree[gt].out - tree[gt].low == out - low)
628
0
        {
629
0
          tree[lt].high = tree[gt].high;
630
0
          delete_node(cmap, gt);
631
0
        }
632
11.6k
        goto exit;
633
11.6k
      }
634
44.2k
      if (gt != EMPTY && !tree[gt].many && tree[gt].low == high+1 && tree[gt].out - tree[gt].low == out - low)
635
0
      {
636
0
        tree[gt].low = low;
637
0
        tree[gt].out = out;
638
0
        goto exit;
639
0
      }
640
44.2k
    }
641
61.1k
  }
642
845
  else
643
845
    current = EMPTY;
644
645
50.4k
  if (cmap->tlen == cmap->tcap)
646
873
  {
647
873
    int new_cap = cmap->tcap ? cmap->tcap * 2 : 256;
648
873
    if (new_cap > CMAP_TABLE_LIMIT)
649
0
      fz_throw(ctx, FZ_ERROR_LIMIT, "too many entries in CMap table");
650
873
    tree = cmap->tree = fz_realloc_array(ctx, cmap->tree, new_cap, cmap_splay);
651
873
    cmap->tcap = new_cap;
652
873
  }
653
50.4k
  tree[cmap->tlen].low = low;
654
50.4k
  tree[cmap->tlen].high = high;
655
50.4k
  tree[cmap->tlen].out = out;
656
50.4k
  tree[cmap->tlen].parent = current;
657
50.4k
  tree[cmap->tlen].left = EMPTY;
658
50.4k
  tree[cmap->tlen].right = EMPTY;
659
50.4k
  tree[cmap->tlen].many = many;
660
50.4k
  cmap->tlen++;
661
50.4k
  if (current == EMPTY)
662
845
    cmap->ttop = 0;
663
49.5k
  else if (tree[current].low > high)
664
0
    tree[current].left = cmap->tlen-1;
665
49.5k
  else
666
49.5k
  {
667
49.5k
    assert(tree[current].high < low);
668
49.5k
    tree[current].right = cmap->tlen-1;
669
49.5k
  }
670
50.4k
  move_to_root(tree, cmap->tlen-1);
671
50.4k
  cmap->ttop = cmap->tlen-1;
672
62.0k
exit:
673
62.0k
  {}
674
#ifdef CHECK_SPLAY
675
  check_splay(cmap->tree, cmap->ttop, 0);
676
#endif
677
#ifdef DUMP_SPLAY
678
  dump_splay(cmap->tree, cmap->ttop, 0, "");
679
#endif
680
62.0k
}
681
682
/*
683
 * Add a one-to-many mapping.
684
 */
685
static void
686
add_mrange(fz_context *ctx, pdf_cmap *cmap, unsigned int low, int *out, int len)
687
5.26k
{
688
5.26k
  int out_pos;
689
5.26k
  int new_len = cmap->dlen + len + 1;
690
5.26k
  int new_cap;
691
692
5.26k
  if (new_len > CMAP_TABLE_LIMIT)
693
0
    fz_throw(ctx, FZ_ERROR_LIMIT, "too many entries in CMap table");
694
695
5.26k
  if (new_len > cmap->dcap)
696
14
  {
697
14
    new_cap = cmap->dcap > 0 ? cmap->dcap : 256;
698
26
    while (new_len > new_cap)
699
12
      new_cap *= 2;
700
14
    cmap->dict = fz_realloc_array(ctx, cmap->dict, new_cap, int);
701
14
    cmap->dcap = new_cap;
702
14
  }
703
5.26k
  out_pos = cmap->dlen;
704
5.26k
  cmap->dict[out_pos] = len;
705
5.26k
  memcpy(&cmap->dict[out_pos+1], out, sizeof(int)*len);
706
5.26k
  cmap->dlen = new_len;
707
708
5.26k
  add_range(ctx, cmap, low, low, out_pos, 1, 1);
709
5.26k
}
710
711
void
712
pdf_map_range_to_range(fz_context *ctx, pdf_cmap *cmap, unsigned int low, unsigned int high, int out)
713
30.3k
{
714
30.3k
  add_range(ctx, cmap, low, high, out, 1, 0);
715
30.3k
}
716
717
void
718
pdf_map_one_to_many(fz_context *ctx, pdf_cmap *cmap, unsigned int low, int *values, size_t len)
719
31.7k
{
720
31.7k
  int *ovalues = values;
721
  /* len is always restricted to <= 256 by the callers. */
722
31.7k
  int local[PDF_MRANGE_CAP];
723
724
31.7k
  assert(len <= PDF_MRANGE_CAP);
725
726
  /* Decode unicode surrogate pairs. */
727
  /* Only the *-UCS2 CMaps use one-to-many mappings, so assuming unicode should be safe. */
728
31.7k
  if (len >= 2)
729
5.26k
  {
730
5.26k
    size_t i, j;
731
    /* Look for mranges with either multiple surrogate pairs in, or surrogate pairs
732
     * with other chars. See bug 706131. */
733
17.8k
    for (i = 0, j = 0; i < len; i++, j++)
734
12.5k
    {
735
12.5k
      int hi = ovalues[i];
736
12.5k
      if (hi >= 0xd800 && hi < 0xdc00 && i < len-1)
737
0
      {
738
0
        int lo = ovalues[i+1];
739
0
        if (lo >= 0xdc00 && lo < 0xe000)
740
0
        {
741
0
          hi = ((hi - 0xD800) << 10) + (lo - 0xDC00) + 0x10000;
742
0
          i++;
743
0
        }
744
0
      }
745
12.5k
      if (values != local)
746
5.26k
      {
747
        /* We can't change the callers data, so copy stuff in. */
748
5.26k
        if (j)
749
0
          memcpy(local, values, sizeof(local[0]) * (j-1));
750
5.26k
        values = local;
751
5.26k
      }
752
12.5k
      values[j] = hi;
753
12.5k
    }
754
5.26k
    len = j;
755
5.26k
  }
756
757
31.7k
  if (len == 1)
758
26.4k
  {
759
26.4k
    add_range(ctx, cmap, low, low, values[0], 1, 0);
760
26.4k
    return;
761
26.4k
  }
762
763
5.26k
  if (len > PDF_MRANGE_CAP)
764
0
  {
765
0
    fz_warn(ctx, "ignoring one to many mapping in cmap %s", cmap->cmap_name);
766
0
    return;
767
0
  }
768
769
5.26k
  add_mrange(ctx, cmap, low, values, (int)len);
770
5.26k
}
771
772
static void
773
count_node_types(cmap_splay *node, void *arg)
774
50.4k
{
775
50.4k
  int *counts = (int *)arg;
776
777
50.4k
  if (node->many)
778
5.26k
    counts[2]++;
779
45.1k
  else if (node->low <= 0xffff && node->high <= 0xFFFF && node->out <= 0xFFFF)
780
45.1k
    counts[0]++;
781
0
  else
782
0
    counts[1]++;
783
50.4k
}
784
785
static void
786
copy_node_types(cmap_splay *node, void *arg)
787
50.4k
{
788
50.4k
  pdf_cmap *cmap = (pdf_cmap *)arg;
789
790
50.4k
  if (node->many)
791
5.26k
  {
792
5.26k
    assert(node->low == node->high);
793
5.26k
    cmap->mranges[cmap->mlen].low = node->low;
794
5.26k
    cmap->mranges[cmap->mlen].out = node->out;
795
5.26k
    cmap->mlen++;
796
5.26k
  }
797
45.1k
  else if (node->low <= 0xffff && node->high <= 0xFFFF && node->out <= 0xFFFF)
798
45.1k
  {
799
45.1k
    cmap->ranges[cmap->rlen].low = node->low;
800
45.1k
    cmap->ranges[cmap->rlen].high = node->high;
801
45.1k
    cmap->ranges[cmap->rlen].out = node->out;
802
45.1k
    cmap->rlen++;
803
45.1k
  }
804
0
  else
805
0
  {
806
0
    cmap->xranges[cmap->xlen].low = node->low;
807
0
    cmap->xranges[cmap->xlen].high = node->high;
808
0
    cmap->xranges[cmap->xlen].out = node->out;
809
0
    cmap->xlen++;
810
0
  }
811
50.4k
}
812
813
void
814
pdf_sort_cmap(fz_context *ctx, pdf_cmap *cmap)
815
955
{
816
955
  int counts[3];
817
818
955
  if (cmap->tree == NULL)
819
110
    return;
820
821
845
  counts[0] = 0;
822
845
  counts[1] = 0;
823
845
  counts[2] = 0;
824
845
  walk_splay(cmap->tree, cmap->ttop, count_node_types, &counts);
825
826
845
  cmap->ranges = Memento_label(fz_malloc_array(ctx, counts[0], pdf_range), "cmap_range");
827
845
  cmap->rcap = counts[0];
828
845
  cmap->xranges = Memento_label(fz_malloc_array(ctx, counts[1], pdf_xrange), "cmap_xrange");
829
845
  cmap->xcap = counts[1];
830
845
  cmap->mranges = Memento_label(fz_malloc_array(ctx, counts[2], pdf_mrange), "cmap_mrange");
831
845
  cmap->mcap = counts[2];
832
833
845
  walk_splay(cmap->tree, cmap->ttop, copy_node_types, cmap);
834
835
845
  fz_free(ctx, cmap->tree);
836
845
  cmap->tree = NULL;
837
845
}
838
839
int
840
pdf_lookup_cmap(pdf_cmap *cmap, unsigned int cpt)
841
1.84M
{
842
1.84M
  pdf_range *ranges = cmap->ranges;
843
1.84M
  pdf_xrange *xranges = cmap->xranges;
844
1.84M
  int l, r, m;
845
846
1.84M
  l = 0;
847
1.84M
  r = cmap->rlen - 1;
848
2.62M
  while (l <= r)
849
2.59M
  {
850
2.59M
    m = (l + r) >> 1;
851
2.59M
    if (cpt < ranges[m].low)
852
277k
      r = m - 1;
853
2.31M
    else if (cpt > ranges[m].high)
854
508k
      l = m + 1;
855
1.80M
    else
856
1.80M
      return cpt - ranges[m].low + ranges[m].out;
857
2.59M
  }
858
859
36.0k
  l = 0;
860
36.0k
  r = cmap->xlen - 1;
861
36.0k
  while (l <= r)
862
0
  {
863
0
    m = (l + r) >> 1;
864
0
    if (cpt < xranges[m].low)
865
0
      r = m - 1;
866
0
    else if (cpt > xranges[m].high)
867
0
      l = m + 1;
868
0
    else
869
0
      return cpt - xranges[m].low + xranges[m].out;
870
0
  }
871
872
36.0k
  if (cmap->usecmap)
873
0
    return pdf_lookup_cmap(cmap->usecmap, cpt);
874
875
36.0k
  return -1;
876
36.0k
}
877
878
int
879
pdf_lookup_cmap_full(pdf_cmap *cmap, unsigned int cpt, int *out)
880
4.42M
{
881
4.42M
  pdf_range *ranges = cmap->ranges;
882
4.42M
  pdf_xrange *xranges = cmap->xranges;
883
4.42M
  pdf_mrange *mranges = cmap->mranges;
884
4.42M
  unsigned int i;
885
4.42M
  int l, r, m;
886
887
4.42M
  l = 0;
888
4.42M
  r = cmap->rlen - 1;
889
24.8M
  while (l <= r)
890
20.4M
  {
891
20.4M
    m = (l + r) >> 1;
892
20.4M
    if (cpt < ranges[m].low)
893
249k
      r = m - 1;
894
20.2M
    else if (cpt > ranges[m].high)
895
20.1M
      l = m + 1;
896
68.2k
    else
897
68.2k
    {
898
68.2k
      out[0] = cpt - ranges[m].low + ranges[m].out;
899
68.2k
      return 1;
900
68.2k
    }
901
20.4M
  }
902
903
4.35M
  l = 0;
904
4.35M
  r = cmap->xlen - 1;
905
4.35M
  while (l <= r)
906
0
  {
907
0
    m = (l + r) >> 1;
908
0
    if (cpt < xranges[m].low)
909
0
      r = m - 1;
910
0
    else if (cpt > xranges[m].high)
911
0
      l = m + 1;
912
0
    else
913
0
    {
914
0
      out[0] = cpt - xranges[m].low + xranges[m].out;
915
0
      return 1;
916
0
    }
917
0
  }
918
919
4.35M
  l = 0;
920
4.35M
  r = cmap->mlen - 1;
921
4.92M
  while (l <= r)
922
574k
  {
923
574k
    m = (l + r) >> 1;
924
574k
    if (cpt < mranges[m].low)
925
11.8k
      r = m - 1;
926
563k
    else if (cpt > mranges[m].low)
927
560k
      l = m + 1;
928
2.63k
    else
929
2.63k
    {
930
2.63k
      int *ptr = &cmap->dict[cmap->mranges[m].out];
931
2.63k
      unsigned int len = (unsigned int)*ptr++;
932
8.91k
      for (i = 0; i < len; ++i)
933
6.28k
        out[i] = *ptr++;
934
2.63k
      return len;
935
2.63k
    }
936
574k
  }
937
938
4.35M
  if (cmap->usecmap)
939
0
    return pdf_lookup_cmap_full(cmap->usecmap, cpt, out);
940
941
4.35M
  return 0;
942
4.35M
}
943
944
int
945
pdf_decode_cmap(pdf_cmap *cmap, unsigned char *buf, unsigned char *end, unsigned int *cpt)
946
1.38M
{
947
1.38M
  unsigned int c;
948
1.38M
  int k, n;
949
1.38M
  int len = end - buf;
950
951
1.38M
  if (len > 4)
952
415k
    len = 4;
953
954
1.38M
  c = 0;
955
1.41M
  for (n = 0; n < len; n++)
956
1.41M
  {
957
1.41M
    c = (c << 8) | buf[n];
958
1.44M
    for (k = 0; k < cmap->codespace_len; k++)
959
1.41M
    {
960
1.41M
      if (cmap->codespace[k].n == n + 1)
961
1.38M
      {
962
1.38M
        if (c >= cmap->codespace[k].low && c <= cmap->codespace[k].high)
963
1.38M
        {
964
1.38M
          *cpt = c;
965
1.38M
          return n + 1;
966
1.38M
        }
967
1.38M
      }
968
1.41M
    }
969
1.41M
  }
970
971
0
  *cpt = 0;
972
0
  return 1;
973
1.38M
}
974
975
size_t
976
pdf_cmap_size(fz_context *ctx, pdf_cmap *cmap)
977
1.98k
{
978
1.98k
  if (cmap == NULL)
979
955
    return 0;
980
1.03k
  if (cmap->storable.refs < 0)
981
79
    return 0;
982
983
955
  return pdf_cmap_size(ctx, cmap->usecmap) +
984
955
    cmap->rcap * sizeof *cmap->ranges +
985
955
    cmap->xcap * sizeof *cmap->xranges +
986
955
    cmap->mcap * sizeof *cmap->mranges +
987
955
    cmap->tcap * sizeof *cmap->tree +
988
955
    sizeof(*cmap);
989
1.03k
}