Coverage Report

Created: 2025-12-03 07:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/mupdf/source/pdf/pdf-cmap.c
Line
Count
Source
1
// Copyright (C) 2004-2021 Artifex Software, Inc.
2
//
3
// This file is part of MuPDF.
4
//
5
// MuPDF is free software: you can redistribute it and/or modify it under the
6
// terms of the GNU Affero General Public License as published by the Free
7
// Software Foundation, either version 3 of the License, or (at your option)
8
// any later version.
9
//
10
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13
// details.
14
//
15
// You should have received a copy of the GNU Affero General Public License
16
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17
//
18
// Alternative licensing terms are available from the licensor.
19
// For commercial licensing, see <https://www.artifex.com/> or contact
20
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21
// CA 94129, USA, for further information.
22
23
#include "mupdf/fitz.h"
24
#include "mupdf/pdf.h"
25
26
#include <assert.h>
27
#include <string.h>
28
29
#undef CHECK_SPLAY
30
#undef DUMP_SPLAY
31
32
/*
33
 * Allocate, destroy and simple parameters.
34
 */
35
36
void
37
pdf_drop_cmap_imp(fz_context *ctx, fz_storable *cmap_)
38
6
{
39
6
  pdf_cmap *cmap = (pdf_cmap *)cmap_;
40
6
  pdf_drop_cmap(ctx, cmap->usecmap);
41
6
  fz_free(ctx, cmap->ranges);
42
6
  fz_free(ctx, cmap->xranges);
43
6
  fz_free(ctx, cmap->mranges);
44
6
  fz_free(ctx, cmap->dict);
45
6
  fz_free(ctx, cmap->tree);
46
6
  fz_free(ctx, cmap);
47
6
}
48
49
pdf_cmap *
50
pdf_new_cmap(fz_context *ctx)
51
6
{
52
6
  pdf_cmap *cmap = fz_malloc_struct(ctx, pdf_cmap);
53
6
  FZ_INIT_STORABLE(cmap, 1, pdf_drop_cmap_imp);
54
6
  return cmap;
55
6
}
56
57
/* Could be a macro for speed */
58
pdf_cmap *
59
pdf_keep_cmap(fz_context *ctx, pdf_cmap *cmap)
60
2
{
61
2
  return fz_keep_storable(ctx, &cmap->storable);
62
2
}
63
64
/* Could be a macro for speed */
65
void
66
pdf_drop_cmap(fz_context *ctx, pdf_cmap *cmap)
67
22
{
68
22
  fz_drop_storable(ctx, &cmap->storable);
69
22
}
70
71
void
72
pdf_set_usecmap(fz_context *ctx, pdf_cmap *cmap, pdf_cmap *usecmap)
73
0
{
74
0
  int i;
75
76
0
  pdf_drop_cmap(ctx, cmap->usecmap);
77
0
  cmap->usecmap = pdf_keep_cmap(ctx, usecmap);
78
79
0
  if (cmap->codespace_len == 0)
80
0
  {
81
0
    cmap->codespace_len = usecmap->codespace_len;
82
0
    for (i = 0; i < usecmap->codespace_len; i++)
83
0
      cmap->codespace[i] = usecmap->codespace[i];
84
0
  }
85
0
}
86
87
int
88
pdf_cmap_wmode(fz_context *ctx, pdf_cmap *cmap)
89
4
{
90
4
  return cmap->wmode;
91
4
}
92
93
void
94
pdf_set_cmap_wmode(fz_context *ctx, pdf_cmap *cmap, int wmode)
95
2
{
96
2
  cmap->wmode = wmode;
97
2
}
98
99
void
100
pdf_add_codespace(fz_context *ctx, pdf_cmap *cmap, unsigned int low, unsigned int high, size_t n)
101
6
{
102
6
  if (cmap->codespace_len + 1 == nelem(cmap->codespace))
103
0
  {
104
0
    fz_warn(ctx, "assert: too many code space ranges");
105
0
    return;
106
0
  }
107
108
6
  if ((uint32_t)n != n)
109
0
  {
110
0
    fz_warn(ctx, "assert: code space range too large");
111
0
    return;
112
0
  }
113
114
6
  cmap->codespace[cmap->codespace_len].n = (int)n;
115
6
  cmap->codespace[cmap->codespace_len].low = low;
116
6
  cmap->codespace[cmap->codespace_len].high = high;
117
6
  cmap->codespace_len ++;
118
6
}
119
120
struct cmap_splay {
121
  unsigned int low;
122
  unsigned int high;
123
  unsigned int out;
124
  unsigned int left;
125
  unsigned int right;
126
  unsigned int parent : 31;
127
  unsigned int many : 1;
128
};
129
130
1.67M
#define EMPTY ((unsigned int)0x40000000)
131
132
/*
133
  The splaying steps used:
134
135
  Case 1: |              z              x
136
    |          y       D  =>  A       y
137
    |        x   C                  B   z
138
    |       A B                        C D
139
140
  Case 2: |         z              x
141
    |     y       D  =>   y     z
142
    |   A   x            A B   C D
143
    |  B C
144
145
  Case 3: |     y                 x
146
    |  x     C       =>   A     y
147
    | A B                      B C
148
*/
149
150
static void
151
move_to_root(cmap_splay *tree, unsigned int x)
152
65.1k
{
153
65.1k
  if (x == EMPTY)
154
0
    return;
155
65.1k
  do
156
79.1k
  {
157
79.1k
    unsigned int z, zp;
158
79.1k
    unsigned int y = tree[x].parent;
159
79.1k
    if (y == EMPTY)
160
2.62k
      break;
161
76.4k
    z = tree[y].parent;
162
76.4k
    if (z == EMPTY)
163
62.5k
    {
164
      /* Case 3 */
165
62.5k
      tree[x].parent = EMPTY;
166
62.5k
      tree[y].parent = x;
167
62.5k
      if (tree[y].left == x)
168
7
      {
169
        /* Case 3 */
170
7
        tree[y].left = tree[x].right;
171
7
        if (tree[y].left != EMPTY)
172
7
          tree[tree[y].left].parent = y;
173
7
        tree[x].right = y;
174
7
      }
175
62.5k
      else
176
62.5k
      {
177
        /* Case 3 - reflected */
178
62.5k
        assert(tree[y].right == x);
179
62.5k
        tree[y].right = tree[x].left;
180
62.5k
        if (tree[y].right != EMPTY)
181
1
          tree[tree[y].right].parent = y;
182
62.5k
        tree[x].left = y;
183
62.5k
      }
184
62.5k
      break;
185
62.5k
    }
186
187
13.9k
    zp = tree[z].parent;
188
13.9k
    tree[x].parent = zp;
189
13.9k
    if (zp != EMPTY) {
190
11.3k
      if (tree[zp].left == z)
191
11.2k
        tree[zp].left = x;
192
55
      else
193
55
      {
194
55
        assert(tree[zp].right == z);
195
55
        tree[zp].right = x;
196
55
      }
197
11.3k
    }
198
13.9k
    tree[y].parent = x;
199
13.9k
    if (tree[y].left == x)
200
13.9k
    {
201
13.9k
      tree[y].left = tree[x].right;
202
13.9k
      if (tree[y].left != EMPTY)
203
11.2k
        tree[tree[y].left].parent = y;
204
13.9k
      tree[x].right = y;
205
13.9k
      if (tree[z].left == y)
206
11.3k
      {
207
        /* Case 1 */
208
11.3k
        tree[z].parent = y;
209
11.3k
        tree[z].left = tree[y].right;
210
11.3k
        if (tree[z].left != EMPTY)
211
5.15k
          tree[tree[z].left].parent = z;
212
11.3k
        tree[y].right = z;
213
11.3k
      }
214
2.61k
      else
215
2.61k
      {
216
        /* Case 2 - reflected */
217
2.61k
        assert(tree[z].right == y);
218
2.61k
        tree[z].parent = x;
219
2.61k
        tree[z].right = tree[x].left;
220
2.61k
        if (tree[z].right != EMPTY)
221
6
          tree[tree[z].right].parent = z;
222
2.61k
        tree[x].left = z;
223
2.61k
      }
224
13.9k
    }
225
50
    else
226
50
    {
227
50
      assert(tree[y].right == x);
228
50
      tree[y].right = tree[x].left;
229
50
      if (tree[y].right != EMPTY)
230
28
        tree[tree[y].right].parent = y;
231
50
      tree[x].left = y;
232
50
      if (tree[z].left == y)
233
14
      {
234
        /* Case 2 */
235
14
        tree[z].parent = x;
236
14
        tree[z].left = tree[x].right;
237
14
        if (tree[z].left != EMPTY)
238
4
          tree[tree[z].left].parent = z;
239
14
        tree[x].right = z;
240
14
      }
241
36
      else
242
36
      {
243
        /* Case 1 - reflected */
244
36
        assert(tree[z].right == y);
245
36
        tree[z].parent = y;
246
36
        tree[z].right = tree[y].left;
247
36
        if (tree[z].right != EMPTY)
248
25
          tree[tree[z].right].parent = z;
249
36
        tree[y].left = z;
250
36
      }
251
50
    }
252
13.9k
  } while (1);
253
65.1k
}
254
255
static unsigned int delete_node(pdf_cmap *cmap, unsigned int current)
256
226
{
257
226
  cmap_splay *tree = cmap->tree;
258
226
  unsigned int parent;
259
226
  unsigned int replacement;
260
261
226
  assert(current != EMPTY);
262
263
226
  parent = tree[current].parent;
264
226
  if (tree[current].right == EMPTY)
265
9
  {
266
9
    if (parent == EMPTY)
267
7
    {
268
7
      replacement = cmap->ttop = tree[current].left;
269
7
    }
270
2
    else if (tree[parent].left == current)
271
2
    {
272
2
      replacement = tree[parent].left = tree[current].left;
273
2
    }
274
0
    else
275
0
    {
276
0
      assert(tree[parent].right == current);
277
0
      replacement = tree[parent].right = tree[current].left;
278
0
    }
279
9
    if (replacement != EMPTY)
280
9
      tree[replacement].parent = parent;
281
0
    else
282
0
      replacement = parent;
283
9
  }
284
217
  else if (tree[current].left == EMPTY)
285
2
  {
286
2
    if (parent == EMPTY)
287
0
    {
288
0
      replacement = cmap->ttop = tree[current].right;
289
0
    }
290
2
    else if (tree[parent].left == current)
291
1
    {
292
1
      replacement = tree[parent].left = tree[current].right;
293
1
    }
294
1
    else
295
1
    {
296
1
      assert(tree[parent].right == current);
297
1
      replacement = tree[parent].right = tree[current].right;
298
1
    }
299
2
    if (replacement != EMPTY)
300
2
      tree[replacement].parent = parent;
301
0
    else
302
0
      replacement = parent;
303
2
  }
304
215
  else
305
215
  {
306
    /* Hard case, find the in-order predecessor of current */
307
215
    unsigned int amputee = current;
308
215
    replacement = tree[current].left;
309
224
    while (tree[replacement].right != EMPTY) {
310
9
      amputee = replacement;
311
9
      replacement = tree[replacement].right;
312
9
    }
313
    /* Remove replacement from the tree */
314
215
    if (amputee == current)
315
207
    {
316
207
      tree[amputee].left = tree[replacement].left;
317
207
      if (tree[amputee].left != EMPTY)
318
206
        tree[tree[amputee].left].parent = amputee;
319
207
    }
320
8
    else
321
8
    {
322
8
      tree[amputee].right = tree[replacement].left;
323
8
      if (tree[amputee].right != EMPTY)
324
3
        tree[tree[amputee].right].parent = amputee;
325
8
    }
326
    /* Insert replacement in place of current */
327
215
    tree[replacement].parent = parent;
328
215
    if (parent == EMPTY)
329
208
    {
330
208
      tree[replacement].parent = EMPTY;
331
208
      cmap->ttop = replacement;
332
208
    }
333
7
    else if (tree[parent].left == current)
334
4
      tree[parent].left = replacement;
335
3
    else
336
3
    {
337
3
      assert(tree[parent].right == current);
338
3
      tree[parent].right = replacement;
339
3
    }
340
215
    tree[replacement].left = tree[current].left;
341
215
    if (tree[replacement].left != EMPTY)
342
214
      tree[tree[replacement].left].parent = replacement;
343
215
    tree[replacement].right = tree[current].right;
344
215
    if (tree[replacement].right != EMPTY)
345
215
      tree[tree[replacement].right].parent = replacement;
346
215
  }
347
348
  /* current is now unlinked. We need to remove it from our array. */
349
226
  cmap->tlen--;
350
226
  if (current != (unsigned int) cmap->tlen)
351
11
  {
352
11
    if (replacement == (unsigned int) cmap->tlen)
353
0
      replacement = current;
354
11
    tree[current] = tree[cmap->tlen];
355
11
    parent = tree[current].parent;
356
11
    if (parent == EMPTY)
357
11
      cmap->ttop = current;
358
0
    else if (tree[parent].left == (unsigned int) cmap->tlen)
359
0
      tree[parent].left = current;
360
0
    else
361
0
    {
362
0
      assert(tree[parent].right == (unsigned int) cmap->tlen);
363
0
      tree[parent].right = current;
364
0
    }
365
11
    if (tree[current].left != EMPTY)
366
11
    {
367
11
      assert(tree[tree[current].left].parent == (unsigned int) cmap->tlen);
368
11
      tree[tree[current].left].parent = current;
369
11
    }
370
11
    if (tree[current].right != EMPTY)
371
7
    {
372
7
      assert(tree[tree[current].right].parent == (unsigned int) cmap->tlen);
373
7
      tree[tree[current].right].parent = current;
374
7
    }
375
11
  }
376
377
  /* Return the node that we should continue searching from */
378
226
  return replacement;
379
226
}
380
381
#ifdef DUMP_SPLAY
382
static void
383
dump_splay(cmap_splay *tree, unsigned int node, int depth, const char *pre)
384
{
385
  int i;
386
387
  if (tree == NULL || node == EMPTY)
388
    return;
389
390
  for (i = 0; i < depth; i++)
391
    fprintf(stderr, " ");
392
  fprintf(stderr, "%s%d:", pre, node);
393
  if (tree[node].parent == EMPTY)
394
    fprintf(stderr, "^EMPTY");
395
  else
396
    fprintf(stderr, "^%d", tree[node].parent);
397
  if (tree[node].left == EMPTY)
398
    fprintf(stderr, "<EMPTY");
399
  else
400
    fprintf(stderr, "<%d", tree[node].left);
401
  if (tree[node].right == EMPTY)
402
    fprintf(stderr, ">EMPTY");
403
  else
404
    fprintf(stderr, ">%d", tree[node].right);
405
  fprintf(stderr, "(%x,%x,%x,%d)\n", tree[node].low, tree[node].high, tree[node].out, tree[node].many);
406
  assert(tree[node].parent == EMPTY || depth);
407
  assert(tree[node].left == EMPTY || tree[tree[node].left].parent == node);
408
  assert(tree[node].right == EMPTY || tree[tree[node].right].parent == node);
409
  dump_splay(tree, tree[node].left, depth+1, "L");
410
  dump_splay(tree, tree[node].right, depth+1, "R");
411
}
412
#endif
413
414
enum
415
{
416
  TOP = 0,
417
  LEFT = 1,
418
  RIGHT = 2
419
};
420
421
static void walk_splay(cmap_splay *tree, unsigned int node, void (*fn)(cmap_splay *, void *), void *arg)
422
12
{
423
12
  int from = TOP;
424
425
259k
  while (node != EMPTY)
426
259k
  {
427
259k
    switch (from)
428
259k
    {
429
129k
    case TOP:
430
129k
      if (tree[node].left != EMPTY)
431
117k
      {
432
117k
        node = tree[node].left;
433
117k
        from = TOP;
434
117k
        break;
435
117k
      }
436
      /* fallthrough */
437
129k
    case LEFT:
438
129k
      fn(&tree[node], arg);
439
129k
      if (tree[node].right != EMPTY)
440
12.2k
      {
441
12.2k
        node = tree[node].right;
442
12.2k
        from = TOP;
443
12.2k
        break;
444
12.2k
      }
445
      /* fallthrough */
446
129k
    case RIGHT:
447
129k
      {
448
129k
        unsigned int parent = tree[node].parent;
449
129k
        if (parent == EMPTY)
450
12
          return;
451
129k
        if (tree[parent].left == node)
452
117k
          from = LEFT;
453
12.2k
        else
454
12.2k
        {
455
12.2k
          assert(tree[parent].right == node);
456
12.2k
          from = RIGHT;
457
12.2k
        }
458
129k
        node = parent;
459
129k
      }
460
259k
    }
461
259k
  }
462
12
}
463
464
#ifdef CHECK_SPLAY
465
466
static int
467
tree_has_overlap(cmap_splay *tree, int node, int low, int high)
468
{
469
  if (tree[node].left != EMPTY)
470
    if (tree_has_overlap(tree, tree[node].left, low, high))
471
      return 1;
472
  if (tree[node].right != EMPTY)
473
    if (tree_has_overlap(tree, tree[node].right, low, high))
474
      return 1;
475
  return (tree[node].low < low && low < tree[node].high) || (tree[node].low < high && high < tree[node].high);
476
}
477
478
static void
479
do_check(cmap_splay *node, void *arg)
480
{
481
  cmap_splay *tree = arg;
482
  unsigned int num = node - tree;
483
  assert(!node->many || node->low == node->high);
484
  assert(node->low <= node->high);
485
  assert((node->left == EMPTY) || (tree[node->left].parent == num &&
486
    tree[node->left].high < node->low));
487
  assert(node->right == EMPTY || (tree[node->right].parent == num &&
488
    node->high < tree[node->right].low));
489
  assert(!tree_has_overlap(tree, num, node->low, node->high));
490
}
491
492
static void
493
check_splay(cmap_splay *tree, unsigned int node, int depth)
494
{
495
  if (node == EMPTY)
496
    return;
497
  assert(tree[node].parent == EMPTY);
498
  walk_splay(tree, node, do_check, tree);
499
}
500
#endif
501
502
/*
503
 * Add a range.
504
 */
505
static void
506
add_range(fz_context *ctx, pdf_cmap *cmap, unsigned int low, unsigned int high, unsigned int out, int check_for_overlap, int many)
507
65.1k
{
508
65.1k
  int current;
509
65.1k
  cmap_splay *tree;
510
511
65.1k
  if (low > high)
512
0
  {
513
0
    fz_warn(ctx, "range limits out of range in cmap %s", cmap->cmap_name);
514
0
    return;
515
0
  }
516
517
65.1k
  if (cmap->codespace_len == 0)
518
0
  {
519
0
    fz_warn(ctx, "CMap is missing codespace range");
520
0
    pdf_add_codespace(ctx, cmap, 0, 65535, 2);
521
0
  }
522
523
65.1k
  tree = cmap->tree;
524
525
65.1k
  if (cmap->tlen)
526
65.1k
  {
527
65.1k
    unsigned int move = cmap->ttop;
528
65.1k
    unsigned int gt = EMPTY;
529
65.1k
    unsigned int lt = EMPTY;
530
65.1k
    if (check_for_overlap)
531
65.1k
    {
532
      /* Check for collision with the current node */
533
65.1k
      do
534
90.6k
      {
535
90.6k
        current = move;
536
        /* Cases we might meet:
537
         * tree[i]:        <----->
538
         * case 0:     <->
539
         * case 1:     <------->
540
         * case 2:     <------------->
541
         * case 3:           <->
542
         * case 4:           <------->
543
         * case 5:                 <->
544
         */
545
90.6k
        if (low <= tree[current].low && tree[current].low <= high)
546
226
        {
547
          /* case 1, reduces to case 0 */
548
          /* or case 2, deleting the node */
549
226
          if (!tree[current].many)
550
226
          {
551
226
            tree[current].out += high + 1 - tree[current].low;
552
226
            tree[current].low = high + 1;
553
226
          }
554
226
          if (tree[current].low > tree[current].high || tree[current].many)
555
226
          {
556
            /* update lt/gt references that will be moved/stale after deleting current */
557
226
            if (gt == (unsigned int) cmap->tlen - 1)
558
1
              gt = current;
559
226
            if (lt == (unsigned int) cmap->tlen - 1)
560
0
              lt = current;
561
            /* delete_node() moves the element at cmap->tlen-1 into current */
562
226
            move = delete_node(cmap, current);
563
226
            current = EMPTY;
564
226
            continue;
565
226
          }
566
226
        }
567
90.4k
        else if (low <= tree[current].high && tree[current].high <= high)
568
0
        {
569
          /* case 4, reduces to case 5 */
570
0
          tree[current].high = low - 1;
571
0
          assert(tree[current].low <= tree[current].high);
572
0
        }
573
90.4k
        else if (tree[current].low < low && high < tree[current].high)
574
0
        {
575
          /* case 3, reduces to case 5 */
576
0
          int new_high = tree[current].high;
577
0
          tree[current].high = low-1;
578
0
          assert(!tree[current].many);
579
0
          add_range(ctx, cmap, high+1, new_high, tree[current].out + high + 1 - tree[current].low, 0, 0);
580
0
          tree = cmap->tree;
581
0
        }
582
        /* Now look for where to move to next (left for case 0, right for case 5) */
583
90.4k
        if (tree[current].low > high) {
584
25.2k
          move = tree[current].left;
585
25.2k
          gt = current;
586
25.2k
        }
587
65.2k
        else
588
65.2k
        {
589
65.2k
          move = tree[current].right;
590
65.2k
          lt = current;
591
65.2k
        }
592
90.4k
      }
593
90.6k
      while (move != EMPTY);
594
65.1k
    }
595
0
    else
596
0
    {
597
0
      do
598
0
      {
599
0
        current = move;
600
0
        if (tree[current].low > high)
601
0
        {
602
0
          move = tree[current].left;
603
0
          gt = current;
604
0
        }
605
0
        else
606
0
        {
607
0
          move = tree[current].right;
608
0
          lt = current;
609
0
        }
610
0
      } while (move != EMPTY);
611
0
    }
612
    /* current is now the node to which we would be adding the new node */
613
    /* lt is the last node we traversed which is lt the new node. */
614
    /* gt is the last node we traversed which is gt the new node. */
615
616
65.1k
    if (!many)
617
55.7k
    {
618
      /* Check for the 'merge' cases. */
619
55.7k
      if (lt != EMPTY && !tree[lt].many && tree[lt].high == low-1 && tree[lt].out - tree[lt].low == out - low)
620
24
      {
621
24
        tree[lt].high = high;
622
24
        if (gt != EMPTY && !tree[gt].many && tree[gt].low == high+1 && tree[gt].out - tree[gt].low == out - low)
623
0
        {
624
0
          tree[lt].high = tree[gt].high;
625
0
          delete_node(cmap, gt);
626
0
        }
627
24
        goto exit;
628
24
      }
629
55.7k
      if (gt != EMPTY && !tree[gt].many && tree[gt].low == high+1 && tree[gt].out - tree[gt].low == out - low)
630
0
      {
631
0
        tree[gt].low = low;
632
0
        tree[gt].out = out;
633
0
        goto exit;
634
0
      }
635
55.7k
    }
636
65.1k
  }
637
6
  else
638
6
    current = EMPTY;
639
640
65.1k
  if (cmap->tlen == cmap->tcap)
641
32
  {
642
32
    int new_cap = cmap->tcap ? cmap->tcap * 2 : 256;
643
32
    tree = cmap->tree = fz_realloc_array(ctx, cmap->tree, new_cap, cmap_splay);
644
32
    cmap->tcap = new_cap;
645
32
  }
646
65.1k
  tree[cmap->tlen].low = low;
647
65.1k
  tree[cmap->tlen].high = high;
648
65.1k
  tree[cmap->tlen].out = out;
649
65.1k
  tree[cmap->tlen].parent = current;
650
65.1k
  tree[cmap->tlen].left = EMPTY;
651
65.1k
  tree[cmap->tlen].right = EMPTY;
652
65.1k
  tree[cmap->tlen].many = many;
653
65.1k
  cmap->tlen++;
654
65.1k
  if (current == EMPTY)
655
6
    cmap->ttop = 0;
656
65.1k
  else if (tree[current].low > high)
657
2.63k
    tree[current].left = cmap->tlen-1;
658
62.4k
  else
659
62.4k
  {
660
62.4k
    assert(tree[current].high < low);
661
62.4k
    tree[current].right = cmap->tlen-1;
662
62.4k
  }
663
65.1k
  move_to_root(tree, cmap->tlen-1);
664
65.1k
  cmap->ttop = cmap->tlen-1;
665
65.1k
exit:
666
65.1k
  {}
667
#ifdef CHECK_SPLAY
668
  check_splay(cmap->tree, cmap->ttop, 0);
669
#endif
670
#ifdef DUMP_SPLAY
671
  dump_splay(cmap->tree, cmap->ttop, 0, "");
672
#endif
673
65.1k
}
674
675
/*
676
 * Add a one-to-many mapping.
677
 */
678
static void
679
add_mrange(fz_context *ctx, pdf_cmap *cmap, unsigned int low, int *out, int len)
680
9.38k
{
681
9.38k
  int out_pos;
682
9.38k
  int new_len = cmap->dlen + len + 1;
683
684
9.38k
  if (new_len > cmap->dcap)
685
26
  {
686
26
    int new_cap = cmap->dcap > 0 ? cmap->dcap : 256;
687
48
    while (new_len > new_cap)
688
22
      new_cap *= 2;
689
26
    cmap->dict = fz_realloc_array(ctx, cmap->dict, new_cap, int);
690
26
    cmap->dcap = new_cap;
691
26
  }
692
9.38k
  out_pos = cmap->dlen;
693
9.38k
  cmap->dict[out_pos] = len;
694
9.38k
  memcpy(&cmap->dict[out_pos+1], out, sizeof(int)*len);
695
9.38k
  cmap->dlen = new_len;
696
697
9.38k
  add_range(ctx, cmap, low, low, out_pos, 1, 1);
698
9.38k
}
699
700
void
701
pdf_map_range_to_range(fz_context *ctx, pdf_cmap *cmap, unsigned int low, unsigned int high, int out)
702
27.7k
{
703
27.7k
  add_range(ctx, cmap, low, high, out, 1, 0);
704
27.7k
}
705
706
void
707
pdf_map_one_to_many(fz_context *ctx, pdf_cmap *cmap, unsigned int low, int *values, size_t len)
708
37.3k
{
709
37.3k
  int *ovalues = values;
710
  /* len is always restricted to <= 256 by the callers. */
711
37.3k
  int local[PDF_MRANGE_CAP];
712
713
37.3k
  assert(len <= PDF_MRANGE_CAP);
714
715
  /* Decode unicode surrogate pairs. */
716
  /* Only the *-UCS2 CMaps use one-to-many mappings, so assuming unicode should be safe. */
717
37.3k
  if (len >= 2)
718
9.38k
  {
719
9.38k
    size_t i, j;
720
    /* Look for mranges with either multiple surrogate pairs in, or surrogate pairs
721
     * with other chars. See bug 706131. */
722
31.2k
    for (i = 0, j = 0; i < len; i++, j++)
723
21.8k
    {
724
21.8k
      int hi = ovalues[i];
725
21.8k
      if (hi >= 0xd800 && hi < 0xdc00 && i < len-1)
726
0
      {
727
0
        int lo = ovalues[i+1];
728
0
        if (lo >= 0xdc00 && lo < 0xe000)
729
0
        {
730
0
          hi = ((hi - 0xD800) << 10) + (lo - 0xDC00) + 0x10000;
731
0
          i++;
732
0
        }
733
0
      }
734
21.8k
      if (values != local)
735
9.38k
      {
736
        /* We can't change the callers data, so copy stuff in. */
737
9.38k
        if (j)
738
0
          memcpy(local, values, sizeof(local[0]) * (j-1));
739
9.38k
        values = local;
740
9.38k
      }
741
21.8k
      values[j] = hi;
742
21.8k
    }
743
9.38k
    len = j;
744
9.38k
  }
745
746
37.3k
  if (len == 1)
747
28.0k
  {
748
28.0k
    add_range(ctx, cmap, low, low, values[0], 1, 0);
749
28.0k
    return;
750
28.0k
  }
751
752
9.38k
  if (len > PDF_MRANGE_CAP)
753
0
  {
754
0
    fz_warn(ctx, "ignoring one to many mapping in cmap %s", cmap->cmap_name);
755
0
    return;
756
0
  }
757
758
9.38k
  add_mrange(ctx, cmap, low, values, (int)len);
759
9.38k
}
760
761
static void
762
count_node_types(cmap_splay *node, void *arg)
763
64.9k
{
764
64.9k
  int *counts = (int *)arg;
765
766
64.9k
  if (node->many)
767
9.38k
    counts[2]++;
768
55.5k
  else if (node->low <= 0xffff && node->high <= 0xFFFF && node->out <= 0xFFFF)
769
55.5k
    counts[0]++;
770
6
  else
771
6
    counts[1]++;
772
64.9k
}
773
774
static void
775
copy_node_types(cmap_splay *node, void *arg)
776
64.9k
{
777
64.9k
  pdf_cmap *cmap = (pdf_cmap *)arg;
778
779
64.9k
  if (node->many)
780
9.38k
  {
781
9.38k
    assert(node->low == node->high);
782
9.38k
    cmap->mranges[cmap->mlen].low = node->low;
783
9.38k
    cmap->mranges[cmap->mlen].out = node->out;
784
9.38k
    cmap->mlen++;
785
9.38k
  }
786
55.5k
  else if (node->low <= 0xffff && node->high <= 0xFFFF && node->out <= 0xFFFF)
787
55.5k
  {
788
55.5k
    cmap->ranges[cmap->rlen].low = node->low;
789
55.5k
    cmap->ranges[cmap->rlen].high = node->high;
790
55.5k
    cmap->ranges[cmap->rlen].out = node->out;
791
55.5k
    cmap->rlen++;
792
55.5k
  }
793
6
  else
794
6
  {
795
6
    cmap->xranges[cmap->xlen].low = node->low;
796
6
    cmap->xranges[cmap->xlen].high = node->high;
797
6
    cmap->xranges[cmap->xlen].out = node->out;
798
6
    cmap->xlen++;
799
6
  }
800
64.9k
}
801
802
void
803
pdf_sort_cmap(fz_context *ctx, pdf_cmap *cmap)
804
6
{
805
6
  int counts[3];
806
807
6
  if (cmap->tree == NULL)
808
0
    return;
809
810
6
  counts[0] = 0;
811
6
  counts[1] = 0;
812
6
  counts[2] = 0;
813
6
  walk_splay(cmap->tree, cmap->ttop, count_node_types, &counts);
814
815
6
  cmap->ranges = Memento_label(fz_malloc_array(ctx, counts[0], pdf_range), "cmap_range");
816
6
  cmap->rcap = counts[0];
817
6
  cmap->xranges = Memento_label(fz_malloc_array(ctx, counts[1], pdf_xrange), "cmap_xrange");
818
6
  cmap->xcap = counts[1];
819
6
  cmap->mranges = Memento_label(fz_malloc_array(ctx, counts[2], pdf_mrange), "cmap_mrange");
820
6
  cmap->mcap = counts[2];
821
822
6
  walk_splay(cmap->tree, cmap->ttop, copy_node_types, cmap);
823
824
6
  fz_free(ctx, cmap->tree);
825
6
  cmap->tree = NULL;
826
6
}
827
828
int
829
pdf_lookup_cmap(pdf_cmap *cmap, unsigned int cpt)
830
131k
{
831
131k
  pdf_range *ranges = cmap->ranges;
832
131k
  pdf_xrange *xranges = cmap->xranges;
833
131k
  int l, r, m;
834
835
131k
  l = 0;
836
131k
  r = cmap->rlen - 1;
837
929k
  while (l <= r)
838
917k
  {
839
917k
    m = (l + r) >> 1;
840
917k
    if (cpt < ranges[m].low)
841
393k
      r = m - 1;
842
524k
    else if (cpt > ranges[m].high)
843
403k
      l = m + 1;
844
120k
    else
845
120k
      return cpt - ranges[m].low + ranges[m].out;
846
917k
  }
847
848
11.2k
  l = 0;
849
11.2k
  r = cmap->xlen - 1;
850
11.2k
  while (l <= r)
851
0
  {
852
0
    m = (l + r) >> 1;
853
0
    if (cpt < xranges[m].low)
854
0
      r = m - 1;
855
0
    else if (cpt > xranges[m].high)
856
0
      l = m + 1;
857
0
    else
858
0
      return cpt - xranges[m].low + xranges[m].out;
859
0
  }
860
861
11.2k
  if (cmap->usecmap)
862
0
    return pdf_lookup_cmap(cmap->usecmap, cpt);
863
864
11.2k
  return -1;
865
11.2k
}
866
867
int
868
pdf_lookup_cmap_full(pdf_cmap *cmap, unsigned int cpt, int *out)
869
131k
{
870
131k
  pdf_range *ranges = cmap->ranges;
871
131k
  pdf_xrange *xranges = cmap->xranges;
872
131k
  pdf_mrange *mranges = cmap->mranges;
873
131k
  unsigned int i;
874
131k
  int l, r, m;
875
876
131k
  l = 0;
877
131k
  r = cmap->rlen - 1;
878
1.95M
  while (l <= r)
879
1.84M
  {
880
1.84M
    m = (l + r) >> 1;
881
1.84M
    if (cpt < ranges[m].low)
882
251k
      r = m - 1;
883
1.59M
    else if (cpt > ranges[m].high)
884
1.56M
      l = m + 1;
885
27.7k
    else
886
27.7k
    {
887
27.7k
      out[0] = cpt - ranges[m].low + ranges[m].out;
888
27.7k
      return 1;
889
27.7k
    }
890
1.84M
  }
891
892
103k
  l = 0;
893
103k
  r = cmap->xlen - 1;
894
213k
  while (l <= r)
895
110k
  {
896
110k
    m = (l + r) >> 1;
897
110k
    if (cpt < xranges[m].low)
898
110k
      r = m - 1;
899
0
    else if (cpt > xranges[m].high)
900
0
      l = m + 1;
901
0
    else
902
0
    {
903
0
      out[0] = cpt - xranges[m].low + xranges[m].out;
904
0
      return 1;
905
0
    }
906
110k
  }
907
908
103k
  l = 0;
909
103k
  r = cmap->mlen - 1;
910
1.32M
  while (l <= r)
911
1.23M
  {
912
1.23M
    m = (l + r) >> 1;
913
1.23M
    if (cpt < mranges[m].low)
914
24.2k
      r = m - 1;
915
1.20M
    else if (cpt > mranges[m].low)
916
1.20M
      l = m + 1;
917
4.69k
    else
918
4.69k
    {
919
4.69k
      int *ptr = &cmap->dict[cmap->mranges[m].out];
920
4.69k
      unsigned int len = (unsigned int)*ptr++;
921
15.6k
      for (i = 0; i < len; ++i)
922
10.9k
        out[i] = *ptr++;
923
4.69k
      return len;
924
4.69k
    }
925
1.23M
  }
926
927
98.6k
  if (cmap->usecmap)
928
0
    return pdf_lookup_cmap_full(cmap->usecmap, cpt, out);
929
930
98.6k
  return 0;
931
98.6k
}
932
933
int
934
pdf_decode_cmap(pdf_cmap *cmap, unsigned char *buf, unsigned char *end, unsigned int *cpt)
935
219
{
936
219
  unsigned int c;
937
219
  int k, n;
938
219
  int len = end - buf;
939
940
219
  if (len > 4)
941
18
    len = 4;
942
943
219
  c = 0;
944
227
  for (n = 0; n < len; n++)
945
227
  {
946
227
    c = (c << 8) | buf[n];
947
235
    for (k = 0; k < cmap->codespace_len; k++)
948
227
    {
949
227
      if (cmap->codespace[k].n == n + 1)
950
219
      {
951
219
        if (c >= cmap->codespace[k].low && c <= cmap->codespace[k].high)
952
219
        {
953
219
          *cpt = c;
954
219
          return n + 1;
955
219
        }
956
219
      }
957
227
    }
958
227
  }
959
960
0
  *cpt = 0;
961
0
  return 1;
962
219
}
963
964
size_t
965
pdf_cmap_size(fz_context *ctx, pdf_cmap *cmap)
966
14
{
967
14
  if (cmap == NULL)
968
6
    return 0;
969
8
  if (cmap->storable.refs < 0)
970
2
    return 0;
971
972
6
  return pdf_cmap_size(ctx, cmap->usecmap) +
973
6
    cmap->rcap * sizeof *cmap->ranges +
974
6
    cmap->xcap * sizeof *cmap->xranges +
975
6
    cmap->mcap * sizeof *cmap->mranges +
976
6
    cmap->tcap * sizeof *cmap->tree +
977
6
    sizeof(*cmap);
978
8
}