Coverage Report

Created: 2026-03-31 07:04

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/xpdf-4.06/xpdf/CharCodeToUnicode.cc
Line
Count
Source
1
//========================================================================
2
//
3
// CharCodeToUnicode.cc
4
//
5
// Copyright 2001-2003 Glyph & Cog, LLC
6
//
7
//========================================================================
8
9
#include <aconf.h>
10
11
#include <stdio.h>
12
#include <string.h>
13
#include "gmem.h"
14
#include "gmempp.h"
15
#include "gfile.h"
16
#include "GString.h"
17
#include "Error.h"
18
#include "GlobalParams.h"
19
#include "PSTokenizer.h"
20
#include "CharCodeToUnicode.h"
21
22
//------------------------------------------------------------------------
23
24
683k
#define maxUnicodeString 8
25
26
struct CharCodeToUnicodeString {
27
  CharCode c;
28
  Unicode u[maxUnicodeString];
29
  int len;
30
};
31
32
//------------------------------------------------------------------------
33
34
struct GStringIndex {
35
  GString *s;
36
  int i;
37
};
38
39
17.4M
static int getCharFromGString(void *data) {
40
17.4M
  GStringIndex *idx = (GStringIndex *)data;
41
17.4M
  if (idx->i >= idx->s->getLength()) {
42
29.9k
    return EOF;
43
29.9k
  }
44
17.3M
  return idx->s->getChar(idx->i++) & 0xff;
45
17.4M
}
46
47
0
static int getCharFromFile(void *data) {
48
0
  return fgetc((FILE *)data);
49
0
}
50
51
//------------------------------------------------------------------------
52
53
static int hexCharVals[256] = {
54
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x
55
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 1x
56
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 2x
57
   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1, // 3x
58
  -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 4x
59
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 5x
60
  -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 6x
61
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 7x
62
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 8x
63
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 9x
64
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Ax
65
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Bx
66
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Cx
67
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Dx
68
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Ex
69
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1  // Fx
70
};
71
72
// Parse a <len>-byte hex string <s> into *<val>.  Returns false on
73
// error.
74
749k
static GBool parseHex(char *s, int len, Guint *val) {
75
749k
  int i, x;
76
77
749k
  *val = 0;
78
3.31M
  for (i = 0; i < len; ++i) {
79
2.57M
    x = hexCharVals[s[i] & 0xff];
80
2.57M
    if (x < 0) {
81
7.87k
      return gFalse;
82
7.87k
    }
83
2.56M
    *val = (*val << 4) + x;
84
2.56M
  }
85
741k
  return gTrue;
86
749k
}
87
88
//------------------------------------------------------------------------
89
90
8.02k
CharCodeToUnicode *CharCodeToUnicode::makeIdentityMapping() {
91
8.02k
  return new CharCodeToUnicode();
92
8.02k
}
93
94
CharCodeToUnicode *CharCodeToUnicode::parseCIDToUnicode(GString *fileName,
95
0
              GString *collection) {
96
0
  FILE *f;
97
0
  Unicode *mapA;
98
0
  CharCode size, mapLenA;
99
0
  char buf[64];
100
0
  Unicode u;
101
0
  CharCodeToUnicode *ctu;
102
103
0
  if (!(f = openFile(fileName->getCString(), "r"))) {
104
0
    error(errSyntaxError, -1, "Couldn't open cidToUnicode file '{0:t}'",
105
0
    fileName);
106
0
    return NULL;
107
0
  }
108
109
0
  size = 32768;
110
0
  mapA = (Unicode *)gmallocn(size, sizeof(Unicode));
111
0
  mapLenA = 0;
112
113
0
  while (getLine(buf, sizeof(buf), f)) {
114
0
    if (mapLenA == size) {
115
0
      size *= 2;
116
0
      mapA = (Unicode *)greallocn(mapA, size, sizeof(Unicode));
117
0
    }
118
0
    if (sscanf(buf, "%x", &u) == 1) {
119
0
      mapA[mapLenA] = u;
120
0
    } else {
121
0
      error(errSyntaxWarning, -1,
122
0
      "Bad line ({0:d}) in cidToUnicode file '{1:t}'",
123
0
      (int)(mapLenA + 1), fileName);
124
0
      mapA[mapLenA] = 0;
125
0
    }
126
0
    ++mapLenA;
127
0
  }
128
0
  fclose(f);
129
130
0
  ctu = new CharCodeToUnicode(collection->copy(), mapA, mapLenA, gTrue,
131
0
            NULL, 0, 0);
132
0
  gfree(mapA);
133
0
  return ctu;
134
0
}
135
136
CharCodeToUnicode *CharCodeToUnicode::parseUnicodeToUnicode(
137
0
                GString *fileName) {
138
0
  FILE *f;
139
0
  Unicode *mapA;
140
0
  CharCodeToUnicodeString *sMapA;
141
0
  CharCode size, oldSize, len, sMapSizeA, sMapLenA;
142
0
  char buf[256];
143
0
  char *tok;
144
0
  Unicode u0;
145
0
  Unicode uBuf[maxUnicodeString];
146
0
  CharCodeToUnicode *ctu;
147
0
  int line, n, i;
148
149
0
  if (!(f = openFile(fileName->getCString(), "r"))) {
150
0
    error(errSyntaxError, -1, "Couldn't open unicodeToUnicode file '{0:t}'",
151
0
    fileName);
152
0
    return NULL;
153
0
  }
154
155
0
  size = 4096;
156
0
  mapA = (Unicode *)gmallocn(size, sizeof(Unicode));
157
0
  memset(mapA, 0, size * sizeof(Unicode));
158
0
  len = 0;
159
0
  sMapA = NULL;
160
0
  sMapSizeA = sMapLenA = 0;
161
162
0
  line = 0;
163
0
  while (getLine(buf, sizeof(buf), f)) {
164
0
    ++line;
165
0
    if (!(tok = strtok(buf, " \t\r\n")) ||
166
0
  !parseHex(tok, (int)strlen(tok), &u0)) {
167
0
      error(errSyntaxWarning, -1,
168
0
      "Bad line ({0:d}) in unicodeToUnicode file '{1:t}'",
169
0
      line, fileName);
170
0
      continue;
171
0
    }
172
0
    n = 0;
173
0
    while (n < maxUnicodeString) {
174
0
      if (!(tok = strtok(NULL, " \t\r\n"))) {
175
0
  break;
176
0
      }
177
0
      if (!parseHex(tok, (int)strlen(tok), &uBuf[n])) {
178
0
  error(errSyntaxWarning, -1,
179
0
        "Bad line ({0:d}) in unicodeToUnicode file '{1:t}'",
180
0
        line, fileName);
181
0
  break;
182
0
      }
183
0
      ++n;
184
0
    }
185
0
    if (n < 1) {
186
0
      error(errSyntaxWarning, -1,
187
0
      "Bad line ({0:d}) in unicodeToUnicode file '{1:t}'",
188
0
      line, fileName);
189
0
      continue;
190
0
    }
191
0
    if (u0 >= size) {
192
0
      oldSize = size;
193
0
      while (u0 >= size) {
194
0
  size *= 2;
195
0
      }
196
0
      mapA = (Unicode *)greallocn(mapA, size, sizeof(Unicode));
197
0
      memset(mapA + oldSize, 0, (size - oldSize) * sizeof(Unicode));
198
0
    }
199
0
    if (n == 1) {
200
0
      mapA[u0] = uBuf[0];
201
0
    } else {
202
0
      mapA[u0] = 0;
203
0
      if (sMapLenA == sMapSizeA) {
204
0
  sMapSizeA += 16;
205
0
  sMapA = (CharCodeToUnicodeString *)
206
0
            greallocn(sMapA, sMapSizeA, sizeof(CharCodeToUnicodeString));
207
0
      }
208
0
      sMapA[sMapLenA].c = u0;
209
0
      for (i = 0; i < n; ++i) {
210
0
  sMapA[sMapLenA].u[i] = uBuf[i];
211
0
      }
212
0
      sMapA[sMapLenA].len = n;
213
0
      ++sMapLenA;
214
0
    }
215
0
    if (u0 >= len) {
216
0
      len = u0 + 1;
217
0
    }
218
0
  }
219
0
  fclose(f);
220
221
0
  ctu = new CharCodeToUnicode(fileName->copy(), mapA, len, gTrue,
222
0
            sMapA, sMapLenA, sMapSizeA);
223
0
  gfree(mapA);
224
0
  return ctu;
225
0
}
226
227
23.3k
CharCodeToUnicode *CharCodeToUnicode::make8BitToUnicode(Unicode *toUnicode) {
228
23.3k
  return new CharCodeToUnicode(NULL, toUnicode, 256, gTrue, NULL, 0, 0);
229
23.3k
}
230
231
0
CharCodeToUnicode *CharCodeToUnicode::make16BitToUnicode(Unicode *toUnicode) {
232
0
  return new CharCodeToUnicode(NULL, toUnicode, 65536, gTrue, NULL, 0, 0);
233
0
}
234
235
10.0k
CharCodeToUnicode *CharCodeToUnicode::parseCMap(GString *buf, int nBits) {
236
10.0k
  CharCodeToUnicode *ctu;
237
10.0k
  GStringIndex idx;
238
239
10.0k
  ctu = new CharCodeToUnicode(NULL);
240
10.0k
  idx.s = buf;
241
10.0k
  idx.i = 0;
242
10.0k
  if (!ctu->parseCMap1(&getCharFromGString, &idx, nBits)) {
243
7.48k
    delete ctu;
244
7.48k
    return NULL;
245
7.48k
  }
246
2.57k
  return ctu;
247
10.0k
}
248
249
4.97k
void CharCodeToUnicode::mergeCMap(GString *buf, int nBits) {
250
4.97k
  GStringIndex idx;
251
252
4.97k
  idx.s = buf;
253
4.97k
  idx.i = 0;
254
4.97k
  parseCMap1(&getCharFromGString, &idx, nBits);
255
4.97k
}
256
257
GBool CharCodeToUnicode::parseCMap1(int (*getCharFunc)(void *), void *data,
258
15.0k
            int nBits) {
259
15.0k
  PSTokenizer *pst;
260
15.0k
  char tok1[256], tok2[256], tok3[256];
261
15.0k
  int n1, n2, n3;
262
15.0k
  CharCode i;
263
15.0k
  CharCode maxCode, code1, code2;
264
15.0k
  GString *name;
265
15.0k
  FILE *f;
266
15.0k
  GBool ok;
267
268
15.0k
  ok = gFalse;
269
15.0k
  maxCode = (nBits == 8) ? 0xff : (nBits == 16) ? 0xffff : 0xffffffff;
270
15.0k
  pst = new PSTokenizer(getCharFunc, data);
271
15.0k
  pst->getToken(tok1, sizeof(tok1), &n1);
272
640k
  while (pst->getToken(tok2, sizeof(tok2), &n2)) {
273
625k
    if (!strcmp(tok1, "begincodespacerange")) {
274
4.92k
      if (globalParams->getIgnoreWrongSizeToUnicode() &&
275
0
    tok2[0] == '<' && tok2[n2 - 1] == '>' &&
276
0
    n2 - 2 != nBits / 4) {
277
0
  error(errSyntaxWarning, -1,
278
0
        "Incorrect character size in ToUnicode CMap");
279
0
  ok = gFalse;
280
0
  break;
281
0
      }
282
12.9k
      while (pst->getToken(tok1, sizeof(tok1), &n1) &&
283
12.7k
       strcmp(tok1, "endcodespacerange")) ;
284
620k
    } else if (!strcmp(tok2, "usecmap")) {
285
0
      if (tok1[0] == '/') {
286
0
  name = new GString(tok1 + 1);
287
0
  if ((f = globalParams->findToUnicodeFile(name))) {
288
0
    if (parseCMap1(&getCharFromFile, f, nBits)) {
289
0
      ok = gTrue;
290
0
    }
291
0
    fclose(f);
292
0
  } else {
293
0
    error(errSyntaxError, -1,
294
0
    "Couldn't find ToUnicode CMap file for '{1:t}'",
295
0
    name);
296
0
  }
297
0
  delete name;
298
0
      }
299
0
      pst->getToken(tok1, sizeof(tok1), &n1);
300
620k
    } else if (!strcmp(tok2, "beginbfchar")) {
301
61.1k
      while (pst->getToken(tok1, sizeof(tok1), &n1)) {
302
61.0k
  if (!strcmp(tok1, "endbfchar")) {
303
3.84k
    break;
304
3.84k
  }
305
57.1k
  if (!pst->getToken(tok2, sizeof(tok2), &n2) ||
306
57.1k
      !strcmp(tok2, "endbfchar")) {
307
59
    error(errSyntaxWarning, -1,
308
59
    "Illegal entry in bfchar block in ToUnicode CMap");
309
59
    break;
310
59
  }
311
57.1k
  if (!(tok1[0] == '<' && tok1[n1 - 1] == '>' &&
312
41.8k
        tok2[0] == '<' && tok2[n2 - 1] == '>')) {
313
41.8k
    error(errSyntaxWarning, -1,
314
41.8k
    "Illegal entry in bfchar block in ToUnicode CMap");
315
41.8k
    continue;
316
41.8k
  }
317
15.3k
  tok1[n1 - 1] = tok2[n2 - 1] = '\0';
318
15.3k
  if (!parseHex(tok1 + 1, n1 - 2, &code1)) {
319
1.33k
    error(errSyntaxWarning, -1,
320
1.33k
    "Illegal entry in bfchar block in ToUnicode CMap");
321
1.33k
    continue;
322
1.33k
  }
323
14.0k
  if (code1 > maxCode) {
324
2.63k
    error(errSyntaxWarning, -1,
325
2.63k
    "Invalid entry in bfchar block in ToUnicode CMap");
326
2.63k
  }
327
14.0k
  addMapping(code1, tok2 + 1, n2 - 2, 0);
328
14.0k
  ok = gTrue;
329
14.0k
      }
330
3.96k
      pst->getToken(tok1, sizeof(tok1), &n1);
331
616k
    } else if (!strcmp(tok2, "beginbfrange")) {
332
32.5k
      while (pst->getToken(tok1, sizeof(tok1), &n1)) {
333
31.9k
  if (!strcmp(tok1, "endbfrange")) {
334
3.05k
    break;
335
3.05k
  }
336
28.9k
  if (!pst->getToken(tok2, sizeof(tok2), &n2) ||
337
28.7k
      !strcmp(tok2, "endbfrange") ||
338
28.4k
      !pst->getToken(tok3, sizeof(tok3), &n3) ||
339
28.1k
      !strcmp(tok3, "endbfrange")) {
340
767
    error(errSyntaxWarning, -1,
341
767
    "Illegal entry in bfrange block in ToUnicode CMap");
342
767
    break;
343
767
  }
344
28.1k
  if (!(tok1[0] == '<' && tok1[n1 - 1] == '>' &&
345
20.1k
        tok2[0] == '<' && tok2[n2 - 1] == '>')) {
346
10.0k
    error(errSyntaxWarning,
347
10.0k
    -1, "Illegal entry in bfrange block in ToUnicode CMap");
348
10.0k
    continue;
349
10.0k
  }
350
18.1k
  tok1[n1 - 1] = tok2[n2 - 1] = '\0';
351
18.1k
  if (!parseHex(tok1 + 1, n1 - 2, &code1) ||
352
17.2k
      !parseHex(tok2 + 1, n2 - 2, &code2)) {
353
1.19k
    error(errSyntaxWarning, -1,
354
1.19k
    "Illegal entry in bfrange block in ToUnicode CMap");
355
1.19k
    continue;
356
1.19k
  }
357
16.9k
  if (code1 > maxCode || code2 > maxCode) {
358
1.05k
    error(errSyntaxWarning, -1,
359
1.05k
    "Invalid entry in bfrange block in ToUnicode CMap");
360
1.05k
    if (code2 > maxCode) {
361
935
      code2 = maxCode;
362
935
    }
363
1.05k
  }
364
16.9k
  if (!strcmp(tok3, "[")) {
365
565
    i = 0;
366
3.50k
    while (pst->getToken(tok1, sizeof(tok1), &n1)) {
367
3.49k
      if (!strcmp(tok1, "]")) {
368
556
        break;
369
556
      }
370
2.94k
      if (tok1[0] == '<' && tok1[n1 - 1] == '>') {
371
1.56k
        if (code1 + i <= code2) {
372
548
    tok1[n1 - 1] = '\0';
373
548
    addMapping(code1 + i, tok1 + 1, n1 - 2, 0);
374
548
    ok = gTrue;
375
548
        }
376
1.56k
      } else {
377
1.37k
        error(errSyntaxWarning, -1,
378
1.37k
        "Illegal entry in bfrange block in ToUnicode CMap");
379
1.37k
      }
380
2.94k
      ++i;
381
2.94k
    }
382
16.3k
  } else if (tok3[0] == '<' && tok3[n3 - 1] == '>') {
383
15.7k
    tok3[n3 - 1] = '\0';
384
630k
    for (i = 0; code1 <= code2; ++code1, ++i) {
385
614k
      addMapping(code1, tok3 + 1, n3 - 2, i);
386
614k
      ok = gTrue;
387
614k
    }
388
15.7k
  } else {
389
577
    error(errSyntaxWarning, -1,
390
577
    "Illegal entry in bfrange block in ToUnicode CMap");
391
577
  }
392
16.9k
      }
393
4.36k
      pst->getToken(tok1, sizeof(tok1), &n1);
394
612k
    } else if (!strcmp(tok2, "begincidchar")) {
395
      // the begincidchar operator is not allowed in ToUnicode CMaps,
396
      // but some buggy PDF generators incorrectly use
397
      // code-to-CID-type CMaps here
398
61
      error(errSyntaxWarning, -1,
399
61
      "Invalid 'begincidchar' operator in ToUnicode CMap");
400
6.31k
      while (pst->getToken(tok1, sizeof(tok1), &n1)) {
401
6.30k
  if (!strcmp(tok1, "endcidchar")) {
402
20
    break;
403
20
  }
404
6.28k
  if (!pst->getToken(tok2, sizeof(tok2), &n2) ||
405
6.26k
      !strcmp(tok2, "endcidchar")) {
406
29
    error(errSyntaxWarning, -1,
407
29
    "Illegal entry in cidchar block in ToUnicode CMap");
408
29
    break;
409
29
  }
410
6.25k
  if (!(tok1[0] == '<' && tok1[n1 - 1] == '>')) {
411
2.03k
    error(errSyntaxWarning, -1,
412
2.03k
    "Illegal entry in cidchar block in ToUnicode CMap");
413
2.03k
    continue;
414
2.03k
  }
415
4.22k
  tok1[n1 - 1] = '\0';
416
4.22k
  if (!parseHex(tok1 + 1, n1 - 2, &code1)) {
417
337
    error(errSyntaxWarning, -1,
418
337
    "Illegal entry in cidchar block in ToUnicode CMap");
419
337
    continue;
420
337
  }
421
3.88k
  if (code1 > maxCode) {
422
521
    error(errSyntaxWarning, -1,
423
521
    "Invalid entry in cidchar block in ToUnicode CMap");
424
521
  }
425
3.88k
  addMappingInt(code1, atoi(tok2));
426
3.88k
  ok = gTrue;
427
3.88k
      }
428
61
      pst->getToken(tok1, sizeof(tok1), &n1);
429
612k
    } else if (!strcmp(tok2, "begincidrange")) {
430
      // the begincidrange operator is not allowed in ToUnicode CMaps,
431
      // but some buggy PDF generators incorrectly use
432
      // code-to-CID-type CMaps here
433
90
      error(errSyntaxWarning, -1,
434
90
      "Invalid 'begincidrange' operator in ToUnicode CMap");
435
16.6k
      while (pst->getToken(tok1, sizeof(tok1), &n1)) {
436
16.6k
  if (!strcmp(tok1, "endcidrange")) {
437
5
    break;
438
5
  }
439
16.6k
  if (!pst->getToken(tok2, sizeof(tok2), &n2) ||
440
16.6k
      !strcmp(tok2, "endcidrange") ||
441
16.6k
      !pst->getToken(tok3, sizeof(tok3), &n3) ||
442
16.5k
      !strcmp(tok3, "endcidrange")) {
443
64
    error(errSyntaxWarning, -1,
444
64
    "Illegal entry in cidrange block in ToUnicode CMap");
445
64
    break;
446
64
  }
447
16.5k
  if (!(tok1[0] == '<' && tok1[n1 - 1] == '>' &&
448
12.2k
        tok2[0] == '<' && tok2[n2 - 1] == '>')) {
449
12.2k
    error(errSyntaxWarning,
450
12.2k
    -1, "Illegal entry in cidrange block in ToUnicode CMap");
451
12.2k
    continue;
452
12.2k
  }
453
4.34k
  tok1[n1 - 1] = tok2[n2 - 1] = '\0';
454
4.34k
  if (!parseHex(tok1 + 1, n1 - 2, &code1) ||
455
3.51k
      !parseHex(tok2 + 1, n2 - 2, &code2)) {
456
1.24k
    error(errSyntaxWarning, -1,
457
1.24k
    "Illegal entry in cidrange block in ToUnicode CMap");
458
1.24k
    continue;
459
1.24k
  }
460
3.10k
  if (code1 > maxCode || code2 > maxCode) {
461
1.77k
    error(errSyntaxWarning, -1,
462
1.77k
    "Invalid entry in cidrange block in ToUnicode CMap");
463
1.77k
    if (code2 > maxCode) {
464
1.63k
      code2 = maxCode;
465
1.63k
    }
466
1.77k
  }
467
683k
  for (i = atoi(tok3); code1 <= code2; ++code1, ++i) {
468
680k
    addMappingInt(code1, i);
469
680k
    ok = gTrue;
470
680k
  }
471
3.10k
      }
472
90
      pst->getToken(tok1, sizeof(tok1), &n1);
473
612k
    } else {
474
612k
      strcpy(tok1, tok2);
475
612k
      n1 = n2;
476
612k
    }
477
625k
  }
478
15.0k
  delete pst;
479
15.0k
  return ok;
480
15.0k
}
481
482
void CharCodeToUnicode::addMapping(CharCode code, char *uStr, int n,
483
629k
           int offset) {
484
629k
  CharCode oldLen, i;
485
629k
  Unicode u[maxUnicodeString];
486
629k
  int uLen, j;
487
488
629k
  if (code > 0xffffff) {
489
    // This is an arbitrary limit to avoid integer overflow issues.
490
    // (I've seen CMaps with mappings for <ffffffff>.)
491
359
    return;
492
359
  }
493
628k
  if ((uLen = parseUTF16String(uStr, n, u)) == 0) {
494
5.52k
    return;
495
5.52k
  }
496
623k
  if (code >= mapLen) {
497
261
    oldLen = mapLen;
498
261
    mapLen = mapLen ? 2 * mapLen : 256;
499
261
    if (code >= mapLen) {
500
148
      mapLen = (code + 256) & ~255;
501
148
    }
502
261
    map = (Unicode *)greallocn(map, mapLen, sizeof(Unicode));
503
179M
    for (i = oldLen; i < mapLen; ++i) {
504
179M
      map[i] = 0;
505
179M
    }
506
261
  }
507
623k
  if (uLen == 1) {
508
603k
    map[code] = u[0] + offset;
509
603k
  } else {
510
19.7k
    if (sMapLen >= sMapSize) {
511
1.28k
      sMapSize = sMapSize + 16;
512
1.28k
      sMap = (CharCodeToUnicodeString *)
513
1.28k
         greallocn(sMap, sMapSize, sizeof(CharCodeToUnicodeString));
514
1.28k
    }
515
19.7k
    map[code] = 0;
516
19.7k
    sMap[sMapLen].c = code;
517
95.0k
    for (j = 0; j < uLen; ++j) {
518
75.3k
      sMap[sMapLen].u[j] = u[j];
519
75.3k
    }
520
19.7k
    sMap[sMapLen].u[uLen - 1] += offset;
521
19.7k
    sMap[sMapLen].len = uLen;
522
19.7k
    ++sMapLen;
523
19.7k
  }
524
623k
}
525
526
// Convert a UTF-16BE hex string into a sequence of up to
527
// maxUnicodeString Unicode chars.
528
628k
int CharCodeToUnicode::parseUTF16String(char *uStr, int n, Unicode *uOut) {
529
628k
  int i = 0;
530
628k
  int uLen = 0;
531
1.31M
  while (i < n) {
532
686k
    Unicode u;
533
686k
    int j = n;
534
686k
    if (j - i > 4) {
535
63.3k
      j = i + 4;
536
63.3k
    }
537
686k
    if (!parseHex(uStr + i, j - i, &u)) {
538
3.76k
      error(errSyntaxWarning, -1, "Illegal entry in ToUnicode CMap");
539
3.76k
      return 0;
540
3.76k
    }
541
    // look for a UTF-16 pair
542
683k
    if (uLen > 0 && uOut[uLen-1] >= 0xd800 && uOut[uLen-1] <= 0xdbff &&
543
804
  u >= 0xdc00 && u <= 0xdfff) {
544
1
      uOut[uLen-1] = 0x10000 + ((uOut[uLen-1] & 0x03ff) << 10) + (u & 0x03ff);
545
683k
    } else {
546
683k
      if (uLen < maxUnicodeString) {
547
682k
  uOut[uLen++] = u;
548
682k
      }
549
683k
    }
550
683k
    i = j;
551
683k
  }
552
625k
  return uLen;
553
628k
}
554
555
683k
void CharCodeToUnicode::addMappingInt(CharCode code, Unicode u) {
556
683k
  CharCode oldLen, i;
557
558
683k
  if (code > 0xffffff) {
559
    // This is an arbitrary limit to avoid integer overflow issues.
560
    // (I've seen CMaps with mappings for <ffffffff>.)
561
13
    return;
562
13
  }
563
683k
  if (code >= mapLen) {
564
218
    oldLen = mapLen;
565
218
    mapLen = mapLen ? 2 * mapLen : 256;
566
218
    if (code >= mapLen) {
567
86
      mapLen = (code + 256) & ~255;
568
86
    }
569
218
    map = (Unicode *)greallocn(map, mapLen, sizeof(Unicode));
570
9.67M
    for (i = oldLen; i < mapLen; ++i) {
571
9.67M
      map[i] = 0;
572
9.67M
    }
573
218
  }
574
683k
  map[code] = u;
575
683k
}
576
577
8.02k
CharCodeToUnicode::CharCodeToUnicode() {
578
8.02k
  tag = NULL;
579
8.02k
  map = NULL;
580
8.02k
  mapLen = 0;
581
8.02k
  sMap = NULL;
582
8.02k
  sMapLen = sMapSize = 0;
583
8.02k
  refCnt = 1;
584
8.02k
}
585
586
10.0k
CharCodeToUnicode::CharCodeToUnicode(GString *tagA) {
587
10.0k
  CharCode i;
588
589
10.0k
  tag = tagA;
590
10.0k
  mapLen = 256;
591
10.0k
  map = (Unicode *)gmallocn(mapLen, sizeof(Unicode));
592
2.58M
  for (i = 0; i < mapLen; ++i) {
593
2.57M
    map[i] = 0;
594
2.57M
  }
595
10.0k
  sMap = NULL;
596
10.0k
  sMapLen = sMapSize = 0;
597
10.0k
  refCnt = 1;
598
10.0k
}
599
600
CharCodeToUnicode::CharCodeToUnicode(GString *tagA, Unicode *mapA,
601
             CharCode mapLenA, GBool copyMap,
602
             CharCodeToUnicodeString *sMapA,
603
23.3k
             int sMapLenA, int sMapSizeA) {
604
23.3k
  tag = tagA;
605
23.3k
  mapLen = mapLenA;
606
23.3k
  if (copyMap) {
607
23.3k
    map = (Unicode *)gmallocn(mapLen, sizeof(Unicode));
608
23.3k
    memcpy(map, mapA, mapLen * sizeof(Unicode));
609
23.3k
  } else {
610
0
    map = mapA;
611
0
  }
612
23.3k
  sMap = sMapA;
613
23.3k
  sMapLen = sMapLenA;
614
23.3k
  sMapSize = sMapSizeA;
615
23.3k
  refCnt = 1;
616
23.3k
}
617
618
41.4k
CharCodeToUnicode::~CharCodeToUnicode() {
619
41.4k
  if (tag) {
620
0
    delete tag;
621
0
  }
622
41.4k
  gfree(map);
623
41.4k
  gfree(sMap);
624
41.4k
}
625
626
0
void CharCodeToUnicode::incRefCnt() {
627
0
#if MULTITHREADED
628
0
  gAtomicIncrement(&refCnt);
629
#else
630
  ++refCnt;
631
#endif
632
0
}
633
634
33.9k
void CharCodeToUnicode::decRefCnt() {
635
33.9k
  GBool done;
636
637
33.9k
#if MULTITHREADED
638
33.9k
  done = gAtomicDecrement(&refCnt) == 0;
639
#else
640
  done = --refCnt == 0;
641
#endif
642
33.9k
  if (done) {
643
33.9k
    delete this;
644
33.9k
  }
645
33.9k
}
646
647
0
GBool CharCodeToUnicode::match(GString *tagA) {
648
0
  return tag && !tag->cmp(tagA);
649
0
}
650
651
0
void CharCodeToUnicode::setMapping(CharCode c, Unicode *u, int len) {
652
0
  int i, j;
653
654
0
  if (!map) {
655
0
    return;
656
0
  }
657
0
  if (len == 1) {
658
0
    map[c] = u[0];
659
0
  } else {
660
0
    for (i = 0; i < sMapLen; ++i) {
661
0
      if (sMap[i].c == c) {
662
0
  break;
663
0
      }
664
0
    }
665
0
    if (i == sMapLen) {
666
0
      if (sMapLen == sMapSize) {
667
0
  sMapSize += 8;
668
0
  sMap = (CharCodeToUnicodeString *)
669
0
           greallocn(sMap, sMapSize, sizeof(CharCodeToUnicodeString));
670
0
      }
671
0
      ++sMapLen;
672
0
    }
673
0
    map[c] = 0;
674
0
    sMap[i].c = c;
675
0
    sMap[i].len = len;
676
0
    for (j = 0; j < len && j < maxUnicodeString; ++j) {
677
0
      sMap[i].u[j] = u[j];
678
0
    }
679
0
  }
680
0
}
681
682
5.84M
int CharCodeToUnicode::mapToUnicode(CharCode c, Unicode *u, int size) {
683
5.84M
  int i, j;
684
685
5.84M
  if (!map) {
686
106k
    u[0] = (Unicode)c;
687
106k
    return 1;
688
106k
  }
689
5.73M
  if (c >= mapLen) {
690
7.33k
    return 0;
691
7.33k
  }
692
5.72M
  if (map[c]) {
693
4.36M
    u[0] = map[c];
694
4.36M
    return 1;
695
4.36M
  }
696
1.39M
  for (i = 0; i < sMapLen; ++i) {
697
23.9k
    if (sMap[i].c == c) {
698
973
      for (j = 0; j < sMap[i].len && j < size; ++j) {
699
718
  u[j] = sMap[i].u[j];
700
718
      }
701
255
      return j;
702
255
    }
703
23.9k
  }
704
1.36M
  return 0;
705
1.36M
}
706
707
//------------------------------------------------------------------------
708
709
46.8k
CharCodeToUnicodeCache::CharCodeToUnicodeCache(int sizeA) {
710
46.8k
  int i;
711
712
46.8k
  size = sizeA;
713
46.8k
  cache = (CharCodeToUnicode **)gmallocn(size, sizeof(CharCodeToUnicode *));
714
234k
  for (i = 0; i < size; ++i) {
715
187k
    cache[i] = NULL;
716
187k
  }
717
46.8k
}
718
719
46.8k
CharCodeToUnicodeCache::~CharCodeToUnicodeCache() {
720
46.8k
  int i;
721
722
234k
  for (i = 0; i < size; ++i) {
723
187k
    if (cache[i]) {
724
0
      cache[i]->decRefCnt();
725
0
    }
726
187k
  }
727
46.8k
  gfree(cache);
728
46.8k
}
729
730
6.03k
CharCodeToUnicode *CharCodeToUnicodeCache::getCharCodeToUnicode(GString *tag) {
731
6.03k
  CharCodeToUnicode *ctu;
732
6.03k
  int i, j;
733
734
6.03k
  if (cache[0] && cache[0]->match(tag)) {
735
0
    cache[0]->incRefCnt();
736
0
    return cache[0];
737
0
  }
738
24.1k
  for (i = 1; i < size; ++i) {
739
18.0k
    if (cache[i] && cache[i]->match(tag)) {
740
0
      ctu = cache[i];
741
0
      for (j = i; j >= 1; --j) {
742
0
  cache[j] = cache[j - 1];
743
0
      }
744
0
      cache[0] = ctu;
745
0
      ctu->incRefCnt();
746
0
      return ctu;
747
0
    }
748
18.0k
  }
749
6.03k
  return NULL;
750
6.03k
}
751
752
0
void CharCodeToUnicodeCache::add(CharCodeToUnicode *ctu) {
753
0
  int i;
754
755
0
  if (cache[size - 1]) {
756
0
    cache[size - 1]->decRefCnt();
757
0
  }
758
0
  for (i = size - 1; i >= 1; --i) {
759
0
    cache[i] = cache[i - 1];
760
0
  }
761
0
  cache[0] = ctu;
762
0
  ctu->incRefCnt();
763
0
}