Coverage Report

Created: 2025-08-26 06:28

/src/xpdf-4.05/xpdf/CharCodeToUnicode.cc
Line
Count
Source (jump to first uncovered line)
1
//========================================================================
2
//
3
// CharCodeToUnicode.cc
4
//
5
// Copyright 2001-2003 Glyph & Cog, LLC
6
//
7
//========================================================================
8
9
#include <aconf.h>
10
11
#include <stdio.h>
12
#include <string.h>
13
#include "gmem.h"
14
#include "gmempp.h"
15
#include "gfile.h"
16
#include "GString.h"
17
#include "Error.h"
18
#include "GlobalParams.h"
19
#include "PSTokenizer.h"
20
#include "CharCodeToUnicode.h"
21
22
//------------------------------------------------------------------------
23
24
0
#define maxUnicodeString 8
25
26
struct CharCodeToUnicodeString {
27
  CharCode c;
28
  Unicode u[maxUnicodeString];
29
  int len;
30
};
31
32
//------------------------------------------------------------------------
33
34
struct GStringIndex {
35
  GString *s;
36
  int i;
37
};
38
39
0
static int getCharFromGString(void *data) {
40
0
  GStringIndex *idx = (GStringIndex *)data;
41
0
  if (idx->i >= idx->s->getLength()) {
42
0
    return EOF;
43
0
  }
44
0
  return idx->s->getChar(idx->i++) & 0xff;
45
0
}
46
47
0
static int getCharFromFile(void *data) {
48
0
  return fgetc((FILE *)data);
49
0
}
50
51
//------------------------------------------------------------------------
52
53
static int hexCharVals[256] = {
54
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x
55
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 1x
56
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 2x
57
   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1, // 3x
58
  -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 4x
59
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 5x
60
  -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 6x
61
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 7x
62
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 8x
63
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 9x
64
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Ax
65
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Bx
66
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Cx
67
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Dx
68
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Ex
69
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1  // Fx
70
};
71
72
// Parse a <len>-byte hex string <s> into *<val>.  Returns false on
73
// error.
74
0
static GBool parseHex(char *s, int len, Guint *val) {
75
0
  int i, x;
76
77
0
  *val = 0;
78
0
  for (i = 0; i < len; ++i) {
79
0
    x = hexCharVals[s[i] & 0xff];
80
0
    if (x < 0) {
81
0
      return gFalse;
82
0
    }
83
0
    *val = (*val << 4) + x;
84
0
  }
85
0
  return gTrue;
86
0
}
87
88
//------------------------------------------------------------------------
89
90
0
CharCodeToUnicode *CharCodeToUnicode::makeIdentityMapping() {
91
0
  return new CharCodeToUnicode();
92
0
}
93
94
CharCodeToUnicode *CharCodeToUnicode::parseCIDToUnicode(GString *fileName,
95
0
              GString *collection) {
96
0
  FILE *f;
97
0
  Unicode *mapA;
98
0
  CharCode size, mapLenA;
99
0
  char buf[64];
100
0
  Unicode u;
101
0
  CharCodeToUnicode *ctu;
102
103
0
  if (!(f = openFile(fileName->getCString(), "r"))) {
104
0
    error(errSyntaxError, -1, "Couldn't open cidToUnicode file '{0:t}'",
105
0
    fileName);
106
0
    return NULL;
107
0
  }
108
109
0
  size = 32768;
110
0
  mapA = (Unicode *)gmallocn(size, sizeof(Unicode));
111
0
  mapLenA = 0;
112
113
0
  while (getLine(buf, sizeof(buf), f)) {
114
0
    if (mapLenA == size) {
115
0
      size *= 2;
116
0
      mapA = (Unicode *)greallocn(mapA, size, sizeof(Unicode));
117
0
    }
118
0
    if (sscanf(buf, "%x", &u) == 1) {
119
0
      mapA[mapLenA] = u;
120
0
    } else {
121
0
      error(errSyntaxWarning, -1,
122
0
      "Bad line ({0:d}) in cidToUnicode file '{1:t}'",
123
0
      (int)(mapLenA + 1), fileName);
124
0
      mapA[mapLenA] = 0;
125
0
    }
126
0
    ++mapLenA;
127
0
  }
128
0
  fclose(f);
129
130
0
  ctu = new CharCodeToUnicode(collection->copy(), mapA, mapLenA, gTrue,
131
0
            NULL, 0, 0);
132
0
  gfree(mapA);
133
0
  return ctu;
134
0
}
135
136
CharCodeToUnicode *CharCodeToUnicode::parseUnicodeToUnicode(
137
0
                GString *fileName) {
138
0
  FILE *f;
139
0
  Unicode *mapA;
140
0
  CharCodeToUnicodeString *sMapA;
141
0
  CharCode size, oldSize, len, sMapSizeA, sMapLenA;
142
0
  char buf[256];
143
0
  char *tok;
144
0
  Unicode u0;
145
0
  Unicode uBuf[maxUnicodeString];
146
0
  CharCodeToUnicode *ctu;
147
0
  int line, n, i;
148
149
0
  if (!(f = openFile(fileName->getCString(), "r"))) {
150
0
    error(errSyntaxError, -1, "Couldn't open unicodeToUnicode file '{0:t}'",
151
0
    fileName);
152
0
    return NULL;
153
0
  }
154
155
0
  size = 4096;
156
0
  mapA = (Unicode *)gmallocn(size, sizeof(Unicode));
157
0
  memset(mapA, 0, size * sizeof(Unicode));
158
0
  len = 0;
159
0
  sMapA = NULL;
160
0
  sMapSizeA = sMapLenA = 0;
161
162
0
  line = 0;
163
0
  while (getLine(buf, sizeof(buf), f)) {
164
0
    ++line;
165
0
    if (!(tok = strtok(buf, " \t\r\n")) ||
166
0
  !parseHex(tok, (int)strlen(tok), &u0)) {
167
0
      error(errSyntaxWarning, -1,
168
0
      "Bad line ({0:d}) in unicodeToUnicode file '{1:t}'",
169
0
      line, fileName);
170
0
      continue;
171
0
    }
172
0
    n = 0;
173
0
    while (n < maxUnicodeString) {
174
0
      if (!(tok = strtok(NULL, " \t\r\n"))) {
175
0
  break;
176
0
      }
177
0
      if (!parseHex(tok, (int)strlen(tok), &uBuf[n])) {
178
0
  error(errSyntaxWarning, -1,
179
0
        "Bad line ({0:d}) in unicodeToUnicode file '{1:t}'",
180
0
        line, fileName);
181
0
  break;
182
0
      }
183
0
      ++n;
184
0
    }
185
0
    if (n < 1) {
186
0
      error(errSyntaxWarning, -1,
187
0
      "Bad line ({0:d}) in unicodeToUnicode file '{1:t}'",
188
0
      line, fileName);
189
0
      continue;
190
0
    }
191
0
    if (u0 >= size) {
192
0
      oldSize = size;
193
0
      while (u0 >= size) {
194
0
  size *= 2;
195
0
      }
196
0
      mapA = (Unicode *)greallocn(mapA, size, sizeof(Unicode));
197
0
      memset(mapA + oldSize, 0, (size - oldSize) * sizeof(Unicode));
198
0
    }
199
0
    if (n == 1) {
200
0
      mapA[u0] = uBuf[0];
201
0
    } else {
202
0
      mapA[u0] = 0;
203
0
      if (sMapLenA == sMapSizeA) {
204
0
  sMapSizeA += 16;
205
0
  sMapA = (CharCodeToUnicodeString *)
206
0
            greallocn(sMapA, sMapSizeA, sizeof(CharCodeToUnicodeString));
207
0
      }
208
0
      sMapA[sMapLenA].c = u0;
209
0
      for (i = 0; i < n; ++i) {
210
0
  sMapA[sMapLenA].u[i] = uBuf[i];
211
0
      }
212
0
      sMapA[sMapLenA].len = n;
213
0
      ++sMapLenA;
214
0
    }
215
0
    if (u0 >= len) {
216
0
      len = u0 + 1;
217
0
    }
218
0
  }
219
0
  fclose(f);
220
221
0
  ctu = new CharCodeToUnicode(fileName->copy(), mapA, len, gTrue,
222
0
            sMapA, sMapLenA, sMapSizeA);
223
0
  gfree(mapA);
224
0
  return ctu;
225
0
}
226
227
0
CharCodeToUnicode *CharCodeToUnicode::make8BitToUnicode(Unicode *toUnicode) {
228
0
  return new CharCodeToUnicode(NULL, toUnicode, 256, gTrue, NULL, 0, 0);
229
0
}
230
231
0
CharCodeToUnicode *CharCodeToUnicode::make16BitToUnicode(Unicode *toUnicode) {
232
0
  return new CharCodeToUnicode(NULL, toUnicode, 65536, gTrue, NULL, 0, 0);
233
0
}
234
235
0
CharCodeToUnicode *CharCodeToUnicode::parseCMap(GString *buf, int nBits) {
236
0
  CharCodeToUnicode *ctu;
237
0
  GStringIndex idx;
238
239
0
  ctu = new CharCodeToUnicode(NULL);
240
0
  idx.s = buf;
241
0
  idx.i = 0;
242
0
  if (!ctu->parseCMap1(&getCharFromGString, &idx, nBits)) {
243
0
    delete ctu;
244
0
    return NULL;
245
0
  }
246
0
  return ctu;
247
0
}
248
249
0
void CharCodeToUnicode::mergeCMap(GString *buf, int nBits) {
250
0
  GStringIndex idx;
251
252
0
  idx.s = buf;
253
0
  idx.i = 0;
254
0
  parseCMap1(&getCharFromGString, &idx, nBits);
255
0
}
256
257
GBool CharCodeToUnicode::parseCMap1(int (*getCharFunc)(void *), void *data,
258
0
            int nBits) {
259
0
  PSTokenizer *pst;
260
0
  char tok1[256], tok2[256], tok3[256];
261
0
  int n1, n2, n3;
262
0
  CharCode i;
263
0
  CharCode maxCode, code1, code2;
264
0
  GString *name;
265
0
  FILE *f;
266
0
  GBool ok;
267
268
0
  ok = gFalse;
269
0
  maxCode = (nBits == 8) ? 0xff : (nBits == 16) ? 0xffff : 0xffffffff;
270
0
  pst = new PSTokenizer(getCharFunc, data);
271
0
  pst->getToken(tok1, sizeof(tok1), &n1);
272
0
  while (pst->getToken(tok2, sizeof(tok2), &n2)) {
273
0
    if (!strcmp(tok1, "begincodespacerange")) {
274
0
      if (globalParams->getIgnoreWrongSizeToUnicode() &&
275
0
    tok2[0] == '<' && tok2[n2 - 1] == '>' &&
276
0
    n2 - 2 != nBits / 4) {
277
0
  error(errSyntaxWarning, -1,
278
0
        "Incorrect character size in ToUnicode CMap");
279
0
  ok = gFalse;
280
0
  break;
281
0
      }
282
0
      while (pst->getToken(tok1, sizeof(tok1), &n1) &&
283
0
       strcmp(tok1, "endcodespacerange")) ;
284
0
    } else if (!strcmp(tok2, "usecmap")) {
285
0
      if (tok1[0] == '/') {
286
0
  name = new GString(tok1 + 1);
287
0
  if ((f = globalParams->findToUnicodeFile(name))) {
288
0
    if (parseCMap1(&getCharFromFile, f, nBits)) {
289
0
      ok = gTrue;
290
0
    }
291
0
    fclose(f);
292
0
  } else {
293
0
    error(errSyntaxError, -1,
294
0
    "Couldn't find ToUnicode CMap file for '{1:t}'",
295
0
    name);
296
0
  }
297
0
  delete name;
298
0
      }
299
0
      pst->getToken(tok1, sizeof(tok1), &n1);
300
0
    } else if (!strcmp(tok2, "beginbfchar")) {
301
0
      while (pst->getToken(tok1, sizeof(tok1), &n1)) {
302
0
  if (!strcmp(tok1, "endbfchar")) {
303
0
    break;
304
0
  }
305
0
  if (!pst->getToken(tok2, sizeof(tok2), &n2) ||
306
0
      !strcmp(tok2, "endbfchar")) {
307
0
    error(errSyntaxWarning, -1,
308
0
    "Illegal entry in bfchar block in ToUnicode CMap");
309
0
    break;
310
0
  }
311
0
  if (!(tok1[0] == '<' && tok1[n1 - 1] == '>' &&
312
0
        tok2[0] == '<' && tok2[n2 - 1] == '>')) {
313
0
    error(errSyntaxWarning, -1,
314
0
    "Illegal entry in bfchar block in ToUnicode CMap");
315
0
    continue;
316
0
  }
317
0
  tok1[n1 - 1] = tok2[n2 - 1] = '\0';
318
0
  if (!parseHex(tok1 + 1, n1 - 2, &code1)) {
319
0
    error(errSyntaxWarning, -1,
320
0
    "Illegal entry in bfchar block in ToUnicode CMap");
321
0
    continue;
322
0
  }
323
0
  if (code1 > maxCode) {
324
0
    error(errSyntaxWarning, -1,
325
0
    "Invalid entry in bfchar block in ToUnicode CMap");
326
0
  }
327
0
  addMapping(code1, tok2 + 1, n2 - 2, 0);
328
0
  ok = gTrue;
329
0
      }
330
0
      pst->getToken(tok1, sizeof(tok1), &n1);
331
0
    } else if (!strcmp(tok2, "beginbfrange")) {
332
0
      while (pst->getToken(tok1, sizeof(tok1), &n1)) {
333
0
  if (!strcmp(tok1, "endbfrange")) {
334
0
    break;
335
0
  }
336
0
  if (!pst->getToken(tok2, sizeof(tok2), &n2) ||
337
0
      !strcmp(tok2, "endbfrange") ||
338
0
      !pst->getToken(tok3, sizeof(tok3), &n3) ||
339
0
      !strcmp(tok3, "endbfrange")) {
340
0
    error(errSyntaxWarning, -1,
341
0
    "Illegal entry in bfrange block in ToUnicode CMap");
342
0
    break;
343
0
  }
344
0
  if (!(tok1[0] == '<' && tok1[n1 - 1] == '>' &&
345
0
        tok2[0] == '<' && tok2[n2 - 1] == '>')) {
346
0
    error(errSyntaxWarning,
347
0
    -1, "Illegal entry in bfrange block in ToUnicode CMap");
348
0
    continue;
349
0
  }
350
0
  tok1[n1 - 1] = tok2[n2 - 1] = '\0';
351
0
  if (!parseHex(tok1 + 1, n1 - 2, &code1) ||
352
0
      !parseHex(tok2 + 1, n2 - 2, &code2)) {
353
0
    error(errSyntaxWarning, -1,
354
0
    "Illegal entry in bfrange block in ToUnicode CMap");
355
0
    continue;
356
0
  }
357
0
  if (code1 > maxCode || code2 > maxCode) {
358
0
    error(errSyntaxWarning, -1,
359
0
    "Invalid entry in bfrange block in ToUnicode CMap");
360
0
    if (code2 > maxCode) {
361
0
      code2 = maxCode;
362
0
    }
363
0
  }
364
0
  if (!strcmp(tok3, "[")) {
365
0
    i = 0;
366
0
    while (pst->getToken(tok1, sizeof(tok1), &n1)) {
367
0
      if (!strcmp(tok1, "]")) {
368
0
        break;
369
0
      }
370
0
      if (tok1[0] == '<' && tok1[n1 - 1] == '>') {
371
0
        if (code1 + i <= code2) {
372
0
    tok1[n1 - 1] = '\0';
373
0
    addMapping(code1 + i, tok1 + 1, n1 - 2, 0);
374
0
    ok = gTrue;
375
0
        }
376
0
      } else {
377
0
        error(errSyntaxWarning, -1,
378
0
        "Illegal entry in bfrange block in ToUnicode CMap");
379
0
      }
380
0
      ++i;
381
0
    }
382
0
  } else if (tok3[0] == '<' && tok3[n3 - 1] == '>') {
383
0
    tok3[n3 - 1] = '\0';
384
0
    for (i = 0; code1 <= code2; ++code1, ++i) {
385
0
      addMapping(code1, tok3 + 1, n3 - 2, i);
386
0
      ok = gTrue;
387
0
    }
388
0
  } else {
389
0
    error(errSyntaxWarning, -1,
390
0
    "Illegal entry in bfrange block in ToUnicode CMap");
391
0
  }
392
0
      }
393
0
      pst->getToken(tok1, sizeof(tok1), &n1);
394
0
    } else if (!strcmp(tok2, "begincidchar")) {
395
      // the begincidchar operator is not allowed in ToUnicode CMaps,
396
      // but some buggy PDF generators incorrectly use
397
      // code-to-CID-type CMaps here
398
0
      error(errSyntaxWarning, -1,
399
0
      "Invalid 'begincidchar' operator in ToUnicode CMap");
400
0
      while (pst->getToken(tok1, sizeof(tok1), &n1)) {
401
0
  if (!strcmp(tok1, "endcidchar")) {
402
0
    break;
403
0
  }
404
0
  if (!pst->getToken(tok2, sizeof(tok2), &n2) ||
405
0
      !strcmp(tok2, "endcidchar")) {
406
0
    error(errSyntaxWarning, -1,
407
0
    "Illegal entry in cidchar block in ToUnicode CMap");
408
0
    break;
409
0
  }
410
0
  if (!(tok1[0] == '<' && tok1[n1 - 1] == '>')) {
411
0
    error(errSyntaxWarning, -1,
412
0
    "Illegal entry in cidchar block in ToUnicode CMap");
413
0
    continue;
414
0
  }
415
0
  tok1[n1 - 1] = '\0';
416
0
  if (!parseHex(tok1 + 1, n1 - 2, &code1)) {
417
0
    error(errSyntaxWarning, -1,
418
0
    "Illegal entry in cidchar block in ToUnicode CMap");
419
0
    continue;
420
0
  }
421
0
  if (code1 > maxCode) {
422
0
    error(errSyntaxWarning, -1,
423
0
    "Invalid entry in cidchar block in ToUnicode CMap");
424
0
  }
425
0
  addMappingInt(code1, atoi(tok2));
426
0
  ok = gTrue;
427
0
      }
428
0
      pst->getToken(tok1, sizeof(tok1), &n1);
429
0
    } else if (!strcmp(tok2, "begincidrange")) {
430
      // the begincidrange operator is not allowed in ToUnicode CMaps,
431
      // but some buggy PDF generators incorrectly use
432
      // code-to-CID-type CMaps here
433
0
      error(errSyntaxWarning, -1,
434
0
      "Invalid 'begincidrange' operator in ToUnicode CMap");
435
0
      while (pst->getToken(tok1, sizeof(tok1), &n1)) {
436
0
  if (!strcmp(tok1, "endcidrange")) {
437
0
    break;
438
0
  }
439
0
  if (!pst->getToken(tok2, sizeof(tok2), &n2) ||
440
0
      !strcmp(tok2, "endcidrange") ||
441
0
      !pst->getToken(tok3, sizeof(tok3), &n3) ||
442
0
      !strcmp(tok3, "endcidrange")) {
443
0
    error(errSyntaxWarning, -1,
444
0
    "Illegal entry in cidrange block in ToUnicode CMap");
445
0
    break;
446
0
  }
447
0
  if (!(tok1[0] == '<' && tok1[n1 - 1] == '>' &&
448
0
        tok2[0] == '<' && tok2[n2 - 1] == '>')) {
449
0
    error(errSyntaxWarning,
450
0
    -1, "Illegal entry in cidrange block in ToUnicode CMap");
451
0
    continue;
452
0
  }
453
0
  tok1[n1 - 1] = tok2[n2 - 1] = '\0';
454
0
  if (!parseHex(tok1 + 1, n1 - 2, &code1) ||
455
0
      !parseHex(tok2 + 1, n2 - 2, &code2)) {
456
0
    error(errSyntaxWarning, -1,
457
0
    "Illegal entry in cidrange block in ToUnicode CMap");
458
0
    continue;
459
0
  }
460
0
  if (code1 > maxCode || code2 > maxCode) {
461
0
    error(errSyntaxWarning, -1,
462
0
    "Invalid entry in cidrange block in ToUnicode CMap");
463
0
    if (code2 > maxCode) {
464
0
      code2 = maxCode;
465
0
    }
466
0
  }
467
0
  for (i = atoi(tok3); code1 <= code2; ++code1, ++i) {
468
0
    addMappingInt(code1, i);
469
0
    ok = gTrue;
470
0
  }
471
0
      }
472
0
      pst->getToken(tok1, sizeof(tok1), &n1);
473
0
    } else {
474
0
      strcpy(tok1, tok2);
475
0
      n1 = n2;
476
0
    }
477
0
  }
478
0
  delete pst;
479
0
  return ok;
480
0
}
481
482
void CharCodeToUnicode::addMapping(CharCode code, char *uStr, int n,
483
0
           int offset) {
484
0
  CharCode oldLen, i;
485
0
  Unicode u[maxUnicodeString];
486
0
  int uLen, j;
487
488
0
  if (code > 0xffffff) {
489
    // This is an arbitrary limit to avoid integer overflow issues.
490
    // (I've seen CMaps with mappings for <ffffffff>.)
491
0
    return;
492
0
  }
493
0
  if ((uLen = parseUTF16String(uStr, n, u)) == 0) {
494
0
    return;
495
0
  }
496
0
  if (code >= mapLen) {
497
0
    oldLen = mapLen;
498
0
    mapLen = mapLen ? 2 * mapLen : 256;
499
0
    if (code >= mapLen) {
500
0
      mapLen = (code + 256) & ~255;
501
0
    }
502
0
    map = (Unicode *)greallocn(map, mapLen, sizeof(Unicode));
503
0
    for (i = oldLen; i < mapLen; ++i) {
504
0
      map[i] = 0;
505
0
    }
506
0
  }
507
0
  if (uLen == 1) {
508
0
    map[code] = u[0] + offset;
509
0
  } else {
510
0
    if (sMapLen >= sMapSize) {
511
0
      sMapSize = sMapSize + 16;
512
0
      sMap = (CharCodeToUnicodeString *)
513
0
         greallocn(sMap, sMapSize, sizeof(CharCodeToUnicodeString));
514
0
    }
515
0
    map[code] = 0;
516
0
    sMap[sMapLen].c = code;
517
0
    for (j = 0; j < uLen; ++j) {
518
0
      sMap[sMapLen].u[j] = u[j];
519
0
    }
520
0
    sMap[sMapLen].u[uLen - 1] += offset;
521
0
    sMap[sMapLen].len = uLen;
522
0
    ++sMapLen;
523
0
  }
524
0
}
525
526
// Convert a UTF-16BE hex string into a sequence of up to
527
// maxUnicodeString Unicode chars.
528
0
int CharCodeToUnicode::parseUTF16String(char *uStr, int n, Unicode *uOut) {
529
0
  int i = 0;
530
0
  int uLen = 0;
531
0
  while (i < n) {
532
0
    Unicode u;
533
0
    int j = n;
534
0
    if (j - i > 4) {
535
0
      j = i + 4;
536
0
    }
537
0
    if (!parseHex(uStr + i, j - i, &u)) {
538
0
      error(errSyntaxWarning, -1, "Illegal entry in ToUnicode CMap");
539
0
      return 0;
540
0
    }
541
    // look for a UTF-16 pair
542
0
    if (uLen > 0 && uOut[uLen-1] >= 0xd800 && uOut[uLen-1] <= 0xdbff &&
543
0
  u >= 0xdc00 && u <= 0xdfff) {
544
0
      uOut[uLen-1] = 0x10000 + ((uOut[uLen-1] & 0x03ff) << 10) + (u & 0x03ff);
545
0
    } else {
546
0
      if (uLen < maxUnicodeString) {
547
0
  uOut[uLen++] = u;
548
0
      }
549
0
    }
550
0
    i = j;
551
0
  }
552
0
  return uLen;
553
0
}
554
555
0
void CharCodeToUnicode::addMappingInt(CharCode code, Unicode u) {
556
0
  CharCode oldLen, i;
557
558
0
  if (code > 0xffffff) {
559
    // This is an arbitrary limit to avoid integer overflow issues.
560
    // (I've seen CMaps with mappings for <ffffffff>.)
561
0
    return;
562
0
  }
563
0
  if (code >= mapLen) {
564
0
    oldLen = mapLen;
565
0
    mapLen = mapLen ? 2 * mapLen : 256;
566
0
    if (code >= mapLen) {
567
0
      mapLen = (code + 256) & ~255;
568
0
    }
569
0
    map = (Unicode *)greallocn(map, mapLen, sizeof(Unicode));
570
0
    for (i = oldLen; i < mapLen; ++i) {
571
0
      map[i] = 0;
572
0
    }
573
0
  }
574
0
  map[code] = u;
575
0
}
576
577
0
CharCodeToUnicode::CharCodeToUnicode() {
578
0
  tag = NULL;
579
0
  map = NULL;
580
0
  mapLen = 0;
581
0
  sMap = NULL;
582
0
  sMapLen = sMapSize = 0;
583
0
  refCnt = 1;
584
0
}
585
586
0
CharCodeToUnicode::CharCodeToUnicode(GString *tagA) {
587
0
  CharCode i;
588
589
0
  tag = tagA;
590
0
  mapLen = 256;
591
0
  map = (Unicode *)gmallocn(mapLen, sizeof(Unicode));
592
0
  for (i = 0; i < mapLen; ++i) {
593
0
    map[i] = 0;
594
0
  }
595
0
  sMap = NULL;
596
0
  sMapLen = sMapSize = 0;
597
0
  refCnt = 1;
598
0
}
599
600
CharCodeToUnicode::CharCodeToUnicode(GString *tagA, Unicode *mapA,
601
             CharCode mapLenA, GBool copyMap,
602
             CharCodeToUnicodeString *sMapA,
603
0
             int sMapLenA, int sMapSizeA) {
604
0
  tag = tagA;
605
0
  mapLen = mapLenA;
606
0
  if (copyMap) {
607
0
    map = (Unicode *)gmallocn(mapLen, sizeof(Unicode));
608
0
    memcpy(map, mapA, mapLen * sizeof(Unicode));
609
0
  } else {
610
0
    map = mapA;
611
0
  }
612
0
  sMap = sMapA;
613
0
  sMapLen = sMapLenA;
614
0
  sMapSize = sMapSizeA;
615
0
  refCnt = 1;
616
0
}
617
618
0
CharCodeToUnicode::~CharCodeToUnicode() {
619
0
  if (tag) {
620
0
    delete tag;
621
0
  }
622
0
  gfree(map);
623
0
  gfree(sMap);
624
0
}
625
626
0
void CharCodeToUnicode::incRefCnt() {
627
0
#if MULTITHREADED
628
0
  gAtomicIncrement(&refCnt);
629
#else
630
  ++refCnt;
631
#endif
632
0
}
633
634
0
void CharCodeToUnicode::decRefCnt() {
635
0
  GBool done;
636
637
0
#if MULTITHREADED
638
0
  done = gAtomicDecrement(&refCnt) == 0;
639
#else
640
  done = --refCnt == 0;
641
#endif
642
0
  if (done) {
643
0
    delete this;
644
0
  }
645
0
}
646
647
0
GBool CharCodeToUnicode::match(GString *tagA) {
648
0
  return tag && !tag->cmp(tagA);
649
0
}
650
651
0
void CharCodeToUnicode::setMapping(CharCode c, Unicode *u, int len) {
652
0
  int i, j;
653
654
0
  if (!map) {
655
0
    return;
656
0
  }
657
0
  if (len == 1) {
658
0
    map[c] = u[0];
659
0
  } else {
660
0
    for (i = 0; i < sMapLen; ++i) {
661
0
      if (sMap[i].c == c) {
662
0
  break;
663
0
      }
664
0
    }
665
0
    if (i == sMapLen) {
666
0
      if (sMapLen == sMapSize) {
667
0
  sMapSize += 8;
668
0
  sMap = (CharCodeToUnicodeString *)
669
0
           greallocn(sMap, sMapSize, sizeof(CharCodeToUnicodeString));
670
0
      }
671
0
      ++sMapLen;
672
0
    }
673
0
    map[c] = 0;
674
0
    sMap[i].c = c;
675
0
    sMap[i].len = len;
676
0
    for (j = 0; j < len && j < maxUnicodeString; ++j) {
677
0
      sMap[i].u[j] = u[j];
678
0
    }
679
0
  }
680
0
}
681
682
0
int CharCodeToUnicode::mapToUnicode(CharCode c, Unicode *u, int size) {
683
0
  int i, j;
684
685
0
  if (!map) {
686
0
    u[0] = (Unicode)c;
687
0
    return 1;
688
0
  }
689
0
  if (c >= mapLen) {
690
0
    return 0;
691
0
  }
692
0
  if (map[c]) {
693
0
    u[0] = map[c];
694
0
    return 1;
695
0
  }
696
0
  for (i = 0; i < sMapLen; ++i) {
697
0
    if (sMap[i].c == c) {
698
0
      for (j = 0; j < sMap[i].len && j < size; ++j) {
699
0
  u[j] = sMap[i].u[j];
700
0
      }
701
0
      return j;
702
0
    }
703
0
  }
704
0
  return 0;
705
0
}
706
707
//------------------------------------------------------------------------
708
709
4.81k
CharCodeToUnicodeCache::CharCodeToUnicodeCache(int sizeA) {
710
4.81k
  int i;
711
712
4.81k
  size = sizeA;
713
4.81k
  cache = (CharCodeToUnicode **)gmallocn(size, sizeof(CharCodeToUnicode *));
714
24.0k
  for (i = 0; i < size; ++i) {
715
19.2k
    cache[i] = NULL;
716
19.2k
  }
717
4.81k
}
718
719
4.81k
CharCodeToUnicodeCache::~CharCodeToUnicodeCache() {
720
4.81k
  int i;
721
722
24.0k
  for (i = 0; i < size; ++i) {
723
19.2k
    if (cache[i]) {
724
0
      cache[i]->decRefCnt();
725
0
    }
726
19.2k
  }
727
4.81k
  gfree(cache);
728
4.81k
}
729
730
0
CharCodeToUnicode *CharCodeToUnicodeCache::getCharCodeToUnicode(GString *tag) {
731
0
  CharCodeToUnicode *ctu;
732
0
  int i, j;
733
734
0
  if (cache[0] && cache[0]->match(tag)) {
735
0
    cache[0]->incRefCnt();
736
0
    return cache[0];
737
0
  }
738
0
  for (i = 1; i < size; ++i) {
739
0
    if (cache[i] && cache[i]->match(tag)) {
740
0
      ctu = cache[i];
741
0
      for (j = i; j >= 1; --j) {
742
0
  cache[j] = cache[j - 1];
743
0
      }
744
0
      cache[0] = ctu;
745
0
      ctu->incRefCnt();
746
0
      return ctu;
747
0
    }
748
0
  }
749
0
  return NULL;
750
0
}
751
752
0
void CharCodeToUnicodeCache::add(CharCodeToUnicode *ctu) {
753
0
  int i;
754
755
0
  if (cache[size - 1]) {
756
0
    cache[size - 1]->decRefCnt();
757
0
  }
758
0
  for (i = size - 1; i >= 1; --i) {
759
0
    cache[i] = cache[i - 1];
760
0
  }
761
0
  cache[0] = ctu;
762
0
  ctu->incRefCnt();
763
0
}