/src/xpdf-4.05/xpdf/CharCodeToUnicode.cc
Line | Count | Source (jump to first uncovered line) |
1 | | //======================================================================== |
2 | | // |
3 | | // CharCodeToUnicode.cc |
4 | | // |
5 | | // Copyright 2001-2003 Glyph & Cog, LLC |
6 | | // |
7 | | //======================================================================== |
8 | | |
9 | | #include <aconf.h> |
10 | | |
11 | | #include <stdio.h> |
12 | | #include <string.h> |
13 | | #include "gmem.h" |
14 | | #include "gmempp.h" |
15 | | #include "gfile.h" |
16 | | #include "GString.h" |
17 | | #include "Error.h" |
18 | | #include "GlobalParams.h" |
19 | | #include "PSTokenizer.h" |
20 | | #include "CharCodeToUnicode.h" |
21 | | |
22 | | //------------------------------------------------------------------------ |
23 | | |
24 | 0 | #define maxUnicodeString 8 |
25 | | |
26 | | struct CharCodeToUnicodeString { |
27 | | CharCode c; |
28 | | Unicode u[maxUnicodeString]; |
29 | | int len; |
30 | | }; |
31 | | |
32 | | //------------------------------------------------------------------------ |
33 | | |
34 | | struct GStringIndex { |
35 | | GString *s; |
36 | | int i; |
37 | | }; |
38 | | |
39 | 0 | static int getCharFromGString(void *data) { |
40 | 0 | GStringIndex *idx = (GStringIndex *)data; |
41 | 0 | if (idx->i >= idx->s->getLength()) { |
42 | 0 | return EOF; |
43 | 0 | } |
44 | 0 | return idx->s->getChar(idx->i++) & 0xff; |
45 | 0 | } |
46 | | |
47 | 0 | static int getCharFromFile(void *data) { |
48 | 0 | return fgetc((FILE *)data); |
49 | 0 | } |
50 | | |
51 | | //------------------------------------------------------------------------ |
52 | | |
53 | | static int hexCharVals[256] = { |
54 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x |
55 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 1x |
56 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 2x |
57 | | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 3x |
58 | | -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 4x |
59 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 5x |
60 | | -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 6x |
61 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 7x |
62 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 8x |
63 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 9x |
64 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Ax |
65 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Bx |
66 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Cx |
67 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Dx |
68 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Ex |
69 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 // Fx |
70 | | }; |
71 | | |
72 | | // Parse a <len>-byte hex string <s> into *<val>. Returns false on |
73 | | // error. |
74 | 0 | static GBool parseHex(char *s, int len, Guint *val) { |
75 | 0 | int i, x; |
76 | |
|
77 | 0 | *val = 0; |
78 | 0 | for (i = 0; i < len; ++i) { |
79 | 0 | x = hexCharVals[s[i] & 0xff]; |
80 | 0 | if (x < 0) { |
81 | 0 | return gFalse; |
82 | 0 | } |
83 | 0 | *val = (*val << 4) + x; |
84 | 0 | } |
85 | 0 | return gTrue; |
86 | 0 | } |
87 | | |
88 | | //------------------------------------------------------------------------ |
89 | | |
90 | 0 | CharCodeToUnicode *CharCodeToUnicode::makeIdentityMapping() { |
91 | 0 | return new CharCodeToUnicode(); |
92 | 0 | } |
93 | | |
94 | | CharCodeToUnicode *CharCodeToUnicode::parseCIDToUnicode(GString *fileName, |
95 | 0 | GString *collection) { |
96 | 0 | FILE *f; |
97 | 0 | Unicode *mapA; |
98 | 0 | CharCode size, mapLenA; |
99 | 0 | char buf[64]; |
100 | 0 | Unicode u; |
101 | 0 | CharCodeToUnicode *ctu; |
102 | |
|
103 | 0 | if (!(f = openFile(fileName->getCString(), "r"))) { |
104 | 0 | error(errSyntaxError, -1, "Couldn't open cidToUnicode file '{0:t}'", |
105 | 0 | fileName); |
106 | 0 | return NULL; |
107 | 0 | } |
108 | | |
109 | 0 | size = 32768; |
110 | 0 | mapA = (Unicode *)gmallocn(size, sizeof(Unicode)); |
111 | 0 | mapLenA = 0; |
112 | |
|
113 | 0 | while (getLine(buf, sizeof(buf), f)) { |
114 | 0 | if (mapLenA == size) { |
115 | 0 | size *= 2; |
116 | 0 | mapA = (Unicode *)greallocn(mapA, size, sizeof(Unicode)); |
117 | 0 | } |
118 | 0 | if (sscanf(buf, "%x", &u) == 1) { |
119 | 0 | mapA[mapLenA] = u; |
120 | 0 | } else { |
121 | 0 | error(errSyntaxWarning, -1, |
122 | 0 | "Bad line ({0:d}) in cidToUnicode file '{1:t}'", |
123 | 0 | (int)(mapLenA + 1), fileName); |
124 | 0 | mapA[mapLenA] = 0; |
125 | 0 | } |
126 | 0 | ++mapLenA; |
127 | 0 | } |
128 | 0 | fclose(f); |
129 | |
|
130 | 0 | ctu = new CharCodeToUnicode(collection->copy(), mapA, mapLenA, gTrue, |
131 | 0 | NULL, 0, 0); |
132 | 0 | gfree(mapA); |
133 | 0 | return ctu; |
134 | 0 | } |
135 | | |
136 | | CharCodeToUnicode *CharCodeToUnicode::parseUnicodeToUnicode( |
137 | 0 | GString *fileName) { |
138 | 0 | FILE *f; |
139 | 0 | Unicode *mapA; |
140 | 0 | CharCodeToUnicodeString *sMapA; |
141 | 0 | CharCode size, oldSize, len, sMapSizeA, sMapLenA; |
142 | 0 | char buf[256]; |
143 | 0 | char *tok; |
144 | 0 | Unicode u0; |
145 | 0 | Unicode uBuf[maxUnicodeString]; |
146 | 0 | CharCodeToUnicode *ctu; |
147 | 0 | int line, n, i; |
148 | |
|
149 | 0 | if (!(f = openFile(fileName->getCString(), "r"))) { |
150 | 0 | error(errSyntaxError, -1, "Couldn't open unicodeToUnicode file '{0:t}'", |
151 | 0 | fileName); |
152 | 0 | return NULL; |
153 | 0 | } |
154 | | |
155 | 0 | size = 4096; |
156 | 0 | mapA = (Unicode *)gmallocn(size, sizeof(Unicode)); |
157 | 0 | memset(mapA, 0, size * sizeof(Unicode)); |
158 | 0 | len = 0; |
159 | 0 | sMapA = NULL; |
160 | 0 | sMapSizeA = sMapLenA = 0; |
161 | |
|
162 | 0 | line = 0; |
163 | 0 | while (getLine(buf, sizeof(buf), f)) { |
164 | 0 | ++line; |
165 | 0 | if (!(tok = strtok(buf, " \t\r\n")) || |
166 | 0 | !parseHex(tok, (int)strlen(tok), &u0)) { |
167 | 0 | error(errSyntaxWarning, -1, |
168 | 0 | "Bad line ({0:d}) in unicodeToUnicode file '{1:t}'", |
169 | 0 | line, fileName); |
170 | 0 | continue; |
171 | 0 | } |
172 | 0 | n = 0; |
173 | 0 | while (n < maxUnicodeString) { |
174 | 0 | if (!(tok = strtok(NULL, " \t\r\n"))) { |
175 | 0 | break; |
176 | 0 | } |
177 | 0 | if (!parseHex(tok, (int)strlen(tok), &uBuf[n])) { |
178 | 0 | error(errSyntaxWarning, -1, |
179 | 0 | "Bad line ({0:d}) in unicodeToUnicode file '{1:t}'", |
180 | 0 | line, fileName); |
181 | 0 | break; |
182 | 0 | } |
183 | 0 | ++n; |
184 | 0 | } |
185 | 0 | if (n < 1) { |
186 | 0 | error(errSyntaxWarning, -1, |
187 | 0 | "Bad line ({0:d}) in unicodeToUnicode file '{1:t}'", |
188 | 0 | line, fileName); |
189 | 0 | continue; |
190 | 0 | } |
191 | 0 | if (u0 >= size) { |
192 | 0 | oldSize = size; |
193 | 0 | while (u0 >= size) { |
194 | 0 | size *= 2; |
195 | 0 | } |
196 | 0 | mapA = (Unicode *)greallocn(mapA, size, sizeof(Unicode)); |
197 | 0 | memset(mapA + oldSize, 0, (size - oldSize) * sizeof(Unicode)); |
198 | 0 | } |
199 | 0 | if (n == 1) { |
200 | 0 | mapA[u0] = uBuf[0]; |
201 | 0 | } else { |
202 | 0 | mapA[u0] = 0; |
203 | 0 | if (sMapLenA == sMapSizeA) { |
204 | 0 | sMapSizeA += 16; |
205 | 0 | sMapA = (CharCodeToUnicodeString *) |
206 | 0 | greallocn(sMapA, sMapSizeA, sizeof(CharCodeToUnicodeString)); |
207 | 0 | } |
208 | 0 | sMapA[sMapLenA].c = u0; |
209 | 0 | for (i = 0; i < n; ++i) { |
210 | 0 | sMapA[sMapLenA].u[i] = uBuf[i]; |
211 | 0 | } |
212 | 0 | sMapA[sMapLenA].len = n; |
213 | 0 | ++sMapLenA; |
214 | 0 | } |
215 | 0 | if (u0 >= len) { |
216 | 0 | len = u0 + 1; |
217 | 0 | } |
218 | 0 | } |
219 | 0 | fclose(f); |
220 | |
|
221 | 0 | ctu = new CharCodeToUnicode(fileName->copy(), mapA, len, gTrue, |
222 | 0 | sMapA, sMapLenA, sMapSizeA); |
223 | 0 | gfree(mapA); |
224 | 0 | return ctu; |
225 | 0 | } |
226 | | |
227 | 0 | CharCodeToUnicode *CharCodeToUnicode::make8BitToUnicode(Unicode *toUnicode) { |
228 | 0 | return new CharCodeToUnicode(NULL, toUnicode, 256, gTrue, NULL, 0, 0); |
229 | 0 | } |
230 | | |
231 | 0 | CharCodeToUnicode *CharCodeToUnicode::make16BitToUnicode(Unicode *toUnicode) { |
232 | 0 | return new CharCodeToUnicode(NULL, toUnicode, 65536, gTrue, NULL, 0, 0); |
233 | 0 | } |
234 | | |
235 | 0 | CharCodeToUnicode *CharCodeToUnicode::parseCMap(GString *buf, int nBits) { |
236 | 0 | CharCodeToUnicode *ctu; |
237 | 0 | GStringIndex idx; |
238 | |
|
239 | 0 | ctu = new CharCodeToUnicode(NULL); |
240 | 0 | idx.s = buf; |
241 | 0 | idx.i = 0; |
242 | 0 | if (!ctu->parseCMap1(&getCharFromGString, &idx, nBits)) { |
243 | 0 | delete ctu; |
244 | 0 | return NULL; |
245 | 0 | } |
246 | 0 | return ctu; |
247 | 0 | } |
248 | | |
249 | 0 | void CharCodeToUnicode::mergeCMap(GString *buf, int nBits) { |
250 | 0 | GStringIndex idx; |
251 | |
|
252 | 0 | idx.s = buf; |
253 | 0 | idx.i = 0; |
254 | 0 | parseCMap1(&getCharFromGString, &idx, nBits); |
255 | 0 | } |
256 | | |
257 | | GBool CharCodeToUnicode::parseCMap1(int (*getCharFunc)(void *), void *data, |
258 | 0 | int nBits) { |
259 | 0 | PSTokenizer *pst; |
260 | 0 | char tok1[256], tok2[256], tok3[256]; |
261 | 0 | int n1, n2, n3; |
262 | 0 | CharCode i; |
263 | 0 | CharCode maxCode, code1, code2; |
264 | 0 | GString *name; |
265 | 0 | FILE *f; |
266 | 0 | GBool ok; |
267 | |
|
268 | 0 | ok = gFalse; |
269 | 0 | maxCode = (nBits == 8) ? 0xff : (nBits == 16) ? 0xffff : 0xffffffff; |
270 | 0 | pst = new PSTokenizer(getCharFunc, data); |
271 | 0 | pst->getToken(tok1, sizeof(tok1), &n1); |
272 | 0 | while (pst->getToken(tok2, sizeof(tok2), &n2)) { |
273 | 0 | if (!strcmp(tok1, "begincodespacerange")) { |
274 | 0 | if (globalParams->getIgnoreWrongSizeToUnicode() && |
275 | 0 | tok2[0] == '<' && tok2[n2 - 1] == '>' && |
276 | 0 | n2 - 2 != nBits / 4) { |
277 | 0 | error(errSyntaxWarning, -1, |
278 | 0 | "Incorrect character size in ToUnicode CMap"); |
279 | 0 | ok = gFalse; |
280 | 0 | break; |
281 | 0 | } |
282 | 0 | while (pst->getToken(tok1, sizeof(tok1), &n1) && |
283 | 0 | strcmp(tok1, "endcodespacerange")) ; |
284 | 0 | } else if (!strcmp(tok2, "usecmap")) { |
285 | 0 | if (tok1[0] == '/') { |
286 | 0 | name = new GString(tok1 + 1); |
287 | 0 | if ((f = globalParams->findToUnicodeFile(name))) { |
288 | 0 | if (parseCMap1(&getCharFromFile, f, nBits)) { |
289 | 0 | ok = gTrue; |
290 | 0 | } |
291 | 0 | fclose(f); |
292 | 0 | } else { |
293 | 0 | error(errSyntaxError, -1, |
294 | 0 | "Couldn't find ToUnicode CMap file for '{1:t}'", |
295 | 0 | name); |
296 | 0 | } |
297 | 0 | delete name; |
298 | 0 | } |
299 | 0 | pst->getToken(tok1, sizeof(tok1), &n1); |
300 | 0 | } else if (!strcmp(tok2, "beginbfchar")) { |
301 | 0 | while (pst->getToken(tok1, sizeof(tok1), &n1)) { |
302 | 0 | if (!strcmp(tok1, "endbfchar")) { |
303 | 0 | break; |
304 | 0 | } |
305 | 0 | if (!pst->getToken(tok2, sizeof(tok2), &n2) || |
306 | 0 | !strcmp(tok2, "endbfchar")) { |
307 | 0 | error(errSyntaxWarning, -1, |
308 | 0 | "Illegal entry in bfchar block in ToUnicode CMap"); |
309 | 0 | break; |
310 | 0 | } |
311 | 0 | if (!(tok1[0] == '<' && tok1[n1 - 1] == '>' && |
312 | 0 | tok2[0] == '<' && tok2[n2 - 1] == '>')) { |
313 | 0 | error(errSyntaxWarning, -1, |
314 | 0 | "Illegal entry in bfchar block in ToUnicode CMap"); |
315 | 0 | continue; |
316 | 0 | } |
317 | 0 | tok1[n1 - 1] = tok2[n2 - 1] = '\0'; |
318 | 0 | if (!parseHex(tok1 + 1, n1 - 2, &code1)) { |
319 | 0 | error(errSyntaxWarning, -1, |
320 | 0 | "Illegal entry in bfchar block in ToUnicode CMap"); |
321 | 0 | continue; |
322 | 0 | } |
323 | 0 | if (code1 > maxCode) { |
324 | 0 | error(errSyntaxWarning, -1, |
325 | 0 | "Invalid entry in bfchar block in ToUnicode CMap"); |
326 | 0 | } |
327 | 0 | addMapping(code1, tok2 + 1, n2 - 2, 0); |
328 | 0 | ok = gTrue; |
329 | 0 | } |
330 | 0 | pst->getToken(tok1, sizeof(tok1), &n1); |
331 | 0 | } else if (!strcmp(tok2, "beginbfrange")) { |
332 | 0 | while (pst->getToken(tok1, sizeof(tok1), &n1)) { |
333 | 0 | if (!strcmp(tok1, "endbfrange")) { |
334 | 0 | break; |
335 | 0 | } |
336 | 0 | if (!pst->getToken(tok2, sizeof(tok2), &n2) || |
337 | 0 | !strcmp(tok2, "endbfrange") || |
338 | 0 | !pst->getToken(tok3, sizeof(tok3), &n3) || |
339 | 0 | !strcmp(tok3, "endbfrange")) { |
340 | 0 | error(errSyntaxWarning, -1, |
341 | 0 | "Illegal entry in bfrange block in ToUnicode CMap"); |
342 | 0 | break; |
343 | 0 | } |
344 | 0 | if (!(tok1[0] == '<' && tok1[n1 - 1] == '>' && |
345 | 0 | tok2[0] == '<' && tok2[n2 - 1] == '>')) { |
346 | 0 | error(errSyntaxWarning, |
347 | 0 | -1, "Illegal entry in bfrange block in ToUnicode CMap"); |
348 | 0 | continue; |
349 | 0 | } |
350 | 0 | tok1[n1 - 1] = tok2[n2 - 1] = '\0'; |
351 | 0 | if (!parseHex(tok1 + 1, n1 - 2, &code1) || |
352 | 0 | !parseHex(tok2 + 1, n2 - 2, &code2)) { |
353 | 0 | error(errSyntaxWarning, -1, |
354 | 0 | "Illegal entry in bfrange block in ToUnicode CMap"); |
355 | 0 | continue; |
356 | 0 | } |
357 | 0 | if (code1 > maxCode || code2 > maxCode) { |
358 | 0 | error(errSyntaxWarning, -1, |
359 | 0 | "Invalid entry in bfrange block in ToUnicode CMap"); |
360 | 0 | if (code2 > maxCode) { |
361 | 0 | code2 = maxCode; |
362 | 0 | } |
363 | 0 | } |
364 | 0 | if (!strcmp(tok3, "[")) { |
365 | 0 | i = 0; |
366 | 0 | while (pst->getToken(tok1, sizeof(tok1), &n1)) { |
367 | 0 | if (!strcmp(tok1, "]")) { |
368 | 0 | break; |
369 | 0 | } |
370 | 0 | if (tok1[0] == '<' && tok1[n1 - 1] == '>') { |
371 | 0 | if (code1 + i <= code2) { |
372 | 0 | tok1[n1 - 1] = '\0'; |
373 | 0 | addMapping(code1 + i, tok1 + 1, n1 - 2, 0); |
374 | 0 | ok = gTrue; |
375 | 0 | } |
376 | 0 | } else { |
377 | 0 | error(errSyntaxWarning, -1, |
378 | 0 | "Illegal entry in bfrange block in ToUnicode CMap"); |
379 | 0 | } |
380 | 0 | ++i; |
381 | 0 | } |
382 | 0 | } else if (tok3[0] == '<' && tok3[n3 - 1] == '>') { |
383 | 0 | tok3[n3 - 1] = '\0'; |
384 | 0 | for (i = 0; code1 <= code2; ++code1, ++i) { |
385 | 0 | addMapping(code1, tok3 + 1, n3 - 2, i); |
386 | 0 | ok = gTrue; |
387 | 0 | } |
388 | 0 | } else { |
389 | 0 | error(errSyntaxWarning, -1, |
390 | 0 | "Illegal entry in bfrange block in ToUnicode CMap"); |
391 | 0 | } |
392 | 0 | } |
393 | 0 | pst->getToken(tok1, sizeof(tok1), &n1); |
394 | 0 | } else if (!strcmp(tok2, "begincidchar")) { |
395 | | // the begincidchar operator is not allowed in ToUnicode CMaps, |
396 | | // but some buggy PDF generators incorrectly use |
397 | | // code-to-CID-type CMaps here |
398 | 0 | error(errSyntaxWarning, -1, |
399 | 0 | "Invalid 'begincidchar' operator in ToUnicode CMap"); |
400 | 0 | while (pst->getToken(tok1, sizeof(tok1), &n1)) { |
401 | 0 | if (!strcmp(tok1, "endcidchar")) { |
402 | 0 | break; |
403 | 0 | } |
404 | 0 | if (!pst->getToken(tok2, sizeof(tok2), &n2) || |
405 | 0 | !strcmp(tok2, "endcidchar")) { |
406 | 0 | error(errSyntaxWarning, -1, |
407 | 0 | "Illegal entry in cidchar block in ToUnicode CMap"); |
408 | 0 | break; |
409 | 0 | } |
410 | 0 | if (!(tok1[0] == '<' && tok1[n1 - 1] == '>')) { |
411 | 0 | error(errSyntaxWarning, -1, |
412 | 0 | "Illegal entry in cidchar block in ToUnicode CMap"); |
413 | 0 | continue; |
414 | 0 | } |
415 | 0 | tok1[n1 - 1] = '\0'; |
416 | 0 | if (!parseHex(tok1 + 1, n1 - 2, &code1)) { |
417 | 0 | error(errSyntaxWarning, -1, |
418 | 0 | "Illegal entry in cidchar block in ToUnicode CMap"); |
419 | 0 | continue; |
420 | 0 | } |
421 | 0 | if (code1 > maxCode) { |
422 | 0 | error(errSyntaxWarning, -1, |
423 | 0 | "Invalid entry in cidchar block in ToUnicode CMap"); |
424 | 0 | } |
425 | 0 | addMappingInt(code1, atoi(tok2)); |
426 | 0 | ok = gTrue; |
427 | 0 | } |
428 | 0 | pst->getToken(tok1, sizeof(tok1), &n1); |
429 | 0 | } else if (!strcmp(tok2, "begincidrange")) { |
430 | | // the begincidrange operator is not allowed in ToUnicode CMaps, |
431 | | // but some buggy PDF generators incorrectly use |
432 | | // code-to-CID-type CMaps here |
433 | 0 | error(errSyntaxWarning, -1, |
434 | 0 | "Invalid 'begincidrange' operator in ToUnicode CMap"); |
435 | 0 | while (pst->getToken(tok1, sizeof(tok1), &n1)) { |
436 | 0 | if (!strcmp(tok1, "endcidrange")) { |
437 | 0 | break; |
438 | 0 | } |
439 | 0 | if (!pst->getToken(tok2, sizeof(tok2), &n2) || |
440 | 0 | !strcmp(tok2, "endcidrange") || |
441 | 0 | !pst->getToken(tok3, sizeof(tok3), &n3) || |
442 | 0 | !strcmp(tok3, "endcidrange")) { |
443 | 0 | error(errSyntaxWarning, -1, |
444 | 0 | "Illegal entry in cidrange block in ToUnicode CMap"); |
445 | 0 | break; |
446 | 0 | } |
447 | 0 | if (!(tok1[0] == '<' && tok1[n1 - 1] == '>' && |
448 | 0 | tok2[0] == '<' && tok2[n2 - 1] == '>')) { |
449 | 0 | error(errSyntaxWarning, |
450 | 0 | -1, "Illegal entry in cidrange block in ToUnicode CMap"); |
451 | 0 | continue; |
452 | 0 | } |
453 | 0 | tok1[n1 - 1] = tok2[n2 - 1] = '\0'; |
454 | 0 | if (!parseHex(tok1 + 1, n1 - 2, &code1) || |
455 | 0 | !parseHex(tok2 + 1, n2 - 2, &code2)) { |
456 | 0 | error(errSyntaxWarning, -1, |
457 | 0 | "Illegal entry in cidrange block in ToUnicode CMap"); |
458 | 0 | continue; |
459 | 0 | } |
460 | 0 | if (code1 > maxCode || code2 > maxCode) { |
461 | 0 | error(errSyntaxWarning, -1, |
462 | 0 | "Invalid entry in cidrange block in ToUnicode CMap"); |
463 | 0 | if (code2 > maxCode) { |
464 | 0 | code2 = maxCode; |
465 | 0 | } |
466 | 0 | } |
467 | 0 | for (i = atoi(tok3); code1 <= code2; ++code1, ++i) { |
468 | 0 | addMappingInt(code1, i); |
469 | 0 | ok = gTrue; |
470 | 0 | } |
471 | 0 | } |
472 | 0 | pst->getToken(tok1, sizeof(tok1), &n1); |
473 | 0 | } else { |
474 | 0 | strcpy(tok1, tok2); |
475 | 0 | n1 = n2; |
476 | 0 | } |
477 | 0 | } |
478 | 0 | delete pst; |
479 | 0 | return ok; |
480 | 0 | } |
481 | | |
482 | | void CharCodeToUnicode::addMapping(CharCode code, char *uStr, int n, |
483 | 0 | int offset) { |
484 | 0 | CharCode oldLen, i; |
485 | 0 | Unicode u[maxUnicodeString]; |
486 | 0 | int uLen, j; |
487 | |
|
488 | 0 | if (code > 0xffffff) { |
489 | | // This is an arbitrary limit to avoid integer overflow issues. |
490 | | // (I've seen CMaps with mappings for <ffffffff>.) |
491 | 0 | return; |
492 | 0 | } |
493 | 0 | if ((uLen = parseUTF16String(uStr, n, u)) == 0) { |
494 | 0 | return; |
495 | 0 | } |
496 | 0 | if (code >= mapLen) { |
497 | 0 | oldLen = mapLen; |
498 | 0 | mapLen = mapLen ? 2 * mapLen : 256; |
499 | 0 | if (code >= mapLen) { |
500 | 0 | mapLen = (code + 256) & ~255; |
501 | 0 | } |
502 | 0 | map = (Unicode *)greallocn(map, mapLen, sizeof(Unicode)); |
503 | 0 | for (i = oldLen; i < mapLen; ++i) { |
504 | 0 | map[i] = 0; |
505 | 0 | } |
506 | 0 | } |
507 | 0 | if (uLen == 1) { |
508 | 0 | map[code] = u[0] + offset; |
509 | 0 | } else { |
510 | 0 | if (sMapLen >= sMapSize) { |
511 | 0 | sMapSize = sMapSize + 16; |
512 | 0 | sMap = (CharCodeToUnicodeString *) |
513 | 0 | greallocn(sMap, sMapSize, sizeof(CharCodeToUnicodeString)); |
514 | 0 | } |
515 | 0 | map[code] = 0; |
516 | 0 | sMap[sMapLen].c = code; |
517 | 0 | for (j = 0; j < uLen; ++j) { |
518 | 0 | sMap[sMapLen].u[j] = u[j]; |
519 | 0 | } |
520 | 0 | sMap[sMapLen].u[uLen - 1] += offset; |
521 | 0 | sMap[sMapLen].len = uLen; |
522 | 0 | ++sMapLen; |
523 | 0 | } |
524 | 0 | } |
525 | | |
526 | | // Convert a UTF-16BE hex string into a sequence of up to |
527 | | // maxUnicodeString Unicode chars. |
528 | 0 | int CharCodeToUnicode::parseUTF16String(char *uStr, int n, Unicode *uOut) { |
529 | 0 | int i = 0; |
530 | 0 | int uLen = 0; |
531 | 0 | while (i < n) { |
532 | 0 | Unicode u; |
533 | 0 | int j = n; |
534 | 0 | if (j - i > 4) { |
535 | 0 | j = i + 4; |
536 | 0 | } |
537 | 0 | if (!parseHex(uStr + i, j - i, &u)) { |
538 | 0 | error(errSyntaxWarning, -1, "Illegal entry in ToUnicode CMap"); |
539 | 0 | return 0; |
540 | 0 | } |
541 | | // look for a UTF-16 pair |
542 | 0 | if (uLen > 0 && uOut[uLen-1] >= 0xd800 && uOut[uLen-1] <= 0xdbff && |
543 | 0 | u >= 0xdc00 && u <= 0xdfff) { |
544 | 0 | uOut[uLen-1] = 0x10000 + ((uOut[uLen-1] & 0x03ff) << 10) + (u & 0x03ff); |
545 | 0 | } else { |
546 | 0 | if (uLen < maxUnicodeString) { |
547 | 0 | uOut[uLen++] = u; |
548 | 0 | } |
549 | 0 | } |
550 | 0 | i = j; |
551 | 0 | } |
552 | 0 | return uLen; |
553 | 0 | } |
554 | | |
555 | 0 | void CharCodeToUnicode::addMappingInt(CharCode code, Unicode u) { |
556 | 0 | CharCode oldLen, i; |
557 | |
|
558 | 0 | if (code > 0xffffff) { |
559 | | // This is an arbitrary limit to avoid integer overflow issues. |
560 | | // (I've seen CMaps with mappings for <ffffffff>.) |
561 | 0 | return; |
562 | 0 | } |
563 | 0 | if (code >= mapLen) { |
564 | 0 | oldLen = mapLen; |
565 | 0 | mapLen = mapLen ? 2 * mapLen : 256; |
566 | 0 | if (code >= mapLen) { |
567 | 0 | mapLen = (code + 256) & ~255; |
568 | 0 | } |
569 | 0 | map = (Unicode *)greallocn(map, mapLen, sizeof(Unicode)); |
570 | 0 | for (i = oldLen; i < mapLen; ++i) { |
571 | 0 | map[i] = 0; |
572 | 0 | } |
573 | 0 | } |
574 | 0 | map[code] = u; |
575 | 0 | } |
576 | | |
577 | 0 | CharCodeToUnicode::CharCodeToUnicode() { |
578 | 0 | tag = NULL; |
579 | 0 | map = NULL; |
580 | 0 | mapLen = 0; |
581 | 0 | sMap = NULL; |
582 | 0 | sMapLen = sMapSize = 0; |
583 | 0 | refCnt = 1; |
584 | 0 | } |
585 | | |
586 | 0 | CharCodeToUnicode::CharCodeToUnicode(GString *tagA) { |
587 | 0 | CharCode i; |
588 | |
|
589 | 0 | tag = tagA; |
590 | 0 | mapLen = 256; |
591 | 0 | map = (Unicode *)gmallocn(mapLen, sizeof(Unicode)); |
592 | 0 | for (i = 0; i < mapLen; ++i) { |
593 | 0 | map[i] = 0; |
594 | 0 | } |
595 | 0 | sMap = NULL; |
596 | 0 | sMapLen = sMapSize = 0; |
597 | 0 | refCnt = 1; |
598 | 0 | } |
599 | | |
600 | | CharCodeToUnicode::CharCodeToUnicode(GString *tagA, Unicode *mapA, |
601 | | CharCode mapLenA, GBool copyMap, |
602 | | CharCodeToUnicodeString *sMapA, |
603 | 0 | int sMapLenA, int sMapSizeA) { |
604 | 0 | tag = tagA; |
605 | 0 | mapLen = mapLenA; |
606 | 0 | if (copyMap) { |
607 | 0 | map = (Unicode *)gmallocn(mapLen, sizeof(Unicode)); |
608 | 0 | memcpy(map, mapA, mapLen * sizeof(Unicode)); |
609 | 0 | } else { |
610 | 0 | map = mapA; |
611 | 0 | } |
612 | 0 | sMap = sMapA; |
613 | 0 | sMapLen = sMapLenA; |
614 | 0 | sMapSize = sMapSizeA; |
615 | 0 | refCnt = 1; |
616 | 0 | } |
617 | | |
618 | 0 | CharCodeToUnicode::~CharCodeToUnicode() { |
619 | 0 | if (tag) { |
620 | 0 | delete tag; |
621 | 0 | } |
622 | 0 | gfree(map); |
623 | 0 | gfree(sMap); |
624 | 0 | } |
625 | | |
626 | 0 | void CharCodeToUnicode::incRefCnt() { |
627 | 0 | #if MULTITHREADED |
628 | 0 | gAtomicIncrement(&refCnt); |
629 | | #else |
630 | | ++refCnt; |
631 | | #endif |
632 | 0 | } |
633 | | |
634 | 0 | void CharCodeToUnicode::decRefCnt() { |
635 | 0 | GBool done; |
636 | |
|
637 | 0 | #if MULTITHREADED |
638 | 0 | done = gAtomicDecrement(&refCnt) == 0; |
639 | | #else |
640 | | done = --refCnt == 0; |
641 | | #endif |
642 | 0 | if (done) { |
643 | 0 | delete this; |
644 | 0 | } |
645 | 0 | } |
646 | | |
647 | 0 | GBool CharCodeToUnicode::match(GString *tagA) { |
648 | 0 | return tag && !tag->cmp(tagA); |
649 | 0 | } |
650 | | |
651 | 0 | void CharCodeToUnicode::setMapping(CharCode c, Unicode *u, int len) { |
652 | 0 | int i, j; |
653 | |
|
654 | 0 | if (!map) { |
655 | 0 | return; |
656 | 0 | } |
657 | 0 | if (len == 1) { |
658 | 0 | map[c] = u[0]; |
659 | 0 | } else { |
660 | 0 | for (i = 0; i < sMapLen; ++i) { |
661 | 0 | if (sMap[i].c == c) { |
662 | 0 | break; |
663 | 0 | } |
664 | 0 | } |
665 | 0 | if (i == sMapLen) { |
666 | 0 | if (sMapLen == sMapSize) { |
667 | 0 | sMapSize += 8; |
668 | 0 | sMap = (CharCodeToUnicodeString *) |
669 | 0 | greallocn(sMap, sMapSize, sizeof(CharCodeToUnicodeString)); |
670 | 0 | } |
671 | 0 | ++sMapLen; |
672 | 0 | } |
673 | 0 | map[c] = 0; |
674 | 0 | sMap[i].c = c; |
675 | 0 | sMap[i].len = len; |
676 | 0 | for (j = 0; j < len && j < maxUnicodeString; ++j) { |
677 | 0 | sMap[i].u[j] = u[j]; |
678 | 0 | } |
679 | 0 | } |
680 | 0 | } |
681 | | |
682 | 0 | int CharCodeToUnicode::mapToUnicode(CharCode c, Unicode *u, int size) { |
683 | 0 | int i, j; |
684 | |
|
685 | 0 | if (!map) { |
686 | 0 | u[0] = (Unicode)c; |
687 | 0 | return 1; |
688 | 0 | } |
689 | 0 | if (c >= mapLen) { |
690 | 0 | return 0; |
691 | 0 | } |
692 | 0 | if (map[c]) { |
693 | 0 | u[0] = map[c]; |
694 | 0 | return 1; |
695 | 0 | } |
696 | 0 | for (i = 0; i < sMapLen; ++i) { |
697 | 0 | if (sMap[i].c == c) { |
698 | 0 | for (j = 0; j < sMap[i].len && j < size; ++j) { |
699 | 0 | u[j] = sMap[i].u[j]; |
700 | 0 | } |
701 | 0 | return j; |
702 | 0 | } |
703 | 0 | } |
704 | 0 | return 0; |
705 | 0 | } |
706 | | |
707 | | //------------------------------------------------------------------------ |
708 | | |
709 | 4.81k | CharCodeToUnicodeCache::CharCodeToUnicodeCache(int sizeA) { |
710 | 4.81k | int i; |
711 | | |
712 | 4.81k | size = sizeA; |
713 | 4.81k | cache = (CharCodeToUnicode **)gmallocn(size, sizeof(CharCodeToUnicode *)); |
714 | 24.0k | for (i = 0; i < size; ++i) { |
715 | 19.2k | cache[i] = NULL; |
716 | 19.2k | } |
717 | 4.81k | } |
718 | | |
719 | 4.81k | CharCodeToUnicodeCache::~CharCodeToUnicodeCache() { |
720 | 4.81k | int i; |
721 | | |
722 | 24.0k | for (i = 0; i < size; ++i) { |
723 | 19.2k | if (cache[i]) { |
724 | 0 | cache[i]->decRefCnt(); |
725 | 0 | } |
726 | 19.2k | } |
727 | 4.81k | gfree(cache); |
728 | 4.81k | } |
729 | | |
730 | 0 | CharCodeToUnicode *CharCodeToUnicodeCache::getCharCodeToUnicode(GString *tag) { |
731 | 0 | CharCodeToUnicode *ctu; |
732 | 0 | int i, j; |
733 | |
|
734 | 0 | if (cache[0] && cache[0]->match(tag)) { |
735 | 0 | cache[0]->incRefCnt(); |
736 | 0 | return cache[0]; |
737 | 0 | } |
738 | 0 | for (i = 1; i < size; ++i) { |
739 | 0 | if (cache[i] && cache[i]->match(tag)) { |
740 | 0 | ctu = cache[i]; |
741 | 0 | for (j = i; j >= 1; --j) { |
742 | 0 | cache[j] = cache[j - 1]; |
743 | 0 | } |
744 | 0 | cache[0] = ctu; |
745 | 0 | ctu->incRefCnt(); |
746 | 0 | return ctu; |
747 | 0 | } |
748 | 0 | } |
749 | 0 | return NULL; |
750 | 0 | } |
751 | | |
752 | 0 | void CharCodeToUnicodeCache::add(CharCodeToUnicode *ctu) { |
753 | 0 | int i; |
754 | |
|
755 | 0 | if (cache[size - 1]) { |
756 | 0 | cache[size - 1]->decRefCnt(); |
757 | 0 | } |
758 | 0 | for (i = size - 1; i >= 1; --i) { |
759 | 0 | cache[i] = cache[i - 1]; |
760 | 0 | } |
761 | 0 | cache[0] = ctu; |
762 | 0 | ctu->incRefCnt(); |
763 | 0 | } |