/src/poppler/poppler/CMap.cc
Line | Count | Source |
1 | | //======================================================================== |
2 | | // |
3 | | // CMap.cc |
4 | | // |
5 | | // Copyright 2001-2003 Glyph & Cog, LLC |
6 | | // |
7 | | //======================================================================== |
8 | | |
9 | | //======================================================================== |
10 | | // |
11 | | // Modified under the Poppler project - http://poppler.freedesktop.org |
12 | | // |
13 | | // All changes made under the Poppler project to this file are licensed |
14 | | // under GPL version 2 or later |
15 | | // |
16 | | // Copyright (C) 2008 Koji Otani <sho@bbr.jp> |
17 | | // Copyright (C) 2008, 2009, 2017-2021, 2024, 2025 Albert Astals Cid <aacid@kde.org> |
18 | | // Copyright (C) 2013 Fabio D'Urso <fabiodurso@hotmail.it> |
19 | | // Copyright (C) 2017 Adrian Johnson <ajohnson@redneon.com> |
20 | | // Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de> |
21 | | // Copyright (C) 2019 LE GARREC Vincent <legarrec.vincent@gmail.com> |
22 | | // Copyright (C) 2025, 2026 g10 Code GmbH, Author: Sune Stolborg Vuorela <sune@vuorela.dk> |
23 | | // Copyright (C) 2025 Arnav V <arnav0872@gmail.com> |
24 | | // |
25 | | // To see a description of the changes please see the Changelog file that |
26 | | // came with your tarball or type make ChangeLog if you are building from git |
27 | | // |
28 | | //======================================================================== |
29 | | |
30 | | #include <config.h> |
31 | | |
32 | | #include <cstdio> |
33 | | #include <cstdlib> |
34 | | #include <cstring> |
35 | | #include "goo/gmem.h" |
36 | | #include "goo/gfile.h" |
37 | | #include "goo/GooString.h" |
38 | | #include "Error.h" |
39 | | #include "GlobalParams.h" |
40 | | #include "PSTokenizer.h" |
41 | | #include "CMap.h" |
42 | | #include "Object.h" |
43 | | |
44 | | //------------------------------------------------------------------------ |
45 | | |
46 | | struct CMapVectorEntry |
47 | | { |
48 | | bool isVector; |
49 | | union { |
50 | | CMapVectorEntry *vector; |
51 | | CID cid; |
52 | | }; |
53 | | }; |
54 | | |
55 | | //------------------------------------------------------------------------ |
56 | | |
57 | | static int getCharFromFile(void *data) |
58 | 47.6M | { |
59 | 47.6M | return fgetc((FILE *)data); |
60 | 47.6M | } |
61 | | |
62 | | static int getCharFromStream(void *data) |
63 | 30.7M | { |
64 | 30.7M | return ((Stream *)data)->getChar(); |
65 | 30.7M | } |
66 | | |
67 | | //------------------------------------------------------------------------ |
68 | | |
69 | | std::shared_ptr<CMap> CMap::parse(const std::string &collectionA, Object *obj) |
70 | 195k | { |
71 | 195k | RefRecursionChecker recursion; |
72 | 195k | return parse(collectionA, obj, recursion); |
73 | 195k | } |
74 | | |
75 | | std::shared_ptr<CMap> CMap::parse(const std::string &collectionA, Object *obj, RefRecursionChecker &recursion) |
76 | 195k | { |
77 | 195k | std::shared_ptr<CMap> cMap; |
78 | | |
79 | 195k | if (obj->isName()) { |
80 | 189k | const GooString cMapNameA(obj->getNameString()); |
81 | 189k | if (!(cMap = globalParams->getCMap(collectionA, cMapNameA.toStr()))) { |
82 | 6.21k | error(errSyntaxError, -1, "Unknown CMap '{0:t}' for character collection '{1:s}'", &cMapNameA, collectionA.c_str()); |
83 | 6.21k | } |
84 | 189k | } else if (obj->isStream()) { |
85 | 4.91k | if (!(cMap = CMap::parse(nullptr, collectionA, obj->getStream(), recursion))) { |
86 | 0 | error(errSyntaxError, -1, "Invalid CMap in Type 0 font"); |
87 | 0 | } |
88 | 4.91k | } else { |
89 | 916 | error(errSyntaxError, -1, "Invalid Encoding in Type 0 font"); |
90 | 916 | return {}; |
91 | 916 | } |
92 | 194k | return cMap; |
93 | 195k | } |
94 | | |
95 | | std::shared_ptr<CMap> CMap::parse(CMapCache *cache, const std::string &collectionA, const std::string &cMapNameA) |
96 | 20.1k | { |
97 | 20.1k | FILE *f; |
98 | | |
99 | 20.1k | if (!(f = globalParams->findCMapFile(collectionA, cMapNameA))) { |
100 | | |
101 | | // Check for an identity CMap. |
102 | 19.7k | if (cMapNameA == "Identity" || cMapNameA == "Identity-H") { |
103 | 13.0k | return std::shared_ptr<CMap>(new CMap(std::make_unique<GooString>(collectionA), std::make_unique<GooString>(cMapNameA), GfxFont::WritingMode::Horizontal)); |
104 | 13.0k | } |
105 | 6.69k | if (cMapNameA == "Identity-V") { |
106 | 459 | return std::shared_ptr<CMap>(new CMap(std::make_unique<GooString>(collectionA), std::make_unique<GooString>(cMapNameA), GfxFont::WritingMode::Vertical)); |
107 | 459 | } |
108 | | |
109 | 6.23k | error(errSyntaxError, -1, "Couldn't find '{0:s}' CMap file for '{1:s}' collection", cMapNameA.c_str(), collectionA.c_str()); |
110 | 6.23k | return {}; |
111 | 6.69k | } |
112 | | |
113 | 375 | auto cMap = std::shared_ptr<CMap>(new CMap(std::make_unique<GooString>(collectionA), std::make_unique<GooString>(cMapNameA))); |
114 | 375 | cMap->parse2(cache, &getCharFromFile, f); |
115 | | |
116 | 375 | fclose(f); |
117 | | |
118 | 375 | return cMap; |
119 | 20.1k | } |
120 | | |
121 | | std::shared_ptr<CMap> CMap::parse(CMapCache *cache, const std::string &collectionA, Stream *str, RefRecursionChecker &recursion) |
122 | 4.91k | { |
123 | 4.91k | auto cMap = std::shared_ptr<CMap>(new CMap(std::make_unique<GooString>(collectionA), nullptr)); |
124 | 4.91k | Ref ref; |
125 | 4.91k | Object obj1 = str->getDict()->lookup("UseCMap", &ref); |
126 | 4.91k | if (!recursion.insert(ref)) { |
127 | 0 | return {}; |
128 | 0 | } |
129 | 4.91k | if (!obj1.isNull()) { |
130 | 0 | cMap->useCMap(&obj1, recursion); |
131 | 0 | } |
132 | | |
133 | 4.91k | if (str->rewind()) { |
134 | 4.88k | cMap->parse2(cache, &getCharFromStream, str); |
135 | 4.88k | } |
136 | 4.91k | str->close(); |
137 | 4.91k | return cMap; |
138 | 4.91k | } |
139 | | |
140 | | void CMap::parse2(CMapCache *cache, int (*getCharFunc)(void *), void *data) |
141 | 5.26k | { |
142 | 5.26k | PSTokenizer *pst; |
143 | 5.26k | char tok1[256], tok2[256], tok3[256]; |
144 | 5.26k | int n1, n2, n3; |
145 | 5.26k | unsigned int start = 0, end = 0, code; |
146 | | |
147 | 5.26k | pst = new PSTokenizer(getCharFunc, data); |
148 | 5.26k | pst->getToken(tok1, sizeof(tok1), &n1); |
149 | 654k | while (pst->getToken(tok2, sizeof(tok2), &n2)) { |
150 | 648k | if (!strcmp(tok2, "usecmap")) { |
151 | 102 | if (tok1[0] == '/') { |
152 | 102 | useCMap(cache, tok1 + 1); |
153 | 102 | } |
154 | 102 | pst->getToken(tok1, sizeof(tok1), &n1); |
155 | 648k | } else if (!strcmp(tok1, "/WMode")) { |
156 | 3.12k | const int wModeI = atoi(tok2); |
157 | 3.12k | wMode = wModeI == 1 ? GfxFont::WritingMode::Vertical : GfxFont::WritingMode::Horizontal; |
158 | 3.12k | pst->getToken(tok1, sizeof(tok1), &n1); |
159 | 645k | } else if (!strcmp(tok2, "begincidchar")) { |
160 | 2.26M | while (pst->getToken(tok1, sizeof(tok1), &n1)) { |
161 | 2.26M | if (!strcmp(tok1, "endcidchar")) { |
162 | 21.9k | break; |
163 | 21.9k | } |
164 | 2.24M | if (!pst->getToken(tok2, sizeof(tok2), &n2) || !strcmp(tok2, "endcidchar")) { |
165 | 550 | error(errSyntaxError, -1, "Illegal entry in cidchar block in CMap"); |
166 | 550 | break; |
167 | 550 | } |
168 | 2.24M | if (tok1[0] != '<' || tok1[n1 - 1] != '>' || n1 < 4 || (n1 & 1) != 0) { |
169 | 237k | error(errSyntaxError, -1, "Illegal entry in cidchar block in CMap"); |
170 | 237k | continue; |
171 | 237k | } |
172 | 2.00M | tok1[n1 - 1] = '\0'; |
173 | 2.00M | if (sscanf(tok1 + 1, "%x", &code) != 1) { |
174 | 2.45k | error(errSyntaxError, -1, "Illegal entry in cidchar block in CMap"); |
175 | 2.45k | continue; |
176 | 2.45k | } |
177 | 2.00M | n1 = (n1 - 2) / 2; |
178 | 2.00M | addCIDs(code, code, n1, (CID)atoi(tok2)); |
179 | 2.00M | } |
180 | 23.0k | pst->getToken(tok1, sizeof(tok1), &n1); |
181 | 622k | } else if (!strcmp(tok2, "begincidrange")) { |
182 | 1.28M | while (pst->getToken(tok1, sizeof(tok1), &n1)) { |
183 | 1.28M | if (!strcmp(tok1, "endcidrange")) { |
184 | 14.8k | break; |
185 | 14.8k | } |
186 | 1.27M | if (!pst->getToken(tok2, sizeof(tok2), &n2) || !strcmp(tok2, "endcidrange") || !pst->getToken(tok3, sizeof(tok3), &n3) || !strcmp(tok3, "endcidrange")) { |
187 | 1.36k | error(errSyntaxError, -1, "Illegal entry in cidrange block in CMap"); |
188 | 1.36k | break; |
189 | 1.36k | } |
190 | 1.26M | if (tok1[0] == '<' && tok2[0] == '<' && n1 == n2 && n1 >= 4 && (n1 & 1) == 0) { |
191 | 1.07M | tok1[n1 - 1] = tok2[n1 - 1] = '\0'; |
192 | 1.07M | sscanf(tok1 + 1, "%x", &start); |
193 | 1.07M | sscanf(tok2 + 1, "%x", &end); |
194 | 1.07M | n1 = (n1 - 2) / 2; |
195 | 1.07M | addCIDs(start, end, n1, (CID)atoi(tok3)); |
196 | 1.07M | } |
197 | 1.26M | } |
198 | 16.5k | pst->getToken(tok1, sizeof(tok1), &n1); |
199 | 606k | } else { |
200 | 606k | strcpy(tok1, tok2); |
201 | 606k | } |
202 | 648k | } |
203 | 5.26k | delete pst; |
204 | 5.26k | } |
205 | | |
206 | 5.28k | CMap::CMap(std::unique_ptr<GooString> &&collectionA, std::unique_ptr<GooString> &&cMapNameA) : collection(std::move(collectionA)), cMapName(std::move(cMapNameA)) |
207 | 5.28k | { |
208 | 5.28k | isIdent = false; |
209 | 5.28k | wMode = GfxFont::WritingMode::Horizontal; |
210 | 5.28k | vector = (CMapVectorEntry *)gmallocn(256, sizeof(CMapVectorEntry)); |
211 | 1.35M | for (int i = 0; i < 256; ++i) { |
212 | 1.35M | vector[i].isVector = false; |
213 | 1.35M | vector[i].cid = 0; |
214 | 1.35M | } |
215 | 5.28k | } |
216 | | |
217 | 13.4k | CMap::CMap(std::unique_ptr<GooString> &&collectionA, std::unique_ptr<GooString> &&cMapNameA, GfxFont::WritingMode wModeA) : collection(std::move(collectionA)), cMapName(std::move(cMapNameA)) |
218 | 13.4k | { |
219 | 13.4k | isIdent = true; |
220 | 13.4k | wMode = wModeA; |
221 | 13.4k | vector = nullptr; |
222 | 13.4k | } |
223 | | |
224 | | void CMap::useCMap(CMapCache *cache, const char *useName) |
225 | 102 | { |
226 | 102 | std::shared_ptr<CMap> subCMap; |
227 | | |
228 | 102 | const GooString useNameStr(useName); |
229 | | // if cache is non-NULL, we already have a lock, and we can use |
230 | | // CMapCache::getCMap() directly; otherwise, we need to use |
231 | | // GlobalParams::getCMap() in order to acqure the lock need to use |
232 | | // GlobalParams::getCMap |
233 | 102 | if (cache) { |
234 | 102 | subCMap = cache->getCMap(collection->toStr(), useNameStr.toStr()); |
235 | 102 | } else { |
236 | 0 | subCMap = globalParams->getCMap(collection->toStr(), useNameStr.toStr()); |
237 | 0 | } |
238 | 102 | if (!subCMap) { |
239 | 0 | return; |
240 | 0 | } |
241 | 102 | isIdent = subCMap->isIdent; |
242 | 102 | if (subCMap->vector) { |
243 | 102 | copyVector(vector, subCMap->vector); |
244 | 102 | } |
245 | 102 | } |
246 | | |
247 | | void CMap::useCMap(Object *obj, RefRecursionChecker &recursion) |
248 | 0 | { |
249 | 0 | std::shared_ptr<CMap> subCMap = CMap::parse(collection->toStr(), obj, recursion); |
250 | 0 | if (!subCMap) { |
251 | 0 | return; |
252 | 0 | } |
253 | 0 | isIdent = subCMap->isIdent; |
254 | 0 | if (subCMap->vector) { |
255 | 0 | copyVector(vector, subCMap->vector); |
256 | 0 | } |
257 | 0 | } |
258 | | |
259 | | void CMap::copyVector(CMapVectorEntry *dest, CMapVectorEntry *src) |
260 | 20.8k | { |
261 | 20.8k | int i, j; |
262 | | |
263 | 5.34M | for (i = 0; i < 256; ++i) { |
264 | 5.32M | if (src[i].isVector) { |
265 | 20.7k | if (!dest[i].isVector) { |
266 | 20.7k | dest[i].isVector = true; |
267 | 20.7k | dest[i].vector = (CMapVectorEntry *)gmallocn(256, sizeof(CMapVectorEntry)); |
268 | 5.32M | for (j = 0; j < 256; ++j) { |
269 | 5.30M | dest[i].vector[j].isVector = false; |
270 | 5.30M | dest[i].vector[j].cid = 0; |
271 | 5.30M | } |
272 | 20.7k | } |
273 | 20.7k | copyVector(dest[i].vector, src[i].vector); |
274 | 5.30M | } else { |
275 | 5.30M | if (dest[i].isVector) { |
276 | 0 | error(errSyntaxError, -1, "Collision in usecmap"); |
277 | 5.30M | } else { |
278 | 5.30M | dest[i].cid = src[i].cid; |
279 | 5.30M | } |
280 | 5.30M | } |
281 | 5.32M | } |
282 | 20.8k | } |
283 | | |
284 | | void CMap::addCIDs(unsigned int start, unsigned int end, unsigned int nBytes, CID firstCID) |
285 | 3.07M | { |
286 | 3.07M | if (nBytes > 4) { |
287 | 11.1k | error(errSyntaxError, -1, "Illegal entry in cidchar block in CMap"); |
288 | 11.1k | return; |
289 | 11.1k | } |
290 | | |
291 | 3.06M | const unsigned int start1 = start & 0xffffff00; |
292 | 3.06M | const unsigned int end1 = end & 0xffffff00; |
293 | 8.36M | for (unsigned int i = start1; i <= end1; i += 0x100) { |
294 | 5.29M | CMapVectorEntry *vec = vector; |
295 | 16.0M | for (unsigned int j = nBytes - 1; j >= 1; --j) { |
296 | 10.7M | const int byte = (i >> (8 * j)) & 0xff; |
297 | 10.7M | if (!vec[byte].isVector) { |
298 | 2.16M | vec[byte].isVector = true; |
299 | 2.16M | vec[byte].vector = (CMapVectorEntry *)gmallocn(256, sizeof(CMapVectorEntry)); |
300 | 555M | for (unsigned int k = 0; k < 256; ++k) { |
301 | 553M | vec[byte].vector[k].isVector = false; |
302 | 553M | vec[byte].vector[k].cid = 0; |
303 | 553M | } |
304 | 2.16M | } |
305 | 10.7M | vec = vec[byte].vector; |
306 | 10.7M | } |
307 | 5.29M | const int byte0 = (i < start) ? (start & 0xff) : 0; |
308 | 5.29M | const int byte1 = (i + 0xff > end) ? (end & 0xff) : 0xff; |
309 | 582M | for (int byte = byte0; byte <= byte1; ++byte) { |
310 | 576M | if (vec[byte].isVector) { |
311 | 3.19k | error(errSyntaxError, -1, "Invalid CID ({0:ux} [{1:ud} bytes]) in CMap", i, nBytes); |
312 | 576M | } else { |
313 | 576M | vec[byte].cid = firstCID + ((i + byte) - start); |
314 | 576M | } |
315 | 576M | } |
316 | 5.29M | } |
317 | 3.06M | } |
318 | | |
319 | | CMap::~CMap() |
320 | 18.7k | { |
321 | 18.7k | if (vector) { |
322 | 5.28k | freeCMapVector(vector); |
323 | 5.28k | } |
324 | 18.7k | } |
325 | | |
326 | | void CMap::freeCMapVector(CMapVectorEntry *vec) |
327 | 2.18M | { |
328 | 2.18M | int i; |
329 | | |
330 | 562M | for (i = 0; i < 256; ++i) { |
331 | 559M | if (vec[i].isVector) { |
332 | 2.18M | freeCMapVector(vec[i].vector); |
333 | 2.18M | } |
334 | 559M | } |
335 | 2.18M | gfree(vec); |
336 | 2.18M | } |
337 | | |
338 | | bool CMap::match(const std::string &collectionA, const std::string &cMapNameA) |
339 | 193k | { |
340 | 193k | return !collection->compare(collectionA) && !cMapName->compare(cMapNameA); |
341 | 193k | } |
342 | | |
343 | | CID CMap::getCID(const char *s, int len, CharCode *c, int *nUsed) |
344 | 6.97M | { |
345 | 6.97M | CMapVectorEntry *vec; |
346 | 6.97M | CharCode cc; |
347 | 6.97M | int n, i; |
348 | | |
349 | 6.97M | vec = vector; |
350 | 6.97M | cc = 0; |
351 | 6.97M | n = 0; |
352 | 7.00M | while (vec && n < len) { |
353 | 412k | i = s[n++] & 0xff; |
354 | 412k | cc = (cc << 8) | i; |
355 | 412k | if (!vec[i].isVector) { |
356 | 383k | *c = cc; |
357 | 383k | *nUsed = n; |
358 | 383k | return vec[i].cid; |
359 | 383k | } |
360 | 28.7k | vec = vec[i].vector; |
361 | 28.7k | } |
362 | 6.58M | if (isIdent && len >= 2) { |
363 | | // identity CMap |
364 | 6.51M | *nUsed = 2; |
365 | 6.51M | *c = cc = ((s[0] & 0xff) << 8) + (s[1] & 0xff); |
366 | 6.51M | return cc; |
367 | 6.51M | } |
368 | 73.9k | *nUsed = 1; |
369 | 73.9k | *c = s[0] & 0xff; |
370 | 73.9k | return 0; |
371 | 6.58M | } |
372 | | |
373 | | void CMap::setReverseMapVector(unsigned int startCode, CMapVectorEntry *vec, unsigned int *rmap, unsigned int rmapSize, unsigned int ncand) |
374 | 41.4k | { |
375 | 41.4k | int i; |
376 | | |
377 | 41.4k | if (vec == nullptr) { |
378 | 0 | return; |
379 | 0 | } |
380 | 10.6M | for (i = 0; i < 256; i++) { |
381 | 10.6M | if (vec[i].isVector) { |
382 | 41.2k | setReverseMapVector((startCode + i) << 8, vec[i].vector, rmap, rmapSize, ncand); |
383 | 10.5M | } else { |
384 | 10.5M | unsigned int cid = vec[i].cid; |
385 | | |
386 | 10.5M | if (cid < rmapSize) { |
387 | 10.5M | unsigned int cand; |
388 | | |
389 | 21.6M | for (cand = 0; cand < ncand; cand++) { |
390 | 14.2M | unsigned int code = startCode + i; |
391 | 14.2M | unsigned int idx = cid * ncand + cand; |
392 | 14.2M | if (rmap[idx] == 0) { |
393 | 1.84M | rmap[idx] = code; |
394 | 1.84M | break; |
395 | 1.84M | } |
396 | 12.4M | if (rmap[idx] == code) { |
397 | 1.35M | break; |
398 | 1.35M | } |
399 | 12.4M | } |
400 | 10.5M | } |
401 | 10.5M | } |
402 | 10.6M | } |
403 | 41.4k | } |
404 | | |
405 | | void CMap::setReverseMap(unsigned int *rmap, unsigned int rmapSize, unsigned int ncand) |
406 | 200 | { |
407 | 200 | setReverseMapVector(0, vector, rmap, rmapSize, ncand); |
408 | 200 | } |
409 | | |
410 | | //------------------------------------------------------------------------ |
411 | | |
412 | 131k | CMapCache::CMapCache() = default; |
413 | | |
414 | | std::shared_ptr<CMap> CMapCache::getCMap(const std::string &collection, const std::string &cMapName) |
415 | 190k | { |
416 | 190k | int i, j; |
417 | | |
418 | 190k | if (cache[0] && cache[0]->match(collection, cMapName)) { |
419 | 154k | return cache[0]; |
420 | 154k | } |
421 | 98.2k | for (i = 1; i < cMapCacheSize; ++i) { |
422 | 78.1k | if (cache[i] && cache[i]->match(collection, cMapName)) { |
423 | 15.5k | std::shared_ptr<CMap> cmap = cache[i]; |
424 | 33.4k | for (j = i; j >= 1; --j) { |
425 | 17.8k | cache[j] = cache[j - 1]; |
426 | 17.8k | } |
427 | 15.5k | cache[0] = cmap; |
428 | 15.5k | return cmap; |
429 | 15.5k | } |
430 | 78.1k | } |
431 | 20.1k | std::shared_ptr<CMap> cmap = CMap::parse(this, collection, cMapName); |
432 | 20.1k | if (cmap) { |
433 | 55.4k | for (j = cMapCacheSize - 1; j >= 1; --j) { |
434 | 41.6k | cache[j] = cache[j - 1]; |
435 | 41.6k | } |
436 | 13.8k | cache[0] = cmap; |
437 | 13.8k | return cmap; |
438 | 13.8k | } |
439 | 6.23k | return {}; |
440 | 20.1k | } |