Coverage Report

Created: 2026-02-10 07:39

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/poppler/poppler/CMap.cc
Line
Count
Source
1
//========================================================================
2
//
3
// CMap.cc
4
//
5
// Copyright 2001-2003 Glyph & Cog, LLC
6
//
7
//========================================================================
8
9
//========================================================================
10
//
11
// Modified under the Poppler project - http://poppler.freedesktop.org
12
//
13
// All changes made under the Poppler project to this file are licensed
14
// under GPL version 2 or later
15
//
16
// Copyright (C) 2008 Koji Otani <sho@bbr.jp>
17
// Copyright (C) 2008, 2009, 2017-2021, 2024, 2025 Albert Astals Cid <aacid@kde.org>
18
// Copyright (C) 2013 Fabio D'Urso <fabiodurso@hotmail.it>
19
// Copyright (C) 2017 Adrian Johnson <ajohnson@redneon.com>
20
// Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de>
21
// Copyright (C) 2019 LE GARREC Vincent <legarrec.vincent@gmail.com>
22
// Copyright (C) 2025, 2026 g10 Code GmbH, Author: Sune Stolborg Vuorela <sune@vuorela.dk>
23
// Copyright (C) 2025 Arnav V <arnav0872@gmail.com>
24
//
25
// To see a description of the changes please see the Changelog file that
26
// came with your tarball or type make ChangeLog if you are building from git
27
//
28
//========================================================================
29
30
#include <config.h>
31
32
#include <cstdio>
33
#include <cstdlib>
34
#include <cstring>
35
#include "goo/gmem.h"
36
#include "goo/gfile.h"
37
#include "goo/GooString.h"
38
#include "Error.h"
39
#include "GlobalParams.h"
40
#include "PSTokenizer.h"
41
#include "CMap.h"
42
#include "Object.h"
43
44
//------------------------------------------------------------------------
45
46
struct CMapVectorEntry
47
{
48
    bool isVector;
49
    union {
50
        CMapVectorEntry *vector;
51
        CID cid;
52
    };
53
};
54
55
//------------------------------------------------------------------------
56
57
static int getCharFromFile(void *data)
58
47.6M
{
59
47.6M
    return fgetc((FILE *)data);
60
47.6M
}
61
62
static int getCharFromStream(void *data)
63
30.7M
{
64
30.7M
    return ((Stream *)data)->getChar();
65
30.7M
}
66
67
//------------------------------------------------------------------------
68
69
std::shared_ptr<CMap> CMap::parse(const std::string &collectionA, Object *obj)
70
195k
{
71
195k
    RefRecursionChecker recursion;
72
195k
    return parse(collectionA, obj, recursion);
73
195k
}
74
75
std::shared_ptr<CMap> CMap::parse(const std::string &collectionA, Object *obj, RefRecursionChecker &recursion)
76
195k
{
77
195k
    std::shared_ptr<CMap> cMap;
78
79
195k
    if (obj->isName()) {
80
189k
        const GooString cMapNameA(obj->getNameString());
81
189k
        if (!(cMap = globalParams->getCMap(collectionA, cMapNameA.toStr()))) {
82
6.21k
            error(errSyntaxError, -1, "Unknown CMap '{0:t}' for character collection '{1:s}'", &cMapNameA, collectionA.c_str());
83
6.21k
        }
84
189k
    } else if (obj->isStream()) {
85
4.91k
        if (!(cMap = CMap::parse(nullptr, collectionA, obj->getStream(), recursion))) {
86
0
            error(errSyntaxError, -1, "Invalid CMap in Type 0 font");
87
0
        }
88
4.91k
    } else {
89
916
        error(errSyntaxError, -1, "Invalid Encoding in Type 0 font");
90
916
        return {};
91
916
    }
92
194k
    return cMap;
93
195k
}
94
95
std::shared_ptr<CMap> CMap::parse(CMapCache *cache, const std::string &collectionA, const std::string &cMapNameA)
96
20.1k
{
97
20.1k
    FILE *f;
98
99
20.1k
    if (!(f = globalParams->findCMapFile(collectionA, cMapNameA))) {
100
101
        // Check for an identity CMap.
102
19.7k
        if (cMapNameA == "Identity" || cMapNameA == "Identity-H") {
103
13.0k
            return std::shared_ptr<CMap>(new CMap(std::make_unique<GooString>(collectionA), std::make_unique<GooString>(cMapNameA), GfxFont::WritingMode::Horizontal));
104
13.0k
        }
105
6.69k
        if (cMapNameA == "Identity-V") {
106
459
            return std::shared_ptr<CMap>(new CMap(std::make_unique<GooString>(collectionA), std::make_unique<GooString>(cMapNameA), GfxFont::WritingMode::Vertical));
107
459
        }
108
109
6.23k
        error(errSyntaxError, -1, "Couldn't find '{0:s}' CMap file for '{1:s}' collection", cMapNameA.c_str(), collectionA.c_str());
110
6.23k
        return {};
111
6.69k
    }
112
113
375
    auto cMap = std::shared_ptr<CMap>(new CMap(std::make_unique<GooString>(collectionA), std::make_unique<GooString>(cMapNameA)));
114
375
    cMap->parse2(cache, &getCharFromFile, f);
115
116
375
    fclose(f);
117
118
375
    return cMap;
119
20.1k
}
120
121
std::shared_ptr<CMap> CMap::parse(CMapCache *cache, const std::string &collectionA, Stream *str, RefRecursionChecker &recursion)
122
4.91k
{
123
4.91k
    auto cMap = std::shared_ptr<CMap>(new CMap(std::make_unique<GooString>(collectionA), nullptr));
124
4.91k
    Ref ref;
125
4.91k
    Object obj1 = str->getDict()->lookup("UseCMap", &ref);
126
4.91k
    if (!recursion.insert(ref)) {
127
0
        return {};
128
0
    }
129
4.91k
    if (!obj1.isNull()) {
130
0
        cMap->useCMap(&obj1, recursion);
131
0
    }
132
133
4.91k
    if (str->rewind()) {
134
4.88k
        cMap->parse2(cache, &getCharFromStream, str);
135
4.88k
    }
136
4.91k
    str->close();
137
4.91k
    return cMap;
138
4.91k
}
139
140
void CMap::parse2(CMapCache *cache, int (*getCharFunc)(void *), void *data)
141
5.26k
{
142
5.26k
    PSTokenizer *pst;
143
5.26k
    char tok1[256], tok2[256], tok3[256];
144
5.26k
    int n1, n2, n3;
145
5.26k
    unsigned int start = 0, end = 0, code;
146
147
5.26k
    pst = new PSTokenizer(getCharFunc, data);
148
5.26k
    pst->getToken(tok1, sizeof(tok1), &n1);
149
654k
    while (pst->getToken(tok2, sizeof(tok2), &n2)) {
150
648k
        if (!strcmp(tok2, "usecmap")) {
151
102
            if (tok1[0] == '/') {
152
102
                useCMap(cache, tok1 + 1);
153
102
            }
154
102
            pst->getToken(tok1, sizeof(tok1), &n1);
155
648k
        } else if (!strcmp(tok1, "/WMode")) {
156
3.12k
            const int wModeI = atoi(tok2);
157
3.12k
            wMode = wModeI == 1 ? GfxFont::WritingMode::Vertical : GfxFont::WritingMode::Horizontal;
158
3.12k
            pst->getToken(tok1, sizeof(tok1), &n1);
159
645k
        } else if (!strcmp(tok2, "begincidchar")) {
160
2.26M
            while (pst->getToken(tok1, sizeof(tok1), &n1)) {
161
2.26M
                if (!strcmp(tok1, "endcidchar")) {
162
21.9k
                    break;
163
21.9k
                }
164
2.24M
                if (!pst->getToken(tok2, sizeof(tok2), &n2) || !strcmp(tok2, "endcidchar")) {
165
550
                    error(errSyntaxError, -1, "Illegal entry in cidchar block in CMap");
166
550
                    break;
167
550
                }
168
2.24M
                if (tok1[0] != '<' || tok1[n1 - 1] != '>' || n1 < 4 || (n1 & 1) != 0) {
169
237k
                    error(errSyntaxError, -1, "Illegal entry in cidchar block in CMap");
170
237k
                    continue;
171
237k
                }
172
2.00M
                tok1[n1 - 1] = '\0';
173
2.00M
                if (sscanf(tok1 + 1, "%x", &code) != 1) {
174
2.45k
                    error(errSyntaxError, -1, "Illegal entry in cidchar block in CMap");
175
2.45k
                    continue;
176
2.45k
                }
177
2.00M
                n1 = (n1 - 2) / 2;
178
2.00M
                addCIDs(code, code, n1, (CID)atoi(tok2));
179
2.00M
            }
180
23.0k
            pst->getToken(tok1, sizeof(tok1), &n1);
181
622k
        } else if (!strcmp(tok2, "begincidrange")) {
182
1.28M
            while (pst->getToken(tok1, sizeof(tok1), &n1)) {
183
1.28M
                if (!strcmp(tok1, "endcidrange")) {
184
14.8k
                    break;
185
14.8k
                }
186
1.27M
                if (!pst->getToken(tok2, sizeof(tok2), &n2) || !strcmp(tok2, "endcidrange") || !pst->getToken(tok3, sizeof(tok3), &n3) || !strcmp(tok3, "endcidrange")) {
187
1.36k
                    error(errSyntaxError, -1, "Illegal entry in cidrange block in CMap");
188
1.36k
                    break;
189
1.36k
                }
190
1.26M
                if (tok1[0] == '<' && tok2[0] == '<' && n1 == n2 && n1 >= 4 && (n1 & 1) == 0) {
191
1.07M
                    tok1[n1 - 1] = tok2[n1 - 1] = '\0';
192
1.07M
                    sscanf(tok1 + 1, "%x", &start);
193
1.07M
                    sscanf(tok2 + 1, "%x", &end);
194
1.07M
                    n1 = (n1 - 2) / 2;
195
1.07M
                    addCIDs(start, end, n1, (CID)atoi(tok3));
196
1.07M
                }
197
1.26M
            }
198
16.5k
            pst->getToken(tok1, sizeof(tok1), &n1);
199
606k
        } else {
200
606k
            strcpy(tok1, tok2);
201
606k
        }
202
648k
    }
203
5.26k
    delete pst;
204
5.26k
}
205
206
5.28k
CMap::CMap(std::unique_ptr<GooString> &&collectionA, std::unique_ptr<GooString> &&cMapNameA) : collection(std::move(collectionA)), cMapName(std::move(cMapNameA))
207
5.28k
{
208
5.28k
    isIdent = false;
209
5.28k
    wMode = GfxFont::WritingMode::Horizontal;
210
5.28k
    vector = (CMapVectorEntry *)gmallocn(256, sizeof(CMapVectorEntry));
211
1.35M
    for (int i = 0; i < 256; ++i) {
212
1.35M
        vector[i].isVector = false;
213
1.35M
        vector[i].cid = 0;
214
1.35M
    }
215
5.28k
}
216
217
13.4k
CMap::CMap(std::unique_ptr<GooString> &&collectionA, std::unique_ptr<GooString> &&cMapNameA, GfxFont::WritingMode wModeA) : collection(std::move(collectionA)), cMapName(std::move(cMapNameA))
218
13.4k
{
219
13.4k
    isIdent = true;
220
13.4k
    wMode = wModeA;
221
13.4k
    vector = nullptr;
222
13.4k
}
223
224
void CMap::useCMap(CMapCache *cache, const char *useName)
225
102
{
226
102
    std::shared_ptr<CMap> subCMap;
227
228
102
    const GooString useNameStr(useName);
229
    // if cache is non-NULL, we already have a lock, and we can use
230
    // CMapCache::getCMap() directly; otherwise, we need to use
231
    // GlobalParams::getCMap() in order to acqure the lock need to use
232
    // GlobalParams::getCMap
233
102
    if (cache) {
234
102
        subCMap = cache->getCMap(collection->toStr(), useNameStr.toStr());
235
102
    } else {
236
0
        subCMap = globalParams->getCMap(collection->toStr(), useNameStr.toStr());
237
0
    }
238
102
    if (!subCMap) {
239
0
        return;
240
0
    }
241
102
    isIdent = subCMap->isIdent;
242
102
    if (subCMap->vector) {
243
102
        copyVector(vector, subCMap->vector);
244
102
    }
245
102
}
246
247
void CMap::useCMap(Object *obj, RefRecursionChecker &recursion)
248
0
{
249
0
    std::shared_ptr<CMap> subCMap = CMap::parse(collection->toStr(), obj, recursion);
250
0
    if (!subCMap) {
251
0
        return;
252
0
    }
253
0
    isIdent = subCMap->isIdent;
254
0
    if (subCMap->vector) {
255
0
        copyVector(vector, subCMap->vector);
256
0
    }
257
0
}
258
259
void CMap::copyVector(CMapVectorEntry *dest, CMapVectorEntry *src)
260
20.8k
{
261
20.8k
    int i, j;
262
263
5.34M
    for (i = 0; i < 256; ++i) {
264
5.32M
        if (src[i].isVector) {
265
20.7k
            if (!dest[i].isVector) {
266
20.7k
                dest[i].isVector = true;
267
20.7k
                dest[i].vector = (CMapVectorEntry *)gmallocn(256, sizeof(CMapVectorEntry));
268
5.32M
                for (j = 0; j < 256; ++j) {
269
5.30M
                    dest[i].vector[j].isVector = false;
270
5.30M
                    dest[i].vector[j].cid = 0;
271
5.30M
                }
272
20.7k
            }
273
20.7k
            copyVector(dest[i].vector, src[i].vector);
274
5.30M
        } else {
275
5.30M
            if (dest[i].isVector) {
276
0
                error(errSyntaxError, -1, "Collision in usecmap");
277
5.30M
            } else {
278
5.30M
                dest[i].cid = src[i].cid;
279
5.30M
            }
280
5.30M
        }
281
5.32M
    }
282
20.8k
}
283
284
void CMap::addCIDs(unsigned int start, unsigned int end, unsigned int nBytes, CID firstCID)
285
3.07M
{
286
3.07M
    if (nBytes > 4) {
287
11.1k
        error(errSyntaxError, -1, "Illegal entry in cidchar block in CMap");
288
11.1k
        return;
289
11.1k
    }
290
291
3.06M
    const unsigned int start1 = start & 0xffffff00;
292
3.06M
    const unsigned int end1 = end & 0xffffff00;
293
8.36M
    for (unsigned int i = start1; i <= end1; i += 0x100) {
294
5.29M
        CMapVectorEntry *vec = vector;
295
16.0M
        for (unsigned int j = nBytes - 1; j >= 1; --j) {
296
10.7M
            const int byte = (i >> (8 * j)) & 0xff;
297
10.7M
            if (!vec[byte].isVector) {
298
2.16M
                vec[byte].isVector = true;
299
2.16M
                vec[byte].vector = (CMapVectorEntry *)gmallocn(256, sizeof(CMapVectorEntry));
300
555M
                for (unsigned int k = 0; k < 256; ++k) {
301
553M
                    vec[byte].vector[k].isVector = false;
302
553M
                    vec[byte].vector[k].cid = 0;
303
553M
                }
304
2.16M
            }
305
10.7M
            vec = vec[byte].vector;
306
10.7M
        }
307
5.29M
        const int byte0 = (i < start) ? (start & 0xff) : 0;
308
5.29M
        const int byte1 = (i + 0xff > end) ? (end & 0xff) : 0xff;
309
582M
        for (int byte = byte0; byte <= byte1; ++byte) {
310
576M
            if (vec[byte].isVector) {
311
3.19k
                error(errSyntaxError, -1, "Invalid CID ({0:ux} [{1:ud} bytes]) in CMap", i, nBytes);
312
576M
            } else {
313
576M
                vec[byte].cid = firstCID + ((i + byte) - start);
314
576M
            }
315
576M
        }
316
5.29M
    }
317
3.06M
}
318
319
CMap::~CMap()
320
18.7k
{
321
18.7k
    if (vector) {
322
5.28k
        freeCMapVector(vector);
323
5.28k
    }
324
18.7k
}
325
326
void CMap::freeCMapVector(CMapVectorEntry *vec)
327
2.18M
{
328
2.18M
    int i;
329
330
562M
    for (i = 0; i < 256; ++i) {
331
559M
        if (vec[i].isVector) {
332
2.18M
            freeCMapVector(vec[i].vector);
333
2.18M
        }
334
559M
    }
335
2.18M
    gfree(vec);
336
2.18M
}
337
338
bool CMap::match(const std::string &collectionA, const std::string &cMapNameA)
339
193k
{
340
193k
    return !collection->compare(collectionA) && !cMapName->compare(cMapNameA);
341
193k
}
342
343
CID CMap::getCID(const char *s, int len, CharCode *c, int *nUsed)
344
6.97M
{
345
6.97M
    CMapVectorEntry *vec;
346
6.97M
    CharCode cc;
347
6.97M
    int n, i;
348
349
6.97M
    vec = vector;
350
6.97M
    cc = 0;
351
6.97M
    n = 0;
352
7.00M
    while (vec && n < len) {
353
412k
        i = s[n++] & 0xff;
354
412k
        cc = (cc << 8) | i;
355
412k
        if (!vec[i].isVector) {
356
383k
            *c = cc;
357
383k
            *nUsed = n;
358
383k
            return vec[i].cid;
359
383k
        }
360
28.7k
        vec = vec[i].vector;
361
28.7k
    }
362
6.58M
    if (isIdent && len >= 2) {
363
        // identity CMap
364
6.51M
        *nUsed = 2;
365
6.51M
        *c = cc = ((s[0] & 0xff) << 8) + (s[1] & 0xff);
366
6.51M
        return cc;
367
6.51M
    }
368
73.9k
    *nUsed = 1;
369
73.9k
    *c = s[0] & 0xff;
370
73.9k
    return 0;
371
6.58M
}
372
373
void CMap::setReverseMapVector(unsigned int startCode, CMapVectorEntry *vec, unsigned int *rmap, unsigned int rmapSize, unsigned int ncand)
374
41.4k
{
375
41.4k
    int i;
376
377
41.4k
    if (vec == nullptr) {
378
0
        return;
379
0
    }
380
10.6M
    for (i = 0; i < 256; i++) {
381
10.6M
        if (vec[i].isVector) {
382
41.2k
            setReverseMapVector((startCode + i) << 8, vec[i].vector, rmap, rmapSize, ncand);
383
10.5M
        } else {
384
10.5M
            unsigned int cid = vec[i].cid;
385
386
10.5M
            if (cid < rmapSize) {
387
10.5M
                unsigned int cand;
388
389
21.6M
                for (cand = 0; cand < ncand; cand++) {
390
14.2M
                    unsigned int code = startCode + i;
391
14.2M
                    unsigned int idx = cid * ncand + cand;
392
14.2M
                    if (rmap[idx] == 0) {
393
1.84M
                        rmap[idx] = code;
394
1.84M
                        break;
395
1.84M
                    }
396
12.4M
                    if (rmap[idx] == code) {
397
1.35M
                        break;
398
1.35M
                    }
399
12.4M
                }
400
10.5M
            }
401
10.5M
        }
402
10.6M
    }
403
41.4k
}
404
405
void CMap::setReverseMap(unsigned int *rmap, unsigned int rmapSize, unsigned int ncand)
406
200
{
407
200
    setReverseMapVector(0, vector, rmap, rmapSize, ncand);
408
200
}
409
410
//------------------------------------------------------------------------
411
412
131k
CMapCache::CMapCache() = default;
413
414
std::shared_ptr<CMap> CMapCache::getCMap(const std::string &collection, const std::string &cMapName)
415
190k
{
416
190k
    int i, j;
417
418
190k
    if (cache[0] && cache[0]->match(collection, cMapName)) {
419
154k
        return cache[0];
420
154k
    }
421
98.2k
    for (i = 1; i < cMapCacheSize; ++i) {
422
78.1k
        if (cache[i] && cache[i]->match(collection, cMapName)) {
423
15.5k
            std::shared_ptr<CMap> cmap = cache[i];
424
33.4k
            for (j = i; j >= 1; --j) {
425
17.8k
                cache[j] = cache[j - 1];
426
17.8k
            }
427
15.5k
            cache[0] = cmap;
428
15.5k
            return cmap;
429
15.5k
        }
430
78.1k
    }
431
20.1k
    std::shared_ptr<CMap> cmap = CMap::parse(this, collection, cMapName);
432
20.1k
    if (cmap) {
433
55.4k
        for (j = cMapCacheSize - 1; j >= 1; --j) {
434
41.6k
            cache[j] = cache[j - 1];
435
41.6k
        }
436
13.8k
        cache[0] = cmap;
437
13.8k
        return cmap;
438
13.8k
    }
439
6.23k
    return {};
440
20.1k
}