/src/xpdf-4.05/xpdf/UnicodeMap.cc
Line | Count | Source (jump to first uncovered line) |
1 | | //======================================================================== |
2 | | // |
3 | | // UnicodeMap.cc |
4 | | // |
5 | | // Copyright 2001-2003 Glyph & Cog, LLC |
6 | | // |
7 | | //======================================================================== |
8 | | |
9 | | #include <aconf.h> |
10 | | |
11 | | #include <stdio.h> |
12 | | #include <string.h> |
13 | | #include "gmem.h" |
14 | | #include "gmempp.h" |
15 | | #include "gfile.h" |
16 | | #include "GString.h" |
17 | | #include "GList.h" |
18 | | #include "Error.h" |
19 | | #include "GlobalParams.h" |
20 | | #include "UnicodeMap.h" |
21 | | |
22 | | //------------------------------------------------------------------------ |
23 | | |
24 | | #define maxExtCode 16 |
25 | | |
26 | | struct UnicodeMapExt { |
27 | | Unicode u; // Unicode char |
28 | | char code[maxExtCode]; |
29 | | Guint nBytes; |
30 | | }; |
31 | | |
32 | | //------------------------------------------------------------------------ |
33 | | |
34 | 0 | UnicodeMap *UnicodeMap::parse(GString *encodingNameA) { |
35 | 0 | FILE *f; |
36 | 0 | UnicodeMap *map; |
37 | 0 | UnicodeMapRange *range; |
38 | 0 | UnicodeMapExt *eMap; |
39 | 0 | int size, eMapsSize; |
40 | 0 | char buf[256]; |
41 | 0 | int line, nBytes, i, x; |
42 | 0 | char *tok1, *tok2, *tok3; |
43 | |
|
44 | 0 | if (!(f = globalParams->getUnicodeMapFile(encodingNameA))) { |
45 | 0 | error(errSyntaxError, -1, |
46 | 0 | "Couldn't find unicodeMap file for the '{0:t}' encoding", |
47 | 0 | encodingNameA); |
48 | 0 | return NULL; |
49 | 0 | } |
50 | | |
51 | 0 | map = new UnicodeMap(encodingNameA->copy()); |
52 | |
|
53 | 0 | size = 8; |
54 | 0 | map->ranges = (UnicodeMapRange *)gmallocn(size, sizeof(UnicodeMapRange)); |
55 | 0 | eMapsSize = 0; |
56 | |
|
57 | 0 | line = 1; |
58 | 0 | while (getLine(buf, sizeof(buf), f)) { |
59 | 0 | if ((tok1 = strtok(buf, " \t\r\n")) && |
60 | 0 | (tok2 = strtok(NULL, " \t\r\n"))) { |
61 | 0 | if (!(tok3 = strtok(NULL, " \t\r\n"))) { |
62 | 0 | tok3 = tok2; |
63 | 0 | tok2 = tok1; |
64 | 0 | } |
65 | 0 | nBytes = (int)strlen(tok3) / 2; |
66 | 0 | if (nBytes <= 4) { |
67 | 0 | if (map->len == size) { |
68 | 0 | size *= 2; |
69 | 0 | map->ranges = (UnicodeMapRange *) |
70 | 0 | greallocn(map->ranges, size, sizeof(UnicodeMapRange)); |
71 | 0 | } |
72 | 0 | range = &map->ranges[map->len]; |
73 | 0 | sscanf(tok1, "%x", &range->start); |
74 | 0 | sscanf(tok2, "%x", &range->end); |
75 | 0 | sscanf(tok3, "%x", &range->code); |
76 | 0 | range->nBytes = nBytes; |
77 | 0 | ++map->len; |
78 | 0 | } else if (tok2 == tok1) { |
79 | 0 | if (map->eMapsLen == eMapsSize) { |
80 | 0 | eMapsSize += 16; |
81 | 0 | map->eMaps = (UnicodeMapExt *) |
82 | 0 | greallocn(map->eMaps, eMapsSize, sizeof(UnicodeMapExt)); |
83 | 0 | } |
84 | 0 | eMap = &map->eMaps[map->eMapsLen]; |
85 | 0 | sscanf(tok1, "%x", &eMap->u); |
86 | 0 | for (i = 0; i < nBytes; ++i) { |
87 | 0 | sscanf(tok3 + i*2, "%2x", &x); |
88 | 0 | eMap->code[i] = (char)x; |
89 | 0 | } |
90 | 0 | eMap->nBytes = nBytes; |
91 | 0 | ++map->eMapsLen; |
92 | 0 | } else { |
93 | 0 | error(errSyntaxError, -1, |
94 | 0 | "Bad line ({0:d}) in unicodeMap file for the '{1:t}' encoding", |
95 | 0 | line, encodingNameA); |
96 | 0 | } |
97 | 0 | } else { |
98 | 0 | error(errSyntaxError, -1, |
99 | 0 | "Bad line ({0:d}) in unicodeMap file for the '{1:t}' encoding", |
100 | 0 | line, encodingNameA); |
101 | 0 | } |
102 | 0 | ++line; |
103 | 0 | } |
104 | |
|
105 | 0 | fclose(f); |
106 | |
|
107 | 0 | return map; |
108 | 0 | } |
109 | | |
110 | 0 | UnicodeMap::UnicodeMap(GString *encodingNameA) { |
111 | 0 | encodingName = encodingNameA; |
112 | 0 | unicodeOut = gFalse; |
113 | 0 | kind = unicodeMapUser; |
114 | 0 | ranges = NULL; |
115 | 0 | len = 0; |
116 | 0 | eMaps = NULL; |
117 | 0 | eMapsLen = 0; |
118 | 0 | refCnt = 1; |
119 | 0 | } |
120 | | |
121 | | UnicodeMap::UnicodeMap(const char *encodingNameA, GBool unicodeOutA, |
122 | 9.20k | UnicodeMapRange *rangesA, int lenA) { |
123 | 9.20k | encodingName = new GString(encodingNameA); |
124 | 9.20k | unicodeOut = unicodeOutA; |
125 | 9.20k | kind = unicodeMapResident; |
126 | 9.20k | ranges = rangesA; |
127 | 9.20k | len = lenA; |
128 | 9.20k | eMaps = NULL; |
129 | 9.20k | eMapsLen = 0; |
130 | 9.20k | refCnt = 1; |
131 | 9.20k | } |
132 | | |
133 | | UnicodeMap::UnicodeMap(const char *encodingNameA, GBool unicodeOutA, |
134 | 4.60k | UnicodeMapFunc funcA) { |
135 | 4.60k | encodingName = new GString(encodingNameA); |
136 | 4.60k | unicodeOut = unicodeOutA; |
137 | 4.60k | kind = unicodeMapFunc; |
138 | 4.60k | func = funcA; |
139 | 4.60k | eMaps = NULL; |
140 | 4.60k | eMapsLen = 0; |
141 | 4.60k | refCnt = 1; |
142 | 4.60k | } |
143 | | |
144 | 13.8k | UnicodeMap::~UnicodeMap() { |
145 | 13.8k | delete encodingName; |
146 | 13.8k | if (kind == unicodeMapUser && ranges) { |
147 | 0 | gfree(ranges); |
148 | 0 | } |
149 | 13.8k | if (eMaps) { |
150 | 0 | gfree(eMaps); |
151 | 0 | } |
152 | 13.8k | } |
153 | | |
154 | 0 | void UnicodeMap::incRefCnt() { |
155 | 0 | #if MULTITHREADED |
156 | 0 | gAtomicIncrement(&refCnt); |
157 | | #else |
158 | | ++refCnt; |
159 | | #endif |
160 | 0 | } |
161 | | |
162 | 0 | void UnicodeMap::decRefCnt() { |
163 | 0 | GBool done; |
164 | |
|
165 | 0 | #if MULTITHREADED |
166 | 0 | done = gAtomicDecrement(&refCnt) == 0; |
167 | | #else |
168 | | done = --refCnt == 0; |
169 | | #endif |
170 | 0 | if (done) { |
171 | 0 | delete this; |
172 | 0 | } |
173 | 0 | } |
174 | | |
175 | 0 | GBool UnicodeMap::match(GString *encodingNameA) { |
176 | 0 | return !encodingName->cmp(encodingNameA); |
177 | 0 | } |
178 | | |
179 | 0 | int UnicodeMap::mapUnicode(Unicode u, char *buf, int bufSize) { |
180 | 0 | int a, b, m, n, i, j; |
181 | 0 | Guint code; |
182 | |
|
183 | 0 | if (kind == unicodeMapFunc) { |
184 | 0 | return (*func)(u, buf, bufSize); |
185 | 0 | } |
186 | | |
187 | 0 | a = 0; |
188 | 0 | b = len; |
189 | 0 | if (u >= ranges[a].start) { |
190 | | // invariant: ranges[a].start <= u < ranges[b].start |
191 | 0 | while (b - a > 1) { |
192 | 0 | m = (a + b) / 2; |
193 | 0 | if (u >= ranges[m].start) { |
194 | 0 | a = m; |
195 | 0 | } else if (u < ranges[m].start) { |
196 | 0 | b = m; |
197 | 0 | } |
198 | 0 | } |
199 | 0 | if (u <= ranges[a].end) { |
200 | 0 | n = ranges[a].nBytes; |
201 | 0 | if (n > bufSize) { |
202 | 0 | return 0; |
203 | 0 | } |
204 | 0 | code = ranges[a].code + (u - ranges[a].start); |
205 | 0 | for (i = n - 1; i >= 0; --i) { |
206 | 0 | buf[i] = (char)(code & 0xff); |
207 | 0 | code >>= 8; |
208 | 0 | } |
209 | 0 | return n; |
210 | 0 | } |
211 | 0 | } |
212 | | |
213 | 0 | for (i = 0; i < eMapsLen; ++i) { |
214 | 0 | if (eMaps[i].u == u) { |
215 | 0 | n = eMaps[i].nBytes; |
216 | 0 | for (j = 0; j < n; ++j) { |
217 | 0 | buf[j] = eMaps[i].code[j]; |
218 | 0 | } |
219 | 0 | return n; |
220 | 0 | } |
221 | 0 | } |
222 | | |
223 | 0 | return 0; |
224 | 0 | } |
225 | | |
226 | | //------------------------------------------------------------------------ |
227 | | |
228 | 2.30k | UnicodeMapCache::UnicodeMapCache() { |
229 | 2.30k | int i; |
230 | | |
231 | 11.5k | for (i = 0; i < unicodeMapCacheSize; ++i) { |
232 | 9.20k | cache[i] = NULL; |
233 | 9.20k | } |
234 | 2.30k | } |
235 | | |
236 | 2.30k | UnicodeMapCache::~UnicodeMapCache() { |
237 | 2.30k | int i; |
238 | | |
239 | 11.5k | for (i = 0; i < unicodeMapCacheSize; ++i) { |
240 | 9.20k | if (cache[i]) { |
241 | 0 | cache[i]->decRefCnt(); |
242 | 0 | } |
243 | 9.20k | } |
244 | 2.30k | } |
245 | | |
246 | 0 | UnicodeMap *UnicodeMapCache::getUnicodeMap(GString *encodingName) { |
247 | 0 | UnicodeMap *map; |
248 | 0 | int i, j; |
249 | |
|
250 | 0 | if (cache[0] && cache[0]->match(encodingName)) { |
251 | 0 | cache[0]->incRefCnt(); |
252 | 0 | return cache[0]; |
253 | 0 | } |
254 | 0 | for (i = 1; i < unicodeMapCacheSize; ++i) { |
255 | 0 | if (cache[i] && cache[i]->match(encodingName)) { |
256 | 0 | map = cache[i]; |
257 | 0 | for (j = i; j >= 1; --j) { |
258 | 0 | cache[j] = cache[j - 1]; |
259 | 0 | } |
260 | 0 | cache[0] = map; |
261 | 0 | map->incRefCnt(); |
262 | 0 | return map; |
263 | 0 | } |
264 | 0 | } |
265 | 0 | if ((map = UnicodeMap::parse(encodingName))) { |
266 | 0 | if (cache[unicodeMapCacheSize - 1]) { |
267 | 0 | cache[unicodeMapCacheSize - 1]->decRefCnt(); |
268 | 0 | } |
269 | 0 | for (j = unicodeMapCacheSize - 1; j >= 1; --j) { |
270 | 0 | cache[j] = cache[j - 1]; |
271 | 0 | } |
272 | 0 | cache[0] = map; |
273 | 0 | map->incRefCnt(); |
274 | 0 | return map; |
275 | 0 | } |
276 | 0 | return NULL; |
277 | 0 | } |