/src/xpdf-4.04/xpdf/UnicodeMap.cc
Line | Count | Source (jump to first uncovered line) |
1 | | //======================================================================== |
2 | | // |
3 | | // UnicodeMap.cc |
4 | | // |
5 | | // Copyright 2001-2003 Glyph & Cog, LLC |
6 | | // |
7 | | //======================================================================== |
8 | | |
9 | | #include <aconf.h> |
10 | | |
11 | | #ifdef USE_GCC_PRAGMAS |
12 | | #pragma implementation |
13 | | #endif |
14 | | |
15 | | #include <stdio.h> |
16 | | #include <string.h> |
17 | | #include "gmem.h" |
18 | | #include "gmempp.h" |
19 | | #include "gfile.h" |
20 | | #include "GString.h" |
21 | | #include "GList.h" |
22 | | #include "Error.h" |
23 | | #include "GlobalParams.h" |
24 | | #include "UnicodeMap.h" |
25 | | |
26 | | //------------------------------------------------------------------------ |
27 | | |
28 | | #define maxExtCode 16 |
29 | | |
30 | | struct UnicodeMapExt { |
31 | | Unicode u; // Unicode char |
32 | | char code[maxExtCode]; |
33 | | Guint nBytes; |
34 | | }; |
35 | | |
36 | | //------------------------------------------------------------------------ |
37 | | |
38 | 0 | UnicodeMap *UnicodeMap::parse(GString *encodingNameA) { |
39 | 0 | FILE *f; |
40 | 0 | UnicodeMap *map; |
41 | 0 | UnicodeMapRange *range; |
42 | 0 | UnicodeMapExt *eMap; |
43 | 0 | int size, eMapsSize; |
44 | 0 | char buf[256]; |
45 | 0 | int line, nBytes, i, x; |
46 | 0 | char *tok1, *tok2, *tok3; |
47 | |
|
48 | 0 | if (!(f = globalParams->getUnicodeMapFile(encodingNameA))) { |
49 | 0 | error(errSyntaxError, -1, |
50 | 0 | "Couldn't find unicodeMap file for the '{0:t}' encoding", |
51 | 0 | encodingNameA); |
52 | 0 | return NULL; |
53 | 0 | } |
54 | | |
55 | 0 | map = new UnicodeMap(encodingNameA->copy()); |
56 | |
|
57 | 0 | size = 8; |
58 | 0 | map->ranges = (UnicodeMapRange *)gmallocn(size, sizeof(UnicodeMapRange)); |
59 | 0 | eMapsSize = 0; |
60 | |
|
61 | 0 | line = 1; |
62 | 0 | while (getLine(buf, sizeof(buf), f)) { |
63 | 0 | if ((tok1 = strtok(buf, " \t\r\n")) && |
64 | 0 | (tok2 = strtok(NULL, " \t\r\n"))) { |
65 | 0 | if (!(tok3 = strtok(NULL, " \t\r\n"))) { |
66 | 0 | tok3 = tok2; |
67 | 0 | tok2 = tok1; |
68 | 0 | } |
69 | 0 | nBytes = (int)strlen(tok3) / 2; |
70 | 0 | if (nBytes <= 4) { |
71 | 0 | if (map->len == size) { |
72 | 0 | size *= 2; |
73 | 0 | map->ranges = (UnicodeMapRange *) |
74 | 0 | greallocn(map->ranges, size, sizeof(UnicodeMapRange)); |
75 | 0 | } |
76 | 0 | range = &map->ranges[map->len]; |
77 | 0 | sscanf(tok1, "%x", &range->start); |
78 | 0 | sscanf(tok2, "%x", &range->end); |
79 | 0 | sscanf(tok3, "%x", &range->code); |
80 | 0 | range->nBytes = nBytes; |
81 | 0 | ++map->len; |
82 | 0 | } else if (tok2 == tok1) { |
83 | 0 | if (map->eMapsLen == eMapsSize) { |
84 | 0 | eMapsSize += 16; |
85 | 0 | map->eMaps = (UnicodeMapExt *) |
86 | 0 | greallocn(map->eMaps, eMapsSize, sizeof(UnicodeMapExt)); |
87 | 0 | } |
88 | 0 | eMap = &map->eMaps[map->eMapsLen]; |
89 | 0 | sscanf(tok1, "%x", &eMap->u); |
90 | 0 | for (i = 0; i < nBytes; ++i) { |
91 | 0 | sscanf(tok3 + i*2, "%2x", &x); |
92 | 0 | eMap->code[i] = (char)x; |
93 | 0 | } |
94 | 0 | eMap->nBytes = nBytes; |
95 | 0 | ++map->eMapsLen; |
96 | 0 | } else { |
97 | 0 | error(errSyntaxError, -1, |
98 | 0 | "Bad line ({0:d}) in unicodeMap file for the '{1:t}' encoding", |
99 | 0 | line, encodingNameA); |
100 | 0 | } |
101 | 0 | } else { |
102 | 0 | error(errSyntaxError, -1, |
103 | 0 | "Bad line ({0:d}) in unicodeMap file for the '{1:t}' encoding", |
104 | 0 | line, encodingNameA); |
105 | 0 | } |
106 | 0 | ++line; |
107 | 0 | } |
108 | |
|
109 | 0 | fclose(f); |
110 | |
|
111 | 0 | return map; |
112 | 0 | } |
113 | | |
114 | 0 | UnicodeMap::UnicodeMap(GString *encodingNameA) { |
115 | 0 | encodingName = encodingNameA; |
116 | 0 | unicodeOut = gFalse; |
117 | 0 | kind = unicodeMapUser; |
118 | 0 | ranges = NULL; |
119 | 0 | len = 0; |
120 | 0 | eMaps = NULL; |
121 | 0 | eMapsLen = 0; |
122 | 0 | refCnt = 1; |
123 | 0 | } |
124 | | |
125 | | UnicodeMap::UnicodeMap(const char *encodingNameA, GBool unicodeOutA, |
126 | 9.50k | UnicodeMapRange *rangesA, int lenA) { |
127 | 9.50k | encodingName = new GString(encodingNameA); |
128 | 9.50k | unicodeOut = unicodeOutA; |
129 | 9.50k | kind = unicodeMapResident; |
130 | 9.50k | ranges = rangesA; |
131 | 9.50k | len = lenA; |
132 | 9.50k | eMaps = NULL; |
133 | 9.50k | eMapsLen = 0; |
134 | 9.50k | refCnt = 1; |
135 | 9.50k | } |
136 | | |
137 | | UnicodeMap::UnicodeMap(const char *encodingNameA, GBool unicodeOutA, |
138 | 4.75k | UnicodeMapFunc funcA) { |
139 | 4.75k | encodingName = new GString(encodingNameA); |
140 | 4.75k | unicodeOut = unicodeOutA; |
141 | 4.75k | kind = unicodeMapFunc; |
142 | 4.75k | func = funcA; |
143 | 4.75k | eMaps = NULL; |
144 | 4.75k | eMapsLen = 0; |
145 | 4.75k | refCnt = 1; |
146 | 4.75k | } |
147 | | |
148 | 14.2k | UnicodeMap::~UnicodeMap() { |
149 | 14.2k | delete encodingName; |
150 | 14.2k | if (kind == unicodeMapUser && ranges) { |
151 | 0 | gfree(ranges); |
152 | 0 | } |
153 | 14.2k | if (eMaps) { |
154 | 0 | gfree(eMaps); |
155 | 0 | } |
156 | 14.2k | } |
157 | | |
158 | 0 | void UnicodeMap::incRefCnt() { |
159 | 0 | #if MULTITHREADED |
160 | 0 | gAtomicIncrement(&refCnt); |
161 | | #else |
162 | | ++refCnt; |
163 | | #endif |
164 | 0 | } |
165 | | |
166 | 0 | void UnicodeMap::decRefCnt() { |
167 | 0 | GBool done; |
168 | |
|
169 | 0 | #if MULTITHREADED |
170 | 0 | done = gAtomicDecrement(&refCnt) == 0; |
171 | | #else |
172 | | done = --refCnt == 0; |
173 | | #endif |
174 | 0 | if (done) { |
175 | 0 | delete this; |
176 | 0 | } |
177 | 0 | } |
178 | | |
179 | 0 | GBool UnicodeMap::match(GString *encodingNameA) { |
180 | 0 | return !encodingName->cmp(encodingNameA); |
181 | 0 | } |
182 | | |
183 | 0 | int UnicodeMap::mapUnicode(Unicode u, char *buf, int bufSize) { |
184 | 0 | int a, b, m, n, i, j; |
185 | 0 | Guint code; |
186 | |
|
187 | 0 | if (kind == unicodeMapFunc) { |
188 | 0 | return (*func)(u, buf, bufSize); |
189 | 0 | } |
190 | | |
191 | 0 | a = 0; |
192 | 0 | b = len; |
193 | 0 | if (u >= ranges[a].start) { |
194 | | // invariant: ranges[a].start <= u < ranges[b].start |
195 | 0 | while (b - a > 1) { |
196 | 0 | m = (a + b) / 2; |
197 | 0 | if (u >= ranges[m].start) { |
198 | 0 | a = m; |
199 | 0 | } else if (u < ranges[m].start) { |
200 | 0 | b = m; |
201 | 0 | } |
202 | 0 | } |
203 | 0 | if (u <= ranges[a].end) { |
204 | 0 | n = ranges[a].nBytes; |
205 | 0 | if (n > bufSize) { |
206 | 0 | return 0; |
207 | 0 | } |
208 | 0 | code = ranges[a].code + (u - ranges[a].start); |
209 | 0 | for (i = n - 1; i >= 0; --i) { |
210 | 0 | buf[i] = (char)(code & 0xff); |
211 | 0 | code >>= 8; |
212 | 0 | } |
213 | 0 | return n; |
214 | 0 | } |
215 | 0 | } |
216 | | |
217 | 0 | for (i = 0; i < eMapsLen; ++i) { |
218 | 0 | if (eMaps[i].u == u) { |
219 | 0 | n = eMaps[i].nBytes; |
220 | 0 | for (j = 0; j < n; ++j) { |
221 | 0 | buf[j] = eMaps[i].code[j]; |
222 | 0 | } |
223 | 0 | return n; |
224 | 0 | } |
225 | 0 | } |
226 | | |
227 | 0 | return 0; |
228 | 0 | } |
229 | | |
230 | | //------------------------------------------------------------------------ |
231 | | |
232 | 2.37k | UnicodeMapCache::UnicodeMapCache() { |
233 | 2.37k | int i; |
234 | | |
235 | 11.8k | for (i = 0; i < unicodeMapCacheSize; ++i) { |
236 | 9.50k | cache[i] = NULL; |
237 | 9.50k | } |
238 | 2.37k | } |
239 | | |
240 | 2.37k | UnicodeMapCache::~UnicodeMapCache() { |
241 | 2.37k | int i; |
242 | | |
243 | 11.8k | for (i = 0; i < unicodeMapCacheSize; ++i) { |
244 | 9.50k | if (cache[i]) { |
245 | 0 | cache[i]->decRefCnt(); |
246 | 0 | } |
247 | 9.50k | } |
248 | 2.37k | } |
249 | | |
250 | 0 | UnicodeMap *UnicodeMapCache::getUnicodeMap(GString *encodingName) { |
251 | 0 | UnicodeMap *map; |
252 | 0 | int i, j; |
253 | |
|
254 | 0 | if (cache[0] && cache[0]->match(encodingName)) { |
255 | 0 | cache[0]->incRefCnt(); |
256 | 0 | return cache[0]; |
257 | 0 | } |
258 | 0 | for (i = 1; i < unicodeMapCacheSize; ++i) { |
259 | 0 | if (cache[i] && cache[i]->match(encodingName)) { |
260 | 0 | map = cache[i]; |
261 | 0 | for (j = i; j >= 1; --j) { |
262 | 0 | cache[j] = cache[j - 1]; |
263 | 0 | } |
264 | 0 | cache[0] = map; |
265 | 0 | map->incRefCnt(); |
266 | 0 | return map; |
267 | 0 | } |
268 | 0 | } |
269 | 0 | if ((map = UnicodeMap::parse(encodingName))) { |
270 | 0 | if (cache[unicodeMapCacheSize - 1]) { |
271 | 0 | cache[unicodeMapCacheSize - 1]->decRefCnt(); |
272 | 0 | } |
273 | 0 | for (j = unicodeMapCacheSize - 1; j >= 1; --j) { |
274 | 0 | cache[j] = cache[j - 1]; |
275 | 0 | } |
276 | 0 | cache[0] = map; |
277 | 0 | map->incRefCnt(); |
278 | 0 | return map; |
279 | 0 | } |
280 | 0 | return NULL; |
281 | 0 | } |