/src/ghostpdl/base/gsfcmap.c
Line | Count | Source |
1 | | /* Copyright (C) 2001-2026 Artifex Software, Inc. |
2 | | All Rights Reserved. |
3 | | |
4 | | This software is provided AS-IS with no warranty, either express or |
5 | | implied. |
6 | | |
7 | | This software is distributed under license and may not be copied, |
8 | | modified or distributed except as expressly authorized under the terms |
9 | | of the license contained in the file LICENSE in this distribution. |
10 | | |
11 | | Refer to licensing information at http://www.artifex.com or contact |
12 | | Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, |
13 | | CA 94129, USA, for further information. |
14 | | */ |
15 | | |
16 | | |
17 | | /* CMap character decoding */ |
18 | | #include "memory_.h" |
19 | | #include "string_.h" |
20 | | #include "gx.h" |
21 | | #include "gserrors.h" |
22 | | #include "gsstruct.h" |
23 | | #include "gsutil.h" /* for gs_next_ids */ |
24 | | #include "gxfcmap.h" |
25 | | #include "gxdevice.h" |
26 | | |
27 | | typedef struct gs_cmap_identity_s { |
28 | | GS_CMAP_COMMON; |
29 | | int num_bytes; |
30 | | int varying_bytes; |
31 | | int code; /* 0 or num_bytes */ |
32 | | } gs_cmap_identity_t; |
33 | | |
34 | | /* GC descriptors */ |
35 | | public_st_cmap(); |
36 | | gs_public_st_suffix_add0_local(st_cmap_identity, gs_cmap_identity_t, |
37 | | "gs_cmap_identity_t", cmap_ptrs, cmap_data, |
38 | | st_cmap); |
39 | | |
40 | | /* ---------------- Client procedures ---------------- */ |
41 | | |
42 | | /* ------ Initialization/creation ------ */ |
43 | | |
44 | | /* |
45 | | * Create an Identity CMap. |
46 | | */ |
47 | | static uint |
48 | | get_integer_bytes(const byte *src, int count) |
49 | 0 | { |
50 | 0 | uint v = 0; |
51 | 0 | int i; |
52 | |
|
53 | 0 | for (i = 0; i < count; ++i) |
54 | 0 | v = (v << 8) + src[i]; |
55 | 0 | return v; |
56 | 0 | } |
57 | | static int |
58 | | identity_decode_next(const gs_cmap_t *pcmap, const gs_const_string *str, |
59 | | uint *pindex, uint *pfidx, |
60 | | gs_char *pchr, gs_glyph *pglyph) |
61 | 0 | { |
62 | 0 | const gs_cmap_identity_t *const pcimap = |
63 | 0 | (const gs_cmap_identity_t *)pcmap; |
64 | 0 | int num_bytes = pcimap->num_bytes; |
65 | 0 | uint value; |
66 | |
|
67 | 0 | if (str->size < *pindex + num_bytes) { |
68 | 0 | *pglyph = GS_NO_GLYPH; |
69 | 0 | return (*pindex == str->size ? 2 : -1); |
70 | 0 | } |
71 | 0 | value = get_integer_bytes(str->data + *pindex, num_bytes); |
72 | 0 | *pglyph = GS_MIN_CID_GLYPH + value; |
73 | 0 | *pchr = value; |
74 | 0 | *pindex += num_bytes; |
75 | 0 | *pfidx = 0; |
76 | 0 | return pcimap->code; |
77 | 0 | } |
78 | | static int |
79 | | identity_next_range(gs_cmap_ranges_enum_t *penum) |
80 | 0 | { |
81 | 0 | if (penum->index == 0) { |
82 | 0 | const gs_cmap_identity_t *const pcimap = |
83 | 0 | (const gs_cmap_identity_t *)penum->cmap; |
84 | |
|
85 | 0 | memset(penum->range.first, 0, pcimap->num_bytes); |
86 | 0 | memset(penum->range.last, 0xff, pcimap->num_bytes); |
87 | 0 | penum->range.size = pcimap->num_bytes; |
88 | 0 | penum->index = 1; |
89 | 0 | return 0; |
90 | 0 | } |
91 | 0 | return 1; |
92 | 0 | } |
93 | | static const gs_cmap_ranges_enum_procs_t identity_range_procs = { |
94 | | identity_next_range |
95 | | }; |
96 | | static void |
97 | | identity_enum_ranges(const gs_cmap_t *pcmap, gs_cmap_ranges_enum_t *pre) |
98 | 0 | { |
99 | 0 | gs_cmap_ranges_enum_setup(pre, pcmap, &identity_range_procs); |
100 | 0 | } |
101 | | static int |
102 | | identity_next_lookup(gs_memory_t *mem, gs_cmap_lookups_enum_t *penum) |
103 | 0 | { |
104 | 0 | penum->entry.value.data = 0L; |
105 | 0 | if (penum->index[0] == 0) { |
106 | 0 | const gs_cmap_identity_t *const pcimap = |
107 | 0 | (const gs_cmap_identity_t *)penum->cmap; |
108 | 0 | int num_bytes = pcimap->num_bytes; |
109 | |
|
110 | 0 | memset(penum->entry.key[0], 0, num_bytes); |
111 | 0 | memset(penum->entry.key[1], 0xff, num_bytes); |
112 | 0 | memset(penum->entry.key[1], 0, num_bytes - pcimap->varying_bytes); |
113 | 0 | penum->entry.key_size = num_bytes; |
114 | 0 | penum->entry.key_is_range = true; |
115 | 0 | penum->entry.value_type = |
116 | 0 | (pcimap->code ? CODE_VALUE_CHARS : CODE_VALUE_CID); |
117 | 0 | penum->entry.value.size = num_bytes; |
118 | 0 | penum->entry.font_index = 0; |
119 | 0 | penum->index[0] = 1; |
120 | 0 | return 0; |
121 | 0 | } |
122 | 0 | return 1; |
123 | 0 | } |
124 | | static int |
125 | | no_next_lookup(gs_memory_t *mem, gs_cmap_lookups_enum_t *penum) |
126 | 3.86k | { |
127 | 3.86k | penum->entry.value.data = 0L; |
128 | 3.86k | return 1; |
129 | 3.86k | } |
130 | | static int |
131 | | identity_next_entry(gs_cmap_lookups_enum_t *penum) |
132 | 0 | { |
133 | 0 | const gs_cmap_identity_t *const pcimap = |
134 | 0 | (const gs_cmap_identity_t *)penum->cmap; |
135 | 0 | int num_bytes = pcimap->num_bytes; |
136 | 0 | int i = num_bytes - pcimap->varying_bytes; |
137 | |
|
138 | 0 | memcpy(penum->temp_value, penum->entry.key[0], num_bytes); |
139 | 0 | memcpy(penum->entry.key[0], penum->entry.key[1], i); |
140 | 0 | while (--i >= 0) |
141 | 0 | if (++(penum->entry.key[1][i]) != 0) { |
142 | 0 | penum->entry.value.data = penum->temp_value; |
143 | 0 | return 0; |
144 | 0 | } |
145 | 0 | return 1; |
146 | 0 | } |
147 | | |
148 | | static const gs_cmap_lookups_enum_procs_t identity_lookup_procs = { |
149 | | identity_next_lookup, identity_next_entry |
150 | | }; |
151 | | const gs_cmap_lookups_enum_procs_t gs_cmap_no_lookups_procs = { |
152 | | no_next_lookup, 0 |
153 | | }; |
154 | | static void |
155 | | identity_enum_lookups(const gs_cmap_t *pcmap, int which, |
156 | | gs_cmap_lookups_enum_t *pre) |
157 | 0 | { |
158 | 0 | gs_cmap_lookups_enum_setup(pre, pcmap, |
159 | 0 | (which ? &gs_cmap_no_lookups_procs : |
160 | 0 | &identity_lookup_procs)); |
161 | 0 | } |
162 | | static bool |
163 | | identity_is_identity(const gs_cmap_t *pcmap, int font_index_only) |
164 | 0 | { |
165 | 0 | return true; |
166 | 0 | } |
167 | | |
168 | | static const gs_cmap_procs_t identity_procs = { |
169 | | identity_decode_next, identity_enum_ranges, identity_enum_lookups, identity_is_identity |
170 | | }; |
171 | | |
172 | | static int |
173 | | gs_cmap_identity_alloc(gs_cmap_t **ppcmap, int num_bytes, int varying_bytes, |
174 | | int return_code, const char *cmap_name, int wmode, |
175 | | gs_memory_t *mem) |
176 | 0 | { |
177 | | /* |
178 | | * We could allow any value of num_bytes between 1 and |
179 | | * min(MAX_CMAP_CODE_SIZE, 4), but if num_bytes != 2, we can't name |
180 | | * the result "Identity-[HV]". |
181 | | */ |
182 | 0 | static const gs_cid_system_info_t identity_cidsi = { |
183 | 0 | { (const byte *)"Adobe", 5 }, |
184 | 0 | { (const byte *)"Identity", 8 }, |
185 | 0 | 0 |
186 | 0 | }; |
187 | 0 | int code; |
188 | 0 | gs_cmap_identity_t *pcimap; |
189 | |
|
190 | 0 | if (num_bytes != 2) |
191 | 0 | return_error(gs_error_rangecheck); |
192 | 0 | code = gs_cmap_alloc(ppcmap, &st_cmap_identity, wmode, |
193 | 0 | (const byte *)cmap_name, strlen(cmap_name), |
194 | 0 | &identity_cidsi, 1, &identity_procs, mem); |
195 | 0 | if (code < 0) |
196 | 0 | return code; |
197 | 0 | pcimap = (gs_cmap_identity_t *)*ppcmap; |
198 | 0 | pcimap->num_bytes = num_bytes; |
199 | 0 | pcimap->varying_bytes = varying_bytes; |
200 | 0 | pcimap->code = return_code; |
201 | 0 | return 0; |
202 | 0 | } |
203 | | int |
204 | | gs_cmap_create_identity(gs_cmap_t **ppcmap, int num_bytes, int wmode, |
205 | | gs_memory_t *mem) |
206 | 0 | { |
207 | 0 | return gs_cmap_identity_alloc(ppcmap, num_bytes, num_bytes, 0, |
208 | 0 | (wmode ? "Identity-V" : "Identity-H"), |
209 | 0 | wmode, mem); |
210 | 0 | } |
211 | | int |
212 | | gs_cmap_create_char_identity(gs_cmap_t **ppcmap, int num_bytes, int wmode, |
213 | | gs_memory_t *mem) |
214 | 0 | { |
215 | 0 | return gs_cmap_identity_alloc(ppcmap, num_bytes, 1, num_bytes, |
216 | 0 | (wmode ? "Identity-BF-V" : "Identity-BF-H"), |
217 | 0 | wmode, mem); |
218 | 0 | } |
219 | | |
220 | | /* ------ Check identity ------ */ |
221 | | |
222 | | /* |
223 | | * Check for identity CMap. Uses a fast check for special cases. |
224 | | */ |
225 | | bool |
226 | | gs_cmap_is_identity(const gs_cmap_t *pcmap, int font_index_only) |
227 | 71 | { |
228 | 71 | return pcmap->procs->is_identity(pcmap, font_index_only); |
229 | 71 | } |
230 | | |
231 | | /* ------ Decoding ------ */ |
232 | | |
233 | | /* |
234 | | * Decode and map a character from a string using a CMap. |
235 | | * See gsfcmap.h for details. |
236 | | */ |
237 | | int |
238 | | gs_cmap_decode_next(const gs_cmap_t *pcmap, const gs_const_string *str, |
239 | | uint *pindex, uint *pfidx, |
240 | | gs_char *pchr, gs_glyph *pglyph) |
241 | 3.39M | { |
242 | 3.39M | return pcmap->procs->decode_next(pcmap, str, pindex, pfidx, pchr, pglyph); |
243 | 3.39M | } |
244 | | |
245 | | /* ------ Enumeration ------ */ |
246 | | |
247 | | /* |
248 | | * Initialize the enumeration of the code space ranges, and enumerate |
249 | | * the next range. See gxfcmap.h for details. |
250 | | */ |
251 | | void |
252 | | gs_cmap_ranges_enum_init(const gs_cmap_t *pcmap, gs_cmap_ranges_enum_t *penum) |
253 | 3.92k | { |
254 | 3.92k | pcmap->procs->enum_ranges(pcmap, penum); |
255 | 3.92k | } |
256 | | int |
257 | | gs_cmap_enum_next_range(gs_cmap_ranges_enum_t *penum) |
258 | 7.86k | { |
259 | 7.86k | return penum->procs->next_range(penum); |
260 | 7.86k | } |
261 | | |
262 | | /* |
263 | | * Initialize the enumeration of the lookups, and enumerate the next |
264 | | * the next lookup or entry. See gxfcmap.h for details. |
265 | | */ |
266 | | void |
267 | | gs_cmap_lookups_enum_init(const gs_cmap_t *pcmap, int which, |
268 | | gs_cmap_lookups_enum_t *penum) |
269 | 679k | { |
270 | 679k | pcmap->procs->enum_lookups(pcmap, which, penum); |
271 | 679k | } |
272 | | int |
273 | | gs_cmap_enum_next_lookup(gs_memory_t *mem, gs_cmap_lookups_enum_t *penum) |
274 | 11.1M | { |
275 | 11.1M | return penum->procs->next_lookup(mem, penum); |
276 | 11.1M | } |
277 | | int |
278 | | gs_cmap_enum_next_entry(gs_cmap_lookups_enum_t *penum) |
279 | 21.6M | { |
280 | 21.6M | return penum->procs->next_entry(penum); |
281 | 21.6M | } |
282 | | |
283 | | /* ---------------- Implementation procedures ---------------- */ |
284 | | |
285 | | /* ------ Initialization/creation ------ */ |
286 | | |
287 | | /* |
288 | | * Initialize a just-allocated CMap, to ensure that all pointers are clean |
289 | | * for the GC. Note that this only initializes the common part. |
290 | | */ |
291 | | void |
292 | | gs_cmap_init(const gs_memory_t *mem, gs_cmap_t *pcmap, int num_fonts) |
293 | 76.2k | { |
294 | 76.2k | memset(pcmap, 0, sizeof(*pcmap)); |
295 | | /* We reserve a range of IDs for pdfwrite needs, |
296 | | to allow an identification of submaps for a particular subfont. |
297 | | */ |
298 | 76.2k | pcmap->id = gs_next_ids(mem, num_fonts); |
299 | 76.2k | pcmap->num_fonts = num_fonts; |
300 | 76.2k | uid_set_invalid(&pcmap->uid); |
301 | 76.2k | } |
302 | | |
303 | | /* |
304 | | * Allocate and initialize (the common part of) a CMap. |
305 | | */ |
306 | | int |
307 | | gs_cmap_alloc(gs_cmap_t **ppcmap, const gs_memory_struct_type_t *pstype, |
308 | | int wmode, const byte *map_name, uint name_size, |
309 | | const gs_cid_system_info_t *pcidsi_in, int num_fonts, |
310 | | const gs_cmap_procs_t *procs, gs_memory_t *mem) |
311 | 76.2k | { |
312 | 76.2k | gs_cmap_t *pcmap = |
313 | 76.2k | gs_alloc_struct(mem, gs_cmap_t, pstype, "gs_cmap_alloc(CMap)"); |
314 | 76.2k | gs_cid_system_info_t *pcidsi = |
315 | 76.2k | gs_alloc_struct_array(mem, num_fonts, gs_cid_system_info_t, |
316 | 76.2k | &st_cid_system_info_element, |
317 | 76.2k | "gs_cmap_alloc(CIDSystemInfo)"); |
318 | | |
319 | 76.2k | if (pcmap == 0 || pcidsi == 0) { |
320 | 0 | gs_free_object(mem, pcidsi, "gs_cmap_alloc(CIDSystemInfo)"); |
321 | 0 | gs_free_object(mem, pcmap, "gs_cmap_alloc(CMap)"); |
322 | 0 | return_error(gs_error_VMerror); |
323 | 0 | } |
324 | 76.2k | gs_cmap_init(mem, pcmap, num_fonts); /* id, uid, num_fonts */ |
325 | 76.2k | pcmap->CMapType = 1; |
326 | 76.2k | pcmap->CMapName.data = map_name; |
327 | 76.2k | pcmap->CMapName.size = name_size; |
328 | 76.2k | if (pcidsi_in) |
329 | 0 | memcpy(pcidsi, pcidsi_in, sizeof(*pcidsi) * num_fonts); |
330 | 76.2k | else |
331 | 76.2k | memset(pcidsi, 0, sizeof(*pcidsi) * num_fonts); |
332 | 76.2k | pcmap->CIDSystemInfo = pcidsi; |
333 | 76.2k | pcmap->CMapVersion = 1.0; |
334 | | /* uid = 0, UIDOffset = 0 */ |
335 | 76.2k | pcmap->WMode = wmode; |
336 | | /* from_Unicode = 0 */ |
337 | | /* not glyph_name, glyph_name_data */ |
338 | 76.2k | pcmap->procs = procs; |
339 | 76.2k | *ppcmap = pcmap; |
340 | 76.2k | return 0; |
341 | 76.2k | } |
342 | | |
343 | | int gs_cmap_free(gs_cmap_t *pcmap, gs_memory_t *mem) |
344 | 76.2k | { |
345 | 76.2k | gs_free_object(mem, pcmap->CIDSystemInfo, "gs_cmap_free(CIDSystemInfo)"); |
346 | 76.2k | gs_free_object(mem, pcmap, "gs_cmap_free(CMap)"); |
347 | 76.2k | return 0; |
348 | 76.2k | } |
349 | | |
350 | | /* |
351 | | * Initialize an enumerator with convenient defaults (index = 0). |
352 | | */ |
353 | | void |
354 | | gs_cmap_ranges_enum_setup(gs_cmap_ranges_enum_t *penum, |
355 | | const gs_cmap_t *pcmap, |
356 | | const gs_cmap_ranges_enum_procs_t *procs) |
357 | 3.92k | { |
358 | 3.92k | penum->cmap = pcmap; |
359 | 3.92k | penum->procs = procs; |
360 | 3.92k | penum->index = 0; |
361 | 3.92k | } |
362 | | void |
363 | | gs_cmap_lookups_enum_setup(gs_cmap_lookups_enum_t *penum, |
364 | | const gs_cmap_t *pcmap, |
365 | | const gs_cmap_lookups_enum_procs_t *procs) |
366 | 679k | { |
367 | 679k | penum->cmap = pcmap; |
368 | 679k | penum->procs = procs; |
369 | 679k | penum->index[0] = penum->index[1] = 0; |
370 | 679k | } |
371 | | |
372 | | /* |
373 | | * For a random CMap, compute whether it is identity. |
374 | | * It is not applicable to gs_cmap_ToUnicode_t due to |
375 | | * different sizes of domain keys and range values. |
376 | | * Note we reject CMaps with Registry=Artifex |
377 | | * to force embedding special instandard CMaps, |
378 | | * which are not commonly in use yet. |
379 | | */ |
380 | | bool |
381 | | gs_cmap_compute_identity(const gs_cmap_t *pcmap, int font_index_only) |
382 | 71 | { |
383 | 71 | const int which = 0; |
384 | 71 | gs_cmap_lookups_enum_t lenum; |
385 | 71 | int code; |
386 | | |
387 | 71 | if (!bytes_compare(pcmap->CIDSystemInfo->Registry.data, pcmap->CIDSystemInfo->Registry.size, |
388 | 71 | (const byte *)"Artifex", 7)) |
389 | 0 | return false; |
390 | 71 | for (gs_cmap_lookups_enum_init(pcmap, which, &lenum); |
391 | 299 | (code = gs_cmap_enum_next_lookup(NULL, &lenum)) == 0; ) { |
392 | 289 | if (font_index_only >= 0 && lenum.entry.font_index != font_index_only) |
393 | 0 | continue; |
394 | 289 | if (font_index_only < 0 && lenum.entry.font_index > 0) |
395 | 0 | return false; |
396 | 517 | while (gs_cmap_enum_next_entry(&lenum) == 0) { |
397 | 289 | switch (lenum.entry.value_type) { |
398 | 289 | case CODE_VALUE_CID: |
399 | 289 | break; |
400 | 0 | case CODE_VALUE_CHARS: |
401 | 0 | return false; /* Not implemented yet. */ |
402 | 0 | case CODE_VALUE_GLYPH: |
403 | 0 | return false; |
404 | 0 | default : |
405 | 0 | return false; /* Must not happen. */ |
406 | 289 | } |
407 | 289 | if (lenum.entry.key_size != lenum.entry.value.size) |
408 | 24 | return false; |
409 | 265 | if (memcmp(lenum.entry.key[0], lenum.entry.value.data, |
410 | 265 | lenum.entry.key_size)) |
411 | 37 | return false; |
412 | 265 | } |
413 | 289 | } |
414 | 10 | return true; |
415 | 71 | } |
416 | | |
417 | | /* ================= ToUnicode CMap ========================= */ |
418 | | |
419 | | /* |
420 | | * This kind of CMaps keeps character a mapping from a random |
421 | | * PS encoding to Unicode, being defined in PDF reference, "ToUnicode CMaps". |
422 | | * It represents ranges in a closure data, without using |
423 | | * gx_cmap_lookup_range_t. A special function gs_cmap_ToUnicode_set |
424 | | * allows to write code pairs into the closure data. |
425 | | */ |
426 | | |
427 | | static const int gs_cmap_ToUnicode_code_bytes = 2; |
428 | | |
429 | | gs_public_st_suffix_add0(st_cmap_ToUnicode, gs_cmap_ToUnicode_t, |
430 | | "gs_cmap_ToUnicode_t", cmap_ToUnicode_enum_ptrs, cmap_ToUnicode_reloc_ptrs, |
431 | | st_cmap); |
432 | | |
433 | | static int |
434 | | gs_cmap_ToUnicode_next_range(gs_cmap_ranges_enum_t *penum) |
435 | 7.73k | { const gs_cmap_ToUnicode_t *cmap = (gs_cmap_ToUnicode_t *)penum->cmap; |
436 | 7.73k | if (penum->index == 0) { |
437 | 3.86k | memset(penum->range.first, 0, cmap->key_size); |
438 | 3.86k | memset(penum->range.last, 0xff, cmap->key_size); |
439 | 3.86k | penum->range.size = cmap->key_size; |
440 | 3.86k | penum->index = 1; |
441 | 3.86k | return 0; |
442 | 3.86k | } |
443 | 3.86k | return 1; |
444 | 7.73k | } |
445 | | |
446 | | static const gs_cmap_ranges_enum_procs_t gs_cmap_ToUnicode_range_procs = { |
447 | | gs_cmap_ToUnicode_next_range |
448 | | }; |
449 | | |
450 | | static int |
451 | | gs_cmap_ToUnicode_decode_next(const gs_cmap_t *pcmap, const gs_const_string *str, |
452 | | uint *pindex, uint *pfidx, |
453 | | gs_char *pchr, gs_glyph *pglyph) |
454 | 0 | { |
455 | 0 | return_error(gs_error_unregistered); |
456 | 0 | } |
457 | | |
458 | | static void |
459 | | gs_cmap_ToUnicode_enum_ranges(const gs_cmap_t *pcmap, gs_cmap_ranges_enum_t *pre) |
460 | 3.86k | { |
461 | 3.86k | gs_cmap_ranges_enum_setup(pre, pcmap, &gs_cmap_ToUnicode_range_procs); |
462 | 3.86k | } |
463 | | |
464 | | static int |
465 | | gs_cmap_ToUnicode_next_lookup(gs_memory_t *mem, gs_cmap_lookups_enum_t *penum) |
466 | 7.73k | { const gs_cmap_ToUnicode_t *cmap = (gs_cmap_ToUnicode_t *)penum->cmap; |
467 | | |
468 | 7.73k | if (penum->index[0]++ > 0) |
469 | 3.86k | return 1; |
470 | 3.86k | penum->index[1] = 0; |
471 | 3.86k | penum->entry.key_is_range = true; |
472 | 3.86k | penum->entry.value_type = CODE_VALUE_CHARS; |
473 | 3.86k | penum->entry.key_size = cmap->key_size; |
474 | 3.86k | penum->entry.value.size = gs_cmap_ToUnicode_code_bytes; |
475 | 3.86k | penum->entry.font_index = 0; |
476 | 3.86k | penum->entry.value.data = gs_alloc_bytes(mem, cmap->value_size, "working ToUnicode buffer"); |
477 | 3.86k | penum->entry.value.size = cmap->value_size; |
478 | 3.86k | return 0; |
479 | 7.73k | } |
480 | | |
481 | | static int |
482 | | gs_cmap_ToUnicode_next_entry(gs_cmap_lookups_enum_t *penum) |
483 | 148k | { const gs_cmap_ToUnicode_t *cmap = (gs_cmap_ToUnicode_t *)penum->cmap; |
484 | 148k | const uchar *map = cmap->glyph_name_data; |
485 | 148k | const int num_codes = cmap->num_codes; |
486 | 148k | uint index = penum->index[1], i, j; |
487 | 148k | uchar c0, c1, c2; |
488 | | |
489 | 56.7M | for (i = index; i < num_codes; i++) |
490 | 56.7M | if (map[i * (cmap->value_size + 2)] != 0 || map[i * (cmap->value_size + 2) + 1] != 0) |
491 | 144k | break; |
492 | 148k | if (i >= num_codes) |
493 | 3.86k | return 1; |
494 | 144k | c0 = map[i * (cmap->value_size + 2) + 2]; |
495 | 144k | if (cmap->value_size > 1) |
496 | 144k | c1 = map[i * (cmap->value_size + 2) + 3]; |
497 | 0 | else |
498 | 0 | c1 = 0; |
499 | 144k | for (j = i + 1, c2 = c1 + 1; j < num_codes; j++, c2++) { |
500 | | /* Due to PDF spec, *bfrange boundaries may differ |
501 | | in the last byte only. */ |
502 | 144k | if (j % 256 == 0) |
503 | 26 | break; |
504 | 144k | if ((uchar)c2 == 0) |
505 | 16 | break; |
506 | 144k | if (map[j * (cmap->value_size + 2) + 2] != c0 || map[i * (cmap->value_size + 2) + 3] != c2) |
507 | 144k | break; |
508 | 144k | } |
509 | 144k | penum->index[1] = j; |
510 | 144k | if (cmap->key_size > 1) { |
511 | 26.5k | penum->entry.key[0][0] = (uchar)(i >> 8); |
512 | 26.5k | penum->entry.key[0][cmap->key_size - 1] = (uchar)(i & 0xFF); |
513 | 26.5k | penum->entry.key[1][0] = (uchar)(j >> 8); |
514 | 26.5k | penum->entry.key[1][cmap->key_size - 1] = (uchar)((j - 1) & 0xFF); |
515 | 117k | } else { |
516 | 117k | penum->entry.key[0][0] = (uchar)(i); |
517 | 117k | penum->entry.key[1][0] = (uchar)(j - 1); |
518 | 117k | } |
519 | 144k | c0 = map[i * (cmap->value_size + 2)]; |
520 | 144k | c1 = map[i * (cmap->value_size + 2) + 1]; |
521 | 144k | penum->entry.value.size = (c0 << 8) + c1; |
522 | 144k | memcpy((void *)penum->entry.value.data, map + (i * (cmap->value_size + 2)) + 2, |
523 | 144k | penum->entry.value.size); |
524 | 144k | return 0; |
525 | 148k | } |
526 | | |
527 | | static const gs_cmap_lookups_enum_procs_t gs_cmap_ToUnicode_lookup_procs = { |
528 | | gs_cmap_ToUnicode_next_lookup, gs_cmap_ToUnicode_next_entry |
529 | | }; |
530 | | |
531 | | static void |
532 | | gs_cmap_ToUnicode_enum_lookups(const gs_cmap_t *pcmap, int which, |
533 | | gs_cmap_lookups_enum_t *pre) |
534 | 7.73k | { |
535 | 7.73k | gs_cmap_lookups_enum_setup(pre, pcmap, |
536 | 7.73k | (which ? &gs_cmap_no_lookups_procs : /* fixme */ |
537 | 7.73k | &gs_cmap_ToUnicode_lookup_procs)); |
538 | 7.73k | } |
539 | | |
540 | | static bool |
541 | | gs_cmap_ToUnicode_is_identity(const gs_cmap_t *pcmap, int font_index_only) |
542 | 0 | { const gs_cmap_ToUnicode_t *cmap = (gs_cmap_ToUnicode_t *)pcmap; |
543 | 0 | return cmap->is_identity; |
544 | 0 | } |
545 | | |
546 | | static const gs_cmap_procs_t gs_cmap_ToUnicode_procs = { |
547 | | gs_cmap_ToUnicode_decode_next, |
548 | | gs_cmap_ToUnicode_enum_ranges, |
549 | | gs_cmap_ToUnicode_enum_lookups, |
550 | | gs_cmap_ToUnicode_is_identity |
551 | | }; |
552 | | |
553 | | /* |
554 | | * Allocate and initialize a ToUnicode CMap. |
555 | | */ |
556 | | int |
557 | | gs_cmap_ToUnicode_alloc(gs_memory_t *mem, int id, int num_codes, int key_size, int value_size, gs_cmap_t **ppcmap) |
558 | 4.17k | { int code; |
559 | 4.17k | uchar *map, *cmap_name = NULL; |
560 | 4.17k | gs_cmap_ToUnicode_t *cmap; |
561 | 4.17k | int name_len = 0; |
562 | | # if 0 |
563 | | /* We don't write a CMap name to ToUnicode CMaps, |
564 | | * becsue (1) there is no conventional method for |
565 | | * generating them, and (2) Acrobat Reader ignores them. |
566 | | * But we'd like to keep this code until beta-testing completes, |
567 | | * and we ensure that other viewers do not need the names. |
568 | | */ |
569 | | char sid[10], *pref = "aux-"; |
570 | | int sid_len, pref_len = strlen(pref); |
571 | | |
572 | | gs_snprintf(sid, sizeof(sid), "%d", id); |
573 | | sid_len = strlen(sid); |
574 | | name_len = pref_len + sid_len; |
575 | | cmap_name = gs_alloc_string(mem, name_len, "gs_cmap_ToUnicode_alloc"); |
576 | | if (cmap_name == 0) |
577 | | return_error(gs_error_VMerror); |
578 | | memcpy(cmap_name, pref, pref_len); |
579 | | memcpy(cmap_name + pref_len, sid, sid_len); |
580 | | # endif |
581 | | /* code is sacrifical here */ |
582 | | /* Realistically, we don't expect code maps larger than 2Gb |
583 | | * Although this creation code handles that, later code to populate |
584 | | * the map doesn't. |
585 | | */ |
586 | 4.17k | if (check_int_multiply(num_codes, value_size + 2, &code) < 0) { |
587 | 0 | return_error(gs_error_VMerror); |
588 | 0 | } |
589 | | |
590 | 4.17k | code = gs_cmap_alloc(ppcmap, &st_cmap_ToUnicode, |
591 | 4.17k | 0, cmap_name, name_len, NULL, 0, &gs_cmap_ToUnicode_procs, mem); |
592 | 4.17k | if (code < 0) |
593 | 0 | return code; |
594 | 4.17k | map = (uchar *)gs_alloc_bytes(mem, |
595 | 4.17k | (size_t)num_codes * (value_size + 2), |
596 | 4.17k | "gs_cmap_ToUnicode_alloc"); |
597 | 4.17k | if (map == NULL) { |
598 | 0 | gs_cmap_free(*ppcmap, mem); |
599 | 0 | *ppcmap = NULL; |
600 | 0 | return_error(gs_error_VMerror); |
601 | 0 | } |
602 | 4.17k | memset(map, 0, (size_t)num_codes * (value_size + 2)); |
603 | 4.17k | cmap = (gs_cmap_ToUnicode_t *)*ppcmap; |
604 | 4.17k | cmap->glyph_name_data = map; |
605 | 4.17k | cmap->CMapType = 2; |
606 | 4.17k | cmap->num_fonts = 1; |
607 | 4.17k | cmap->key_size = key_size; |
608 | 4.17k | cmap->value_size = value_size; |
609 | 4.17k | cmap->num_codes = num_codes; |
610 | 4.17k | cmap->ToUnicode = true; |
611 | 4.17k | cmap->is_identity = true; |
612 | 4.17k | return 0; |
613 | 4.17k | } |
614 | | |
615 | | int gs_cmap_ToUnicode_free(gs_memory_t *mem, gs_cmap_t *pcmap) |
616 | 4.17k | { |
617 | 4.17k | gs_free_object(mem, pcmap->glyph_name_data, "Free ToUnicode glyph data"); |
618 | 4.17k | gs_cmap_free(pcmap, mem); |
619 | 4.17k | return 0; |
620 | 4.17k | } |
621 | | |
622 | | /* Ths function is called when we discover that the value length we are using to |
623 | | * store Unicode code points is too small for a new value. It increases |
624 | | * the size of the map, and of each entry in the map, which is why we have to |
625 | | * use a for loop rather than a memcpy. Note that when we increase the number |
626 | | * of bytes used for a map entry, unused bytes are stored at the end, the initial |
627 | | * 2 bytes are the length (in bytes) actually used by ths entry. |
628 | | */ |
629 | | int |
630 | | gs_cmap_ToUnicode_realloc(gs_memory_t *mem, int new_value_size, gs_cmap_t **ppcmap) |
631 | 48 | { |
632 | 48 | gs_cmap_ToUnicode_t *cmap = (gs_cmap_ToUnicode_t *)*ppcmap; |
633 | 48 | uchar *new_ptr, *new_map, *old_map = cmap->glyph_name_data; |
634 | 48 | int i; |
635 | | |
636 | 48 | new_map = (uchar *)gs_alloc_bytes(mem, |
637 | 48 | (size_t)cmap->num_codes * |
638 | 48 | (new_value_size + 2), |
639 | 48 | "gs_cmap_ToUnicode_alloc"); |
640 | 48 | if (new_map == NULL) { |
641 | 0 | return_error(gs_error_VMerror); |
642 | 0 | } |
643 | 48 | new_ptr = new_map; |
644 | 48 | memset(new_map, 0, (size_t)cmap->num_codes * (new_value_size + 2)); |
645 | | |
646 | 1.77M | for (i=0;i<cmap->num_codes;i++) { |
647 | 1.77M | memcpy(new_ptr, old_map, cmap->value_size + 2); |
648 | 1.77M | old_map += cmap->value_size + 2; |
649 | 1.77M | new_ptr += new_value_size + 2; |
650 | 1.77M | } |
651 | 48 | gs_free_object(mem, cmap->glyph_name_data, "Free (realloc) ToUnicode glyph data"); |
652 | 48 | cmap->glyph_name_data = new_map; |
653 | 48 | cmap->value_size = new_value_size; |
654 | 48 | return 0; |
655 | 48 | } |
656 | | |
657 | | int gs_cmap_ToUnicode_check_pair(gs_cmap_t *pcmap, int code0) |
658 | 787k | { |
659 | 787k | gs_cmap_ToUnicode_t *cmap = (gs_cmap_ToUnicode_t *)pcmap; |
660 | 787k | uchar *map = pcmap->glyph_name_data; |
661 | 787k | const int num_codes = ((gs_cmap_ToUnicode_t *)pcmap)->num_codes; |
662 | | |
663 | 787k | if (code0 >= num_codes) |
664 | 0 | return 0; |
665 | 787k | if(map[code0 * (cmap->value_size + 2)] == 0 && map[code0 * (cmap->value_size + 2) + 1] == 0) |
666 | 97.2k | return 0; |
667 | 690k | return 1; |
668 | 787k | } |
669 | | |
670 | | /* |
671 | | * Write a code pair to ToUnicode CMap. |
672 | | */ |
673 | | void |
674 | | gs_cmap_ToUnicode_add_pair(gs_cmap_t *pcmap, int code0, ushort *u, unsigned int length) |
675 | 251k | { |
676 | 251k | gs_cmap_ToUnicode_t *cmap = (gs_cmap_ToUnicode_t *)pcmap; |
677 | 251k | uchar *map = pcmap->glyph_name_data, *unicode = (uchar *)u; |
678 | 251k | const int num_codes = ((gs_cmap_ToUnicode_t *)pcmap)->num_codes; |
679 | 251k | int i, code1 = 0; |
680 | | |
681 | 251k | if (code0 < 0 || code0 >= num_codes) |
682 | 28 | return; /* must not happen. */ |
683 | 251k | map[code0 * (cmap->value_size + 2)] = (uchar)(length >> 8); |
684 | 251k | map[code0 * (cmap->value_size + 2) + 1] = (uchar)(length & 0xFF); |
685 | | |
686 | 251k | memcpy(&map[(code0 * (cmap->value_size + 2)) + 2], unicode, length); |
687 | 251k | if (length <= 4) { |
688 | 754k | for (i=0;i<length;i++) { |
689 | 503k | code1 = (code1 << 8) + unicode[i]; |
690 | 503k | } |
691 | 251k | cmap->is_identity &= (code0 == code1); |
692 | 251k | } |
693 | 251k | } |