/src/ghostpdl/base/gschar0.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* Copyright (C) 2001-2023 Artifex Software, Inc. |
2 | | All Rights Reserved. |
3 | | |
4 | | This software is provided AS-IS with no warranty, either express or |
5 | | implied. |
6 | | |
7 | | This software is distributed under license and may not be copied, |
8 | | modified or distributed except as expressly authorized under the terms |
9 | | of the license contained in the file LICENSE in this distribution. |
10 | | |
11 | | Refer to licensing information at http://www.artifex.com or contact |
12 | | Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, |
13 | | CA 94129, USA, for further information. |
14 | | */ |
15 | | |
16 | | |
17 | | /* Composite font decoding for Ghostscript library */ |
18 | | #include "memory_.h" |
19 | | #include "gx.h" |
20 | | #include "gserrors.h" |
21 | | #include "gsstruct.h" |
22 | | #include "gsfcmap.h" |
23 | | #include "gxfcmap.h" |
24 | | #include "gxfixed.h" |
25 | | #include "gxdevice.h" |
26 | | #include "gxfont.h" |
27 | | #include "gxfont0.h" |
28 | | #include "gxfcid.h" |
29 | | #include "gxtext.h" |
30 | | |
31 | | /* Stack up modal composite fonts, down to a non-modal or base font. */ |
32 | | static int |
33 | | gs_stack_modal_fonts(gs_text_enum_t *pte) |
34 | 2.08M | { |
35 | 2.08M | int fdepth = pte->fstack.depth; |
36 | 2.08M | gs_font *cfont = pte->fstack.items[fdepth].font; |
37 | | |
38 | 2.08M | while (cfont->FontType == ft_composite) { |
39 | 2.08M | gs_font_type0 *const cmfont = (gs_font_type0 *) cfont; |
40 | | |
41 | 2.08M | if (!fmap_type_is_modal(cmfont->data.FMapType)) |
42 | 2.08M | break; |
43 | 0 | if (fdepth == MAX_FONT_STACK) |
44 | 0 | return_error(gs_error_invalidfont); |
45 | 0 | fdepth++; |
46 | 0 | cfont = cmfont->data.FDepVector[cmfont->data.Encoding[0]]; |
47 | 0 | pte->fstack.items[fdepth].font = cfont; |
48 | 0 | pte->fstack.items[fdepth - 1].index = 0; |
49 | 0 | if_debug2m('j', pte->memory, "[j]stacking depth=%d font="PRI_INTPTR"\n", |
50 | 0 | fdepth, (intptr_t)cfont); |
51 | 0 | } |
52 | 2.08M | pte->fstack.depth = fdepth; |
53 | 2.08M | return 0; |
54 | 2.08M | } |
55 | | /* Initialize the composite font stack for a show enumerator. */ |
56 | | /* Return an error if the data is not a byte string. */ |
57 | | int |
58 | | gs_type0_init_fstack(gs_text_enum_t *pte, gs_font * pfont) |
59 | 2.08M | { |
60 | 2.08M | if (!(pte->text.operation & (TEXT_FROM_STRING | TEXT_FROM_BYTES))) |
61 | 0 | return_error(gs_error_invalidfont); |
62 | 2.08M | if_debug1m('j', pte->memory, "[j]stacking depth=0 font="PRI_INTPTR"\n", |
63 | 2.08M | (intptr_t)pfont); |
64 | 2.08M | pte->fstack.depth = 0; |
65 | 2.08M | pte->fstack.items[0].font = pfont; |
66 | 2.08M | pte->fstack.items[0].index = 0; |
67 | 2.08M | return gs_stack_modal_fonts(pte); |
68 | 2.08M | } |
69 | | |
70 | | /* Select the appropriate descendant of a font. */ |
71 | | /* Uses free variables: pte. */ |
72 | | /* Uses pdata, uses & updates fdepth, sets pfont. */ |
73 | | #define select_descendant(pfont, pdata, fidx, fdepth)\ |
74 | 5.55M | if (fidx >= pdata->encoding_size)\ |
75 | 5.55M | return_error(gs_error_rangecheck);\ |
76 | 5.55M | if (fdepth == MAX_FONT_STACK)\ |
77 | 5.55M | return_error(gs_error_invalidfont);\ |
78 | 5.55M | pfont = pdata->FDepVector[pdata->Encoding[fidx]];\ |
79 | 5.55M | pte->fstack.items[fdepth].index = fidx;\ |
80 | 5.55M | if (++fdepth > orig_depth || pfont != pte->fstack.items[fdepth].font) {\ |
81 | 2.13M | pte->fstack.items[fdepth].font = pfont;\ |
82 | 2.13M | pte->fstack.items[fdepth].index = 0;\ |
83 | 2.13M | changed = 1;\ |
84 | 3.41M | } else {\ |
85 | 3.41M | } |
86 | | |
87 | | /* Get the root EscChar of a composite font, which overrides the EscChar */ |
88 | | /* of descendant fonts. */ |
89 | | static uint |
90 | | root_esc_char(const gs_text_enum_t *pte) |
91 | 0 | { |
92 | 0 | return ((gs_font_type0 *) (pte->fstack.items[0].font))->data.EscChar; |
93 | 0 | } |
94 | | |
95 | | /* Get the next character or glyph from a composite string. */ |
96 | | /* If we run off the end of the string in the middle of a */ |
97 | | /* multi-byte sequence, return gs_error_rangecheck. */ |
98 | | /* If the string is empty, return 2. */ |
99 | | /* If the current (base) font changed, return 1. Otherwise, return 0. */ |
100 | | int |
101 | | gs_type0_next_char_glyph(gs_text_enum_t *pte, gs_char *pchr, gs_glyph *pglyph) |
102 | 7.40M | { |
103 | 7.40M | const byte *str = pte->text.data.bytes; |
104 | 7.40M | const byte *p = str + pte->index; |
105 | 7.40M | const byte *end = str + pte->text.size; |
106 | 7.40M | int fdepth = pte->fstack.depth; |
107 | 7.40M | int orig_depth = fdepth; |
108 | 7.40M | gs_font *pfont; |
109 | | |
110 | 11.1M | #define pfont0 ((gs_font_type0 *)pfont) |
111 | 7.40M | gs_type0_data *pdata; |
112 | 7.40M | uint fidx; |
113 | 7.40M | gs_char chr; |
114 | 7.40M | gs_glyph glyph = GS_NO_GLYPH; |
115 | 7.40M | int changed = 0; |
116 | | |
117 | 7.40M | pte->FontBBox_as_Metrics2.x = pte->FontBBox_as_Metrics2.y = 0; |
118 | | |
119 | 7.40M | #define need_left(n)\ |
120 | 7.40M | if ( end - p < n ) return_error(gs_error_rangecheck) |
121 | | |
122 | | /* |
123 | | * Although the Adobe documentation doesn't say anything about this, |
124 | | * if the root font is modal and the very first character of the |
125 | | * string being decoded is an escape or shift character, then |
126 | | * font selection via the escape mechanism works down from the root, |
127 | | * rather than up from the lowest modal font. (This was first |
128 | | * reported by Norio Katayama, and confirmed by someone at Adobe.) |
129 | | */ |
130 | | |
131 | 7.40M | if (pte->index == 0) { |
132 | 2.13M | int idepth = 0; |
133 | | |
134 | 2.13M | pfont = pte->fstack.items[0].font; |
135 | 2.13M | for (; pfont->FontType == ft_composite;) { |
136 | 2.13M | fmap_type fmt = (pdata = &pfont0->data)->FMapType; |
137 | | |
138 | 2.13M | if (p == end) |
139 | 486 | return 2; |
140 | 2.13M | chr = *p; |
141 | 2.13M | switch (fmt) { |
142 | 0 | case fmap_escape: |
143 | 0 | if (chr != root_esc_char(pte)) |
144 | 0 | break; |
145 | 0 | need_left(2); |
146 | 0 | fidx = p[1]; |
147 | 0 | p += 2; |
148 | 0 | if_debug1m('j', pte->memory, "[j]from root: escape %d\n", fidx); |
149 | 0 | rdown:select_descendant(pfont, pdata, fidx, idepth); |
150 | 0 | if_debug2m('j', pte->memory, "[j]... new depth=%d, new font="PRI_INTPTR"\n", |
151 | 0 | idepth, (intptr_t)pfont); |
152 | 0 | continue; |
153 | 0 | case fmap_double_escape: |
154 | 0 | if (chr != root_esc_char(pte)) |
155 | 0 | break; |
156 | 0 | need_left(2); |
157 | 0 | fidx = p[1]; |
158 | 0 | p += 2; |
159 | 0 | if (fidx == chr) { |
160 | 0 | need_left(1); |
161 | 0 | fidx = *p++ + 256; |
162 | 0 | } |
163 | 0 | if_debug1m('j', pte->memory, "[j]from root: double escape %d\n", fidx); |
164 | 0 | goto rdown; |
165 | 0 | case fmap_shift: |
166 | 0 | if (chr == pdata->ShiftIn) |
167 | 0 | fidx = 0; |
168 | 0 | else if (chr == pdata->ShiftOut) |
169 | 0 | fidx = 1; |
170 | 0 | else |
171 | 0 | break; |
172 | 0 | p++; |
173 | 0 | if_debug1m('j', pte->memory, "[j]from root: shift %d\n", fidx); |
174 | 0 | goto rdown; |
175 | 2.13M | default: |
176 | 2.13M | break; |
177 | 2.13M | } |
178 | 2.13M | break; |
179 | 2.13M | } |
180 | | /* If we saw any initial escapes or shifts, */ |
181 | | /* compute a new initial base font. */ |
182 | 2.13M | if (idepth != 0) { |
183 | 0 | int code; |
184 | |
|
185 | 0 | pte->fstack.depth = idepth; |
186 | 0 | code = gs_stack_modal_fonts(pte); |
187 | 0 | if (code < 0) |
188 | 0 | return code; |
189 | 0 | if (pte->fstack.depth > idepth) |
190 | 0 | changed = 1; |
191 | 0 | orig_depth = fdepth = pte->fstack.depth; |
192 | 0 | } |
193 | 2.13M | } |
194 | | /* Handle initial escapes or shifts. */ |
195 | | |
196 | 7.40M | up:if (p == end) |
197 | 1.84M | return 2; |
198 | 5.55M | chr = *p; |
199 | 8.97M | while (fdepth > 0) { |
200 | 3.41M | pfont = pte->fstack.items[fdepth - 1].font; |
201 | 3.41M | pdata = &pfont0->data; |
202 | 3.41M | switch (pdata->FMapType) { |
203 | 3.41M | default: /* non-modal */ |
204 | 3.41M | fdepth--; |
205 | 3.41M | continue; |
206 | | |
207 | 0 | case fmap_escape: |
208 | 0 | if (chr != root_esc_char(pte)) |
209 | 0 | break; |
210 | 0 | need_left(2); |
211 | 0 | fidx = *++p; |
212 | 0 | if_debug1m('j', pte->memory, "[j]next: escape %d\n", fidx); |
213 | | /* Per Adobe, if we get an escape at the root, */ |
214 | | /* treat it as an ordinary character (font index). */ |
215 | 0 | if (fidx == chr && fdepth > 1) { |
216 | 0 | fdepth--; |
217 | 0 | goto up; |
218 | 0 | } |
219 | 0 | down:if (++p == end) |
220 | 0 | return 2; |
221 | 0 | chr = *p; |
222 | 0 | fdepth--; |
223 | 0 | do { |
224 | 0 | select_descendant(pfont, pdata, fidx, fdepth); |
225 | 0 | if_debug3m('j', pte->memory, "[j]down from modal: new depth=%d, index=%d, new font="PRI_INTPTR"\n", |
226 | 0 | fdepth, fidx, (intptr_t)pfont); |
227 | 0 | if (pfont->FontType != ft_composite) |
228 | 0 | break; |
229 | 0 | pdata = &pfont0->data; |
230 | 0 | fidx = 0; |
231 | 0 | } |
232 | 0 | while (pdata->FMapType == fmap_escape); |
233 | 0 | continue; |
234 | | |
235 | 0 | case fmap_double_escape: |
236 | 0 | if (chr != root_esc_char(pte)) |
237 | 0 | break; |
238 | 0 | need_left(2); |
239 | 0 | fidx = *++p; |
240 | 0 | if (fidx == chr) { |
241 | 0 | need_left(2); |
242 | 0 | fidx = *++p + 256; |
243 | 0 | } |
244 | 0 | if_debug1m('j', pte->memory, "[j]next: double escape %d\n", fidx); |
245 | 0 | goto down; |
246 | | |
247 | 0 | case fmap_shift: |
248 | 0 | if (chr == pdata->ShiftIn) |
249 | 0 | fidx = 0; |
250 | 0 | else if (chr == pdata->ShiftOut) |
251 | 0 | fidx = 1; |
252 | 0 | else |
253 | 0 | break; |
254 | 0 | if_debug1m('j', pte->memory, "[j]next: shift %d\n", fidx); |
255 | 0 | goto down; |
256 | 3.41M | } |
257 | 0 | break; |
258 | 3.41M | } |
259 | | /* At this point, chr == *p. */ |
260 | | /* (This is important to know for CMap'ed fonts.) */ |
261 | 5.55M | p++; |
262 | | |
263 | | /* |
264 | | * Now handle non-modal descendants. |
265 | | * The PostScript language manual has some confusing |
266 | | * wording about the parent supplying the "first part" |
267 | | * of the child's decoding information; what this means |
268 | | * is not (as one might imagine) the font index, but |
269 | | * simply the first byte of the data. |
270 | | */ |
271 | | |
272 | 11.0M | while ((pfont = pte->fstack.items[fdepth].font)->FontType == ft_composite) { |
273 | 5.55M | pdata = &pfont0->data; |
274 | 5.55M | switch (pdata->FMapType) { |
275 | 0 | default: /* can't happen */ |
276 | 0 | return_error(gs_error_invalidfont); |
277 | | |
278 | 0 | case fmap_8_8: |
279 | 0 | need_left(1); |
280 | 0 | fidx = chr; |
281 | 0 | chr = *p++; |
282 | 0 | if_debug2m('J', pte->memory, "[J]8/8 index=%d, char=%ld\n", |
283 | 0 | fidx, chr); |
284 | 0 | break; |
285 | | |
286 | 0 | case fmap_1_7: |
287 | 0 | fidx = chr >> 7; |
288 | 0 | chr &= 0x7f; |
289 | 0 | if_debug2m('J', pte->memory, "[J]1/7 index=%d, char=%ld\n", |
290 | 0 | fidx, chr); |
291 | 0 | break; |
292 | | |
293 | 0 | case fmap_9_7: |
294 | 0 | need_left(1); |
295 | 0 | fidx = ((uint) chr << 1) + (*p >> 7); |
296 | 0 | chr = *p & 0x7f; |
297 | 0 | if_debug2m('J', pte->memory, "[J]9/7 index=%d, char=%ld\n", |
298 | 0 | fidx, chr); |
299 | 0 | p++; |
300 | 0 | break; |
301 | | |
302 | 0 | case fmap_SubsVector: |
303 | 0 | { |
304 | 0 | int width = pdata->subs_width; |
305 | 0 | uint subs_count = pdata->subs_size; |
306 | 0 | const byte *psv = pdata->SubsVector.data; |
307 | |
|
308 | 0 | #define subs_loop(subs_elt, width)\ |
309 | 0 | while ( subs_count != 0 && tchr >= (schr = subs_elt) )\ |
310 | 0 | subs_count--, tchr -= schr, psv += width;\ |
311 | 0 | chr = tchr; p += width - 1; break |
312 | |
|
313 | 0 | switch (width) { |
314 | 0 | default: /* can't happen */ |
315 | 0 | return_error(gs_error_invalidfont); |
316 | 0 | case 1: |
317 | 0 | { |
318 | 0 | byte tchr = (byte) chr, schr; |
319 | |
|
320 | 0 | subs_loop(*psv, 1); |
321 | 0 | } |
322 | 0 | case 2: |
323 | 0 | need_left(1); |
324 | 0 | #define w2(p) (((ushort)*p << 8) + p[1]) |
325 | 0 | { |
326 | 0 | ushort tchr = ((ushort) chr << 8) + *p, |
327 | 0 | schr; |
328 | |
|
329 | 0 | subs_loop(w2(psv), 2); |
330 | 0 | } |
331 | 0 | case 3: |
332 | 0 | need_left(2); |
333 | 0 | #define w3(p) (((ulong)*p << 16) + ((uint)p[1] << 8) + p[2]) |
334 | 0 | { |
335 | 0 | ulong tchr = ((ulong) chr << 16) + w2(p), |
336 | 0 | schr; |
337 | |
|
338 | 0 | subs_loop(w3(psv), 3); |
339 | 0 | } |
340 | 0 | case 4: |
341 | 0 | need_left(3); |
342 | 0 | #define w4(p) (((ulong)*p << 24) + ((ulong)p[1] << 16) + ((uint)p[2] << 8) + p[3]) |
343 | 0 | { |
344 | 0 | ulong tchr = ((ulong) chr << 24) + w3(p), |
345 | 0 | schr; |
346 | |
|
347 | 0 | subs_loop(w4(psv), 4); |
348 | 0 | } |
349 | 0 | #undef w2 |
350 | 0 | #undef w3 |
351 | 0 | #undef w4 |
352 | 0 | #undef subs_loop |
353 | 0 | } |
354 | 0 | fidx = pdata->subs_size - subs_count; |
355 | 0 | if_debug2m('J', pte->memory, "[J]SubsVector index=%d, char=%ld\n", |
356 | 0 | fidx, chr); |
357 | 0 | break; |
358 | 0 | } |
359 | | |
360 | 5.55M | case fmap_CMap: |
361 | 5.55M | { |
362 | 5.55M | gs_const_string cstr; |
363 | 5.55M | uint mindex = p - str - 1; /* p was incremented */ |
364 | 5.55M | int code; |
365 | | |
366 | | /* |
367 | | * When decoding an FMapType4 or 5, the value |
368 | | * of chr is modified; when an FMapType9 (CMap) |
369 | | * composite font is used as a decendant font, |
370 | | * we have to pass the text including a modified |
371 | | * chr. Check whether chr has been modified, and |
372 | | * if so, construct and pass a modified buffer. |
373 | | */ |
374 | 5.55M | if (*(p - 1) != chr) { |
375 | 0 | byte substr[MAX_CMAP_CODE_SIZE]; |
376 | 0 | int submindex = 0; |
377 | 0 | if_debug2m('j', pte->memory, |
378 | 0 | "[j] *(p-1) 0x%02x != chr 0x%02x, modified str should be passed\n", |
379 | 0 | *(p-1), (byte)chr); |
380 | 0 | memcpy(substr, p - 1, |
381 | 0 | min(MAX_CMAP_CODE_SIZE, end - p + 1)); |
382 | 0 | substr[0] = chr; |
383 | 0 | cstr.data = substr; |
384 | 0 | cstr.size = min(MAX_CMAP_CODE_SIZE, end - p + 1); |
385 | 0 | if (gs_debug_c('j')) { |
386 | 0 | dmlprintf(pfont->memory, "[j] original str("); |
387 | 0 | debug_print_string_hex(pfont->memory, str, end - str); |
388 | 0 | dmlprintf(pfont->memory, ") -> modified substr("); |
389 | 0 | debug_print_string_hex(pfont->memory, cstr.data, cstr.size); |
390 | 0 | dmlprintf(pfont->memory, ")\n"); |
391 | 0 | } |
392 | 0 | code = gs_cmap_decode_next(pdata->CMap, &cstr, |
393 | 0 | (uint*) &submindex, &fidx, &chr, &glyph); |
394 | 0 | mindex += submindex; |
395 | 5.55M | } else { |
396 | 5.55M | cstr.data = str; |
397 | 5.55M | cstr.size = end - str; |
398 | 5.55M | code = gs_cmap_decode_next(pdata->CMap, &cstr, &mindex, |
399 | 5.55M | &fidx, &chr, &glyph); |
400 | 5.55M | } |
401 | 5.55M | if (code < 0) |
402 | 0 | return code; |
403 | 5.55M | pte->cmap_code = code; /* hack for widthshow */ |
404 | 5.55M | p = str + mindex; |
405 | 5.55M | if_debug3m('J', pte->memory, "[J]CMap returns %d, chr=0x%lx, glyph="PRI_INTPTR"\n", |
406 | 5.55M | code, (ulong)chr, (intptr_t)glyph); |
407 | 5.55M | if (code == 0) { |
408 | 5.55M | if (glyph == GS_NO_GLYPH) { |
409 | 44.5k | glyph = GS_MIN_CID_GLYPH; |
410 | 44.5k | if_debug0m('J', pte->memory, "... undefined\n"); |
411 | | /* Must select a descendant font anyway, we can't use the type 0 |
412 | | * even for the /.notdef... |
413 | | */ |
414 | 44.5k | select_descendant(pfont, pdata, fidx, fdepth); |
415 | 44.5k | goto done; |
416 | 44.5k | } |
417 | 5.55M | } else |
418 | 0 | chr = (gs_char) glyph, glyph = GS_NO_GLYPH; |
419 | | /****** RESCAN chr IF DESCENDANT IS CMAP'ED ******/ |
420 | 5.51M | break; |
421 | 5.55M | } |
422 | 5.55M | } |
423 | | |
424 | 16.5M | select_descendant(pfont, pdata, fidx, fdepth); |
425 | 16.5M | if_debug2m('J', pte->memory, "... new depth=%d, new font="PRI_INTPTR"\n", |
426 | 16.5M | fdepth, (intptr_t)pfont); |
427 | 16.5M | } |
428 | 5.55M | done: |
429 | | /* FontBBox may be used as metrics2 with WMode=1 : |
430 | | */ |
431 | 5.55M | if (pte->fstack.items[fdepth].font->FontType == ft_CID_encrypted || |
432 | 5.55M | pte->fstack.items[fdepth].font->FontType == ft_CID_TrueType |
433 | 5.55M | ) { |
434 | 5.55M | gs_font_base *pfb = (gs_font_base *)pte->fstack.items[fdepth].font; |
435 | | |
436 | 5.55M | pte->FontBBox_as_Metrics2 = pfb->FontBBox.q; |
437 | 5.55M | } |
438 | | |
439 | | /* Set fstack.items[fdepth].index to CIDFont FDArray index or 0 otherwise */ |
440 | 5.55M | fidx = 0; |
441 | 5.55M | if (pte->fstack.items[fdepth].font->FontType == ft_CID_encrypted) { |
442 | 222k | int code, font_index; |
443 | 222k | pfont = pte->fstack.items[fdepth].font; |
444 | 222k | code = ((gs_font_cid0 *)pfont)->cidata.glyph_data((gs_font_base *)pfont, |
445 | 222k | glyph, NULL, &font_index); |
446 | 222k | if (code < 0) { /* failed to load glyph data, reload glyph for CID 0 */ |
447 | 169k | code = ((gs_font_cid0 *)pfont)->cidata.glyph_data((gs_font_base *)pfont, |
448 | 169k | (gs_glyph)(GS_MIN_CID_GLYPH + 0), NULL, &font_index); |
449 | 169k | if (code < 0) |
450 | 29 | return_error(gs_error_invalidfont); |
451 | 169k | } |
452 | 222k | fidx = (uint)font_index; |
453 | 222k | if (!changed && pte->fstack.items[fdepth].index != fidx) |
454 | 2.28k | changed = 1; |
455 | 222k | } |
456 | 5.55M | pte->fstack.items[fdepth].index = fidx; |
457 | | |
458 | 5.55M | *pchr = chr; |
459 | 5.55M | *pglyph = glyph; |
460 | | /* Update the pointer into the original string, but only if */ |
461 | | /* we didn't switch over to parsing a code from a CMap. */ |
462 | 5.55M | if (str == pte->text.data.bytes) |
463 | 5.55M | pte->index = p - str; |
464 | 5.55M | pte->fstack.depth = fdepth; |
465 | 5.55M | if_debug4m('J', pte->memory, "[J]depth=%d font="PRI_INTPTR" index=%d changed=%d\n", |
466 | 5.55M | fdepth, (intptr_t)pte->fstack.items[fdepth].font, |
467 | 5.55M | pte->fstack.items[fdepth].index, changed); |
468 | 5.55M | return changed; |
469 | 5.55M | } |
470 | | #undef pfont0 |