/src/ghostpdl/base/gscencs.c
Line | Count | Source |
1 | | /* Copyright (C) 2001-2023 Artifex Software, Inc. |
2 | | All Rights Reserved. |
3 | | |
4 | | This software is provided AS-IS with no warranty, either express or |
5 | | implied. |
6 | | |
7 | | This software is distributed under license and may not be copied, |
8 | | modified or distributed except as expressly authorized under the terms |
9 | | of the license contained in the file LICENSE in this distribution. |
10 | | |
11 | | Refer to licensing information at http://www.artifex.com or contact |
12 | | Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, |
13 | | CA 94129, USA, for further information. |
14 | | */ |
15 | | |
16 | | |
17 | | /* Compact C representation of built-in encodings */ |
18 | | |
19 | | #include "memory_.h" |
20 | | #include "gscedata.h" |
21 | | #include "gscencs.h" |
22 | | #include "gserrors.h" |
23 | | |
24 | | /* |
25 | | * The actual encoding data tables in gscedata.c, and the internal |
26 | | * interface definitions in gscedata.h, are generated by toolbin/encs2c.ps, |
27 | | * q.v. |
28 | | * |
29 | | * In the encoding tables in gscedata.c, each glyph is represented by a |
30 | | * ushort (16-bit) value. A bias of gs_c_min_std_encoding_glyph is added |
31 | | * or subtracted to form a gs_glyph value. |
32 | | */ |
33 | | |
34 | | /* |
35 | | * gscedata.[hc] defines the following tables: |
36 | | * const char gs_c_known_encoding_chars[NUM_CHARS] -- |
37 | | * the character table. |
38 | | * const int gs_c_known_encoding_offsets[NUM_INDIRECT_LEN] -- |
39 | | * the starting offsets of the names of a given length in the |
40 | | * character table. |
41 | | * const ushort *const gs_c_known_encodings[] -- |
42 | | * pointers to the encodings per se. |
43 | | * const ushort gs_c_known_encoding_lengths[] -- |
44 | | * lengths of the encodings. |
45 | | */ |
46 | | |
47 | | const gs_glyph gs_c_min_std_encoding_glyph = GS_MIN_CID_GLYPH - 0x10000; |
48 | | |
49 | | /* |
50 | | * Encode a character in a known encoding. The only use for glyph numbers |
51 | | * returned by this procedure is to pass them to gs_c_glyph_name or gs_c_decode. |
52 | | */ |
53 | | gs_glyph |
54 | | gs_c_known_encode(gs_char ch, int ei) |
55 | 136M | { |
56 | 136M | if (ei < 0 || ei >= gs_c_known_encoding_count || |
57 | 136M | ch >= gs_c_known_encoding_lengths[ei] |
58 | 136M | ) |
59 | 33.0k | return GS_NO_GLYPH; |
60 | 136M | return gs_c_min_std_encoding_glyph + gs_c_known_encodings[ei][ch]; |
61 | 136M | } |
62 | | |
63 | | /* |
64 | | * Decode a gs_c_glyph_name glyph with a known encoding. |
65 | | */ |
66 | | gs_char |
67 | | gs_c_decode(gs_glyph glyph, int ei) |
68 | 546k | { |
69 | | /* Do a binary search for glyph, using gx_c_known_encodings_reverse */ |
70 | 546k | const ushort *const encoding = gs_c_known_encodings[ei]; |
71 | 546k | const ushort *const reverse = gs_c_known_encodings_reverse[ei]; |
72 | 546k | int first_index = 0; |
73 | 546k | int last_index = gs_c_known_encoding_reverse_lengths[ei]; |
74 | 3.72M | while (first_index < last_index) { |
75 | 3.71M | const int test_index = (first_index + last_index) / 2; |
76 | 3.71M | const gs_glyph test_glyph = |
77 | 3.71M | gs_c_min_std_encoding_glyph + encoding[reverse[test_index]]; |
78 | 3.71M | if (glyph < test_glyph) |
79 | 1.87M | last_index = test_index; |
80 | 1.84M | else if (glyph > test_glyph) |
81 | 1.30M | first_index = test_index + 1; |
82 | 538k | else |
83 | 538k | return reverse[test_index]; |
84 | 3.71M | } |
85 | 7.82k | return GS_NO_CHAR; |
86 | 546k | } |
87 | | |
88 | | /* |
89 | | * Convert a glyph number returned by gs_c_known_encode to a string. |
90 | | */ |
91 | | int |
92 | | gs_c_glyph_name(gs_glyph glyph, gs_const_string *pstr) |
93 | 146M | { |
94 | 146M | uint n = (uint)(glyph - gs_c_min_std_encoding_glyph); |
95 | 146M | uint len = N_LEN(n); |
96 | 146M | uint off = N_OFFSET(n); |
97 | | |
98 | | #ifdef DEBUG |
99 | | if (len == 0 || len > gs_c_known_encoding_max_length || |
100 | | off >= gs_c_known_encoding_offsets[len + 1] - |
101 | | gs_c_known_encoding_offsets[len] || |
102 | | off % len != 0 |
103 | | ) |
104 | | return_error(gs_error_rangecheck); |
105 | | #endif |
106 | 146M | pstr->data = (const byte *) |
107 | 146M | &gs_c_known_encoding_chars[gs_c_known_encoding_offsets[len] + off]; |
108 | 146M | pstr->size = len; |
109 | 146M | return 0; |
110 | 146M | } |
111 | | |
112 | | /* |
113 | | * Test whether a string is one that was returned by gs_c_glyph_name. |
114 | | */ |
115 | | bool |
116 | | gs_is_c_glyph_name(const byte *str, uint len) |
117 | 58.5M | { |
118 | 58.5M | return str >= (const byte *)gs_c_known_encoding_chars && |
119 | 58.5M | str < (const byte *)gs_c_known_encoding_chars + gs_c_known_encoding_total_chars; |
120 | 58.5M | } |
121 | | |
122 | | /* |
123 | | * Return the glyph number corresponding to a string (the inverse of |
124 | | * gs_c_glyph_name), or GS_NO_GLYPH if the glyph name is not known. |
125 | | */ |
126 | | gs_glyph |
127 | | gs_c_name_glyph(const byte *str, uint len) |
128 | 19.9M | { |
129 | 19.9M | if (len == 0 || len > gs_c_known_encoding_max_length) |
130 | 25.5k | return GS_NO_GLYPH; |
131 | | /* Binary search the character table. */ |
132 | 19.8M | { |
133 | 19.8M | uint base = gs_c_known_encoding_offsets[len]; |
134 | 19.8M | const byte *bot = (const byte *)&gs_c_known_encoding_chars[base]; |
135 | 19.8M | uint count = (gs_c_known_encoding_offsets[len + 1] - base) / len; |
136 | 19.8M | uint a = 0, b = count; /* know b > 0 */ |
137 | 19.8M | const byte *probe; |
138 | | |
139 | 117M | while (a < b) { /* know will execute at least once */ |
140 | 108M | uint m = (a + b) >> 1; |
141 | 108M | int cmp; |
142 | | |
143 | 108M | probe = bot + m * len; |
144 | 108M | cmp = memcmp(str, probe, len); |
145 | 108M | if (cmp == 0) |
146 | 10.7M | return gs_c_min_std_encoding_glyph + N(len, probe - bot); |
147 | 97.4M | else if (cmp > 0) |
148 | 46.5M | a = m + 1; |
149 | 50.8M | else |
150 | 50.8M | b = m; |
151 | 108M | } |
152 | 19.8M | } |
153 | | |
154 | 9.12M | return GS_NO_GLYPH; |
155 | 19.8M | } |