/src/fluent-bit/lib/onigmo/enc/shift_jis.h
Line | Count | Source (jump to first uncovered line) |
1 | | /********************************************************************** |
2 | | shift_jis.h - Onigmo (Oniguruma-mod) (regular expression library) |
3 | | **********************************************************************/ |
4 | | /*- |
5 | | * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> |
6 | | * Copyright (c) 2011 K.Takata <kentkt AT csc DOT jp> |
7 | | * All rights reserved. |
8 | | * |
9 | | * Redistribution and use in source and binary forms, with or without |
10 | | * modification, are permitted provided that the following conditions |
11 | | * are met: |
12 | | * 1. Redistributions of source code must retain the above copyright |
13 | | * notice, this list of conditions and the following disclaimer. |
14 | | * 2. Redistributions in binary form must reproduce the above copyright |
15 | | * notice, this list of conditions and the following disclaimer in the |
16 | | * documentation and/or other materials provided with the distribution. |
17 | | * |
18 | | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
19 | | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
21 | | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
22 | | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
24 | | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
25 | | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
26 | | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
27 | | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
28 | | * SUCH DAMAGE. |
29 | | */ |
30 | | |
31 | | #include "regenc.h" |
32 | | |
33 | | static const int EncLen_SJIS[] = { |
34 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
35 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
36 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
37 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
38 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
39 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
40 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
41 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
42 | | 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
43 | | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
44 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
45 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
46 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
47 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
48 | | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
49 | | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1 |
50 | | }; |
51 | | |
52 | | static const char SJIS_CAN_BE_TRAIL_TABLE[256] = { |
53 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
54 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
55 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
56 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
57 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
58 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
59 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
60 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, |
61 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
62 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
63 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
64 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
65 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
66 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
67 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
68 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0 |
69 | | }; |
70 | | |
71 | | static const OnigPairCaseFoldCodes CaseFoldMap[] = { |
72 | | /* Fullwidth Alphabet */ |
73 | | { 0x8260, 0x8281 }, |
74 | | { 0x8261, 0x8282 }, |
75 | | { 0x8262, 0x8283 }, |
76 | | { 0x8263, 0x8284 }, |
77 | | { 0x8264, 0x8285 }, |
78 | | { 0x8265, 0x8286 }, |
79 | | { 0x8266, 0x8287 }, |
80 | | { 0x8267, 0x8288 }, |
81 | | { 0x8268, 0x8289 }, |
82 | | { 0x8269, 0x828a }, |
83 | | { 0x826a, 0x828b }, |
84 | | { 0x826b, 0x828c }, |
85 | | { 0x826c, 0x828d }, |
86 | | { 0x826d, 0x828e }, |
87 | | { 0x826e, 0x828f }, |
88 | | { 0x826f, 0x8290 }, |
89 | | { 0x8270, 0x8291 }, |
90 | | { 0x8271, 0x8292 }, |
91 | | { 0x8272, 0x8293 }, |
92 | | { 0x8273, 0x8294 }, |
93 | | { 0x8274, 0x8295 }, |
94 | | { 0x8275, 0x8296 }, |
95 | | { 0x8276, 0x8297 }, |
96 | | { 0x8277, 0x8298 }, |
97 | | { 0x8278, 0x8299 }, |
98 | | { 0x8279, 0x829a }, |
99 | | |
100 | | /* Greek */ |
101 | | { 0x839f, 0x83bf }, |
102 | | { 0x83a0, 0x83c0 }, |
103 | | { 0x83a1, 0x83c1 }, |
104 | | { 0x83a2, 0x83c2 }, |
105 | | { 0x83a3, 0x83c3 }, |
106 | | { 0x83a4, 0x83c4 }, |
107 | | { 0x83a5, 0x83c5 }, |
108 | | { 0x83a6, 0x83c6 }, |
109 | | { 0x83a7, 0x83c7 }, |
110 | | { 0x83a8, 0x83c8 }, |
111 | | { 0x83a9, 0x83c9 }, |
112 | | { 0x83aa, 0x83ca }, |
113 | | { 0x83ab, 0x83cb }, |
114 | | { 0x83ac, 0x83cc }, |
115 | | { 0x83ad, 0x83cd }, |
116 | | { 0x83ae, 0x83ce }, |
117 | | { 0x83af, 0x83cf }, |
118 | | { 0x83b0, 0x83d0 }, |
119 | | { 0x83b1, 0x83d1 }, |
120 | | { 0x83b2, 0x83d2 }, |
121 | | { 0x83b3, 0x83d3 }, |
122 | | { 0x83b4, 0x83d4 }, |
123 | | { 0x83b5, 0x83d5 }, |
124 | | { 0x83b6, 0x83d6 }, |
125 | | |
126 | | /* Cyrillic */ |
127 | | { 0x8440, 0x8470 }, |
128 | | { 0x8441, 0x8471 }, |
129 | | { 0x8442, 0x8472 }, |
130 | | { 0x8443, 0x8473 }, |
131 | | { 0x8444, 0x8474 }, |
132 | | { 0x8445, 0x8475 }, |
133 | | { 0x8446, 0x8476 }, |
134 | | { 0x8447, 0x8477 }, |
135 | | { 0x8448, 0x8478 }, |
136 | | { 0x8449, 0x8479 }, |
137 | | { 0x844a, 0x847a }, |
138 | | { 0x844b, 0x847b }, |
139 | | { 0x844c, 0x847c }, |
140 | | { 0x844d, 0x847d }, |
141 | | { 0x844e, 0x847e }, |
142 | | { 0x844f, 0x8480 }, |
143 | | { 0x8450, 0x8481 }, |
144 | | { 0x8451, 0x8482 }, |
145 | | { 0x8452, 0x8483 }, |
146 | | { 0x8453, 0x8484 }, |
147 | | { 0x8454, 0x8485 }, |
148 | | { 0x8455, 0x8486 }, |
149 | | { 0x8456, 0x8487 }, |
150 | | { 0x8457, 0x8488 }, |
151 | | { 0x8458, 0x8489 }, |
152 | | { 0x8459, 0x848a }, |
153 | | { 0x845a, 0x848b }, |
154 | | { 0x845b, 0x848c }, |
155 | | { 0x845c, 0x848d }, |
156 | | { 0x845d, 0x848e }, |
157 | | { 0x845e, 0x848f }, |
158 | | { 0x845f, 0x8490 }, |
159 | | { 0x8460, 0x8491 }, |
160 | | }; |
161 | | |
162 | 0 | #define SJIS_ISMB_FIRST(byte) (EncLen_SJIS[byte] > 1) |
163 | 0 | #define SJIS_ISMB_TRAIL(byte) SJIS_CAN_BE_TRAIL_TABLE[(byte)] |
164 | | |
165 | | typedef enum { FAILURE = -2, ACCEPT = -1, S0 = 0, S1 } state_t; |
166 | | #define A ACCEPT |
167 | | #define F FAILURE |
168 | | static const signed char trans[][0x100] = { |
169 | | { /* S0 0 1 2 3 4 5 6 7 8 9 a b c d e f */ |
170 | | /* 0 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, |
171 | | /* 1 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, |
172 | | /* 2 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, |
173 | | /* 3 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, |
174 | | /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, |
175 | | /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, |
176 | | /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, |
177 | | /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, |
178 | | /* 8 */ F, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
179 | | /* 9 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
180 | | /* a */ F, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, |
181 | | /* b */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, |
182 | | /* c */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, |
183 | | /* d */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, |
184 | | /* e */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
185 | | /* f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, F, F, F |
186 | | }, |
187 | | { /* S1 0 1 2 3 4 5 6 7 8 9 a b c d e f */ |
188 | | /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, |
189 | | /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, |
190 | | /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, |
191 | | /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, |
192 | | /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, |
193 | | /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, |
194 | | /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, |
195 | | /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, F, |
196 | | /* 8 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, |
197 | | /* 9 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, |
198 | | /* a */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, |
199 | | /* b */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, |
200 | | /* c */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, |
201 | | /* d */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, |
202 | | /* e */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, |
203 | | /* f */ A, A, A, A, A, A, A, A, A, A, A, A, A, F, F, F |
204 | | } |
205 | | }; |
206 | | #undef A |
207 | | #undef F |
208 | | |
209 | | static int |
210 | | mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED) |
211 | 0 | { |
212 | 0 | int firstbyte = *p++; |
213 | 0 | state_t s; |
214 | 0 | s = trans[0][firstbyte]; |
215 | 0 | if (s < 0) return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(1) : |
216 | 0 | ONIGENC_CONSTRUCT_MBCLEN_INVALID(); |
217 | 0 | if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_SJIS[firstbyte]-1); |
218 | 0 | s = trans[s][*p++]; |
219 | 0 | return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(2) : |
220 | 0 | ONIGENC_CONSTRUCT_MBCLEN_INVALID(); |
221 | 0 | } |
222 | | |
223 | | static int |
224 | | code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED) |
225 | 0 | { |
226 | 0 | if (code < 256) { |
227 | 0 | if (EncLen_SJIS[(int )code] == 1) |
228 | 0 | return 1; |
229 | 0 | else |
230 | 0 | return ONIGERR_INVALID_CODE_POINT_VALUE; |
231 | 0 | } |
232 | 0 | else if (code <= 0xffff) { |
233 | 0 | int low = code & 0xff; |
234 | 0 | if (! SJIS_ISMB_TRAIL(low)) |
235 | 0 | return ONIGERR_INVALID_CODE_POINT_VALUE; |
236 | 0 | return 2; |
237 | 0 | } |
238 | 0 | else |
239 | 0 | return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; |
240 | 0 | } |
241 | | |
242 | | static OnigCodePoint |
243 | | mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc) |
244 | 0 | { |
245 | 0 | int c, i, len; |
246 | 0 | OnigCodePoint n; |
247 | |
|
248 | 0 | len = mbc_enc_len(p, end, enc); |
249 | 0 | c = *p++; |
250 | 0 | n = c; |
251 | 0 | if (len == 1) return n; |
252 | | |
253 | 0 | for (i = 1; i < len; i++) { |
254 | 0 | if (p >= end) break; |
255 | 0 | c = *p++; |
256 | 0 | n <<= 8; n += c; |
257 | 0 | } |
258 | 0 | return n; |
259 | 0 | } |
260 | | |
261 | | static int |
262 | | code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc) |
263 | 0 | { |
264 | 0 | UChar *p = buf; |
265 | |
|
266 | 0 | if ((code & 0xff00) != 0) *p++ = (UChar )(((code >> 8) & 0xff)); |
267 | 0 | *p++ = (UChar )(code & 0xff); |
268 | |
|
269 | | #if 0 |
270 | | if (mbc_enc_len(buf, p, enc) != (p - buf)) |
271 | | return REGERR_INVALID_CODE_POINT_VALUE; |
272 | | #endif |
273 | 0 | return (int )(p - buf); |
274 | 0 | } |
275 | | |
276 | | static int |
277 | | apply_all_case_fold(OnigCaseFoldType flag, |
278 | | OnigApplyAllCaseFoldFunc f, void* arg, OnigEncoding enc) |
279 | 0 | { |
280 | 0 | return onigenc_apply_all_case_fold_with_map( |
281 | 0 | numberof(CaseFoldMap), CaseFoldMap, 0, |
282 | 0 | flag, f, arg); |
283 | 0 | } |
284 | | |
285 | | static OnigCodePoint |
286 | | get_lower_case(OnigCodePoint code) |
287 | 0 | { |
288 | 0 | if (ONIGENC_IS_IN_RANGE(code, 0x8260, 0x8279)) { |
289 | | /* Fullwidth Alphabet */ |
290 | 0 | return (OnigCodePoint )(code + 0x0021); |
291 | 0 | } |
292 | 0 | else if (ONIGENC_IS_IN_RANGE(code, 0x839f, 0x83b6)) { |
293 | | /* Greek */ |
294 | 0 | return (OnigCodePoint )(code + 0x0020); |
295 | 0 | } |
296 | 0 | else if (ONIGENC_IS_IN_RANGE(code, 0x8440, 0x8460)) { |
297 | | /* Cyrillic */ |
298 | 0 | int d = (code >= 0x844f) ? 1 : 0; |
299 | 0 | return (OnigCodePoint )(code + (0x0030 + d)); |
300 | 0 | } |
301 | 0 | return code; |
302 | 0 | } |
303 | | |
304 | | static OnigCodePoint |
305 | | get_upper_case(OnigCodePoint code) |
306 | 0 | { |
307 | 0 | if (ONIGENC_IS_IN_RANGE(code, 0x8281, 0x829a)) { |
308 | | /* Fullwidth Alphabet */ |
309 | 0 | return (OnigCodePoint )(code - 0x0021); |
310 | 0 | } |
311 | 0 | else if (ONIGENC_IS_IN_RANGE(code, 0x83bf, 0x83d6)) { |
312 | | /* Greek */ |
313 | 0 | return (OnigCodePoint )(code - 0x0020); |
314 | 0 | } |
315 | 0 | else if (ONIGENC_IS_IN_RANGE(code, 0x8470, 0x847e) || |
316 | 0 | ONIGENC_IS_IN_RANGE(code, 0x8480, 0x8491)) { |
317 | | /* Cyrillic */ |
318 | 0 | int d = (code >= 0x8480) ? 1 : 0; |
319 | 0 | return (OnigCodePoint )(code - (0x0030 - d)); |
320 | 0 | } |
321 | 0 | return code; |
322 | 0 | } |
323 | | |
324 | | static int |
325 | | get_case_fold_codes_by_str(OnigCaseFoldType flag, |
326 | | const OnigUChar* p, const OnigUChar* end, |
327 | | OnigCaseFoldCodeItem items[], OnigEncoding enc) |
328 | 0 | { |
329 | 0 | int len; |
330 | 0 | OnigCodePoint code, code_lo, code_up; |
331 | |
|
332 | 0 | code = mbc_to_code(p, end, enc); |
333 | 0 | if (ONIGENC_IS_ASCII_CODE(code)) |
334 | 0 | return onigenc_ascii_get_case_fold_codes_by_str(flag, p, end, items, enc); |
335 | | |
336 | 0 | len = mbc_enc_len(p, end, enc); |
337 | 0 | code_lo = get_lower_case(code); |
338 | 0 | code_up = get_upper_case(code); |
339 | |
|
340 | 0 | if (code != code_lo) { |
341 | 0 | items[0].byte_len = len; |
342 | 0 | items[0].code_len = 1; |
343 | 0 | items[0].code[0] = code_lo; |
344 | 0 | return 1; |
345 | 0 | } |
346 | 0 | else if (code != code_up) { |
347 | 0 | items[0].byte_len = len; |
348 | 0 | items[0].code_len = 1; |
349 | 0 | items[0].code[0] = code_up; |
350 | 0 | return 1; |
351 | 0 | } |
352 | | |
353 | 0 | return 0; |
354 | 0 | } |
355 | | |
356 | | static int |
357 | | mbc_case_fold(OnigCaseFoldType flag, |
358 | | const UChar** pp, const UChar* end, UChar* lower, |
359 | | OnigEncoding enc) |
360 | 0 | { |
361 | 0 | const UChar* p = *pp; |
362 | |
|
363 | 0 | if (ONIGENC_IS_MBC_ASCII(p)) { |
364 | 0 | *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); |
365 | 0 | (*pp)++; |
366 | 0 | return 1; |
367 | 0 | } |
368 | 0 | else { |
369 | 0 | OnigCodePoint code; |
370 | 0 | int len; |
371 | |
|
372 | 0 | code = get_lower_case(mbc_to_code(p, end, enc)); |
373 | 0 | len = code_to_mbc(code, lower, enc); |
374 | 0 | (*pp) += len; |
375 | 0 | return len; /* return byte length of converted char to lower */ |
376 | 0 | } |
377 | 0 | } |
378 | | |
379 | | #if 0 |
380 | | static int |
381 | | is_mbc_ambiguous(OnigCaseFoldType flag, |
382 | | const UChar** pp, const UChar* end) |
383 | | { |
384 | | return onigenc_mbn_is_mbc_ambiguous(enc, flag, pp, end); |
385 | | |
386 | | } |
387 | | #endif |
388 | | |
389 | | #if 0 |
390 | | static int |
391 | | is_code_ctype(OnigCodePoint code, unsigned int ctype) |
392 | | { |
393 | | if (code < 128) |
394 | | return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); |
395 | | else { |
396 | | if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) { |
397 | | return (code_to_mbclen(code) > 1 ? TRUE : FALSE); |
398 | | } |
399 | | } |
400 | | |
401 | | return FALSE; |
402 | | } |
403 | | #endif |
404 | | |
405 | | static UChar* |
406 | | left_adjust_char_head(const UChar* start, const UChar* s, const UChar* end, OnigEncoding enc) |
407 | 0 | { |
408 | 0 | const UChar *p; |
409 | 0 | int len; |
410 | |
|
411 | 0 | if (s <= start) return (UChar* )s; |
412 | 0 | p = s; |
413 | |
|
414 | 0 | if (SJIS_ISMB_TRAIL(*p)) { |
415 | 0 | while (p > start) { |
416 | 0 | if (! SJIS_ISMB_FIRST(*--p)) { |
417 | 0 | p++; |
418 | 0 | break; |
419 | 0 | } |
420 | 0 | } |
421 | 0 | } |
422 | 0 | len = mbc_enc_len(p, end, enc); |
423 | 0 | if (p + len > s) return (UChar* )p; |
424 | 0 | p += len; |
425 | 0 | return (UChar* )(p + ((s - p) & ~1)); |
426 | 0 | } |
427 | | |
428 | | static int |
429 | | is_allowed_reverse_match(const UChar* s, const UChar* end, OnigEncoding enc ARG_UNUSED) |
430 | 0 | { |
431 | 0 | const UChar c = *s; |
432 | 0 | return (SJIS_ISMB_TRAIL(c) ? FALSE : TRUE); |
433 | 0 | } |
434 | | |
435 | | |
436 | | static const OnigCodePoint CR_Hiragana[] = { |
437 | | 1, |
438 | | 0x829f, 0x82f1 |
439 | | }; /* CR_Hiragana */ |
440 | | |
441 | | static const OnigCodePoint CR_Katakana[] = { |
442 | | 4, |
443 | | 0x00a6, 0x00af, |
444 | | 0x00b1, 0x00dd, |
445 | | 0x8340, 0x837e, |
446 | | 0x8380, 0x8396, |
447 | | }; /* CR_Katakana */ |
448 | | |
449 | | #ifdef ENC_CP932 |
450 | | static const OnigCodePoint CR_Han[] = { |
451 | | 6, |
452 | | 0x8157, 0x8157, |
453 | | 0x889f, 0x9872, /* Kanji level 1 */ |
454 | | 0x989f, 0x9ffc, /* Kanji level 2 */ |
455 | | 0xe040, 0xeaa4, /* Kanji level 2 */ |
456 | | 0xed40, 0xeeec, /* NEC-selected IBM extended characters (without symbols) */ |
457 | | 0xfa5c, 0xfc4b, /* IBM extended characters (without symbols) */ |
458 | | }; /* CR_Han */ |
459 | | #else |
460 | | static const OnigCodePoint CR_Han[] = { |
461 | | 4, |
462 | | 0x8157, 0x8157, |
463 | | 0x889f, 0x9872, /* Kanji level 1 */ |
464 | | 0x989f, 0x9ffc, /* Kanji level 2 */ |
465 | | 0xe040, 0xeaa4, /* Kanji level 2 */ |
466 | | }; /* CR_Han */ |
467 | | #endif |
468 | | |
469 | | static const OnigCodePoint CR_Latin[] = { |
470 | | 4, |
471 | | 0x0041, 0x005a, |
472 | | 0x0061, 0x007a, |
473 | | 0x8260, 0x8279, |
474 | | 0x8281, 0x829a, |
475 | | }; /* CR_Latin */ |
476 | | |
477 | | static const OnigCodePoint CR_Greek[] = { |
478 | | 2, |
479 | | 0x839f, 0x83b6, |
480 | | 0x83bf, 0x83d6, |
481 | | }; /* CR_Greek */ |
482 | | |
483 | | static const OnigCodePoint CR_Cyrillic[] = { |
484 | | 3, |
485 | | 0x8440, 0x8460, |
486 | | 0x8470, 0x847f, |
487 | | 0x8480, 0x8491, |
488 | | }; /* CR_Cyrillic */ |
489 | | |
490 | | #include "enc/jis/props.h" |
491 | | |
492 | | static int |
493 | | property_name_to_ctype(OnigEncoding enc, const UChar* p, const UChar* end) |
494 | 0 | { |
495 | 0 | const UChar *s = p, *e = end; |
496 | 0 | const struct enc_property *prop = |
497 | 0 | onig_jis_property((const char* )s, (unsigned int )(e - s)); |
498 | |
|
499 | 0 | if (!prop) { |
500 | 0 | return onigenc_minimum_property_name_to_ctype(enc, s, e); |
501 | 0 | } |
502 | | |
503 | 0 | return (int )prop->ctype; |
504 | 0 | } |
505 | | |
506 | | static int |
507 | | is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc) |
508 | 0 | { |
509 | 0 | if (ctype <= ONIGENC_MAX_STD_CTYPE) { |
510 | 0 | if (code < 128) |
511 | 0 | return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); |
512 | 0 | else { |
513 | 0 | if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) { |
514 | 0 | return TRUE; |
515 | 0 | } |
516 | 0 | } |
517 | 0 | } |
518 | 0 | else { |
519 | 0 | ctype -= (ONIGENC_MAX_STD_CTYPE + 1); |
520 | 0 | if (ctype >= (unsigned int )PropertyListNum) |
521 | 0 | return ONIGERR_TYPE_BUG; |
522 | | |
523 | 0 | return onig_is_in_code_range((UChar* )PropertyList[ctype], code); |
524 | 0 | } |
525 | | |
526 | 0 | return FALSE; |
527 | 0 | } |
528 | | |
529 | | static int |
530 | | get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out, |
531 | | const OnigCodePoint* ranges[], OnigEncoding enc ARG_UNUSED) |
532 | 0 | { |
533 | 0 | if (ctype <= ONIGENC_MAX_STD_CTYPE) { |
534 | 0 | return ONIG_NO_SUPPORT_CONFIG; |
535 | 0 | } |
536 | 0 | else { |
537 | 0 | *sb_out = 0x80; |
538 | |
|
539 | 0 | ctype -= (ONIGENC_MAX_STD_CTYPE + 1); |
540 | 0 | if (ctype >= (OnigCtype )PropertyListNum) |
541 | 0 | return ONIGERR_TYPE_BUG; |
542 | | |
543 | 0 | *ranges = PropertyList[ctype]; |
544 | 0 | return 0; |
545 | 0 | } |
546 | 0 | } |