/src/php-src/oniguruma/src/regenc.c
Line | Count | Source (jump to first uncovered line) |
1 | | /********************************************************************** |
2 | | regenc.c - Oniguruma (regular expression library) |
3 | | **********************************************************************/ |
4 | | /*- |
5 | | * Copyright (c) 2002-2020 K.Kosako |
6 | | * All rights reserved. |
7 | | * |
8 | | * Redistribution and use in source and binary forms, with or without |
9 | | * modification, are permitted provided that the following conditions |
10 | | * are met: |
11 | | * 1. Redistributions of source code must retain the above copyright |
12 | | * notice, this list of conditions and the following disclaimer. |
13 | | * 2. Redistributions in binary form must reproduce the above copyright |
14 | | * notice, this list of conditions and the following disclaimer in the |
15 | | * documentation and/or other materials provided with the distribution. |
16 | | * |
17 | | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
18 | | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
19 | | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
20 | | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
21 | | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
22 | | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
23 | | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
24 | | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
25 | | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
26 | | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
27 | | * SUCH DAMAGE. |
28 | | */ |
29 | | |
30 | | #include "regint.h" |
31 | | |
32 | 0 | #define LARGE_S 0x53 |
33 | 0 | #define SMALL_S 0x73 |
34 | | |
35 | | OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT; |
36 | | |
37 | 6.08k | #define INITED_LIST_SIZE 20 |
38 | | |
39 | | static int InitedListNum; |
40 | | |
41 | | static struct { |
42 | | OnigEncoding enc; |
43 | | int inited; |
44 | | } InitedList[INITED_LIST_SIZE]; |
45 | | |
46 | | static int |
47 | | enc_inited_entry(OnigEncoding enc) |
48 | 6.08k | { |
49 | 6.08k | int i; |
50 | | |
51 | 6.08k | for (i = 0; i < InitedListNum; i++) { |
52 | 0 | if (InitedList[i].enc == enc) { |
53 | 0 | InitedList[i].inited = 1; |
54 | 0 | return i; |
55 | 0 | } |
56 | 0 | } |
57 | | |
58 | 6.08k | i = InitedListNum; |
59 | 6.08k | if (i < INITED_LIST_SIZE - 1) { |
60 | 6.08k | InitedList[i].enc = enc; |
61 | 6.08k | InitedList[i].inited = 1; |
62 | 6.08k | InitedListNum++; |
63 | 6.08k | return i; |
64 | 6.08k | } |
65 | | |
66 | 0 | return -1; |
67 | 0 | } |
68 | | |
69 | | static int |
70 | | enc_is_inited(OnigEncoding enc) |
71 | 6.08k | { |
72 | 6.08k | int i; |
73 | | |
74 | 6.08k | for (i = 0; i < InitedListNum; i++) { |
75 | 0 | if (InitedList[i].enc == enc) { |
76 | 0 | return InitedList[i].inited; |
77 | 0 | } |
78 | 0 | } |
79 | | |
80 | 6.08k | return 0; |
81 | 6.08k | } |
82 | | |
83 | | static int OnigEncInited; |
84 | | |
85 | | extern int |
86 | | onigenc_init(void) |
87 | 6.08k | { |
88 | 6.08k | if (OnigEncInited != 0) return 0; |
89 | | |
90 | 6.08k | OnigEncInited = 1; |
91 | 6.08k | return 0; |
92 | 6.08k | } |
93 | | |
94 | | extern int |
95 | | onigenc_end(void) |
96 | 0 | { |
97 | 0 | int i; |
98 | |
|
99 | 0 | for (i = 0; i < InitedListNum; i++) { |
100 | 0 | InitedList[i].enc = 0; |
101 | 0 | InitedList[i].inited = 0; |
102 | 0 | } |
103 | 0 | InitedListNum = 0; |
104 | |
|
105 | 0 | OnigEncInited = 0; |
106 | 0 | return ONIG_NORMAL; |
107 | 0 | } |
108 | | |
109 | | extern int |
110 | | onig_initialize_encoding(OnigEncoding enc) |
111 | 6.08k | { |
112 | 6.08k | int r; |
113 | | |
114 | 6.08k | if (enc != ONIG_ENCODING_ASCII && |
115 | 0 | ONIGENC_IS_ASCII_COMPATIBLE_ENCODING(enc)) { |
116 | 0 | OnigEncoding ascii = ONIG_ENCODING_ASCII; |
117 | 0 | if (ascii->init != 0 && enc_is_inited(ascii) == 0) { |
118 | 0 | r = ascii->init(); |
119 | 0 | if (r != ONIG_NORMAL) return r; |
120 | 0 | enc_inited_entry(ascii); |
121 | 0 | } |
122 | 0 | } |
123 | | |
124 | 6.08k | if (enc->init != 0 && |
125 | 6.08k | enc_is_inited(enc) == 0) { |
126 | 6.08k | r = (enc->init)(); |
127 | 6.08k | if (r == ONIG_NORMAL) |
128 | 6.08k | enc_inited_entry(enc); |
129 | 6.08k | return r; |
130 | 6.08k | } |
131 | | |
132 | 0 | return 0; |
133 | 0 | } |
134 | | |
135 | | extern OnigEncoding |
136 | | onigenc_get_default_encoding(void) |
137 | 0 | { |
138 | 0 | return OnigEncDefaultCharEncoding; |
139 | 0 | } |
140 | | |
141 | | extern int |
142 | | onigenc_set_default_encoding(OnigEncoding enc) |
143 | 0 | { |
144 | 0 | OnigEncDefaultCharEncoding = enc; |
145 | 0 | return 0; |
146 | 0 | } |
147 | | |
148 | | extern UChar* |
149 | | onigenc_strdup(OnigEncoding enc, const UChar* s, const UChar* end) |
150 | 42.6k | { |
151 | 42.6k | int slen, term_len, i; |
152 | 42.6k | UChar *r; |
153 | | |
154 | 42.6k | slen = (int )(end - s); |
155 | 42.6k | term_len = ONIGENC_MBC_MINLEN(enc); |
156 | | |
157 | 42.6k | r = (UChar* )xmalloc(slen + term_len); |
158 | 42.6k | CHECK_NULL_RETURN(r); |
159 | 42.6k | xmemcpy(r, s, slen); |
160 | | |
161 | 85.2k | for (i = 0; i < term_len; i++) |
162 | 42.6k | r[slen + i] = (UChar )0; |
163 | | |
164 | 42.6k | return r; |
165 | 42.6k | } |
166 | | |
167 | | extern UChar* |
168 | | onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s) |
169 | 0 | { |
170 | 0 | UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s); |
171 | 0 | if (p < s) { |
172 | 0 | p += enclen(enc, p); |
173 | 0 | } |
174 | 0 | return p; |
175 | 0 | } |
176 | | |
177 | | extern UChar* |
178 | | onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc, |
179 | | const UChar* start, const UChar* s, const UChar** prev) |
180 | 0 | { |
181 | 0 | UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s); |
182 | |
|
183 | 0 | if (p < s) { |
184 | 0 | if (prev) *prev = (const UChar* )p; |
185 | 0 | p += enclen(enc, p); |
186 | 0 | } |
187 | 0 | else { |
188 | 0 | if (prev) |
189 | 0 | *prev = onigenc_get_prev_char_head(enc, start, p); |
190 | 0 | } |
191 | 0 | return p; |
192 | 0 | } |
193 | | |
194 | | extern UChar* |
195 | | onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s) |
196 | 0 | { |
197 | 0 | if (s <= start) |
198 | 0 | return (UChar* )NULL; |
199 | | |
200 | 0 | return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1); |
201 | 0 | } |
202 | | |
203 | | extern UChar* |
204 | | onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, int n) |
205 | 0 | { |
206 | 0 | while (ONIG_IS_NOT_NULL(s) && n-- > 0) { |
207 | 0 | if (s <= start) |
208 | 0 | return (UChar* )NULL; |
209 | | |
210 | 0 | s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1); |
211 | 0 | } |
212 | 0 | return (UChar* )s; |
213 | 0 | } |
214 | | |
215 | | extern UChar* |
216 | | onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n) |
217 | 0 | { |
218 | 0 | UChar* q = (UChar* )p; |
219 | 0 | while (n-- > 0) { |
220 | 0 | q += ONIGENC_MBC_ENC_LEN(enc, q); |
221 | 0 | } |
222 | 0 | return (q <= end ? q : NULL); |
223 | 0 | } |
224 | | |
225 | | extern int |
226 | | onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end) |
227 | 0 | { |
228 | 0 | int n = 0; |
229 | 0 | UChar* q = (UChar* )p; |
230 | |
|
231 | 0 | while (q < end) { |
232 | 0 | q += ONIGENC_MBC_ENC_LEN(enc, q); |
233 | 0 | n++; |
234 | 0 | } |
235 | 0 | return n; |
236 | 0 | } |
237 | | |
238 | | extern int |
239 | | onigenc_strlen_null(OnigEncoding enc, const UChar* s) |
240 | 0 | { |
241 | 0 | int n = 0; |
242 | 0 | UChar* p = (UChar* )s; |
243 | |
|
244 | 0 | while (1) { |
245 | 0 | if (*p == '\0') { |
246 | 0 | UChar* q; |
247 | 0 | int len = ONIGENC_MBC_MINLEN(enc); |
248 | |
|
249 | 0 | if (len == 1) return n; |
250 | 0 | q = p + 1; |
251 | 0 | while (len > 1) { |
252 | 0 | if (*q != '\0') break; |
253 | 0 | q++; |
254 | 0 | len--; |
255 | 0 | } |
256 | 0 | if (len == 1) return n; |
257 | 0 | } |
258 | 0 | p += ONIGENC_MBC_ENC_LEN(enc, p); |
259 | 0 | n++; |
260 | 0 | } |
261 | 0 | } |
262 | | |
263 | | extern int |
264 | | onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s) |
265 | 42.6k | { |
266 | 42.6k | const UChar* start = s; |
267 | 42.6k | const UChar* p = s; |
268 | | |
269 | 280k | while (1) { |
270 | 280k | if (*p == '\0') { |
271 | 42.6k | const UChar* q; |
272 | 42.6k | int len = ONIGENC_MBC_MINLEN(enc); |
273 | | |
274 | 42.6k | if (len == 1) return (int )(p - start); |
275 | 0 | q = p + 1; |
276 | 0 | while (len > 1) { |
277 | 0 | if (*q != '\0') break; |
278 | 0 | q++; |
279 | 0 | len--; |
280 | 0 | } |
281 | 0 | if (len == 1) return (int )(p - start); |
282 | 237k | } |
283 | 237k | p += ONIGENC_MBC_ENC_LEN(enc, p); |
284 | 237k | } |
285 | 42.6k | } |
286 | | |
287 | | const UChar OnigEncAsciiToLowerCaseTable[] = { |
288 | | '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', |
289 | | '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', |
290 | | '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', |
291 | | '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', |
292 | | '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', |
293 | | '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', |
294 | | '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', |
295 | | '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', |
296 | | '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', |
297 | | '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', |
298 | | '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', |
299 | | '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', |
300 | | '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', |
301 | | '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', |
302 | | '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', |
303 | | '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', |
304 | | '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', |
305 | | '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', |
306 | | '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', |
307 | | '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', |
308 | | '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', |
309 | | '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', |
310 | | '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', |
311 | | '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', |
312 | | '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', |
313 | | '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', |
314 | | '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', |
315 | | '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', |
316 | | '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', |
317 | | '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', |
318 | | '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', |
319 | | '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377', |
320 | | }; |
321 | | |
322 | | #ifdef USE_UPPER_CASE_TABLE |
323 | | const UChar OnigEncAsciiToUpperCaseTable[256] = { |
324 | | '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', |
325 | | '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', |
326 | | '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', |
327 | | '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', |
328 | | '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', |
329 | | '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', |
330 | | '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', |
331 | | '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', |
332 | | '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107', |
333 | | '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117', |
334 | | '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127', |
335 | | '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137', |
336 | | '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107', |
337 | | '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117', |
338 | | '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127', |
339 | | '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177', |
340 | | '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', |
341 | | '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', |
342 | | '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', |
343 | | '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', |
344 | | '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', |
345 | | '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', |
346 | | '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', |
347 | | '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', |
348 | | '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', |
349 | | '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', |
350 | | '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', |
351 | | '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', |
352 | | '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', |
353 | | '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', |
354 | | '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', |
355 | | '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377', |
356 | | }; |
357 | | #endif |
358 | | |
359 | | const unsigned short OnigEncAsciiCtypeTable[256] = { |
360 | | 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, |
361 | | 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, |
362 | | 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, |
363 | | 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, |
364 | | 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, |
365 | | 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, |
366 | | 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, |
367 | | 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, |
368 | | 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, |
369 | | 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, |
370 | | 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, |
371 | | 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, |
372 | | 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, |
373 | | 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, |
374 | | 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, |
375 | | 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, |
376 | | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, |
377 | | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, |
378 | | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, |
379 | | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, |
380 | | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, |
381 | | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, |
382 | | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, |
383 | | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, |
384 | | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, |
385 | | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, |
386 | | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, |
387 | | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, |
388 | | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, |
389 | | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, |
390 | | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, |
391 | | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 |
392 | | }; |
393 | | |
394 | | const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = { |
395 | | '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', |
396 | | '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', |
397 | | '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', |
398 | | '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', |
399 | | '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', |
400 | | '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', |
401 | | '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', |
402 | | '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', |
403 | | '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', |
404 | | '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', |
405 | | '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', |
406 | | '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', |
407 | | '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', |
408 | | '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', |
409 | | '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', |
410 | | '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', |
411 | | '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', |
412 | | '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', |
413 | | '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', |
414 | | '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', |
415 | | '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', |
416 | | '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', |
417 | | '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', |
418 | | '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', |
419 | | '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', |
420 | | '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', |
421 | | '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327', |
422 | | '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', |
423 | | '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', |
424 | | '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', |
425 | | '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', |
426 | | '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' |
427 | | }; |
428 | | |
429 | | #ifdef USE_UPPER_CASE_TABLE |
430 | | const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = { |
431 | | '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', |
432 | | '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', |
433 | | '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', |
434 | | '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', |
435 | | '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', |
436 | | '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', |
437 | | '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', |
438 | | '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', |
439 | | '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107', |
440 | | '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117', |
441 | | '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127', |
442 | | '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137', |
443 | | '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107', |
444 | | '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117', |
445 | | '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127', |
446 | | '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177', |
447 | | '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', |
448 | | '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', |
449 | | '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', |
450 | | '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', |
451 | | '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', |
452 | | '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', |
453 | | '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', |
454 | | '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', |
455 | | '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', |
456 | | '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', |
457 | | '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', |
458 | | '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', |
459 | | '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', |
460 | | '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', |
461 | | '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\367', |
462 | | '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\377', |
463 | | }; |
464 | | #endif |
465 | | |
466 | | extern void |
467 | | onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED) |
468 | 0 | { |
469 | | /* nothing */ |
470 | | /* obsoleted. */ |
471 | 0 | } |
472 | | |
473 | | extern UChar* |
474 | | onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s) |
475 | 0 | { |
476 | 0 | return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s); |
477 | 0 | } |
478 | | |
479 | | const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = { |
480 | | { 0x41, 0x61 }, |
481 | | { 0x42, 0x62 }, |
482 | | { 0x43, 0x63 }, |
483 | | { 0x44, 0x64 }, |
484 | | { 0x45, 0x65 }, |
485 | | { 0x46, 0x66 }, |
486 | | { 0x47, 0x67 }, |
487 | | { 0x48, 0x68 }, |
488 | | { 0x49, 0x69 }, |
489 | | { 0x4a, 0x6a }, |
490 | | { 0x4b, 0x6b }, |
491 | | { 0x4c, 0x6c }, |
492 | | { 0x4d, 0x6d }, |
493 | | { 0x4e, 0x6e }, |
494 | | { 0x4f, 0x6f }, |
495 | | { 0x50, 0x70 }, |
496 | | { 0x51, 0x71 }, |
497 | | { 0x52, 0x72 }, |
498 | | { 0x53, 0x73 }, |
499 | | { 0x54, 0x74 }, |
500 | | { 0x55, 0x75 }, |
501 | | { 0x56, 0x76 }, |
502 | | { 0x57, 0x77 }, |
503 | | { 0x58, 0x78 }, |
504 | | { 0x59, 0x79 }, |
505 | | { 0x5a, 0x7a } |
506 | | }; |
507 | | |
508 | | extern int |
509 | | onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED, |
510 | | OnigApplyAllCaseFoldFunc f, void* arg) |
511 | 0 | { |
512 | 0 | OnigCodePoint code; |
513 | 0 | int i, r; |
514 | |
|
515 | 0 | for (i = 0; |
516 | 0 | i < (int )(sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes)); |
517 | 0 | i++) { |
518 | 0 | code = OnigAsciiLowerMap[i].to; |
519 | 0 | r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg); |
520 | 0 | if (r != 0) return r; |
521 | | |
522 | 0 | code = OnigAsciiLowerMap[i].from; |
523 | 0 | r = (*f)(OnigAsciiLowerMap[i].to, &code, 1, arg); |
524 | 0 | if (r != 0) return r; |
525 | 0 | } |
526 | |
|
527 | 0 | return 0; |
528 | 0 | } |
529 | | |
530 | | extern int |
531 | | onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED, |
532 | | const OnigUChar* p, const OnigUChar* end ARG_UNUSED, |
533 | | OnigCaseFoldCodeItem items[]) |
534 | 158k | { |
535 | 158k | if (0x41 <= *p && *p <= 0x5a) { |
536 | 0 | items[0].byte_len = 1; |
537 | 0 | items[0].code_len = 1; |
538 | 0 | items[0].code[0] = (OnigCodePoint )(*p + 0x20); |
539 | 0 | return 1; |
540 | 0 | } |
541 | 158k | else if (0x61 <= *p && *p <= 0x7a) { |
542 | 140k | items[0].byte_len = 1; |
543 | 140k | items[0].code_len = 1; |
544 | 140k | items[0].code[0] = (OnigCodePoint )(*p - 0x20); |
545 | 140k | return 1; |
546 | 140k | } |
547 | 18.2k | else |
548 | 18.2k | return 0; |
549 | 158k | } |
550 | | |
551 | | static int |
552 | | ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED, |
553 | | OnigApplyAllCaseFoldFunc f, void* arg) |
554 | 0 | { |
555 | 0 | static OnigCodePoint ss[] = { SMALL_S, SMALL_S }; |
556 | |
|
557 | 0 | return (*f)((OnigCodePoint )0xdf, ss, 2, arg); |
558 | 0 | } |
559 | | |
560 | | extern int |
561 | | onigenc_apply_all_case_fold_with_map(int map_size, |
562 | | const OnigPairCaseFoldCodes map[], |
563 | | int ess_tsett_flag, OnigCaseFoldType flag, |
564 | | OnigApplyAllCaseFoldFunc f, void* arg) |
565 | 0 | { |
566 | 0 | OnigCodePoint code; |
567 | 0 | int i, r; |
568 | |
|
569 | 0 | r = onigenc_ascii_apply_all_case_fold(flag, f, arg); |
570 | 0 | if (r != 0) return r; |
571 | | |
572 | 0 | if (CASE_FOLD_IS_ASCII_ONLY(flag)) |
573 | 0 | return 0; |
574 | | |
575 | 0 | for (i = 0; i < map_size; i++) { |
576 | 0 | code = map[i].to; |
577 | 0 | r = (*f)(map[i].from, &code, 1, arg); |
578 | 0 | if (r != 0) return r; |
579 | | |
580 | 0 | code = map[i].from; |
581 | 0 | r = (*f)(map[i].to, &code, 1, arg); |
582 | 0 | if (r != 0) return r; |
583 | 0 | } |
584 | |
|
585 | 0 | if (ess_tsett_flag != 0) |
586 | 0 | return ss_apply_all_case_fold(flag, f, arg); |
587 | | |
588 | 0 | return 0; |
589 | 0 | } |
590 | | |
591 | | extern int |
592 | | onigenc_get_case_fold_codes_by_str_with_map(int map_size, |
593 | | const OnigPairCaseFoldCodes map[], |
594 | | int ess_tsett_flag, OnigCaseFoldType flag, |
595 | | const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) |
596 | 0 | { |
597 | 0 | int i, j, n; |
598 | 0 | static OnigUChar sa[] = { LARGE_S, SMALL_S }; |
599 | |
|
600 | 0 | if (0x41 <= *p && *p <= 0x5a) { /* A - Z */ |
601 | 0 | if (*p == LARGE_S && ess_tsett_flag != 0 && end > p + 1 |
602 | 0 | && (*(p+1) == LARGE_S || *(p+1) == SMALL_S) /* SS */ |
603 | 0 | && CASE_FOLD_IS_NOT_ASCII_ONLY(flag)) { |
604 | 0 | ss_combination: |
605 | 0 | items[0].byte_len = 2; |
606 | 0 | items[0].code_len = 1; |
607 | 0 | items[0].code[0] = (OnigCodePoint )0xdf; |
608 | |
|
609 | 0 | n = 1; |
610 | 0 | for (i = 0; i < 2; i++) { |
611 | 0 | for (j = 0; j < 2; j++) { |
612 | 0 | if (sa[i] == *p && sa[j] == *(p+1)) |
613 | 0 | continue; |
614 | | |
615 | 0 | items[n].byte_len = 2; |
616 | 0 | items[n].code_len = 2; |
617 | 0 | items[n].code[0] = (OnigCodePoint )sa[i]; |
618 | 0 | items[n].code[1] = (OnigCodePoint )sa[j]; |
619 | 0 | n++; |
620 | 0 | } |
621 | 0 | } |
622 | 0 | return 4; |
623 | 0 | } |
624 | | |
625 | 0 | items[0].byte_len = 1; |
626 | 0 | items[0].code_len = 1; |
627 | 0 | items[0].code[0] = (OnigCodePoint )(*p + 0x20); |
628 | 0 | return 1; |
629 | 0 | } |
630 | 0 | else if (0x61 <= *p && *p <= 0x7a) { /* a - z */ |
631 | 0 | if (*p == SMALL_S && ess_tsett_flag != 0 && end > p + 1 |
632 | 0 | && (*(p+1) == SMALL_S || *(p+1) == LARGE_S) |
633 | 0 | && CASE_FOLD_IS_NOT_ASCII_ONLY(flag)) { |
634 | 0 | goto ss_combination; |
635 | 0 | } |
636 | | |
637 | 0 | items[0].byte_len = 1; |
638 | 0 | items[0].code_len = 1; |
639 | 0 | items[0].code[0] = (OnigCodePoint )(*p - 0x20); |
640 | 0 | return 1; |
641 | 0 | } |
642 | 0 | else if (*p == 0xdf && ess_tsett_flag != 0 |
643 | 0 | && CASE_FOLD_IS_NOT_ASCII_ONLY(flag)) { |
644 | 0 | items[0].byte_len = 1; |
645 | 0 | items[0].code_len = 2; |
646 | 0 | items[0].code[0] = (OnigCodePoint )'s'; |
647 | 0 | items[0].code[1] = (OnigCodePoint )'s'; |
648 | |
|
649 | 0 | items[1].byte_len = 1; |
650 | 0 | items[1].code_len = 2; |
651 | 0 | items[1].code[0] = (OnigCodePoint )'S'; |
652 | 0 | items[1].code[1] = (OnigCodePoint )'S'; |
653 | |
|
654 | 0 | items[2].byte_len = 1; |
655 | 0 | items[2].code_len = 2; |
656 | 0 | items[2].code[0] = (OnigCodePoint )'s'; |
657 | 0 | items[2].code[1] = (OnigCodePoint )'S'; |
658 | |
|
659 | 0 | items[3].byte_len = 1; |
660 | 0 | items[3].code_len = 2; |
661 | 0 | items[3].code[0] = (OnigCodePoint )'S'; |
662 | 0 | items[3].code[1] = (OnigCodePoint )'s'; |
663 | |
|
664 | 0 | return 4; |
665 | 0 | } |
666 | 0 | else { |
667 | 0 | int i; |
668 | |
|
669 | 0 | if (CASE_FOLD_IS_ASCII_ONLY(flag)) |
670 | 0 | return 0; |
671 | | |
672 | 0 | for (i = 0; i < map_size; i++) { |
673 | 0 | if (*p == map[i].from) { |
674 | 0 | items[0].byte_len = 1; |
675 | 0 | items[0].code_len = 1; |
676 | 0 | items[0].code[0] = map[i].to; |
677 | 0 | return 1; |
678 | 0 | } |
679 | 0 | else if (*p == map[i].to) { |
680 | 0 | items[0].byte_len = 1; |
681 | 0 | items[0].code_len = 1; |
682 | 0 | items[0].code[0] = map[i].from; |
683 | 0 | return 1; |
684 | 0 | } |
685 | 0 | } |
686 | 0 | } |
687 | |
|
688 | 0 | return 0; |
689 | 0 | } |
690 | | |
691 | | |
692 | | extern int |
693 | | onigenc_not_support_get_ctype_code_range(OnigCtype ctype ARG_UNUSED, |
694 | | OnigCodePoint* sb_out ARG_UNUSED, |
695 | | const OnigCodePoint* ranges[] ARG_UNUSED) |
696 | 0 | { |
697 | 0 | return ONIG_NO_SUPPORT_CONFIG; |
698 | 0 | } |
699 | | |
700 | | extern int |
701 | | onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end) |
702 | 0 | { |
703 | 0 | if (p < end) { |
704 | 0 | if (*p == NEWLINE_CODE) return 1; |
705 | 0 | } |
706 | 0 | return 0; |
707 | 0 | } |
708 | | |
709 | | /* for single byte encodings */ |
710 | | extern int |
711 | | onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p, |
712 | | const UChar*end ARG_UNUSED, UChar* lower) |
713 | 0 | { |
714 | 0 | *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p); |
715 | |
|
716 | 0 | (*p)++; |
717 | 0 | return 1; /* return byte length of converted char to lower */ |
718 | 0 | } |
719 | | |
720 | | extern int |
721 | | onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED) |
722 | 895k | { |
723 | 895k | return 1; |
724 | 895k | } |
725 | | |
726 | | extern OnigCodePoint |
727 | | onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED) |
728 | 578k | { |
729 | 578k | return (OnigCodePoint )(*p); |
730 | 578k | } |
731 | | |
732 | | extern int |
733 | | onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED) |
734 | 280k | { |
735 | 280k | return (code < 0x100 ? 1 : ONIGERR_INVALID_CODE_POINT_VALUE); |
736 | 280k | } |
737 | | |
738 | | extern int |
739 | | onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf) |
740 | 0 | { |
741 | 0 | *buf = (UChar )(code & 0xff); |
742 | 0 | return 1; |
743 | 0 | } |
744 | | |
745 | | extern UChar* |
746 | | onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED, |
747 | | const UChar* s) |
748 | 0 | { |
749 | 0 | return (UChar* )s; |
750 | 0 | } |
751 | | |
752 | | extern int |
753 | | onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED, |
754 | | const UChar* end ARG_UNUSED) |
755 | 0 | { |
756 | 0 | return TRUE; |
757 | 0 | } |
758 | | |
759 | | extern int |
760 | | onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED, |
761 | | const UChar* end ARG_UNUSED) |
762 | 0 | { |
763 | 0 | return FALSE; |
764 | 0 | } |
765 | | |
766 | | extern int |
767 | | onigenc_always_true_is_valid_mbc_string(const UChar* s ARG_UNUSED, |
768 | | const UChar* end ARG_UNUSED) |
769 | 18.2k | { |
770 | 18.2k | return TRUE; |
771 | 18.2k | } |
772 | | |
773 | | extern int |
774 | | onigenc_length_check_is_valid_mbc_string(OnigEncoding enc, |
775 | | const UChar* p, const UChar* end) |
776 | 0 | { |
777 | 0 | while (p < end) { |
778 | 0 | p += enclen(enc, p); |
779 | 0 | } |
780 | |
|
781 | 0 | if (p != end) |
782 | 0 | return FALSE; |
783 | 0 | else |
784 | 0 | return TRUE; |
785 | 0 | } |
786 | | |
787 | | extern int |
788 | | onigenc_is_valid_mbc_string(OnigEncoding enc, const UChar* s, const UChar* end) |
789 | 0 | { |
790 | 0 | return ONIGENC_IS_VALID_MBC_STRING(enc, s, end); |
791 | 0 | } |
792 | | |
793 | | extern OnigCodePoint |
794 | | onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end) |
795 | 0 | { |
796 | 0 | int c, i, len; |
797 | 0 | OnigCodePoint n; |
798 | |
|
799 | 0 | len = enclen(enc, p); |
800 | 0 | n = (OnigCodePoint )(*p++); |
801 | 0 | if (len == 1) return n; |
802 | | |
803 | 0 | for (i = 1; i < len; i++) { |
804 | 0 | if (p >= end) break; |
805 | 0 | c = *p++; |
806 | 0 | n <<= 8; n += c; |
807 | 0 | } |
808 | 0 | return n; |
809 | 0 | } |
810 | | |
811 | | extern int |
812 | | onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED, |
813 | | const UChar** pp, const UChar* end ARG_UNUSED, |
814 | | UChar* lower) |
815 | 0 | { |
816 | 0 | int len; |
817 | 0 | const UChar *p = *pp; |
818 | |
|
819 | 0 | if (ONIGENC_IS_MBC_ASCII(p)) { |
820 | 0 | *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); |
821 | 0 | (*pp)++; |
822 | 0 | return 1; |
823 | 0 | } |
824 | 0 | else { |
825 | 0 | int i; |
826 | |
|
827 | 0 | len = enclen(enc, p); |
828 | 0 | for (i = 0; i < len; i++) { |
829 | 0 | *lower++ = *p++; |
830 | 0 | } |
831 | 0 | (*pp) += len; |
832 | 0 | return len; /* return byte length of converted to lower char */ |
833 | 0 | } |
834 | 0 | } |
835 | | |
836 | | extern int |
837 | | onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf) |
838 | 0 | { |
839 | 0 | UChar *p = buf; |
840 | |
|
841 | 0 | if ((code & 0xff00) != 0) { |
842 | 0 | *p++ = (UChar )((code >> 8) & 0xff); |
843 | 0 | } |
844 | 0 | *p++ = (UChar )(code & 0xff); |
845 | |
|
846 | 0 | #if 1 |
847 | 0 | if (enclen(enc, buf) != (p - buf)) |
848 | 0 | return ONIGERR_INVALID_CODE_POINT_VALUE; |
849 | 0 | #endif |
850 | 0 | return (int )(p - buf); |
851 | 0 | } |
852 | | |
853 | | extern int |
854 | | onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf) |
855 | 0 | { |
856 | 0 | UChar *p = buf; |
857 | |
|
858 | 0 | if ((code & 0xff000000) != 0) { |
859 | 0 | *p++ = (UChar )((code >> 24) & 0xff); |
860 | 0 | } |
861 | 0 | if ((code & 0xff0000) != 0 || p != buf) { |
862 | 0 | *p++ = (UChar )((code >> 16) & 0xff); |
863 | 0 | } |
864 | 0 | if ((code & 0xff00) != 0 || p != buf) { |
865 | 0 | *p++ = (UChar )((code >> 8) & 0xff); |
866 | 0 | } |
867 | 0 | *p++ = (UChar )(code & 0xff); |
868 | |
|
869 | 0 | #if 1 |
870 | 0 | if (enclen(enc, buf) != (p - buf)) |
871 | 0 | return ONIGERR_INVALID_CODE_POINT_VALUE; |
872 | 0 | #endif |
873 | 0 | return (int )(p - buf); |
874 | 0 | } |
875 | | |
876 | | extern int |
877 | | onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end) |
878 | 0 | { |
879 | 0 | static PosixBracketEntryType PBS[] = { |
880 | 0 | { (UChar* )"Alnum", ONIGENC_CTYPE_ALNUM, 5 }, |
881 | 0 | { (UChar* )"Alpha", ONIGENC_CTYPE_ALPHA, 5 }, |
882 | 0 | { (UChar* )"Blank", ONIGENC_CTYPE_BLANK, 5 }, |
883 | 0 | { (UChar* )"Cntrl", ONIGENC_CTYPE_CNTRL, 5 }, |
884 | 0 | { (UChar* )"Digit", ONIGENC_CTYPE_DIGIT, 5 }, |
885 | 0 | { (UChar* )"Graph", ONIGENC_CTYPE_GRAPH, 5 }, |
886 | 0 | { (UChar* )"Lower", ONIGENC_CTYPE_LOWER, 5 }, |
887 | 0 | { (UChar* )"Print", ONIGENC_CTYPE_PRINT, 5 }, |
888 | 0 | { (UChar* )"Punct", ONIGENC_CTYPE_PUNCT, 5 }, |
889 | 0 | { (UChar* )"Space", ONIGENC_CTYPE_SPACE, 5 }, |
890 | 0 | { (UChar* )"Upper", ONIGENC_CTYPE_UPPER, 5 }, |
891 | 0 | { (UChar* )"XDigit", ONIGENC_CTYPE_XDIGIT, 6 }, |
892 | 0 | { (UChar* )"ASCII", ONIGENC_CTYPE_ASCII, 5 }, |
893 | 0 | { (UChar* )"Word", ONIGENC_CTYPE_WORD, 4 }, |
894 | 0 | { (UChar* )NULL, -1, 0 } |
895 | 0 | }; |
896 | |
|
897 | 0 | PosixBracketEntryType *pb; |
898 | 0 | int len; |
899 | |
|
900 | 0 | len = onigenc_strlen(enc, p, end); |
901 | 0 | for (pb = PBS; IS_NOT_NULL(pb->name); pb++) { |
902 | 0 | if (len == pb->len && |
903 | 0 | onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) |
904 | 0 | return pb->ctype; |
905 | 0 | } |
906 | |
|
907 | 0 | return ONIGERR_INVALID_CHAR_PROPERTY_NAME; |
908 | 0 | } |
909 | | |
910 | | extern int |
911 | | onigenc_is_mbc_word_ascii(OnigEncoding enc, UChar* s, const UChar* end) |
912 | 0 | { |
913 | 0 | OnigCodePoint code = ONIGENC_MBC_TO_CODE(enc, s, end); |
914 | |
|
915 | 0 | if (code > ASCII_LIMIT) return 0; |
916 | | |
917 | 0 | return ONIGENC_IS_ASCII_CODE_WORD(code); |
918 | 0 | } |
919 | | |
920 | | extern int |
921 | | onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code, |
922 | | unsigned int ctype) |
923 | 0 | { |
924 | 0 | if (code < 128) |
925 | 0 | return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); |
926 | 0 | else { |
927 | 0 | if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) { |
928 | 0 | return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE); |
929 | 0 | } |
930 | 0 | } |
931 | | |
932 | 0 | return FALSE; |
933 | 0 | } |
934 | | |
935 | | extern int |
936 | | onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code, |
937 | | unsigned int ctype) |
938 | 0 | { |
939 | 0 | if (code < 128) |
940 | 0 | return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); |
941 | 0 | else { |
942 | 0 | if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) { |
943 | 0 | return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE); |
944 | 0 | } |
945 | 0 | } |
946 | | |
947 | 0 | return FALSE; |
948 | 0 | } |
949 | | |
950 | | extern int |
951 | | onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end, |
952 | | const UChar* sascii /* ascii */, int n) |
953 | 0 | { |
954 | 0 | int x, c; |
955 | |
|
956 | 0 | while (n-- > 0) { |
957 | 0 | if (p >= end) return (int )(*sascii); |
958 | | |
959 | 0 | c = (int )ONIGENC_MBC_TO_CODE(enc, p, end); |
960 | 0 | x = *sascii - c; |
961 | 0 | if (x) return x; |
962 | | |
963 | 0 | sascii++; |
964 | 0 | p += enclen(enc, p); |
965 | 0 | } |
966 | 0 | return 0; |
967 | 0 | } |
968 | | |
969 | | extern int |
970 | | onig_codes_cmp(OnigCodePoint a[], OnigCodePoint b[], int n) |
971 | 0 | { |
972 | 0 | int i; |
973 | |
|
974 | 0 | for (i = 0; i < n; i++) { |
975 | 0 | if (a[i] != b[i]) |
976 | 0 | return -1; |
977 | 0 | } |
978 | |
|
979 | 0 | return 0; |
980 | 0 | } |
981 | | |
982 | | extern int |
983 | | onig_codes_byte_at(OnigCodePoint codes[], int at) |
984 | 0 | { |
985 | 0 | int index; |
986 | 0 | int b; |
987 | 0 | OnigCodePoint code; |
988 | |
|
989 | 0 | index = at / 3; |
990 | 0 | b = at % 3; |
991 | 0 | code = codes[index]; |
992 | |
|
993 | 0 | return ((code >> ((2 - b) * 8)) & 0xff); |
994 | 0 | } |