/src/fluent-bit/lib/onigmo/regenc.c
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /**********************************************************************  | 
2  |  |   regenc.c -  Onigmo (Oniguruma-mod) (regular expression library)  | 
3  |  | **********************************************************************/  | 
4  |  | /*-  | 
5  |  |  * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>  | 
6  |  |  * Copyright (c) 2011-2019  K.Takata  <kentkt AT csc DOT jp>  | 
7  |  |  * All rights reserved.  | 
8  |  |  *  | 
9  |  |  * Redistribution and use in source and binary forms, with or without  | 
10  |  |  * modification, are permitted provided that the following conditions  | 
11  |  |  * are met:  | 
12  |  |  * 1. Redistributions of source code must retain the above copyright  | 
13  |  |  *    notice, this list of conditions and the following disclaimer.  | 
14  |  |  * 2. Redistributions in binary form must reproduce the above copyright  | 
15  |  |  *    notice, this list of conditions and the following disclaimer in the  | 
16  |  |  *    documentation and/or other materials provided with the distribution.  | 
17  |  |  *  | 
18  |  |  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND  | 
19  |  |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE  | 
20  |  |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE  | 
21  |  |  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE  | 
22  |  |  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL  | 
23  |  |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS  | 
24  |  |  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)  | 
25  |  |  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT  | 
26  |  |  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY  | 
27  |  |  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF  | 
28  |  |  * SUCH DAMAGE.  | 
29  |  |  */  | 
30  |  |  | 
31  |  | #include "regint.h"  | 
32  |  |  | 
33  |  | OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;  | 
34  |  |  | 
35  |  | extern int  | 
36  |  | onigenc_init(void)  | 
37  | 2  | { | 
38  | 2  |   return 0;  | 
39  | 2  | }  | 
40  |  |  | 
41  |  | extern OnigEncoding  | 
42  |  | onigenc_get_default_encoding(void)  | 
43  | 0  | { | 
44  | 0  |   return OnigEncDefaultCharEncoding;  | 
45  | 0  | }  | 
46  |  |  | 
47  |  | extern int  | 
48  |  | onigenc_set_default_encoding(OnigEncoding enc)  | 
49  | 0  | { | 
50  | 0  |   OnigEncDefaultCharEncoding = enc;  | 
51  | 0  |   return 0;  | 
52  | 0  | }  | 
53  |  |  | 
54  |  | extern int  | 
55  |  | onigenc_mbclen(const OnigUChar* p,const OnigUChar* e, OnigEncoding enc)  | 
56  | 3.45k  | { | 
57  | 3.45k  |   int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc, p, e);  | 
58  | 3.45k  |   if (ONIGENC_MBCLEN_CHARFOUND_P(ret)) { | 
59  | 3.45k  |     ret = ONIGENC_MBCLEN_CHARFOUND_LEN(ret);  | 
60  | 3.45k  |     if (ret > (int)(e - p)) ret = (int)(e - p); // just for case  | 
61  | 3.45k  |     return ret;  | 
62  | 3.45k  |   }  | 
63  | 0  |   else if (ONIGENC_MBCLEN_NEEDMORE_P(ret)) { | 
64  | 0  |     return (int)(e - p);  | 
65  | 0  |   }  | 
66  | 0  |   return p < e ? 1 : 0;  | 
67  | 3.45k  | }  | 
68  |  |  | 
69  |  | extern int  | 
70  |  | onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, OnigEncoding enc)  | 
71  | 0  | { | 
72  | 0  |   int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc, p, e);  | 
73  | 0  |   if (ONIGENC_MBCLEN_CHARFOUND_P(ret))  | 
74  | 0  |     return ONIGENC_MBCLEN_CHARFOUND_LEN(ret);  | 
75  | 0  |   else if (ONIGENC_MBCLEN_NEEDMORE_P(ret))  | 
76  | 0  |     return (int )(e - p) + ONIGENC_MBCLEN_NEEDMORE_LEN(ret);  | 
77  | 0  |   return 1;  | 
78  | 0  | }  | 
79  |  |  | 
80  |  | extern UChar*  | 
81  |  | onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end)  | 
82  | 0  | { | 
83  | 0  |   UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end);  | 
84  | 0  |   if (p < s) { | 
85  | 0  |     p += enclen(enc, p, end);  | 
86  | 0  |   }  | 
87  | 0  |   return p;  | 
88  | 0  | }  | 
89  |  |  | 
90  |  | extern UChar*  | 
91  |  | onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,  | 
92  |  |            const UChar* start, const UChar* s, const UChar* end, const UChar** prev)  | 
93  | 0  | { | 
94  | 0  |   UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end);  | 
95  |  | 
  | 
96  | 0  |   if (p < s) { | 
97  | 0  |     if (prev) *prev = (const UChar* )p;  | 
98  | 0  |     p += enclen(enc, p, end);  | 
99  | 0  |   }  | 
100  | 0  |   else { | 
101  | 0  |     if (prev) *prev = (const UChar* )NULL; /* Sorry */  | 
102  | 0  |   }  | 
103  | 0  |   return p;  | 
104  | 0  | }  | 
105  |  |  | 
106  |  | extern UChar*  | 
107  |  | onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end)  | 
108  | 0  | { | 
109  | 0  |   if (s <= start)  | 
110  | 0  |     return (UChar* )NULL;  | 
111  |  |  | 
112  | 0  |   return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1, end);  | 
113  | 0  | }  | 
114  |  |  | 
115  |  | extern UChar*  | 
116  |  | onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end, int n)  | 
117  | 0  | { | 
118  | 0  |   while (ONIG_IS_NOT_NULL(s) && n-- > 0) { | 
119  | 0  |     if (s <= start)  | 
120  | 0  |       return (UChar* )NULL;  | 
121  |  |  | 
122  | 0  |     s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1, end);  | 
123  | 0  |   }  | 
124  | 0  |   return (UChar* )s;  | 
125  | 0  | }  | 
126  |  |  | 
127  |  | extern UChar*  | 
128  |  | onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n)  | 
129  | 0  | { | 
130  | 0  |   UChar* q = (UChar* )p;  | 
131  | 0  |   while (n-- > 0) { | 
132  | 0  |     q += ONIGENC_MBC_ENC_LEN(enc, q, end);  | 
133  | 0  |   }  | 
134  | 0  |   return (q <= end ? q : NULL);  | 
135  | 0  | }  | 
136  |  |  | 
137  |  | extern int  | 
138  |  | onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end)  | 
139  | 0  | { | 
140  | 0  |   int n = 0;  | 
141  | 0  |   UChar* q = (UChar* )p;  | 
142  |  | 
  | 
143  | 0  |   while (q < end) { | 
144  | 0  |     q += ONIGENC_MBC_ENC_LEN(enc, q, end);  | 
145  | 0  |     n++;  | 
146  | 0  |   }  | 
147  | 0  |   return n;  | 
148  | 0  | }  | 
149  |  |  | 
150  |  | extern int  | 
151  |  | onigenc_strlen_null(OnigEncoding enc, const UChar* s)  | 
152  | 0  | { | 
153  | 0  |   int n = 0;  | 
154  | 0  |   UChar* p = (UChar* )s;  | 
155  | 0  |   UChar* e;  | 
156  |  | 
  | 
157  | 0  |   while (1) { | 
158  | 0  |     if (*p == '\0') { | 
159  | 0  |       UChar* q;  | 
160  | 0  |       int len = ONIGENC_MBC_MINLEN(enc);  | 
161  |  | 
  | 
162  | 0  |       if (len == 1) return n;  | 
163  | 0  |       q = p + 1;  | 
164  | 0  |       while (len > 1) { | 
165  | 0  |         if (*q != '\0') break;  | 
166  | 0  |         q++;  | 
167  | 0  |         len--;  | 
168  | 0  |       }  | 
169  | 0  |       if (len == 1) return n;  | 
170  | 0  |     }  | 
171  | 0  |     e = p + ONIGENC_MBC_MAXLEN(enc);  | 
172  | 0  |     p += ONIGENC_MBC_ENC_LEN(enc, p, e);  | 
173  | 0  |     n++;  | 
174  | 0  |   }  | 
175  | 0  | }  | 
176  |  |  | 
177  |  | extern int  | 
178  |  | onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)  | 
179  | 0  | { | 
180  | 0  |   UChar* start = (UChar* )s;  | 
181  | 0  |   UChar* p = (UChar* )s;  | 
182  | 0  |   UChar* e;  | 
183  |  | 
  | 
184  | 0  |   while (1) { | 
185  | 0  |     if (*p == '\0') { | 
186  | 0  |       UChar* q;  | 
187  | 0  |       int len = ONIGENC_MBC_MINLEN(enc);  | 
188  |  | 
  | 
189  | 0  |       if (len == 1) return (int )(p - start);  | 
190  | 0  |       q = p + 1;  | 
191  | 0  |       while (len > 1) { | 
192  | 0  |         if (*q != '\0') break;  | 
193  | 0  |         q++;  | 
194  | 0  |         len--;  | 
195  | 0  |       }  | 
196  | 0  |       if (len == 1) return (int )(p - start);  | 
197  | 0  |     }  | 
198  | 0  |     e = p + ONIGENC_MBC_MAXLEN(enc);  | 
199  | 0  |     p += ONIGENC_MBC_ENC_LEN(enc, p, e);  | 
200  | 0  |   }  | 
201  | 0  | }  | 
202  |  |  | 
203  |  | const UChar OnigEncAsciiToLowerCaseTable[] = { | 
204  |  |   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',  | 
205  |  |   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',  | 
206  |  |   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',  | 
207  |  |   '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',  | 
208  |  |   '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',  | 
209  |  |   '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',  | 
210  |  |   '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',  | 
211  |  |   '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',  | 
212  |  |   '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',  | 
213  |  |   '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',  | 
214  |  |   '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',  | 
215  |  |   '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',  | 
216  |  |   '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',  | 
217  |  |   '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',  | 
218  |  |   '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',  | 
219  |  |   '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',  | 
220  |  |   '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',  | 
221  |  |   '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',  | 
222  |  |   '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',  | 
223  |  |   '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',  | 
224  |  |   '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',  | 
225  |  |   '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',  | 
226  |  |   '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',  | 
227  |  |   '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',  | 
228  |  |   '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',  | 
229  |  |   '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',  | 
230  |  |   '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',  | 
231  |  |   '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',  | 
232  |  |   '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',  | 
233  |  |   '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',  | 
234  |  |   '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',  | 
235  |  |   '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',  | 
236  |  | };  | 
237  |  |  | 
238  |  | #ifdef USE_UPPER_CASE_TABLE  | 
239  |  | const UChar OnigEncAsciiToUpperCaseTable[256] = { | 
240  |  |   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',  | 
241  |  |   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',  | 
242  |  |   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',  | 
243  |  |   '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',  | 
244  |  |   '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',  | 
245  |  |   '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',  | 
246  |  |   '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',  | 
247  |  |   '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',  | 
248  |  |   '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',  | 
249  |  |   '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',  | 
250  |  |   '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',  | 
251  |  |   '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',  | 
252  |  |   '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',  | 
253  |  |   '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',  | 
254  |  |   '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',  | 
255  |  |   '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',  | 
256  |  |   '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',  | 
257  |  |   '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',  | 
258  |  |   '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',  | 
259  |  |   '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',  | 
260  |  |   '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',  | 
261  |  |   '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',  | 
262  |  |   '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',  | 
263  |  |   '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',  | 
264  |  |   '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',  | 
265  |  |   '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',  | 
266  |  |   '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',  | 
267  |  |   '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',  | 
268  |  |   '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',  | 
269  |  |   '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',  | 
270  |  |   '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',  | 
271  |  |   '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',  | 
272  |  | };  | 
273  |  | #endif  | 
274  |  |  | 
275  |  | const unsigned short OnigEncAsciiCtypeTable[256] = { | 
276  |  |   0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,  | 
277  |  |   0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,  | 
278  |  |   0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,  | 
279  |  |   0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,  | 
280  |  |   0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,  | 
281  |  |   0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,  | 
282  |  |   0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,  | 
283  |  |   0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,  | 
284  |  |   0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,  | 
285  |  |   0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,  | 
286  |  |   0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,  | 
287  |  |   0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,  | 
288  |  |   0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,  | 
289  |  |   0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,  | 
290  |  |   0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,  | 
291  |  |   0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,  | 
292  |  |   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,  | 
293  |  |   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,  | 
294  |  |   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,  | 
295  |  |   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,  | 
296  |  |   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,  | 
297  |  |   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,  | 
298  |  |   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,  | 
299  |  |   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,  | 
300  |  |   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,  | 
301  |  |   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,  | 
302  |  |   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,  | 
303  |  |   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,  | 
304  |  |   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,  | 
305  |  |   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,  | 
306  |  |   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,  | 
307  |  |   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000  | 
308  |  | };  | 
309  |  |  | 
310  |  | const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = { | 
311  |  |   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',  | 
312  |  |   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',  | 
313  |  |   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',  | 
314  |  |   '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',  | 
315  |  |   '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',  | 
316  |  |   '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',  | 
317  |  |   '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',  | 
318  |  |   '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',  | 
319  |  |   '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',  | 
320  |  |   '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',  | 
321  |  |   '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',  | 
322  |  |   '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',  | 
323  |  |   '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',  | 
324  |  |   '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',  | 
325  |  |   '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',  | 
326  |  |   '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',  | 
327  |  |   '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',  | 
328  |  |   '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',  | 
329  |  |   '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',  | 
330  |  |   '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',  | 
331  |  |   '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',  | 
332  |  |   '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',  | 
333  |  |   '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',  | 
334  |  |   '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',  | 
335  |  |   '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',  | 
336  |  |   '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',  | 
337  |  |   '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',  | 
338  |  |   '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',  | 
339  |  |   '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',  | 
340  |  |   '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',  | 
341  |  |   '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',  | 
342  |  |   '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'  | 
343  |  | };  | 
344  |  |  | 
345  |  | #ifdef USE_UPPER_CASE_TABLE  | 
346  |  | const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = { | 
347  |  |   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',  | 
348  |  |   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',  | 
349  |  |   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',  | 
350  |  |   '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',  | 
351  |  |   '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',  | 
352  |  |   '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',  | 
353  |  |   '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',  | 
354  |  |   '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',  | 
355  |  |   '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',  | 
356  |  |   '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',  | 
357  |  |   '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',  | 
358  |  |   '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',  | 
359  |  |   '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',  | 
360  |  |   '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',  | 
361  |  |   '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',  | 
362  |  |   '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',  | 
363  |  |   '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',  | 
364  |  |   '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',  | 
365  |  |   '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',  | 
366  |  |   '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',  | 
367  |  |   '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',  | 
368  |  |   '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',  | 
369  |  |   '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',  | 
370  |  |   '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',  | 
371  |  |   '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',  | 
372  |  |   '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',  | 
373  |  |   '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',  | 
374  |  |   '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',  | 
375  |  |   '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',  | 
376  |  |   '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',  | 
377  |  |   '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\367',  | 
378  |  |   '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\377',  | 
379  |  | };  | 
380  |  | #endif  | 
381  |  |  | 
382  |  | #if 0  | 
383  |  | extern void  | 
384  |  | onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED)  | 
385  |  | { | 
386  |  |   /* nothing */  | 
387  |  |   /* obsoleted. */  | 
388  |  | }  | 
389  |  | #endif  | 
390  |  |  | 
391  |  | extern UChar*  | 
392  |  | onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end)  | 
393  | 0  | { | 
394  | 0  |   return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end);  | 
395  | 0  | }  | 
396  |  |  | 
397  |  | const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = { | 
398  |  |   { 0x41, 0x61 }, | 
399  |  |   { 0x42, 0x62 }, | 
400  |  |   { 0x43, 0x63 }, | 
401  |  |   { 0x44, 0x64 }, | 
402  |  |   { 0x45, 0x65 }, | 
403  |  |   { 0x46, 0x66 }, | 
404  |  |   { 0x47, 0x67 }, | 
405  |  |   { 0x48, 0x68 }, | 
406  |  |   { 0x49, 0x69 }, | 
407  |  |   { 0x4a, 0x6a }, | 
408  |  |   { 0x4b, 0x6b }, | 
409  |  |   { 0x4c, 0x6c }, | 
410  |  |   { 0x4d, 0x6d }, | 
411  |  |   { 0x4e, 0x6e }, | 
412  |  |   { 0x4f, 0x6f }, | 
413  |  |   { 0x50, 0x70 }, | 
414  |  |   { 0x51, 0x71 }, | 
415  |  |   { 0x52, 0x72 }, | 
416  |  |   { 0x53, 0x73 }, | 
417  |  |   { 0x54, 0x74 }, | 
418  |  |   { 0x55, 0x75 }, | 
419  |  |   { 0x56, 0x76 }, | 
420  |  |   { 0x57, 0x77 }, | 
421  |  |   { 0x58, 0x78 }, | 
422  |  |   { 0x59, 0x79 }, | 
423  |  |   { 0x5a, 0x7a } | 
424  |  | };  | 
425  |  |  | 
426  |  | extern int  | 
427  |  | onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,  | 
428  |  |           OnigApplyAllCaseFoldFunc f, void* arg,  | 
429  |  |           OnigEncoding enc ARG_UNUSED)  | 
430  | 0  | { | 
431  | 0  |   OnigCodePoint code;  | 
432  | 0  |   int i, r;  | 
433  |  | 
  | 
434  | 0  |   for (i = 0; i < numberof(OnigAsciiLowerMap); i++) { | 
435  | 0  |     code = OnigAsciiLowerMap[i].to;  | 
436  | 0  |     r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg);  | 
437  | 0  |     if (r != 0) return r;  | 
438  |  |  | 
439  | 0  |     code = OnigAsciiLowerMap[i].from;  | 
440  | 0  |     r = (*f)(OnigAsciiLowerMap[i].to, &code, 1, arg);  | 
441  | 0  |     if (r != 0) return r;  | 
442  | 0  |   }  | 
443  |  |  | 
444  | 0  |   return 0;  | 
445  | 0  | }  | 
446  |  |  | 
447  |  | extern int  | 
448  |  | onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED,  | 
449  |  |    const OnigUChar* p, const OnigUChar* end ARG_UNUSED,  | 
450  |  |    OnigCaseFoldCodeItem items[], OnigEncoding enc ARG_UNUSED)  | 
451  | 0  | { | 
452  | 0  |   if (0x41 <= *p && *p <= 0x5a) { | 
453  | 0  |     items[0].byte_len = 1;  | 
454  | 0  |     items[0].code_len = 1;  | 
455  | 0  |     items[0].code[0] = (OnigCodePoint )(*p + 0x20);  | 
456  | 0  |     return 1;  | 
457  | 0  |   }  | 
458  | 0  |   else if (0x61 <= *p && *p <= 0x7a) { | 
459  | 0  |     items[0].byte_len = 1;  | 
460  | 0  |     items[0].code_len = 1;  | 
461  | 0  |     items[0].code[0] = (OnigCodePoint )(*p - 0x20);  | 
462  | 0  |     return 1;  | 
463  | 0  |   }  | 
464  | 0  |   else  | 
465  | 0  |     return 0;  | 
466  | 0  | }  | 
467  |  |  | 
468  |  | static int  | 
469  |  | ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,  | 
470  |  |            OnigApplyAllCaseFoldFunc f, void* arg)  | 
471  | 0  | { | 
472  | 0  |   OnigCodePoint ss[] = { 0x73, 0x73 }; | 
473  |  | 
  | 
474  | 0  |   return (*f)((OnigCodePoint )0xdf, ss, 2, arg);  | 
475  | 0  | }  | 
476  |  |  | 
477  |  | extern int  | 
478  |  | onigenc_apply_all_case_fold_with_map(int map_size,  | 
479  |  |     const OnigPairCaseFoldCodes map[],  | 
480  |  |     int ess_tsett_flag, OnigCaseFoldType flag,  | 
481  |  |     OnigApplyAllCaseFoldFunc f, void* arg)  | 
482  | 0  | { | 
483  | 0  |   OnigCodePoint code;  | 
484  | 0  |   int i, r;  | 
485  |  | 
  | 
486  | 0  |   r = onigenc_ascii_apply_all_case_fold(flag, f, arg, 0);  | 
487  | 0  |   if (r != 0) return r;  | 
488  |  |  | 
489  | 0  |   for (i = 0; i < map_size; i++) { | 
490  | 0  |     code = map[i].to;  | 
491  | 0  |     r = (*f)(map[i].from, &code, 1, arg);  | 
492  | 0  |     if (r != 0) return r;  | 
493  |  |  | 
494  | 0  |     code = map[i].from;  | 
495  | 0  |     r = (*f)(map[i].to, &code, 1, arg);  | 
496  | 0  |     if (r != 0) return r;  | 
497  | 0  |   }  | 
498  |  |  | 
499  | 0  |   if (ess_tsett_flag != 0)  | 
500  | 0  |     return ss_apply_all_case_fold(flag, f, arg);  | 
501  |  |  | 
502  | 0  |   return 0;  | 
503  | 0  | }  | 
504  |  |  | 
505  |  | extern int  | 
506  |  | onigenc_get_case_fold_codes_by_str_with_map(int map_size,  | 
507  |  |     const OnigPairCaseFoldCodes map[],  | 
508  |  |     int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED,  | 
509  |  |     const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])  | 
510  | 0  | { | 
511  | 0  |   if (0x41 <= *p && *p <= 0x5a) { | 
512  | 0  |     items[0].byte_len = 1;  | 
513  | 0  |     items[0].code_len = 1;  | 
514  | 0  |     items[0].code[0] = (OnigCodePoint )(*p + 0x20);  | 
515  | 0  |     if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1  | 
516  | 0  |   && (*(p+1) == 0x53 || *(p+1) == 0x73)) { | 
517  |  |       /* SS */  | 
518  | 0  |       items[1].byte_len = 2;  | 
519  | 0  |       items[1].code_len = 1;  | 
520  | 0  |       items[1].code[0] = (OnigCodePoint )0xdf;  | 
521  | 0  |       return 2;  | 
522  | 0  |     }  | 
523  | 0  |     else  | 
524  | 0  |       return 1;  | 
525  | 0  |   }  | 
526  | 0  |   else if (0x61 <= *p && *p <= 0x7a) { | 
527  | 0  |     items[0].byte_len = 1;  | 
528  | 0  |     items[0].code_len = 1;  | 
529  | 0  |     items[0].code[0] = (OnigCodePoint )(*p - 0x20);  | 
530  | 0  |     if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1  | 
531  | 0  |   && (*(p+1) == 0x73 || *(p+1) == 0x53)) { | 
532  |  |       /* ss */  | 
533  | 0  |       items[1].byte_len = 2;  | 
534  | 0  |       items[1].code_len = 1;  | 
535  | 0  |       items[1].code[0] = (OnigCodePoint )0xdf;  | 
536  | 0  |       return 2;  | 
537  | 0  |     }  | 
538  | 0  |     else  | 
539  | 0  |       return 1;  | 
540  | 0  |   }  | 
541  | 0  |   else if (*p == 0xdf && ess_tsett_flag != 0) { | 
542  | 0  |     items[0].byte_len = 1;  | 
543  | 0  |     items[0].code_len = 2;  | 
544  | 0  |     items[0].code[0] = (OnigCodePoint )'s';  | 
545  | 0  |     items[0].code[1] = (OnigCodePoint )'s';  | 
546  |  | 
  | 
547  | 0  |     items[1].byte_len = 1;  | 
548  | 0  |     items[1].code_len = 2;  | 
549  | 0  |     items[1].code[0] = (OnigCodePoint )'S';  | 
550  | 0  |     items[1].code[1] = (OnigCodePoint )'S';  | 
551  |  | 
  | 
552  | 0  |     items[2].byte_len = 1;  | 
553  | 0  |     items[2].code_len = 2;  | 
554  | 0  |     items[2].code[0] = (OnigCodePoint )'s';  | 
555  | 0  |     items[2].code[1] = (OnigCodePoint )'S';  | 
556  |  | 
  | 
557  | 0  |     items[3].byte_len = 1;  | 
558  | 0  |     items[3].code_len = 2;  | 
559  | 0  |     items[3].code[0] = (OnigCodePoint )'S';  | 
560  | 0  |     items[3].code[1] = (OnigCodePoint )'s';  | 
561  |  | 
  | 
562  | 0  |     return 4;  | 
563  | 0  |   }  | 
564  | 0  |   else { | 
565  | 0  |     int i;  | 
566  |  | 
  | 
567  | 0  |     for (i = 0; i < map_size; i++) { | 
568  | 0  |       if (*p == map[i].from) { | 
569  | 0  |   items[0].byte_len = 1;  | 
570  | 0  |   items[0].code_len = 1;  | 
571  | 0  |   items[0].code[0] = map[i].to;  | 
572  | 0  |   return 1;  | 
573  | 0  |       }  | 
574  | 0  |       else if (*p == map[i].to) { | 
575  | 0  |   items[0].byte_len = 1;  | 
576  | 0  |   items[0].code_len = 1;  | 
577  | 0  |   items[0].code[0] = map[i].from;  | 
578  | 0  |   return 1;  | 
579  | 0  |       }  | 
580  | 0  |     }  | 
581  | 0  |   }  | 
582  |  |  | 
583  | 0  |   return 0;  | 
584  | 0  | }  | 
585  |  |  | 
586  |  |  | 
587  |  | extern int  | 
588  |  | onigenc_not_support_get_ctype_code_range(OnigCtype ctype ARG_UNUSED,  | 
589  |  |    OnigCodePoint* sb_out ARG_UNUSED,  | 
590  |  |    const OnigCodePoint* ranges[] ARG_UNUSED,  | 
591  |  |    OnigEncoding enc)  | 
592  | 0  | { | 
593  | 0  |   return ONIG_NO_SUPPORT_CONFIG;  | 
594  | 0  | }  | 
595  |  |  | 
596  |  | extern int  | 
597  |  | onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end, OnigEncoding enc ARG_UNUSED)  | 
598  | 0  | { | 
599  | 0  |   if (p < end) { | 
600  | 0  |     if (*p == 0x0a) return 1;  | 
601  | 0  |   }  | 
602  | 0  |   return 0;  | 
603  | 0  | }  | 
604  |  |  | 
605  |  | /* for single byte encodings */  | 
606  |  | extern int  | 
607  |  | onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p,  | 
608  |  |           const UChar* end, UChar* lower, OnigEncoding enc ARG_UNUSED)  | 
609  | 0  | { | 
610  | 0  |   *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p);  | 
611  |  | 
  | 
612  | 0  |   (*p)++;  | 
613  | 0  |   return 1; /* return byte length of converted char to lower */  | 
614  | 0  | }  | 
615  |  |  | 
616  |  | #if 0  | 
617  |  | extern int  | 
618  |  | onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag ARG_UNUSED,  | 
619  |  |              const UChar** pp, const UChar* end ARG_UNUSED)  | 
620  |  | { | 
621  |  |   const UChar* p = *pp;  | 
622  |  |  | 
623  |  |   (*pp)++;  | 
624  |  |   return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);  | 
625  |  | }  | 
626  |  | #endif  | 
627  |  |  | 
628  |  | extern int  | 
629  |  | onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED, const UChar* e ARG_UNUSED,  | 
630  |  |         OnigEncoding enc ARG_UNUSED)  | 
631  | 0  | { | 
632  | 0  |   return 1;  | 
633  | 0  | }  | 
634  |  |  | 
635  |  | extern OnigCodePoint  | 
636  |  | onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED,  | 
637  |  |         OnigEncoding enc ARG_UNUSED)  | 
638  | 0  | { | 
639  | 0  |   return (OnigCodePoint )(*p);  | 
640  | 0  | }  | 
641  |  |  | 
642  |  | extern int  | 
643  |  | onigenc_single_byte_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)  | 
644  | 0  | { | 
645  | 0  |   if (code > 0xff)  | 
646  | 0  |     return ONIGERR_INVALID_CODE_POINT_VALUE;  | 
647  | 0  |   return 1;  | 
648  | 0  | }  | 
649  |  |  | 
650  |  | extern int  | 
651  |  | onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc ARG_UNUSED)  | 
652  | 0  | { | 
653  | 0  |   if (code > 0xff) { | 
654  |  | #ifdef RUBY  | 
655  |  |     rb_raise(rb_eRangeError, "%u out of char range", code);  | 
656  |  | #else  | 
657  | 0  |     return ONIGERR_INVALID_CODE_POINT_VALUE;  | 
658  | 0  | #endif  | 
659  | 0  |   }  | 
660  | 0  |   *buf = (UChar )(code & 0xff);  | 
661  | 0  |   return 1;  | 
662  | 0  | }  | 
663  |  |  | 
664  |  | extern UChar*  | 
665  |  | onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED,  | 
666  |  |             const UChar* s,  | 
667  |  |             const UChar* end ARG_UNUSED,  | 
668  |  |             OnigEncoding enc ARG_UNUSED)  | 
669  | 0  | { | 
670  | 0  |   return (UChar* )s;  | 
671  | 0  | }  | 
672  |  |  | 
673  |  | extern int  | 
674  |  | onigenc_always_true_is_allowed_reverse_match(const UChar* s   ARG_UNUSED,  | 
675  |  |                const UChar* end ARG_UNUSED,  | 
676  |  |                OnigEncoding enc ARG_UNUSED)  | 
677  | 22  | { | 
678  | 22  |   return TRUE;  | 
679  | 22  | }  | 
680  |  |  | 
681  |  | extern int  | 
682  |  | onigenc_always_false_is_allowed_reverse_match(const UChar* s   ARG_UNUSED,  | 
683  |  |                 const UChar* end ARG_UNUSED,  | 
684  |  |                 OnigEncoding enc ARG_UNUSED)  | 
685  | 0  | { | 
686  | 0  |   return FALSE;  | 
687  | 0  | }  | 
688  |  |  | 
689  |  | extern int  | 
690  |  | onigenc_ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype,  | 
691  |  |                             OnigEncoding enc ARG_UNUSED)  | 
692  | 0  | { | 
693  | 0  |   if (code < 128)  | 
694  | 0  |     return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);  | 
695  | 0  |   else  | 
696  | 0  |     return FALSE;  | 
697  | 0  | }  | 
698  |  |  | 
699  |  | extern OnigCodePoint  | 
700  |  | onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)  | 
701  | 0  | { | 
702  | 0  |   int c, i, len;  | 
703  | 0  |   OnigCodePoint n;  | 
704  |  | 
  | 
705  | 0  |   len = enclen(enc, p, end);  | 
706  | 0  |   n = (OnigCodePoint )(*p++);  | 
707  | 0  |   if (len == 1) return n;  | 
708  |  |  | 
709  | 0  |   for (i = 1; i < len; i++) { | 
710  | 0  |     if (p >= end) break;  | 
711  | 0  |     c = *p++;  | 
712  | 0  |     n <<= 8;  n += c;  | 
713  | 0  |   }  | 
714  | 0  |   return n;  | 
715  | 0  | }  | 
716  |  |  | 
717  |  | extern int  | 
718  |  | onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED,  | 
719  |  |                           const UChar** pp, const UChar* end ARG_UNUSED,  | 
720  |  |         UChar* lower)  | 
721  | 0  | { | 
722  | 0  |   int len;  | 
723  | 0  |   const UChar *p = *pp;  | 
724  |  | 
  | 
725  | 0  |   if (ONIGENC_IS_MBC_ASCII(p)) { | 
726  | 0  |     *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);  | 
727  | 0  |     (*pp)++;  | 
728  | 0  |     return 1;  | 
729  | 0  |   }  | 
730  | 0  |   else { | 
731  | 0  |     int i;  | 
732  |  | 
  | 
733  | 0  |     len = enclen(enc, p, end);  | 
734  | 0  |     for (i = 0; i < len; i++) { | 
735  | 0  |       *lower++ = *p++;  | 
736  | 0  |     }  | 
737  | 0  |     (*pp) += len;  | 
738  | 0  |     return len; /* return byte length of converted to lower char */  | 
739  | 0  |   }  | 
740  | 0  | }  | 
741  |  |  | 
742  |  | #if 0  | 
743  |  | extern int  | 
744  |  | onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag,  | 
745  |  |                              const UChar** pp, const UChar* end ARG_UNUSED)  | 
746  |  | { | 
747  |  |   const UChar* p = *pp;  | 
748  |  |  | 
749  |  |   if (ONIGENC_IS_MBC_ASCII(p)) { | 
750  |  |     (*pp)++;  | 
751  |  |     return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);  | 
752  |  |   }  | 
753  |  |  | 
754  |  |   (*pp) += enclen(enc, p);  | 
755  |  |   return FALSE;  | 
756  |  | }  | 
757  |  | #endif  | 
758  |  |  | 
759  |  | extern int  | 
760  |  | onigenc_mb2_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)  | 
761  | 0  | { | 
762  | 0  |   if (code <= 0xff) return 1;  | 
763  | 0  |   if (code <= 0xffff) return 2;  | 
764  | 0  |   return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;  | 
765  | 0  | }  | 
766  |  |  | 
767  |  | extern int  | 
768  |  | onigenc_mb4_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)  | 
769  | 0  | { | 
770  | 0  |        if ((code & 0xff000000) != 0) return 4;  | 
771  | 0  |   else if ((code & 0xff0000) != 0) return 3;  | 
772  | 0  |   else if ((code & 0xff00) != 0) return 2;  | 
773  | 0  |   else return 1;  | 
774  | 0  | }  | 
775  |  |  | 
776  |  | extern int  | 
777  |  | onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)  | 
778  | 0  | { | 
779  | 0  |   UChar *p = buf;  | 
780  |  | 
  | 
781  | 0  |   if ((code & 0xff00) != 0) { | 
782  | 0  |     *p++ = (UChar )((code >>  8) & 0xff);  | 
783  | 0  |   }  | 
784  | 0  |   *p++ = (UChar )(code & 0xff);  | 
785  |  | 
  | 
786  | 0  | #if 1  | 
787  | 0  |   if (enclen(enc, buf, p) != (p - buf))  | 
788  | 0  |     return ONIGERR_INVALID_CODE_POINT_VALUE;  | 
789  | 0  | #endif  | 
790  | 0  |   return (int )(p - buf);  | 
791  | 0  | }  | 
792  |  |  | 
793  |  | extern int  | 
794  |  | onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)  | 
795  | 0  | { | 
796  | 0  |   UChar *p = buf;  | 
797  |  | 
  | 
798  | 0  |   if ((code & 0xff000000) != 0) { | 
799  | 0  |     *p++ = (UChar )((code >> 24) & 0xff);  | 
800  | 0  |   }  | 
801  | 0  |   if ((code & 0xff0000) != 0 || p != buf) { | 
802  | 0  |     *p++ = (UChar )((code >> 16) & 0xff);  | 
803  | 0  |   }  | 
804  | 0  |   if ((code & 0xff00) != 0 || p != buf) { | 
805  | 0  |     *p++ = (UChar )((code >> 8) & 0xff);  | 
806  | 0  |   }  | 
807  | 0  |   *p++ = (UChar )(code & 0xff);  | 
808  |  | 
  | 
809  | 0  | #if 1  | 
810  | 0  |   if (enclen(enc, buf, p) != (p - buf))  | 
811  | 0  |     return ONIGERR_INVALID_CODE_POINT_VALUE;  | 
812  | 0  | #endif  | 
813  | 0  |   return (int )(p - buf);  | 
814  | 0  | }  | 
815  |  |  | 
816  |  | extern int  | 
817  |  | onigenc_minimum_property_name_to_ctype(OnigEncoding enc, const UChar* p, const UChar* end)  | 
818  | 0  | { | 
819  | 0  |   static const PosixBracketEntryType PBS[] = { | 
820  | 0  |     POSIX_BRACKET_ENTRY_INIT("Alnum",  ONIGENC_CTYPE_ALNUM), | 
821  | 0  |     POSIX_BRACKET_ENTRY_INIT("Alpha",  ONIGENC_CTYPE_ALPHA), | 
822  | 0  |     POSIX_BRACKET_ENTRY_INIT("Blank",  ONIGENC_CTYPE_BLANK), | 
823  | 0  |     POSIX_BRACKET_ENTRY_INIT("Cntrl",  ONIGENC_CTYPE_CNTRL), | 
824  | 0  |     POSIX_BRACKET_ENTRY_INIT("Digit",  ONIGENC_CTYPE_DIGIT), | 
825  | 0  |     POSIX_BRACKET_ENTRY_INIT("Graph",  ONIGENC_CTYPE_GRAPH), | 
826  | 0  |     POSIX_BRACKET_ENTRY_INIT("Lower",  ONIGENC_CTYPE_LOWER), | 
827  | 0  |     POSIX_BRACKET_ENTRY_INIT("Print",  ONIGENC_CTYPE_PRINT), | 
828  | 0  |     POSIX_BRACKET_ENTRY_INIT("Punct",  ONIGENC_CTYPE_PUNCT), | 
829  | 0  |     POSIX_BRACKET_ENTRY_INIT("Space",  ONIGENC_CTYPE_SPACE), | 
830  | 0  |     POSIX_BRACKET_ENTRY_INIT("Upper",  ONIGENC_CTYPE_UPPER), | 
831  | 0  |     POSIX_BRACKET_ENTRY_INIT("XDigit", ONIGENC_CTYPE_XDIGIT), | 
832  | 0  |     POSIX_BRACKET_ENTRY_INIT("ASCII",  ONIGENC_CTYPE_ASCII), | 
833  | 0  |     POSIX_BRACKET_ENTRY_INIT("Word",   ONIGENC_CTYPE_WORD), | 
834  | 0  |   };  | 
835  |  | 
  | 
836  | 0  |   const PosixBracketEntryType *pb;  | 
837  | 0  |   int len;  | 
838  |  | 
  | 
839  | 0  |   len = onigenc_strlen(enc, p, end);  | 
840  | 0  |   for (pb = PBS; pb < PBS + numberof(PBS); pb++) { | 
841  | 0  |     if (len == pb->len &&  | 
842  | 0  |         onigenc_with_ascii_strnicmp(enc, p, end, pb->name, pb->len) == 0)  | 
843  | 0  |       return pb->ctype;  | 
844  | 0  |   }  | 
845  |  |  | 
846  | 0  |   return ONIGERR_INVALID_CHAR_PROPERTY_NAME;  | 
847  | 0  | }  | 
848  |  |  | 
849  |  | extern int  | 
850  |  | onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,  | 
851  |  |         unsigned int ctype)  | 
852  | 0  | { | 
853  | 0  |   if (code < 128)  | 
854  | 0  |     return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);  | 
855  | 0  |   else { | 
856  | 0  |     if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) { | 
857  | 0  |       return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);  | 
858  | 0  |     }  | 
859  | 0  |   }  | 
860  |  |  | 
861  | 0  |   return FALSE;  | 
862  | 0  | }  | 
863  |  |  | 
864  |  | extern int  | 
865  |  | onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code,  | 
866  |  |         unsigned int ctype)  | 
867  | 0  | { | 
868  | 0  |   if (code < 128)  | 
869  | 0  |     return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);  | 
870  | 0  |   else { | 
871  | 0  |     if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) { | 
872  | 0  |       return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);  | 
873  | 0  |     }  | 
874  | 0  |   }  | 
875  |  |  | 
876  | 0  |   return FALSE;  | 
877  | 0  | }  | 
878  |  |  | 
879  |  | extern int  | 
880  |  | onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end,  | 
881  |  |                            const UChar* sascii /* ascii */, int n)  | 
882  | 0  | { | 
883  | 0  |   int x, c;  | 
884  |  | 
  | 
885  | 0  |   while (n-- > 0) { | 
886  | 0  |     if (p >= end) return (int )(*sascii);  | 
887  |  |  | 
888  | 0  |     c = (int )ONIGENC_MBC_TO_CODE(enc, p, end);  | 
889  | 0  |     x = *sascii - c;  | 
890  | 0  |     if (x) return x;  | 
891  |  |  | 
892  | 0  |     sascii++;  | 
893  | 0  |     p += enclen(enc, p, end);  | 
894  | 0  |   }  | 
895  | 0  |   return 0;  | 
896  | 0  | }  | 
897  |  |  | 
898  |  | extern int  | 
899  |  | onigenc_with_ascii_strnicmp(OnigEncoding enc, const UChar* p, const UChar* end,  | 
900  |  |                             const UChar* sascii /* ascii */, int n)  | 
901  | 0  | { | 
902  | 0  |   int x, c;  | 
903  |  | 
  | 
904  | 0  |   while (n-- > 0) { | 
905  | 0  |     if (p >= end) return (int )(*sascii);  | 
906  |  |  | 
907  | 0  |     c = (int )ONIGENC_MBC_TO_CODE(enc, p, end);  | 
908  | 0  |     if (ONIGENC_IS_ASCII_CODE(c))  | 
909  | 0  |       c = ONIGENC_ASCII_CODE_TO_LOWER_CASE(c);  | 
910  | 0  |     x = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*sascii) - c;  | 
911  | 0  |     if (x) return x;  | 
912  |  |  | 
913  | 0  |     sascii++;  | 
914  | 0  |     p += enclen(enc, p, end);  | 
915  | 0  |   }  | 
916  | 0  |   return 0;  | 
917  | 0  | }  | 
918  |  |  | 
919  |  | #if 0  | 
920  |  | /* Property management */  | 
921  |  | static int  | 
922  |  | resize_property_list(int new_size, const OnigCodePoint*** plist, int* psize)  | 
923  |  | { | 
924  |  |   size_t size;  | 
925  |  |   const OnigCodePoint **list = *plist;  | 
926  |  |  | 
927  |  |   size = sizeof(OnigCodePoint*) * new_size;  | 
928  |  |   if (IS_NULL(list)) { | 
929  |  |     list = (const OnigCodePoint** )xmalloc(size);  | 
930  |  |     if (IS_NULL(list)) return ONIGERR_MEMORY;  | 
931  |  |   }  | 
932  |  |   else { | 
933  |  |     const OnigCodePoint **tmp;  | 
934  |  |     tmp = (const OnigCodePoint** )xrealloc((void* )list, size);  | 
935  |  |     if (IS_NULL(tmp)) return ONIGERR_MEMORY;  | 
936  |  |     list = tmp;  | 
937  |  |   }  | 
938  |  |  | 
939  |  |   *plist = list;  | 
940  |  |   *psize = new_size;  | 
941  |  |  | 
942  |  |   return 0;  | 
943  |  | }  | 
944  |  |  | 
945  |  | extern int  | 
946  |  | onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop,  | 
947  |  |      hash_table_type **table, const OnigCodePoint*** plist, int *pnum,  | 
948  |  |      int *psize)  | 
949  |  | { | 
950  |  | #define PROP_INIT_SIZE     16  | 
951  |  |  | 
952  |  |   int r;  | 
953  |  |  | 
954  |  |   if (*psize <= *pnum) { | 
955  |  |     int new_size = (*psize == 0 ? PROP_INIT_SIZE : *psize * 2);  | 
956  |  |     r = resize_property_list(new_size, plist, psize);  | 
957  |  |     if (r != 0) return r;  | 
958  |  |   }  | 
959  |  |  | 
960  |  |   (*plist)[*pnum] = prop;  | 
961  |  |  | 
962  |  |   if (ONIG_IS_NULL(*table)) { | 
963  |  |     *table = onig_st_init_strend_table_with_size(PROP_INIT_SIZE);  | 
964  |  |     if (ONIG_IS_NULL(*table)) return ONIGERR_MEMORY;  | 
965  |  |   }  | 
966  |  |  | 
967  |  |   *pnum = *pnum + 1;  | 
968  |  |   onig_st_insert_strend(*table, name, name + strlen((char* )name),  | 
969  |  |       (hash_data_type )(*pnum + ONIGENC_MAX_STD_CTYPE));  | 
970  |  |   return 0;  | 
971  |  | }  | 
972  |  | #endif  | 
973  |  |  | 
974  |  | #ifdef USE_CASE_MAP_API  | 
975  |  | extern int  | 
976  |  | onigenc_ascii_only_case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end,  | 
977  |  |           OnigUChar* to, OnigUChar* to_end, const struct OnigEncodingTypeST* enc)  | 
978  | 0  | { | 
979  | 0  |   OnigCodePoint code;  | 
980  | 0  |   OnigUChar *to_start = to;  | 
981  | 0  |   OnigCaseFoldType flags = *flagP;  | 
982  | 0  |   int codepoint_length;  | 
983  |  | 
  | 
984  | 0  |   while (*pp < end && to < to_end) { | 
985  | 0  |     codepoint_length = ONIGENC_PRECISE_MBC_ENC_LEN(enc, *pp, end);  | 
986  | 0  |     if (codepoint_length < 0)  | 
987  | 0  |       return codepoint_length; /* encoding invalid */  | 
988  | 0  |     code = ONIGENC_MBC_TO_CODE(enc, *pp, end);  | 
989  | 0  |     *pp += codepoint_length;  | 
990  |  | 
  | 
991  | 0  |     if (code >= 'a' && code <= 'z' && (flags & ONIGENC_CASE_UPCASE)) { | 
992  | 0  |       flags |= ONIGENC_CASE_MODIFIED;  | 
993  | 0  |       code -= 'a' - 'A';  | 
994  | 0  |     } else if (code >= 'A' && code <= 'Z' &&  | 
995  | 0  |   (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) { | 
996  | 0  |       flags |= ONIGENC_CASE_MODIFIED;  | 
997  | 0  |       code += 'a' - 'A';  | 
998  | 0  |     }  | 
999  | 0  |     to += ONIGENC_CODE_TO_MBC(enc, code, to);  | 
1000  | 0  |     if (flags & ONIGENC_CASE_TITLECASE)  /* switch from titlecase to lowercase for capitalize */  | 
1001  | 0  |       flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);  | 
1002  | 0  |   }  | 
1003  | 0  |   *flagP = flags;  | 
1004  | 0  |   return (int )(to - to_start);  | 
1005  | 0  | }  | 
1006  |  |  | 
1007  |  | extern int  | 
1008  |  | onigenc_single_byte_ascii_only_case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,  | 
1009  |  |           const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,  | 
1010  |  |           const struct OnigEncodingTypeST* enc)  | 
1011  | 0  | { | 
1012  | 0  |   OnigCodePoint code;  | 
1013  | 0  |   OnigUChar *to_start = to;  | 
1014  | 0  |   OnigCaseFoldType flags = *flagP;  | 
1015  |  | 
  | 
1016  | 0  |   while (*pp < end && to < to_end) { | 
1017  | 0  |     code = *(*pp)++;  | 
1018  |  | 
  | 
1019  | 0  |     if (code >= 'a' && code <= 'z' && (flags & ONIGENC_CASE_UPCASE)) { | 
1020  | 0  |       flags |= ONIGENC_CASE_MODIFIED;  | 
1021  | 0  |       code -= 'a' - 'A';  | 
1022  | 0  |     } else if (code >= 'A' && code <= 'Z' &&  | 
1023  | 0  |   (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) { | 
1024  | 0  |       flags |= ONIGENC_CASE_MODIFIED;  | 
1025  | 0  |       code += 'a' - 'A';  | 
1026  | 0  |     }  | 
1027  | 0  |     *to++ = code;  | 
1028  | 0  |     if (flags & ONIGENC_CASE_TITLECASE)  /* switch from titlecase to lowercase for capitalize */  | 
1029  | 0  |       flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);  | 
1030  | 0  |   }  | 
1031  | 0  |   *flagP = flags;  | 
1032  | 0  |   return (int )(to - to_start);  | 
1033  | 0  | }  | 
1034  |  | #endif  |