/src/oniguruma/src/iso8859_16.c
Line | Count | Source |
1 | | /********************************************************************** |
2 | | iso8859_16.c - Oniguruma (regular expression library) |
3 | | **********************************************************************/ |
4 | | /*- |
5 | | * Copyright (c) 2002-2025 K.Kosako |
6 | | * All rights reserved. |
7 | | * |
8 | | * Redistribution and use in source and binary forms, with or without |
9 | | * modification, are permitted provided that the following conditions |
10 | | * are met: |
11 | | * 1. Redistributions of source code must retain the above copyright |
12 | | * notice, this list of conditions and the following disclaimer. |
13 | | * 2. Redistributions in binary form must reproduce the above copyright |
14 | | * notice, this list of conditions and the following disclaimer in the |
15 | | * documentation and/or other materials provided with the distribution. |
16 | | * |
17 | | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
18 | | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
19 | | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
20 | | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
21 | | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
22 | | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
23 | | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
24 | | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
25 | | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
26 | | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
27 | | * SUCH DAMAGE. |
28 | | */ |
29 | | |
30 | | #include "regenc.h" |
31 | | |
32 | 18.3k | #define ENC_ISO_8859_16_TO_LOWER_CASE(c) EncISO_8859_16_ToLowerCaseTable[c] |
33 | | #define ENC_IS_ISO_8859_16_CTYPE(code,ctype) \ |
34 | 333k | ((EncISO_8859_16_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) |
35 | | |
36 | | static const UChar EncISO_8859_16_ToLowerCaseTable[256] = { |
37 | | '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', |
38 | | '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', |
39 | | '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', |
40 | | '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', |
41 | | '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', |
42 | | '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', |
43 | | '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', |
44 | | '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', |
45 | | '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', |
46 | | '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', |
47 | | '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', |
48 | | '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', |
49 | | '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', |
50 | | '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', |
51 | | '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', |
52 | | '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', |
53 | | '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', |
54 | | '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', |
55 | | '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', |
56 | | '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', |
57 | | '\240', '\242', '\242', '\263', '\245', '\245', '\250', '\247', |
58 | | '\250', '\251', '\272', '\253', '\256', '\255', '\256', '\277', |
59 | | '\260', '\261', '\271', '\263', '\270', '\265', '\266', '\267', |
60 | | '\270', '\271', '\272', '\273', '\275', '\275', '\377', '\277', |
61 | | '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', |
62 | | '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', |
63 | | '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', |
64 | | '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', |
65 | | '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', |
66 | | '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', |
67 | | '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', |
68 | | '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' |
69 | | }; |
70 | | |
71 | | static const unsigned short EncISO_8859_16_CtypeTable[256] = { |
72 | | 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, |
73 | | 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, |
74 | | 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, |
75 | | 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, |
76 | | 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, |
77 | | 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, |
78 | | 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, |
79 | | 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, |
80 | | 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, |
81 | | 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, |
82 | | 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, |
83 | | 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, |
84 | | 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, |
85 | | 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, |
86 | | 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, |
87 | | 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, |
88 | | 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, |
89 | | 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, |
90 | | 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, |
91 | | 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, |
92 | | 0x0284, 0x34a2, 0x30e2, 0x34a2, 0x00a0, 0x01a0, 0x34a2, 0x00a0, |
93 | | 0x30e2, 0x00a0, 0x34a2, 0x01a0, 0x34a2, 0x01a0, 0x30e2, 0x34a2, |
94 | | 0x00a0, 0x00a0, 0x34a2, 0x30e2, 0x34a2, 0x01a0, 0x00a0, 0x01a0, |
95 | | 0x30e2, 0x30e2, 0x30e2, 0x01a0, 0x34a2, 0x30e2, 0x34a2, 0x30e2, |
96 | | 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, |
97 | | 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, |
98 | | 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, |
99 | | 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, |
100 | | 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, |
101 | | 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, |
102 | | 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, |
103 | | 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2 |
104 | | }; |
105 | | |
106 | | static int |
107 | | mbc_case_fold(OnigCaseFoldType flag, |
108 | | const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) |
109 | 29.0k | { |
110 | 29.0k | const UChar* p = *pp; |
111 | | |
112 | 29.0k | if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { |
113 | 2.95k | *lower++ = 's'; |
114 | 2.95k | *lower = 's'; |
115 | 2.95k | (*pp)++; |
116 | 2.95k | return 2; |
117 | 2.95k | } |
118 | | |
119 | 26.1k | if (CASE_FOLD_IS_NOT_ASCII_ONLY(flag) || ONIGENC_IS_ASCII_CODE(*p)) |
120 | 18.3k | *lower = ENC_ISO_8859_16_TO_LOWER_CASE(*p); |
121 | 7.82k | else |
122 | 7.82k | *lower = *p; |
123 | | |
124 | 26.1k | (*pp)++; |
125 | 26.1k | return 1; /* return byte length of converted char to lower */ |
126 | 29.0k | } |
127 | | |
128 | | static int |
129 | | is_code_ctype(OnigCodePoint code, unsigned int ctype) |
130 | 333k | { |
131 | 333k | if (code < 256) { |
132 | 333k | if (ctype > ONIGENC_MAX_STD_CTYPE) |
133 | 0 | return FALSE; |
134 | 333k | else |
135 | 333k | return ENC_IS_ISO_8859_16_CTYPE(code, ctype); |
136 | 333k | } |
137 | 0 | else |
138 | 0 | return FALSE; |
139 | 333k | } |
140 | | |
141 | | static const OnigPairCaseFoldCodes CaseFoldMap[] = { |
142 | | { 0xa1, 0xa2 }, |
143 | | { 0xa3, 0xb3 }, |
144 | | { 0xa6, 0xa8 }, |
145 | | { 0xaa, 0xba }, |
146 | | { 0xac, 0xae }, |
147 | | { 0xaf, 0xbf }, |
148 | | |
149 | | { 0xb2, 0xb9 }, |
150 | | { 0xb4, 0xb8 }, |
151 | | { 0xbc, 0xbd }, |
152 | | { 0xbe, 0xff }, |
153 | | |
154 | | { 0xc0, 0xe0 }, |
155 | | { 0xc1, 0xe1 }, |
156 | | { 0xc2, 0xe2 }, |
157 | | { 0xc3, 0xe3 }, |
158 | | { 0xc4, 0xe4 }, |
159 | | { 0xc5, 0xe5 }, |
160 | | { 0xc6, 0xe6 }, |
161 | | { 0xc7, 0xe7 }, |
162 | | { 0xc8, 0xe8 }, |
163 | | { 0xc9, 0xe9 }, |
164 | | { 0xca, 0xea }, |
165 | | { 0xcb, 0xeb }, |
166 | | { 0xcc, 0xec }, |
167 | | { 0xcd, 0xed }, |
168 | | { 0xce, 0xee }, |
169 | | { 0xcf, 0xef }, |
170 | | |
171 | | { 0xd0, 0xf0 }, |
172 | | { 0xd1, 0xf1 }, |
173 | | { 0xd2, 0xf2 }, |
174 | | { 0xd3, 0xf3 }, |
175 | | { 0xd4, 0xf4 }, |
176 | | { 0xd5, 0xf5 }, |
177 | | { 0xd6, 0xf6 }, |
178 | | { 0xd7, 0xf7 }, |
179 | | { 0xd8, 0xf8 }, |
180 | | { 0xd9, 0xf9 }, |
181 | | { 0xda, 0xfa }, |
182 | | { 0xdb, 0xfb }, |
183 | | { 0xdc, 0xfc }, |
184 | | { 0xdd, 0xfd }, |
185 | | { 0xde, 0xfe } |
186 | | }; |
187 | | |
188 | | static int |
189 | | apply_all_case_fold(OnigCaseFoldType flag, |
190 | | OnigApplyAllCaseFoldFunc f, void* arg) |
191 | 422 | { |
192 | 422 | return onigenc_apply_all_case_fold_with_map( |
193 | 422 | sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, |
194 | 422 | flag, f, arg); |
195 | 422 | } |
196 | | |
197 | | static int |
198 | | get_case_fold_codes_by_str(OnigCaseFoldType flag, |
199 | | const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) |
200 | 41.8k | { |
201 | 41.8k | return onigenc_get_case_fold_codes_by_str_with_map( |
202 | 41.8k | sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, |
203 | 41.8k | flag, p, end, items); |
204 | 41.8k | } |
205 | | |
206 | | OnigEncodingType OnigEncodingISO_8859_16 = { |
207 | | onigenc_single_byte_mbc_enc_len, |
208 | | "ISO-8859-16", /* name */ |
209 | | 1, /* max enc length */ |
210 | | 1, /* min enc length */ |
211 | | onigenc_is_mbc_newline_0x0a, |
212 | | onigenc_single_byte_mbc_to_code, |
213 | | onigenc_single_byte_code_to_mbclen, |
214 | | onigenc_single_byte_code_to_mbc, |
215 | | mbc_case_fold, |
216 | | apply_all_case_fold, |
217 | | get_case_fold_codes_by_str, |
218 | | onigenc_minimum_property_name_to_ctype, |
219 | | is_code_ctype, |
220 | | onigenc_not_support_get_ctype_code_range, |
221 | | onigenc_single_byte_left_adjust_char_head, |
222 | | onigenc_always_true_is_allowed_reverse_match, |
223 | | NULL, /* init */ |
224 | | NULL, /* is_initialized */ |
225 | | onigenc_always_true_is_valid_mbc_string, |
226 | | ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1, |
227 | | 0, 0 |
228 | | }; |