/src/glib/glib/pcre/pcre_tables.c
Line | Count | Source (jump to first uncovered line) |
1 | | /************************************************* |
2 | | * Perl-Compatible Regular Expressions * |
3 | | *************************************************/ |
4 | | |
5 | | /* PCRE is a library of functions to support regular expressions whose syntax |
6 | | and semantics are as close as possible to those of the Perl 5 language. |
7 | | |
8 | | Written by Philip Hazel |
9 | | Copyright (c) 1997-2012 University of Cambridge |
10 | | |
11 | | ----------------------------------------------------------------------------- |
12 | | Redistribution and use in source and binary forms, with or without |
13 | | modification, are permitted provided that the following conditions are met: |
14 | | |
15 | | * Redistributions of source code must retain the above copyright notice, |
16 | | this list of conditions and the following disclaimer. |
17 | | |
18 | | * Redistributions in binary form must reproduce the above copyright |
19 | | notice, this list of conditions and the following disclaimer in the |
20 | | documentation and/or other materials provided with the distribution. |
21 | | |
22 | | * Neither the name of the University of Cambridge nor the names of its |
23 | | contributors may be used to endorse or promote products derived from |
24 | | this software without specific prior written permission. |
25 | | |
26 | | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
27 | | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
28 | | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
29 | | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
30 | | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
31 | | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
32 | | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
33 | | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
34 | | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
35 | | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
36 | | POSSIBILITY OF SUCH DAMAGE. |
37 | | ----------------------------------------------------------------------------- |
38 | | */ |
39 | | |
40 | | #ifndef PCRE_INCLUDED |
41 | | |
42 | | /* This module contains some fixed tables that are used by more than one of the |
43 | | PCRE code modules. The tables are also #included by the pcretest program, which |
44 | | uses macros to change their names from _pcre_xxx to xxxx, thereby avoiding name |
45 | | clashes with the library. */ |
46 | | |
47 | | |
48 | | #include "config.h" |
49 | | |
50 | | #include "pcre_internal.h" |
51 | | |
52 | | #endif /* PCRE_INCLUDED */ |
53 | | |
54 | | /* Table of sizes for the fixed-length opcodes. It's defined in a macro so that |
55 | | the definition is next to the definition of the opcodes in pcre_internal.h. */ |
56 | | |
57 | | const pcre_uint8 PRIV(OP_lengths)[] = { OP_LENGTHS }; |
58 | | |
59 | | |
60 | | |
61 | | /************************************************* |
62 | | * Tables for UTF-8 support * |
63 | | *************************************************/ |
64 | | |
65 | | /* These are the breakpoints for different numbers of bytes in a UTF-8 |
66 | | character. */ |
67 | | |
68 | | #if (defined SUPPORT_UTF && defined COMPILE_PCRE8) \ |
69 | | || (defined PCRE_INCLUDED && defined SUPPORT_PCRE16) |
70 | | |
71 | | /* These tables are also required by pcretest in 16 bit mode. */ |
72 | | |
73 | | const int PRIV(utf8_table1)[] = |
74 | | { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff}; |
75 | | |
76 | | const int PRIV(utf8_table1_size) = sizeof(PRIV(utf8_table1)) / sizeof(int); |
77 | | |
78 | | /* These are the indicator bits and the mask for the data bits to set in the |
79 | | first byte of a character, indexed by the number of additional bytes. */ |
80 | | |
81 | | const int PRIV(utf8_table2)[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc}; |
82 | | const int PRIV(utf8_table3)[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01}; |
83 | | |
84 | | /* Table of the number of extra bytes, indexed by the first byte masked with |
85 | | 0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */ |
86 | | |
87 | | const pcre_uint8 PRIV(utf8_table4)[] = { |
88 | | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
89 | | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
90 | | 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, |
91 | | 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 }; |
92 | | |
93 | | #endif /* (SUPPORT_UTF && COMPILE_PCRE8) || (PCRE_INCLUDED && SUPPORT_PCRE16)*/ |
94 | | |
95 | | #ifdef SUPPORT_UTF |
96 | | |
97 | | /* Table to translate from particular type value to the general value. */ |
98 | | |
99 | | const int PRIV(ucp_gentype)[] = { |
100 | | ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */ |
101 | | ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */ |
102 | | ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */ |
103 | | ucp_N, ucp_N, ucp_N, /* Nd, Nl, No */ |
104 | | ucp_P, ucp_P, ucp_P, ucp_P, ucp_P, /* Pc, Pd, Pe, Pf, Pi */ |
105 | | ucp_P, ucp_P, /* Ps, Po */ |
106 | | ucp_S, ucp_S, ucp_S, ucp_S, /* Sc, Sk, Sm, So */ |
107 | | ucp_Z, ucp_Z, ucp_Z /* Zl, Zp, Zs */ |
108 | | }; |
109 | | |
110 | | #ifdef SUPPORT_JIT |
111 | | /* This table reverses PRIV(ucp_gentype). We can save the cost |
112 | | of a memory load. */ |
113 | | |
114 | | const int PRIV(ucp_typerange)[] = { |
115 | | ucp_Cc, ucp_Cs, |
116 | | ucp_Ll, ucp_Lu, |
117 | | ucp_Mc, ucp_Mn, |
118 | | ucp_Nd, ucp_No, |
119 | | ucp_Pc, ucp_Ps, |
120 | | ucp_Sc, ucp_So, |
121 | | ucp_Zl, ucp_Zs, |
122 | | }; |
123 | | #endif /* SUPPORT_JIT */ |
124 | | |
125 | | /* The pcre_utt[] table below translates Unicode property names into type and |
126 | | code values. It is searched by binary chop, so must be in collating sequence of |
127 | | name. Originally, the table contained pointers to the name strings in the first |
128 | | field of each entry. However, that leads to a large number of relocations when |
129 | | a shared library is dynamically loaded. A significant reduction is made by |
130 | | putting all the names into a single, large string and then using offsets in the |
131 | | table itself. Maintenance is more error-prone, but frequent changes to this |
132 | | data are unlikely. |
133 | | |
134 | | July 2008: There is now a script called maint/GenerateUtt.py that can be used |
135 | | to generate this data automatically instead of maintaining it by hand. |
136 | | |
137 | | The script was updated in March 2009 to generate a new EBCDIC-compliant |
138 | | version. Like all other character and string literals that are compared against |
139 | | the regular expression pattern, we must use STR_ macros instead of literal |
140 | | strings to make sure that UTF-8 support works on EBCDIC platforms. */ |
141 | | |
142 | | #define STRING_Any0 STR_A STR_n STR_y "\0" |
143 | | #define STRING_Arabic0 STR_A STR_r STR_a STR_b STR_i STR_c "\0" |
144 | | #define STRING_Armenian0 STR_A STR_r STR_m STR_e STR_n STR_i STR_a STR_n "\0" |
145 | | #define STRING_Avestan0 STR_A STR_v STR_e STR_s STR_t STR_a STR_n "\0" |
146 | | #define STRING_Balinese0 STR_B STR_a STR_l STR_i STR_n STR_e STR_s STR_e "\0" |
147 | | #define STRING_Bamum0 STR_B STR_a STR_m STR_u STR_m "\0" |
148 | | #define STRING_Batak0 STR_B STR_a STR_t STR_a STR_k "\0" |
149 | | #define STRING_Bengali0 STR_B STR_e STR_n STR_g STR_a STR_l STR_i "\0" |
150 | | #define STRING_Bopomofo0 STR_B STR_o STR_p STR_o STR_m STR_o STR_f STR_o "\0" |
151 | | #define STRING_Brahmi0 STR_B STR_r STR_a STR_h STR_m STR_i "\0" |
152 | | #define STRING_Braille0 STR_B STR_r STR_a STR_i STR_l STR_l STR_e "\0" |
153 | | #define STRING_Buginese0 STR_B STR_u STR_g STR_i STR_n STR_e STR_s STR_e "\0" |
154 | | #define STRING_Buhid0 STR_B STR_u STR_h STR_i STR_d "\0" |
155 | | #define STRING_C0 STR_C "\0" |
156 | | #define STRING_Canadian_Aboriginal0 STR_C STR_a STR_n STR_a STR_d STR_i STR_a STR_n STR_UNDERSCORE STR_A STR_b STR_o STR_r STR_i STR_g STR_i STR_n STR_a STR_l "\0" |
157 | | #define STRING_Carian0 STR_C STR_a STR_r STR_i STR_a STR_n "\0" |
158 | | #define STRING_Cc0 STR_C STR_c "\0" |
159 | | #define STRING_Cf0 STR_C STR_f "\0" |
160 | | #define STRING_Chakma0 STR_C STR_h STR_a STR_k STR_m STR_a "\0" |
161 | | #define STRING_Cham0 STR_C STR_h STR_a STR_m "\0" |
162 | | #define STRING_Cherokee0 STR_C STR_h STR_e STR_r STR_o STR_k STR_e STR_e "\0" |
163 | | #define STRING_Cn0 STR_C STR_n "\0" |
164 | | #define STRING_Co0 STR_C STR_o "\0" |
165 | | #define STRING_Common0 STR_C STR_o STR_m STR_m STR_o STR_n "\0" |
166 | | #define STRING_Coptic0 STR_C STR_o STR_p STR_t STR_i STR_c "\0" |
167 | | #define STRING_Cs0 STR_C STR_s "\0" |
168 | | #define STRING_Cuneiform0 STR_C STR_u STR_n STR_e STR_i STR_f STR_o STR_r STR_m "\0" |
169 | | #define STRING_Cypriot0 STR_C STR_y STR_p STR_r STR_i STR_o STR_t "\0" |
170 | | #define STRING_Cyrillic0 STR_C STR_y STR_r STR_i STR_l STR_l STR_i STR_c "\0" |
171 | | #define STRING_Deseret0 STR_D STR_e STR_s STR_e STR_r STR_e STR_t "\0" |
172 | | #define STRING_Devanagari0 STR_D STR_e STR_v STR_a STR_n STR_a STR_g STR_a STR_r STR_i "\0" |
173 | | #define STRING_Egyptian_Hieroglyphs0 STR_E STR_g STR_y STR_p STR_t STR_i STR_a STR_n STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0" |
174 | | #define STRING_Ethiopic0 STR_E STR_t STR_h STR_i STR_o STR_p STR_i STR_c "\0" |
175 | | #define STRING_Georgian0 STR_G STR_e STR_o STR_r STR_g STR_i STR_a STR_n "\0" |
176 | | #define STRING_Glagolitic0 STR_G STR_l STR_a STR_g STR_o STR_l STR_i STR_t STR_i STR_c "\0" |
177 | | #define STRING_Gothic0 STR_G STR_o STR_t STR_h STR_i STR_c "\0" |
178 | | #define STRING_Greek0 STR_G STR_r STR_e STR_e STR_k "\0" |
179 | | #define STRING_Gujarati0 STR_G STR_u STR_j STR_a STR_r STR_a STR_t STR_i "\0" |
180 | | #define STRING_Gurmukhi0 STR_G STR_u STR_r STR_m STR_u STR_k STR_h STR_i "\0" |
181 | | #define STRING_Han0 STR_H STR_a STR_n "\0" |
182 | | #define STRING_Hangul0 STR_H STR_a STR_n STR_g STR_u STR_l "\0" |
183 | | #define STRING_Hanunoo0 STR_H STR_a STR_n STR_u STR_n STR_o STR_o "\0" |
184 | | #define STRING_Hebrew0 STR_H STR_e STR_b STR_r STR_e STR_w "\0" |
185 | | #define STRING_Hiragana0 STR_H STR_i STR_r STR_a STR_g STR_a STR_n STR_a "\0" |
186 | | #define STRING_Imperial_Aramaic0 STR_I STR_m STR_p STR_e STR_r STR_i STR_a STR_l STR_UNDERSCORE STR_A STR_r STR_a STR_m STR_a STR_i STR_c "\0" |
187 | | #define STRING_Inherited0 STR_I STR_n STR_h STR_e STR_r STR_i STR_t STR_e STR_d "\0" |
188 | | #define STRING_Inscriptional_Pahlavi0 STR_I STR_n STR_s STR_c STR_r STR_i STR_p STR_t STR_i STR_o STR_n STR_a STR_l STR_UNDERSCORE STR_P STR_a STR_h STR_l STR_a STR_v STR_i "\0" |
189 | | #define STRING_Inscriptional_Parthian0 STR_I STR_n STR_s STR_c STR_r STR_i STR_p STR_t STR_i STR_o STR_n STR_a STR_l STR_UNDERSCORE STR_P STR_a STR_r STR_t STR_h STR_i STR_a STR_n "\0" |
190 | | #define STRING_Javanese0 STR_J STR_a STR_v STR_a STR_n STR_e STR_s STR_e "\0" |
191 | | #define STRING_Kaithi0 STR_K STR_a STR_i STR_t STR_h STR_i "\0" |
192 | | #define STRING_Kannada0 STR_K STR_a STR_n STR_n STR_a STR_d STR_a "\0" |
193 | | #define STRING_Katakana0 STR_K STR_a STR_t STR_a STR_k STR_a STR_n STR_a "\0" |
194 | | #define STRING_Kayah_Li0 STR_K STR_a STR_y STR_a STR_h STR_UNDERSCORE STR_L STR_i "\0" |
195 | | #define STRING_Kharoshthi0 STR_K STR_h STR_a STR_r STR_o STR_s STR_h STR_t STR_h STR_i "\0" |
196 | | #define STRING_Khmer0 STR_K STR_h STR_m STR_e STR_r "\0" |
197 | | #define STRING_L0 STR_L "\0" |
198 | | #define STRING_L_AMPERSAND0 STR_L STR_AMPERSAND "\0" |
199 | | #define STRING_Lao0 STR_L STR_a STR_o "\0" |
200 | | #define STRING_Latin0 STR_L STR_a STR_t STR_i STR_n "\0" |
201 | | #define STRING_Lepcha0 STR_L STR_e STR_p STR_c STR_h STR_a "\0" |
202 | | #define STRING_Limbu0 STR_L STR_i STR_m STR_b STR_u "\0" |
203 | | #define STRING_Linear_B0 STR_L STR_i STR_n STR_e STR_a STR_r STR_UNDERSCORE STR_B "\0" |
204 | | #define STRING_Lisu0 STR_L STR_i STR_s STR_u "\0" |
205 | | #define STRING_Ll0 STR_L STR_l "\0" |
206 | | #define STRING_Lm0 STR_L STR_m "\0" |
207 | | #define STRING_Lo0 STR_L STR_o "\0" |
208 | | #define STRING_Lt0 STR_L STR_t "\0" |
209 | | #define STRING_Lu0 STR_L STR_u "\0" |
210 | | #define STRING_Lycian0 STR_L STR_y STR_c STR_i STR_a STR_n "\0" |
211 | | #define STRING_Lydian0 STR_L STR_y STR_d STR_i STR_a STR_n "\0" |
212 | | #define STRING_M0 STR_M "\0" |
213 | | #define STRING_Malayalam0 STR_M STR_a STR_l STR_a STR_y STR_a STR_l STR_a STR_m "\0" |
214 | | #define STRING_Mandaic0 STR_M STR_a STR_n STR_d STR_a STR_i STR_c "\0" |
215 | | #define STRING_Mc0 STR_M STR_c "\0" |
216 | | #define STRING_Me0 STR_M STR_e "\0" |
217 | | #define STRING_Meetei_Mayek0 STR_M STR_e STR_e STR_t STR_e STR_i STR_UNDERSCORE STR_M STR_a STR_y STR_e STR_k "\0" |
218 | | #define STRING_Meroitic_Cursive0 STR_M STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_UNDERSCORE STR_C STR_u STR_r STR_s STR_i STR_v STR_e "\0" |
219 | | #define STRING_Meroitic_Hieroglyphs0 STR_M STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0" |
220 | | #define STRING_Miao0 STR_M STR_i STR_a STR_o "\0" |
221 | | #define STRING_Mn0 STR_M STR_n "\0" |
222 | | #define STRING_Mongolian0 STR_M STR_o STR_n STR_g STR_o STR_l STR_i STR_a STR_n "\0" |
223 | | #define STRING_Myanmar0 STR_M STR_y STR_a STR_n STR_m STR_a STR_r "\0" |
224 | | #define STRING_N0 STR_N "\0" |
225 | | #define STRING_Nd0 STR_N STR_d "\0" |
226 | | #define STRING_New_Tai_Lue0 STR_N STR_e STR_w STR_UNDERSCORE STR_T STR_a STR_i STR_UNDERSCORE STR_L STR_u STR_e "\0" |
227 | | #define STRING_Nko0 STR_N STR_k STR_o "\0" |
228 | | #define STRING_Nl0 STR_N STR_l "\0" |
229 | | #define STRING_No0 STR_N STR_o "\0" |
230 | | #define STRING_Ogham0 STR_O STR_g STR_h STR_a STR_m "\0" |
231 | | #define STRING_Ol_Chiki0 STR_O STR_l STR_UNDERSCORE STR_C STR_h STR_i STR_k STR_i "\0" |
232 | | #define STRING_Old_Italic0 STR_O STR_l STR_d STR_UNDERSCORE STR_I STR_t STR_a STR_l STR_i STR_c "\0" |
233 | | #define STRING_Old_Persian0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_s STR_i STR_a STR_n "\0" |
234 | | #define STRING_Old_South_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_S STR_o STR_u STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0" |
235 | | #define STRING_Old_Turkic0 STR_O STR_l STR_d STR_UNDERSCORE STR_T STR_u STR_r STR_k STR_i STR_c "\0" |
236 | | #define STRING_Oriya0 STR_O STR_r STR_i STR_y STR_a "\0" |
237 | | #define STRING_Osmanya0 STR_O STR_s STR_m STR_a STR_n STR_y STR_a "\0" |
238 | | #define STRING_P0 STR_P "\0" |
239 | | #define STRING_Pc0 STR_P STR_c "\0" |
240 | | #define STRING_Pd0 STR_P STR_d "\0" |
241 | | #define STRING_Pe0 STR_P STR_e "\0" |
242 | | #define STRING_Pf0 STR_P STR_f "\0" |
243 | | #define STRING_Phags_Pa0 STR_P STR_h STR_a STR_g STR_s STR_UNDERSCORE STR_P STR_a "\0" |
244 | | #define STRING_Phoenician0 STR_P STR_h STR_o STR_e STR_n STR_i STR_c STR_i STR_a STR_n "\0" |
245 | | #define STRING_Pi0 STR_P STR_i "\0" |
246 | | #define STRING_Po0 STR_P STR_o "\0" |
247 | | #define STRING_Ps0 STR_P STR_s "\0" |
248 | | #define STRING_Rejang0 STR_R STR_e STR_j STR_a STR_n STR_g "\0" |
249 | | #define STRING_Runic0 STR_R STR_u STR_n STR_i STR_c "\0" |
250 | | #define STRING_S0 STR_S "\0" |
251 | | #define STRING_Samaritan0 STR_S STR_a STR_m STR_a STR_r STR_i STR_t STR_a STR_n "\0" |
252 | | #define STRING_Saurashtra0 STR_S STR_a STR_u STR_r STR_a STR_s STR_h STR_t STR_r STR_a "\0" |
253 | | #define STRING_Sc0 STR_S STR_c "\0" |
254 | | #define STRING_Sharada0 STR_S STR_h STR_a STR_r STR_a STR_d STR_a "\0" |
255 | | #define STRING_Shavian0 STR_S STR_h STR_a STR_v STR_i STR_a STR_n "\0" |
256 | | #define STRING_Sinhala0 STR_S STR_i STR_n STR_h STR_a STR_l STR_a "\0" |
257 | | #define STRING_Sk0 STR_S STR_k "\0" |
258 | | #define STRING_Sm0 STR_S STR_m "\0" |
259 | | #define STRING_So0 STR_S STR_o "\0" |
260 | | #define STRING_Sora_Sompeng0 STR_S STR_o STR_r STR_a STR_UNDERSCORE STR_S STR_o STR_m STR_p STR_e STR_n STR_g "\0" |
261 | | #define STRING_Sundanese0 STR_S STR_u STR_n STR_d STR_a STR_n STR_e STR_s STR_e "\0" |
262 | | #define STRING_Syloti_Nagri0 STR_S STR_y STR_l STR_o STR_t STR_i STR_UNDERSCORE STR_N STR_a STR_g STR_r STR_i "\0" |
263 | | #define STRING_Syriac0 STR_S STR_y STR_r STR_i STR_a STR_c "\0" |
264 | | #define STRING_Tagalog0 STR_T STR_a STR_g STR_a STR_l STR_o STR_g "\0" |
265 | | #define STRING_Tagbanwa0 STR_T STR_a STR_g STR_b STR_a STR_n STR_w STR_a "\0" |
266 | | #define STRING_Tai_Le0 STR_T STR_a STR_i STR_UNDERSCORE STR_L STR_e "\0" |
267 | | #define STRING_Tai_Tham0 STR_T STR_a STR_i STR_UNDERSCORE STR_T STR_h STR_a STR_m "\0" |
268 | | #define STRING_Tai_Viet0 STR_T STR_a STR_i STR_UNDERSCORE STR_V STR_i STR_e STR_t "\0" |
269 | | #define STRING_Takri0 STR_T STR_a STR_k STR_r STR_i "\0" |
270 | | #define STRING_Tamil0 STR_T STR_a STR_m STR_i STR_l "\0" |
271 | | #define STRING_Telugu0 STR_T STR_e STR_l STR_u STR_g STR_u "\0" |
272 | | #define STRING_Thaana0 STR_T STR_h STR_a STR_a STR_n STR_a "\0" |
273 | | #define STRING_Thai0 STR_T STR_h STR_a STR_i "\0" |
274 | | #define STRING_Tibetan0 STR_T STR_i STR_b STR_e STR_t STR_a STR_n "\0" |
275 | | #define STRING_Tifinagh0 STR_T STR_i STR_f STR_i STR_n STR_a STR_g STR_h "\0" |
276 | | #define STRING_Ugaritic0 STR_U STR_g STR_a STR_r STR_i STR_t STR_i STR_c "\0" |
277 | | #define STRING_Vai0 STR_V STR_a STR_i "\0" |
278 | | #define STRING_Xan0 STR_X STR_a STR_n "\0" |
279 | | #define STRING_Xps0 STR_X STR_p STR_s "\0" |
280 | | #define STRING_Xsp0 STR_X STR_s STR_p "\0" |
281 | | #define STRING_Xwd0 STR_X STR_w STR_d "\0" |
282 | | #define STRING_Yi0 STR_Y STR_i "\0" |
283 | | #define STRING_Z0 STR_Z "\0" |
284 | | #define STRING_Zl0 STR_Z STR_l "\0" |
285 | | #define STRING_Zp0 STR_Z STR_p "\0" |
286 | | #define STRING_Zs0 STR_Z STR_s "\0" |
287 | | |
288 | | const char PRIV(utt_names)[] = |
289 | | STRING_Any0 |
290 | | STRING_Arabic0 |
291 | | STRING_Armenian0 |
292 | | STRING_Avestan0 |
293 | | STRING_Balinese0 |
294 | | STRING_Bamum0 |
295 | | STRING_Batak0 |
296 | | STRING_Bengali0 |
297 | | STRING_Bopomofo0 |
298 | | STRING_Brahmi0 |
299 | | STRING_Braille0 |
300 | | STRING_Buginese0 |
301 | | STRING_Buhid0 |
302 | | STRING_C0 |
303 | | STRING_Canadian_Aboriginal0 |
304 | | STRING_Carian0 |
305 | | STRING_Cc0 |
306 | | STRING_Cf0 |
307 | | STRING_Chakma0 |
308 | | STRING_Cham0 |
309 | | STRING_Cherokee0 |
310 | | STRING_Cn0 |
311 | | STRING_Co0 |
312 | | STRING_Common0 |
313 | | STRING_Coptic0 |
314 | | STRING_Cs0 |
315 | | STRING_Cuneiform0 |
316 | | STRING_Cypriot0 |
317 | | STRING_Cyrillic0 |
318 | | STRING_Deseret0 |
319 | | STRING_Devanagari0 |
320 | | STRING_Egyptian_Hieroglyphs0 |
321 | | STRING_Ethiopic0 |
322 | | STRING_Georgian0 |
323 | | STRING_Glagolitic0 |
324 | | STRING_Gothic0 |
325 | | STRING_Greek0 |
326 | | STRING_Gujarati0 |
327 | | STRING_Gurmukhi0 |
328 | | STRING_Han0 |
329 | | STRING_Hangul0 |
330 | | STRING_Hanunoo0 |
331 | | STRING_Hebrew0 |
332 | | STRING_Hiragana0 |
333 | | STRING_Imperial_Aramaic0 |
334 | | STRING_Inherited0 |
335 | | STRING_Inscriptional_Pahlavi0 |
336 | | STRING_Inscriptional_Parthian0 |
337 | | STRING_Javanese0 |
338 | | STRING_Kaithi0 |
339 | | STRING_Kannada0 |
340 | | STRING_Katakana0 |
341 | | STRING_Kayah_Li0 |
342 | | STRING_Kharoshthi0 |
343 | | STRING_Khmer0 |
344 | | STRING_L0 |
345 | | STRING_L_AMPERSAND0 |
346 | | STRING_Lao0 |
347 | | STRING_Latin0 |
348 | | STRING_Lepcha0 |
349 | | STRING_Limbu0 |
350 | | STRING_Linear_B0 |
351 | | STRING_Lisu0 |
352 | | STRING_Ll0 |
353 | | STRING_Lm0 |
354 | | STRING_Lo0 |
355 | | STRING_Lt0 |
356 | | STRING_Lu0 |
357 | | STRING_Lycian0 |
358 | | STRING_Lydian0 |
359 | | STRING_M0 |
360 | | STRING_Malayalam0 |
361 | | STRING_Mandaic0 |
362 | | STRING_Mc0 |
363 | | STRING_Me0 |
364 | | STRING_Meetei_Mayek0 |
365 | | STRING_Meroitic_Cursive0 |
366 | | STRING_Meroitic_Hieroglyphs0 |
367 | | STRING_Miao0 |
368 | | STRING_Mn0 |
369 | | STRING_Mongolian0 |
370 | | STRING_Myanmar0 |
371 | | STRING_N0 |
372 | | STRING_Nd0 |
373 | | STRING_New_Tai_Lue0 |
374 | | STRING_Nko0 |
375 | | STRING_Nl0 |
376 | | STRING_No0 |
377 | | STRING_Ogham0 |
378 | | STRING_Ol_Chiki0 |
379 | | STRING_Old_Italic0 |
380 | | STRING_Old_Persian0 |
381 | | STRING_Old_South_Arabian0 |
382 | | STRING_Old_Turkic0 |
383 | | STRING_Oriya0 |
384 | | STRING_Osmanya0 |
385 | | STRING_P0 |
386 | | STRING_Pc0 |
387 | | STRING_Pd0 |
388 | | STRING_Pe0 |
389 | | STRING_Pf0 |
390 | | STRING_Phags_Pa0 |
391 | | STRING_Phoenician0 |
392 | | STRING_Pi0 |
393 | | STRING_Po0 |
394 | | STRING_Ps0 |
395 | | STRING_Rejang0 |
396 | | STRING_Runic0 |
397 | | STRING_S0 |
398 | | STRING_Samaritan0 |
399 | | STRING_Saurashtra0 |
400 | | STRING_Sc0 |
401 | | STRING_Sharada0 |
402 | | STRING_Shavian0 |
403 | | STRING_Sinhala0 |
404 | | STRING_Sk0 |
405 | | STRING_Sm0 |
406 | | STRING_So0 |
407 | | STRING_Sora_Sompeng0 |
408 | | STRING_Sundanese0 |
409 | | STRING_Syloti_Nagri0 |
410 | | STRING_Syriac0 |
411 | | STRING_Tagalog0 |
412 | | STRING_Tagbanwa0 |
413 | | STRING_Tai_Le0 |
414 | | STRING_Tai_Tham0 |
415 | | STRING_Tai_Viet0 |
416 | | STRING_Takri0 |
417 | | STRING_Tamil0 |
418 | | STRING_Telugu0 |
419 | | STRING_Thaana0 |
420 | | STRING_Thai0 |
421 | | STRING_Tibetan0 |
422 | | STRING_Tifinagh0 |
423 | | STRING_Ugaritic0 |
424 | | STRING_Vai0 |
425 | | STRING_Xan0 |
426 | | STRING_Xps0 |
427 | | STRING_Xsp0 |
428 | | STRING_Xwd0 |
429 | | STRING_Yi0 |
430 | | STRING_Z0 |
431 | | STRING_Zl0 |
432 | | STRING_Zp0 |
433 | | STRING_Zs0; |
434 | | |
435 | | const ucp_type_table PRIV(utt)[] = { |
436 | | { 0, PT_ANY, 0 }, |
437 | | { 4, PT_SC, ucp_Arabic }, |
438 | | { 11, PT_SC, ucp_Armenian }, |
439 | | { 20, PT_SC, ucp_Avestan }, |
440 | | { 28, PT_SC, ucp_Balinese }, |
441 | | { 37, PT_SC, ucp_Bamum }, |
442 | | { 43, PT_SC, ucp_Batak }, |
443 | | { 49, PT_SC, ucp_Bengali }, |
444 | | { 57, PT_SC, ucp_Bopomofo }, |
445 | | { 66, PT_SC, ucp_Brahmi }, |
446 | | { 73, PT_SC, ucp_Braille }, |
447 | | { 81, PT_SC, ucp_Buginese }, |
448 | | { 90, PT_SC, ucp_Buhid }, |
449 | | { 96, PT_GC, ucp_C }, |
450 | | { 98, PT_SC, ucp_Canadian_Aboriginal }, |
451 | | { 118, PT_SC, ucp_Carian }, |
452 | | { 125, PT_PC, ucp_Cc }, |
453 | | { 128, PT_PC, ucp_Cf }, |
454 | | { 131, PT_SC, ucp_Chakma }, |
455 | | { 138, PT_SC, ucp_Cham }, |
456 | | { 143, PT_SC, ucp_Cherokee }, |
457 | | { 152, PT_PC, ucp_Cn }, |
458 | | { 155, PT_PC, ucp_Co }, |
459 | | { 158, PT_SC, ucp_Common }, |
460 | | { 165, PT_SC, ucp_Coptic }, |
461 | | { 172, PT_PC, ucp_Cs }, |
462 | | { 175, PT_SC, ucp_Cuneiform }, |
463 | | { 185, PT_SC, ucp_Cypriot }, |
464 | | { 193, PT_SC, ucp_Cyrillic }, |
465 | | { 202, PT_SC, ucp_Deseret }, |
466 | | { 210, PT_SC, ucp_Devanagari }, |
467 | | { 221, PT_SC, ucp_Egyptian_Hieroglyphs }, |
468 | | { 242, PT_SC, ucp_Ethiopic }, |
469 | | { 251, PT_SC, ucp_Georgian }, |
470 | | { 260, PT_SC, ucp_Glagolitic }, |
471 | | { 271, PT_SC, ucp_Gothic }, |
472 | | { 278, PT_SC, ucp_Greek }, |
473 | | { 284, PT_SC, ucp_Gujarati }, |
474 | | { 293, PT_SC, ucp_Gurmukhi }, |
475 | | { 302, PT_SC, ucp_Han }, |
476 | | { 306, PT_SC, ucp_Hangul }, |
477 | | { 313, PT_SC, ucp_Hanunoo }, |
478 | | { 321, PT_SC, ucp_Hebrew }, |
479 | | { 328, PT_SC, ucp_Hiragana }, |
480 | | { 337, PT_SC, ucp_Imperial_Aramaic }, |
481 | | { 354, PT_SC, ucp_Inherited }, |
482 | | { 364, PT_SC, ucp_Inscriptional_Pahlavi }, |
483 | | { 386, PT_SC, ucp_Inscriptional_Parthian }, |
484 | | { 409, PT_SC, ucp_Javanese }, |
485 | | { 418, PT_SC, ucp_Kaithi }, |
486 | | { 425, PT_SC, ucp_Kannada }, |
487 | | { 433, PT_SC, ucp_Katakana }, |
488 | | { 442, PT_SC, ucp_Kayah_Li }, |
489 | | { 451, PT_SC, ucp_Kharoshthi }, |
490 | | { 462, PT_SC, ucp_Khmer }, |
491 | | { 468, PT_GC, ucp_L }, |
492 | | { 470, PT_LAMP, 0 }, |
493 | | { 473, PT_SC, ucp_Lao }, |
494 | | { 477, PT_SC, ucp_Latin }, |
495 | | { 483, PT_SC, ucp_Lepcha }, |
496 | | { 490, PT_SC, ucp_Limbu }, |
497 | | { 496, PT_SC, ucp_Linear_B }, |
498 | | { 505, PT_SC, ucp_Lisu }, |
499 | | { 510, PT_PC, ucp_Ll }, |
500 | | { 513, PT_PC, ucp_Lm }, |
501 | | { 516, PT_PC, ucp_Lo }, |
502 | | { 519, PT_PC, ucp_Lt }, |
503 | | { 522, PT_PC, ucp_Lu }, |
504 | | { 525, PT_SC, ucp_Lycian }, |
505 | | { 532, PT_SC, ucp_Lydian }, |
506 | | { 539, PT_GC, ucp_M }, |
507 | | { 541, PT_SC, ucp_Malayalam }, |
508 | | { 551, PT_SC, ucp_Mandaic }, |
509 | | { 559, PT_PC, ucp_Mc }, |
510 | | { 562, PT_PC, ucp_Me }, |
511 | | { 565, PT_SC, ucp_Meetei_Mayek }, |
512 | | { 578, PT_SC, ucp_Meroitic_Cursive }, |
513 | | { 595, PT_SC, ucp_Meroitic_Hieroglyphs }, |
514 | | { 616, PT_SC, ucp_Miao }, |
515 | | { 621, PT_PC, ucp_Mn }, |
516 | | { 624, PT_SC, ucp_Mongolian }, |
517 | | { 634, PT_SC, ucp_Myanmar }, |
518 | | { 642, PT_GC, ucp_N }, |
519 | | { 644, PT_PC, ucp_Nd }, |
520 | | { 647, PT_SC, ucp_New_Tai_Lue }, |
521 | | { 659, PT_SC, ucp_Nko }, |
522 | | { 663, PT_PC, ucp_Nl }, |
523 | | { 666, PT_PC, ucp_No }, |
524 | | { 669, PT_SC, ucp_Ogham }, |
525 | | { 675, PT_SC, ucp_Ol_Chiki }, |
526 | | { 684, PT_SC, ucp_Old_Italic }, |
527 | | { 695, PT_SC, ucp_Old_Persian }, |
528 | | { 707, PT_SC, ucp_Old_South_Arabian }, |
529 | | { 725, PT_SC, ucp_Old_Turkic }, |
530 | | { 736, PT_SC, ucp_Oriya }, |
531 | | { 742, PT_SC, ucp_Osmanya }, |
532 | | { 750, PT_GC, ucp_P }, |
533 | | { 752, PT_PC, ucp_Pc }, |
534 | | { 755, PT_PC, ucp_Pd }, |
535 | | { 758, PT_PC, ucp_Pe }, |
536 | | { 761, PT_PC, ucp_Pf }, |
537 | | { 764, PT_SC, ucp_Phags_Pa }, |
538 | | { 773, PT_SC, ucp_Phoenician }, |
539 | | { 784, PT_PC, ucp_Pi }, |
540 | | { 787, PT_PC, ucp_Po }, |
541 | | { 790, PT_PC, ucp_Ps }, |
542 | | { 793, PT_SC, ucp_Rejang }, |
543 | | { 800, PT_SC, ucp_Runic }, |
544 | | { 806, PT_GC, ucp_S }, |
545 | | { 808, PT_SC, ucp_Samaritan }, |
546 | | { 818, PT_SC, ucp_Saurashtra }, |
547 | | { 829, PT_PC, ucp_Sc }, |
548 | | { 832, PT_SC, ucp_Sharada }, |
549 | | { 840, PT_SC, ucp_Shavian }, |
550 | | { 848, PT_SC, ucp_Sinhala }, |
551 | | { 856, PT_PC, ucp_Sk }, |
552 | | { 859, PT_PC, ucp_Sm }, |
553 | | { 862, PT_PC, ucp_So }, |
554 | | { 865, PT_SC, ucp_Sora_Sompeng }, |
555 | | { 878, PT_SC, ucp_Sundanese }, |
556 | | { 888, PT_SC, ucp_Syloti_Nagri }, |
557 | | { 901, PT_SC, ucp_Syriac }, |
558 | | { 908, PT_SC, ucp_Tagalog }, |
559 | | { 916, PT_SC, ucp_Tagbanwa }, |
560 | | { 925, PT_SC, ucp_Tai_Le }, |
561 | | { 932, PT_SC, ucp_Tai_Tham }, |
562 | | { 941, PT_SC, ucp_Tai_Viet }, |
563 | | { 950, PT_SC, ucp_Takri }, |
564 | | { 956, PT_SC, ucp_Tamil }, |
565 | | { 962, PT_SC, ucp_Telugu }, |
566 | | { 969, PT_SC, ucp_Thaana }, |
567 | | { 976, PT_SC, ucp_Thai }, |
568 | | { 981, PT_SC, ucp_Tibetan }, |
569 | | { 989, PT_SC, ucp_Tifinagh }, |
570 | | { 998, PT_SC, ucp_Ugaritic }, |
571 | | { 1007, PT_SC, ucp_Vai }, |
572 | | { 1011, PT_ALNUM, 0 }, |
573 | | { 1015, PT_PXSPACE, 0 }, |
574 | | { 1019, PT_SPACE, 0 }, |
575 | | { 1023, PT_WORD, 0 }, |
576 | | { 1027, PT_SC, ucp_Yi }, |
577 | | { 1030, PT_GC, ucp_Z }, |
578 | | { 1032, PT_PC, ucp_Zl }, |
579 | | { 1035, PT_PC, ucp_Zp }, |
580 | | { 1038, PT_PC, ucp_Zs } |
581 | | }; |
582 | | |
583 | | const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table); |
584 | | |
585 | | unsigned int |
586 | | _pcre_ucp_othercase(const unsigned int c) |
587 | 0 | { |
588 | 0 | unsigned int oc = NOTACHAR; |
589 | |
|
590 | 0 | if ((oc = g_unichar_toupper(c)) != c) |
591 | 0 | return oc; |
592 | 0 | if ((oc = g_unichar_tolower(c)) != c) |
593 | 0 | return oc; |
594 | | |
595 | 0 | return c; |
596 | 0 | } |
597 | | |
598 | | #endif /* SUPPORT_UTF */ |
599 | | |
600 | | /* End of pcre_tables.c */ |