/src/liblouis/liblouis/pattern.c
Line | Count | Source |
1 | | /* liblouis Braille Translation and Back-Translation Library |
2 | | |
3 | | Copyright (C) 2016 Mike Gray, American Printing House for the Blind |
4 | | |
5 | | This file is part of liblouis. |
6 | | |
7 | | liblouis is free software: you can redistribute it and/or modify it |
8 | | under the terms of the GNU Lesser General Public License as published |
9 | | by the Free Software Foundation, either version 2.1 of the License, or |
10 | | (at your option) any later version. |
11 | | |
12 | | liblouis is distributed in the hope that it will be useful, but |
13 | | WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | | Lesser General Public License for more details. |
16 | | |
17 | | You should have received a copy of the GNU Lesser General Public |
18 | | License along with liblouis. If not, see <http://www.gnu.org/licenses/>. |
19 | | */ |
20 | | |
21 | | #include "config.h" |
22 | | |
23 | | #include <stdlib.h> |
24 | | #include <stdio.h> |
25 | | #include <string.h> |
26 | | #include <ctype.h> |
27 | | |
28 | | #include "internal.h" |
29 | | |
30 | | //#define CHECK_OUTPUT_DEFINED |
31 | | |
32 | | ///// |
33 | | |
34 | | // TODO: these functions are static and copied serveral times |
35 | | |
36 | | int translation_direction = 1; |
37 | | |
38 | | static TranslationTableCharacter * |
39 | 288k | findCharOrDots(widechar c, int m, const TranslationTableHeader *table) { |
40 | | /* Look up character or dot pattern in the appropriate |
41 | | * table. */ |
42 | 288k | static TranslationTableCharacter noChar = { NULL, -1, 0, 0, 0, CTC_Space, 0, 0, 32, 0, |
43 | 288k | 0 }; |
44 | 288k | static TranslationTableCharacter noDots = { NULL, -1, 0, 0, 0, CTC_Space, 0, 0, |
45 | 288k | LOU_DOTS, 0, 0 }; |
46 | 288k | TranslationTableCharacter *notFound; |
47 | 288k | TranslationTableCharacter *character; |
48 | 288k | TranslationTableOffset bucket; |
49 | 288k | unsigned long int makeHash = _lou_charHash(c); |
50 | 288k | if (m == 0) { |
51 | 288k | bucket = table->characters[makeHash]; |
52 | 288k | notFound = &noChar; |
53 | 288k | } else { |
54 | 0 | bucket = table->dots[makeHash]; |
55 | 0 | notFound = &noDots; |
56 | 0 | } |
57 | 288k | while (bucket) { |
58 | 77.0k | character = (TranslationTableCharacter *)&table->ruleArea[bucket]; |
59 | 77.0k | if (character->value == c) return character; |
60 | 0 | bucket = character->next; |
61 | 0 | } |
62 | 211k | notFound->value = c; |
63 | 211k | return notFound; |
64 | 288k | } |
65 | | |
66 | | static int |
67 | | checkAttr(const widechar c, const TranslationTableCharacterAttributes a, |
68 | 288k | const TranslationTableHeader *table) { |
69 | 288k | return (((findCharOrDots(c, translation_direction ? 0 : 1, table))->attributes & a) |
70 | 288k | ? 1 |
71 | 288k | : 0); |
72 | 288k | } |
73 | | |
74 | | ///// |
75 | | |
76 | | enum pattern_type { |
77 | | PTN_ERROR, |
78 | | |
79 | | PTN_START, |
80 | | PTN_GROUP, |
81 | | PTN_NOT, |
82 | | |
83 | | PTN_ONE_MORE, |
84 | | PTN_ZERO_MORE, |
85 | | PTN_OPTIONAL, |
86 | | |
87 | | PTN_ALTERNATE, |
88 | | |
89 | | PTN_ANY, |
90 | | PTN_ATTRIBUTES, |
91 | | PTN_CHARS, |
92 | | PTN_HOOK, |
93 | | PTN_END_OF_INPUT, |
94 | | |
95 | | PTN_END = 0xffff, |
96 | | }; |
97 | | |
98 | 31.7M | #define EXPR_TYPE_IN(at, buffer) (buffer[(at) + 0]) |
99 | 22.0k | #define EXPR_PRV_IN(at, buffer) (buffer[(at) + 1]) |
100 | 13.2M | #define EXPR_NXT_IN(at, buffer) (buffer[(at) + 2]) |
101 | 1.57M | #define EXPR_DATA_0_IN(at, buffer) (buffer[(at) + 3]) |
102 | 3.01M | #define EXPR_DATA_1_IN(at, buffer) (buffer[(at) + 4]) |
103 | | #define EXPR_DATA_2_IN(at, buffer) (buffer[(at) + 5]) |
104 | 461 | #define EXPR_DATA_IN(at, buffer) ((widechar *)&buffer[(at) + 3]) |
105 | 1.18M | #define EXPR_CONST_DATA_IN(at, buffer) ((const widechar *)&buffer[(at) + 3]) |
106 | | |
107 | 31.7M | #define EXPR_TYPE(at) EXPR_TYPE_IN((at), expr_data) |
108 | 21.9k | #define EXPR_PRV(at) EXPR_PRV_IN((at), expr_data) |
109 | 13.2M | #define EXPR_NXT(at) EXPR_NXT_IN((at), expr_data) |
110 | 1.57M | #define EXPR_DATA_0(at) EXPR_DATA_0_IN((at), expr_data) |
111 | 3.01M | #define EXPR_DATA_1(at) EXPR_DATA_1_IN((at), expr_data) |
112 | | #define EXPR_DATA_2(at) EXPR_DATA_2_IN((at), expr_data) |
113 | 461 | #define EXPR_DATA(at) EXPR_DATA_IN((at), expr_data) |
114 | 1.18M | #define EXPR_CONST_DATA(at) EXPR_CONST_DATA_IN((at), expr_data) |
115 | | |
116 | | #ifdef CHECK_OUTPUT_DEFINED |
117 | | |
118 | | #ifndef DEBUG |
119 | | #define DEBUG |
120 | | |
121 | | #endif |
122 | | |
123 | | #define START 0 |
124 | | #define CALL 1 |
125 | | #define RETURN 2 |
126 | | #define SHOW 3 |
127 | | |
128 | | #define CHECK_OUTPUT(type, ret, line, msg) \ |
129 | | { \ |
130 | | do_output(type, ret, line, input[*input_crs], input_minmax, *input_crs, \ |
131 | | input_dir, expr_data, expr_crs, notOperator, loop_crs, loop_cnts, msg); \ |
132 | | } |
133 | | |
134 | | #else |
135 | | |
136 | | #define CHECK_OUTPUT(type, ret, line, msg) \ |
137 | 25.6M | { ; } |
138 | | |
139 | | #endif |
140 | | |
141 | | struct expression { |
142 | | widechar type; |
143 | | widechar prv; |
144 | | widechar nxt; |
145 | | widechar data[1]; |
146 | | }; |
147 | | |
148 | | /* gdb won't know what this is unless it is actually used */ |
149 | | #ifdef DEBUG |
150 | | static struct expression *expr_debug; |
151 | | #endif |
152 | | |
153 | | //////////////////////////////////////////////////////////////////////////////// |
154 | | |
155 | | static char spaces[] = ".............................."; |
156 | | static int space = 30; |
157 | | |
158 | | static void |
159 | | pattern_output_expression( |
160 | 0 | const widechar *expr_data, int expr_crs, const TranslationTableHeader *table) { |
161 | 0 | int i; |
162 | 0 |
|
163 | 0 | if (expr_crs == PTN_END) return; |
164 | 0 |
|
165 | 0 | while (EXPR_TYPE(expr_crs) != PTN_END) { |
166 | 0 | printf("%s%d", &spaces[space], expr_crs); |
167 | 0 | if (expr_crs < 100) printf(" "); |
168 | 0 | if (expr_crs < 10) printf(" "); |
169 | 0 | for (i = 0; i < 13 - (30 - space); i++) printf(" "); |
170 | 0 |
|
171 | 0 | switch (EXPR_TYPE(expr_crs)) { |
172 | 0 | case PTN_START: |
173 | 0 |
|
174 | 0 | printf("START\t%d\t%d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs)); |
175 | 0 | break; |
176 | 0 |
|
177 | 0 | case PTN_GROUP: |
178 | 0 |
|
179 | 0 | printf("( \t%d\t%d\t-> %d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs), |
180 | 0 | EXPR_DATA_0(expr_crs)); |
181 | 0 | space--; |
182 | 0 | if (space < 0) space = 0; |
183 | 0 | pattern_output_expression(expr_data, EXPR_DATA_0(expr_crs), table); |
184 | 0 | space++; |
185 | 0 | if (space > 30) space = 30; |
186 | 0 | break; |
187 | 0 |
|
188 | 0 | case PTN_NOT: |
189 | 0 |
|
190 | 0 | printf("! \t%d\t%d\t-> %d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs), |
191 | 0 | EXPR_DATA_0(expr_crs)); |
192 | 0 | space--; |
193 | 0 | if (space < 0) space = 0; |
194 | 0 | pattern_output_expression(expr_data, EXPR_DATA_0(expr_crs), table); |
195 | 0 | space++; |
196 | 0 | if (space > 30) space = 30; |
197 | 0 | break; |
198 | 0 |
|
199 | 0 | case PTN_ONE_MORE: |
200 | 0 |
|
201 | 0 | printf("+ \t%d\t%d\t-> %d\t#%d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs), |
202 | 0 | EXPR_DATA_0(expr_crs), EXPR_DATA_1(expr_crs)); |
203 | 0 | space--; |
204 | 0 | if (space < 0) space = 0; |
205 | 0 | pattern_output_expression(expr_data, EXPR_DATA_0(expr_crs), table); |
206 | 0 | space++; |
207 | 0 | if (space > 30) space = 30; |
208 | 0 | break; |
209 | 0 |
|
210 | 0 | case PTN_ZERO_MORE: |
211 | 0 |
|
212 | 0 | printf("* \t%d\t%d\t-> %d\t#%d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs), |
213 | 0 | EXPR_DATA_0(expr_crs), EXPR_DATA_1(expr_crs)); |
214 | 0 | space--; |
215 | 0 | if (space < 0) space = 0; |
216 | 0 | pattern_output_expression(expr_data, EXPR_DATA_0(expr_crs), table); |
217 | 0 | space++; |
218 | 0 | if (space > 30) space = 30; |
219 | 0 | break; |
220 | 0 |
|
221 | 0 | case PTN_OPTIONAL: |
222 | 0 |
|
223 | 0 | printf("? \t%d\t%d\t-> %d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs), |
224 | 0 | EXPR_DATA_0(expr_crs)); |
225 | 0 | space--; |
226 | 0 | if (space < 0) space = 0; |
227 | 0 | pattern_output_expression(expr_data, EXPR_DATA_0(expr_crs), table); |
228 | 0 | space++; |
229 | 0 | if (space > 30) space = 30; |
230 | 0 | break; |
231 | 0 |
|
232 | 0 | case PTN_ALTERNATE: |
233 | 0 |
|
234 | 0 | printf("| \t%d\t%d\t-> %d\t-> %d\n", EXPR_PRV(expr_crs), |
235 | 0 | EXPR_NXT(expr_crs), EXPR_DATA_0(expr_crs), EXPR_DATA_1(expr_crs)); |
236 | 0 | space--; |
237 | 0 | if (space < 0) space = 0; |
238 | 0 | pattern_output_expression(expr_data, EXPR_DATA_0(expr_crs), table); |
239 | 0 | pattern_output_expression(expr_data, EXPR_DATA_1(expr_crs), table); |
240 | 0 | space++; |
241 | 0 | if (space > 30) space = 30; |
242 | 0 | break; |
243 | 0 |
|
244 | 0 | case PTN_ANY: |
245 | 0 |
|
246 | 0 | printf(". \t%d\t%d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs)); |
247 | 0 | break; |
248 | 0 |
|
249 | 0 | case PTN_ATTRIBUTES: |
250 | 0 |
|
251 | 0 | printf("%% \t%d\t%d\t", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs)); |
252 | 0 | if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[0] >> 16)) printf("0"); |
253 | 0 | if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[1] >> 16)) printf("1"); |
254 | 0 | if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[2] >> 16)) printf("2"); |
255 | 0 | if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[3] >> 16)) printf("3"); |
256 | 0 | if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[4] >> 16)) printf("4"); |
257 | 0 | if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[5] >> 16)) printf("5"); |
258 | 0 | if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[6] >> 16)) printf("6"); |
259 | 0 | if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[7] >> 16)) printf("7"); |
260 | 0 | if (EXPR_DATA_0(expr_crs) & (CTC_EndOfInput >> 16)) printf("^"); |
261 | 0 | if (EXPR_DATA_1(expr_crs) & CTC_Space) printf("_"); |
262 | 0 | if (EXPR_DATA_1(expr_crs) & CTC_Digit) printf("#"); |
263 | 0 | if (EXPR_DATA_1(expr_crs) & CTC_Letter) printf("a"); |
264 | 0 | if (EXPR_DATA_1(expr_crs) & CTC_UpperCase) printf("u"); |
265 | 0 | if (EXPR_DATA_1(expr_crs) & CTC_LowerCase) printf("l"); |
266 | 0 | if (EXPR_DATA_1(expr_crs) & CTC_Punctuation) printf("."); |
267 | 0 | if (EXPR_DATA_1(expr_crs) & CTC_Sign) printf("$"); |
268 | 0 | if (EXPR_DATA_1(expr_crs) & CTC_SeqDelimiter) printf("~"); |
269 | 0 | if (EXPR_DATA_1(expr_crs) & CTC_SeqBefore) printf("<"); |
270 | 0 | if (EXPR_DATA_1(expr_crs) & CTC_SeqAfter) printf(">"); |
271 | 0 | puts(""); |
272 | 0 | break; |
273 | 0 |
|
274 | 0 | case PTN_CHARS: |
275 | 0 |
|
276 | 0 | printf("[] \t%d\t%d\t", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs)); |
277 | 0 | for (i = 0; i < EXPR_DATA_0(expr_crs); i++) |
278 | 0 | printf("%c", EXPR_CONST_DATA(expr_crs)[i + 1]); |
279 | 0 | puts(""); |
280 | 0 | break; |
281 | 0 |
|
282 | 0 | case PTN_HOOK: |
283 | 0 |
|
284 | 0 | printf("@ \t%d\t%d\t", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs)); |
285 | 0 | for (i = 0; i < EXPR_DATA_0(expr_crs); i++) |
286 | 0 | printf("%c", EXPR_CONST_DATA(expr_crs)[i + 1]); |
287 | 0 | puts(""); |
288 | 0 | break; |
289 | 0 |
|
290 | 0 | case PTN_END_OF_INPUT: |
291 | 0 |
|
292 | 0 | printf("^ \t%d\t%d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs)); |
293 | 0 | break; |
294 | 0 |
|
295 | 0 | default: |
296 | 0 |
|
297 | 0 | printf("%d? \t%d\t%d\n", EXPR_TYPE(expr_crs), EXPR_PRV(expr_crs), |
298 | 0 | EXPR_NXT(expr_crs)); |
299 | 0 | break; |
300 | 0 | } |
301 | 0 |
|
302 | 0 | expr_crs = EXPR_NXT(expr_crs); |
303 | 0 | } |
304 | 0 |
|
305 | 0 | printf("%s%d", &spaces[space], expr_crs); |
306 | 0 | if (expr_crs < 100) printf(" "); |
307 | 0 | if (expr_crs < 10) printf(" "); |
308 | 0 | for (i = 0; i < 13 - (30 - space); i++) printf(" "); |
309 | 0 | printf("END\t%d\t%d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs)); |
310 | 0 | fflush(stdout); |
311 | 0 | return; |
312 | 0 | } |
313 | | |
314 | | static void |
315 | 0 | pattern_output(const widechar *expr_data, const TranslationTableHeader *table) { |
316 | 0 | printf("%d \tlength\n", expr_data[0]); |
317 | 0 | printf("%d \tloops\n", expr_data[1]); |
318 | 0 | if (expr_data[0] > 0 && expr_data[0] != PTN_END) |
319 | 0 | pattern_output_expression(expr_data, 2, table); |
320 | 0 | } |
321 | | |
322 | | static void |
323 | | pattern_print_expression( |
324 | 0 | const widechar *expr_data, int expr_crs, const TranslationTableHeader *table) { |
325 | 0 | int i; |
326 | 0 |
|
327 | 0 | if (expr_crs == PTN_END) return; |
328 | 0 |
|
329 | 0 | while (EXPR_TYPE(expr_crs) != PTN_END) { |
330 | 0 | switch (EXPR_TYPE(expr_crs)) { |
331 | 0 | case PTN_START: |
332 | 0 | break; |
333 | 0 |
|
334 | 0 | case PTN_GROUP: |
335 | 0 |
|
336 | 0 | printf(" ("); |
337 | 0 | pattern_print_expression(expr_data, EXPR_DATA_0(expr_crs), table); |
338 | 0 | printf(") "); |
339 | 0 | break; |
340 | 0 |
|
341 | 0 | case PTN_NOT: |
342 | 0 |
|
343 | 0 | printf("!"); |
344 | 0 | pattern_print_expression(expr_data, EXPR_DATA_0(expr_crs), table); |
345 | 0 | break; |
346 | 0 |
|
347 | 0 | case PTN_ONE_MORE: |
348 | 0 |
|
349 | 0 | pattern_print_expression(expr_data, EXPR_DATA_0(expr_crs), table); |
350 | 0 | printf("+"); |
351 | 0 | break; |
352 | 0 |
|
353 | 0 | case PTN_ZERO_MORE: |
354 | 0 |
|
355 | 0 | pattern_print_expression(expr_data, EXPR_DATA_0(expr_crs), table); |
356 | 0 | printf("*"); |
357 | 0 | break; |
358 | 0 |
|
359 | 0 | case PTN_OPTIONAL: |
360 | 0 |
|
361 | 0 | pattern_print_expression(expr_data, EXPR_DATA_0(expr_crs), table); |
362 | 0 | printf("?"); |
363 | 0 | break; |
364 | 0 |
|
365 | 0 | case PTN_ALTERNATE: |
366 | 0 |
|
367 | 0 | pattern_print_expression(expr_data, EXPR_DATA_0(expr_crs), table); |
368 | 0 | printf(" | "); |
369 | 0 | pattern_print_expression(expr_data, EXPR_DATA_1(expr_crs), table); |
370 | 0 | break; |
371 | 0 |
|
372 | 0 | case PTN_ANY: |
373 | 0 |
|
374 | 0 | printf("."); |
375 | 0 | break; |
376 | 0 |
|
377 | 0 | case PTN_ATTRIBUTES: |
378 | 0 |
|
379 | 0 | printf("%%["); |
380 | 0 | if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[0] >> 16)) printf("0"); |
381 | 0 | if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[1] >> 16)) printf("1"); |
382 | 0 | if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[2] >> 16)) printf("2"); |
383 | 0 | if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[3] >> 16)) printf("3"); |
384 | 0 | if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[4] >> 16)) printf("4"); |
385 | 0 | if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[5] >> 16)) printf("5"); |
386 | 0 | if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[6] >> 16)) printf("6"); |
387 | 0 | if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[7] >> 16)) printf("7"); |
388 | 0 | if (EXPR_DATA_0(expr_crs) & (CTC_EndOfInput >> 16)) printf("^"); |
389 | 0 | if (EXPR_DATA_1(expr_crs) & CTC_Space) printf("_"); |
390 | 0 | if (EXPR_DATA_1(expr_crs) & CTC_Digit) printf("#"); |
391 | 0 | if (EXPR_DATA_1(expr_crs) & CTC_Letter) printf("a"); |
392 | 0 | if (EXPR_DATA_1(expr_crs) & CTC_UpperCase) printf("u"); |
393 | 0 | if (EXPR_DATA_1(expr_crs) & CTC_LowerCase) printf("l"); |
394 | 0 | if (EXPR_DATA_1(expr_crs) & CTC_Punctuation) printf("."); |
395 | 0 | if (EXPR_DATA_1(expr_crs) & CTC_Sign) printf("$"); |
396 | 0 | if (EXPR_DATA_1(expr_crs) & CTC_SeqDelimiter) printf("~"); |
397 | 0 | if (EXPR_DATA_1(expr_crs) & CTC_SeqBefore) printf("<"); |
398 | 0 | if (EXPR_DATA_1(expr_crs) & CTC_SeqAfter) printf(">"); |
399 | 0 | printf("]"); |
400 | 0 | break; |
401 | 0 |
|
402 | 0 | case PTN_CHARS: |
403 | 0 |
|
404 | 0 | if (EXPR_DATA_0(expr_crs) == 1) |
405 | 0 | printf("%c", EXPR_DATA_1(expr_crs)); |
406 | 0 | else { |
407 | 0 | printf("["); |
408 | 0 | for (i = 0; i < EXPR_DATA_0(expr_crs); i++) |
409 | 0 | printf("%c", EXPR_CONST_DATA(expr_crs)[i + 1]); |
410 | 0 | printf("]"); |
411 | 0 | } |
412 | 0 | break; |
413 | 0 |
|
414 | 0 | case PTN_HOOK: |
415 | 0 |
|
416 | 0 | printf("@["); |
417 | 0 | for (i = 0; i < EXPR_DATA_0(expr_crs); i++) |
418 | 0 | printf("%c", EXPR_CONST_DATA(expr_crs)[i + 1]); |
419 | 0 | printf("]"); |
420 | 0 | break; |
421 | 0 |
|
422 | 0 | case PTN_END_OF_INPUT: |
423 | 0 |
|
424 | 0 | printf("^"); |
425 | 0 | break; |
426 | 0 |
|
427 | 0 | // default: printf("%d?\n", EXPR_TYPE(expr_crs)); break; |
428 | 0 | } |
429 | 0 |
|
430 | 0 | expr_crs = EXPR_NXT(expr_crs); |
431 | 0 | } |
432 | 0 |
|
433 | 0 | return; |
434 | 0 | } |
435 | | |
436 | | static void |
437 | 0 | pattern_print(const widechar *expr_data, const TranslationTableHeader *table) { |
438 | 0 | if (expr_data[0] > 0 && expr_data[0] != PTN_END) |
439 | 0 | pattern_print_expression(expr_data, 2, table); |
440 | 0 | puts(""); |
441 | 0 | } |
442 | | |
443 | | #ifdef CHECK_OUTPUT_DEFINED |
444 | | |
445 | | static void |
446 | | do_padd(const int value) { |
447 | | if (value < 100000) printf(" "); |
448 | | if (value < 10000) printf(" "); |
449 | | if (value < 1000) printf(" "); |
450 | | if (value < 100) printf(" "); |
451 | | if (value < 10) printf(" "); |
452 | | } |
453 | | |
454 | | static void |
455 | | do_pad(const int value) { |
456 | | if (value < 100) printf(" "); |
457 | | if (value < 10) printf(" "); |
458 | | } |
459 | | |
460 | | static void |
461 | | do_output(const int type, const int ret, const int line, |
462 | | |
463 | | const int input, const int input_minmax, const int input_crs, const int input_dir, |
464 | | const widechar *expr_data, const int expr_crs, const int notOperator, |
465 | | const int loop_crs, const int *loop_cnts, |
466 | | |
467 | | const char *msg) { |
468 | | switch (type) { |
469 | | case START: |
470 | | |
471 | | space--; |
472 | | if (space < 0) space = 0; |
473 | | printf("|%s() ", &spaces[space]); |
474 | | break; |
475 | | |
476 | | case CALL: |
477 | | |
478 | | printf("|%s> ", &spaces[space]); |
479 | | break; |
480 | | |
481 | | case RETURN: |
482 | | |
483 | | printf("|%s<%d ", &spaces[space], ret); |
484 | | space++; |
485 | | if (space > 31) space = 31; |
486 | | break; |
487 | | |
488 | | case SHOW: |
489 | | |
490 | | printf("|%s ", &spaces[space]); |
491 | | break; |
492 | | } |
493 | | |
494 | | printf("%d ", line); |
495 | | do_padd(line); |
496 | | |
497 | | switch (expr_data[expr_crs]) { |
498 | | case PTN_ERROR: |
499 | | printf("# "); |
500 | | break; |
501 | | case PTN_START: |
502 | | printf("> "); |
503 | | break; |
504 | | case PTN_END_OF_INPUT: |
505 | | printf("^ "); |
506 | | break; |
507 | | case PTN_ALTERNATE: |
508 | | printf("| "); |
509 | | break; |
510 | | case PTN_OPTIONAL: |
511 | | printf("? "); |
512 | | break; |
513 | | case PTN_ONE_MORE: |
514 | | printf("+ "); |
515 | | break; |
516 | | case PTN_ZERO_MORE: |
517 | | printf("* "); |
518 | | break; |
519 | | case PTN_NOT: |
520 | | printf("! "); |
521 | | break; |
522 | | case PTN_GROUP: |
523 | | printf("( "); |
524 | | break; |
525 | | case PTN_ANY: |
526 | | printf(". "); |
527 | | break; |
528 | | case PTN_ATTRIBUTES: |
529 | | printf("%% "); |
530 | | break; |
531 | | case PTN_CHARS: |
532 | | printf("[ "); |
533 | | break; |
534 | | case PTN_HOOK: |
535 | | printf("@ "); |
536 | | break; |
537 | | case PTN_END: |
538 | | printf("< "); |
539 | | break; |
540 | | default: |
541 | | printf(" "); |
542 | | break; |
543 | | } |
544 | | printf("%d ", expr_crs); |
545 | | do_padd(expr_crs); |
546 | | |
547 | | if (input > 31 && input < 127) |
548 | | printf("%c ", input); |
549 | | else |
550 | | printf("_ "); |
551 | | |
552 | | if (input_crs * input_dir >= input_minmax * input_dir) |
553 | | printf("# "); |
554 | | else { |
555 | | printf("%d ", input_crs); |
556 | | do_pad(input_crs); |
557 | | } |
558 | | |
559 | | if (input_dir > 0) |
560 | | printf("<"); |
561 | | else |
562 | | printf(">"); |
563 | | printf("%d ", input_minmax); |
564 | | do_pad(input_minmax); |
565 | | |
566 | | if (notOperator) |
567 | | printf("! "); |
568 | | else |
569 | | printf(" "); |
570 | | |
571 | | if (loop_crs) { |
572 | | printf("%d ", loop_crs); |
573 | | do_pad(loop_crs); |
574 | | printf("%d ", loop_cnts[EXPR_DATA_1(loop_crs)]); |
575 | | do_pad(loop_cnts[EXPR_DATA_1(loop_crs)]); |
576 | | } else |
577 | | printf("- - "); |
578 | | if (EXPR_TYPE(expr_crs) == PTN_ONE_MORE || EXPR_TYPE(expr_crs) == PTN_ZERO_MORE) { |
579 | | printf("%d ", loop_cnts[EXPR_DATA_1(expr_crs)]); |
580 | | do_pad(loop_cnts[EXPR_DATA_1(expr_crs)]); |
581 | | } else |
582 | | printf("- "); |
583 | | |
584 | | if (msg) printf("%s", msg); |
585 | | puts(""); |
586 | | } |
587 | | |
588 | | #endif |
589 | | |
590 | | //////////////////////////////////////////////////////////////////////////////// |
591 | | |
592 | | static int |
593 | | pattern_compile_1(const widechar *input, const int input_max, int *input_crs, |
594 | | widechar *expr_data, const int expr_max, widechar *expr_crs, widechar *loop_cnts, |
595 | | TranslationTableHeader *table, const FileInfo *nested); |
596 | | |
597 | | static int |
598 | | pattern_compile_expression(const widechar *input, const int input_max, int *input_crs, |
599 | | widechar *expr_data, const int expr_max, widechar *expr_crs, widechar *loop_cnts, |
600 | 6.19k | TranslationTableHeader *table, const FileInfo *nested) { |
601 | 6.19k | widechar *data; |
602 | 6.19k | int expr_start, expr_end, expr_sub, expr_crs_prv; |
603 | 6.19k | int input_end; |
604 | 6.19k | int attrs0, attrs1; |
605 | 6.19k | int set, esc, nest, i; |
606 | | |
607 | 6.19k | switch (input[*input_crs]) { |
608 | 0 | case '(': |
609 | |
|
610 | 0 | if (*expr_crs + 10 >= expr_max) return 0; |
611 | | |
612 | 0 | (*input_crs)++; |
613 | 0 | if (*input_crs >= input_max) return 0; |
614 | | |
615 | | /* find closing parenthesis */ |
616 | 0 | nest = esc = 0; |
617 | 0 | for (input_end = *input_crs; input_end < input_max; input_end++) { |
618 | 0 | if (input[input_end] == '\\' && !esc) { |
619 | 0 | esc = 1; |
620 | 0 | continue; |
621 | 0 | } |
622 | | |
623 | 0 | if (input[input_end] == '(' && !esc) |
624 | 0 | nest++; |
625 | 0 | else if (input[input_end] == ')' && !esc) { |
626 | 0 | if (nest) |
627 | 0 | nest--; |
628 | 0 | else |
629 | 0 | break; |
630 | 0 | } |
631 | | |
632 | 0 | esc = 0; |
633 | 0 | } |
634 | 0 | if (input_end >= input_max) return 0; |
635 | | |
636 | 0 | EXPR_TYPE(*expr_crs) = PTN_GROUP; |
637 | | |
638 | | /* compile sub expressions */ |
639 | 0 | expr_crs_prv = *expr_crs; |
640 | 0 | *expr_crs += 4; |
641 | 0 | EXPR_DATA_0(expr_crs_prv) = *expr_crs; |
642 | 0 | expr_sub = *expr_crs; |
643 | 0 | EXPR_TYPE(expr_sub) = PTN_ERROR; |
644 | 0 | EXPR_PRV(expr_sub) = PTN_END; |
645 | 0 | EXPR_NXT(expr_sub) = PTN_END; |
646 | 0 | if (!pattern_compile_1(input, input_end, input_crs, expr_data, expr_max, expr_crs, |
647 | 0 | loop_cnts, table, nested)) |
648 | 0 | return 0; |
649 | 0 | (*input_crs)++; |
650 | | |
651 | | /* reset end expression */ |
652 | 0 | expr_end = *expr_crs; |
653 | 0 | EXPR_NXT(expr_end) = expr_crs_prv; |
654 | |
|
655 | 0 | return *expr_crs += 3; |
656 | | |
657 | 39 | case '!': |
658 | | |
659 | 39 | if (*expr_crs + 10 >= expr_max) return 0; |
660 | | |
661 | 39 | (*input_crs)++; |
662 | 39 | EXPR_TYPE(*expr_crs) = PTN_NOT; |
663 | 39 | expr_crs_prv = *expr_crs; |
664 | 39 | *expr_crs += 4; |
665 | 39 | EXPR_DATA_0(expr_crs_prv) = *expr_crs; |
666 | | |
667 | | /* create start expression */ |
668 | 39 | expr_start = *expr_crs; |
669 | 39 | EXPR_TYPE(expr_start) = PTN_START; |
670 | 39 | EXPR_PRV(expr_start) = PTN_END; |
671 | 39 | *expr_crs += 3; |
672 | 39 | EXPR_NXT(expr_start) = *expr_crs; |
673 | | |
674 | | /* compile sub expression */ |
675 | 39 | expr_sub = *expr_crs; |
676 | 39 | EXPR_TYPE(expr_sub) = PTN_ERROR; |
677 | 39 | EXPR_PRV(expr_sub) = expr_start; |
678 | 39 | EXPR_NXT(expr_sub) = PTN_END; |
679 | | |
680 | 39 | if (!pattern_compile_expression(input, input_max, input_crs, expr_data, expr_max, |
681 | 39 | expr_crs, loop_cnts, table, nested)) |
682 | 0 | return 0; |
683 | | |
684 | 39 | if (*expr_crs + 3 >= expr_max) return 0; |
685 | | |
686 | 39 | EXPR_NXT(expr_sub) = *expr_crs; |
687 | | |
688 | | /* create end expression */ |
689 | 39 | expr_end = *expr_crs; |
690 | 39 | EXPR_TYPE(expr_end) = PTN_END; |
691 | 39 | EXPR_PRV(expr_end) = expr_sub; |
692 | 39 | EXPR_NXT(expr_end) = expr_crs_prv; |
693 | | |
694 | 39 | return *expr_crs += 3; |
695 | | |
696 | 3 | case '+': |
697 | | |
698 | 3 | if (*expr_crs + 5 >= expr_max) return 0; |
699 | 3 | EXPR_TYPE(*expr_crs) = PTN_ONE_MORE; |
700 | 3 | EXPR_DATA_1(*expr_crs) = (*loop_cnts)++; |
701 | 3 | (*input_crs)++; |
702 | 3 | return *expr_crs += 5; |
703 | | |
704 | 60 | case '*': |
705 | | |
706 | 60 | if (*expr_crs + 5 >= expr_max) return 0; |
707 | 60 | EXPR_TYPE(*expr_crs) = PTN_ZERO_MORE; |
708 | 60 | EXPR_DATA_1(*expr_crs) = (*loop_cnts)++; |
709 | 60 | (*input_crs)++; |
710 | 60 | return *expr_crs += 5; |
711 | | |
712 | 22 | case '?': |
713 | | |
714 | 22 | if (*expr_crs + 4 >= expr_max) return 0; |
715 | 22 | EXPR_TYPE(*expr_crs) = PTN_OPTIONAL; |
716 | 22 | (*input_crs)++; |
717 | 22 | return *expr_crs += 4; |
718 | | |
719 | 101 | case '|': |
720 | | |
721 | 101 | if (*expr_crs + 5 >= expr_max) return 0; |
722 | 101 | EXPR_TYPE(*expr_crs) = PTN_ALTERNATE; |
723 | 101 | (*input_crs)++; |
724 | 101 | return *expr_crs += 5; |
725 | | |
726 | 145 | case '.': |
727 | | |
728 | 145 | if (*expr_crs + 3 >= expr_max) return 0; |
729 | 145 | EXPR_TYPE(*expr_crs) = PTN_ANY; |
730 | 145 | (*input_crs)++; |
731 | 145 | return *expr_crs += 3; |
732 | | |
733 | 468 | case '%': |
734 | | |
735 | 468 | if (*expr_crs + 5 >= expr_max) return 0; |
736 | | |
737 | 468 | (*input_crs)++; |
738 | 468 | if (*input_crs >= input_max) return 0; |
739 | | |
740 | | /* find closing bracket */ |
741 | 468 | if (input[*input_crs] == '[') { |
742 | 184 | set = 1; |
743 | 184 | (*input_crs)++; |
744 | 736 | for (input_end = *input_crs; input_end < input_max; input_end++) |
745 | 736 | if (input[input_end] == ']') break; |
746 | 184 | if (input_end >= input_max) return 0; |
747 | 284 | } else { |
748 | 284 | set = 0; |
749 | 284 | input_end = *input_crs + 1; |
750 | 284 | } |
751 | | |
752 | 468 | EXPR_TYPE(*expr_crs) = PTN_ATTRIBUTES; |
753 | | |
754 | 468 | attrs0 = attrs1 = 0; |
755 | 1.30k | for (; (*input_crs) < input_end; (*input_crs)++) { |
756 | 836 | switch (input[*input_crs]) { |
757 | 0 | case '_': |
758 | 0 | attrs0 |= CTC_Space; |
759 | 0 | break; |
760 | 0 | case '#': |
761 | 0 | attrs0 |= CTC_Digit; |
762 | 0 | break; |
763 | 4 | case 'a': |
764 | 4 | attrs0 |= CTC_Letter; |
765 | 4 | break; |
766 | 0 | case 'u': |
767 | 0 | attrs0 |= CTC_UpperCase; |
768 | 0 | break; |
769 | 0 | case 'l': |
770 | 0 | attrs0 |= CTC_LowerCase; |
771 | 0 | break; |
772 | 138 | case '.': |
773 | 138 | attrs0 |= CTC_Punctuation; |
774 | 138 | break; |
775 | 0 | case '$': |
776 | 0 | attrs0 |= CTC_Sign; |
777 | 0 | break; |
778 | 0 | case 'm': |
779 | 0 | attrs0 |= CTC_Math; |
780 | 0 | break; |
781 | 0 | case '~': |
782 | 0 | attrs0 |= CTC_SeqDelimiter; |
783 | 0 | break; |
784 | 0 | case '<': |
785 | 0 | attrs0 |= CTC_SeqBefore; |
786 | 0 | break; |
787 | 0 | case '>': |
788 | 0 | attrs0 |= CTC_SeqAfter; |
789 | 0 | break; |
790 | | |
791 | 138 | case '0': |
792 | 138 | case '1': |
793 | 690 | case '2': |
794 | 690 | case '3': |
795 | 690 | case '4': |
796 | 690 | case '5': |
797 | 690 | case '6': |
798 | 690 | case '7': { |
799 | 690 | int k = input[*input_crs] - '0'; |
800 | 690 | TranslationTableCharacterAttributes a = table->numberedAttributes[k]; |
801 | 690 | if (!a) { |
802 | | // attribute not used before yet: assign it a value |
803 | 690 | a = table->numberedAttributes[k] = |
804 | 690 | table->nextNumberedCharacterClassAttribute; |
805 | 690 | if (a > CTC_UserDefined8) { |
806 | 0 | _lou_logMessage(LOU_LOG_ERROR, |
807 | 0 | "%s:%d: error: Too many character attributes defined", |
808 | 0 | nested->fileName, nested->lineNumber); |
809 | 0 | return 0; |
810 | 0 | } |
811 | 690 | table->nextNumberedCharacterClassAttribute <<= 1; |
812 | 690 | } |
813 | 690 | attrs1 |= (a >> 16); |
814 | 690 | break; |
815 | 690 | } |
816 | 4 | case '^': |
817 | 4 | attrs1 |= (CTC_EndOfInput >> 16); |
818 | 4 | break; |
819 | | |
820 | 0 | default: |
821 | 0 | return 0; |
822 | 836 | } |
823 | 836 | } |
824 | 468 | EXPR_DATA_0(*expr_crs) = attrs1; |
825 | 468 | EXPR_DATA_1(*expr_crs) = attrs0; |
826 | | |
827 | 468 | if (set) (*input_crs)++; |
828 | 468 | return *expr_crs += 5; |
829 | | |
830 | 460 | case '[': |
831 | | |
832 | 460 | (*input_crs)++; |
833 | 460 | if (*input_crs >= input_max) return 0; |
834 | | |
835 | | /* find closing bracket */ |
836 | 460 | esc = 0; |
837 | 598 | for (input_end = *input_crs; input_end < input_max; input_end++) { |
838 | 598 | if (input[input_end] == '\\' && !esc) { |
839 | 0 | esc = 1; |
840 | 0 | continue; |
841 | 0 | } |
842 | | |
843 | 598 | if (input[input_end] == ']' && !esc) break; |
844 | 138 | esc = 0; |
845 | 138 | } |
846 | 460 | if (input_end >= input_max) return 0; |
847 | | |
848 | 460 | if (*expr_crs + 4 + (input_end - *input_crs) >= expr_max) return 0; |
849 | | |
850 | 460 | EXPR_TYPE(*expr_crs) = PTN_CHARS; |
851 | | |
852 | 460 | esc = 0; |
853 | 460 | data = EXPR_DATA(*expr_crs); |
854 | 598 | for (i = 1; *input_crs < input_end; (*input_crs)++) { |
855 | 138 | if (input[*input_crs] == '\\' && !esc) { |
856 | 0 | esc = 1; |
857 | 0 | continue; |
858 | 0 | } |
859 | | |
860 | 138 | esc = 0; |
861 | 138 | data[i++] = (widechar)input[*input_crs]; |
862 | 138 | } |
863 | 460 | data[0] = i - 1; |
864 | 460 | (*input_crs)++; |
865 | 460 | return *expr_crs += 4 + data[0]; |
866 | | |
867 | 1 | case '@': |
868 | | |
869 | 1 | (*input_crs)++; |
870 | 1 | if (*input_crs >= input_max) return 0; |
871 | | |
872 | | /* find closing bracket */ |
873 | 1 | if (input[*input_crs] == '[') { |
874 | 0 | set = 1; |
875 | 0 | (*input_crs)++; |
876 | 0 | for (input_end = *input_crs; input_end < input_max; input_end++) |
877 | 0 | if (input[input_end] == ']') break; |
878 | 0 | if (input_end >= input_max) return 0; |
879 | 1 | } else { |
880 | 1 | set = 0; |
881 | 1 | input_end = *input_crs + 1; |
882 | 1 | } |
883 | | |
884 | 1 | if (*expr_crs + 4 + (input_end - *input_crs) >= expr_max) return 0; |
885 | | |
886 | 1 | EXPR_TYPE(*expr_crs) = PTN_HOOK; |
887 | | |
888 | 1 | esc = 0; |
889 | 1 | data = EXPR_DATA(*expr_crs); |
890 | 2 | for (i = 1; *input_crs < input_end; (*input_crs)++) { |
891 | 1 | if (input[*input_crs] == '\\' && !esc) { |
892 | 0 | esc = 1; |
893 | 0 | continue; |
894 | 0 | } |
895 | | |
896 | 1 | esc = 0; |
897 | 1 | data[i++] = (widechar)input[*input_crs]; |
898 | 1 | } |
899 | 1 | data[0] = i - 1; |
900 | 1 | if (set) (*input_crs)++; |
901 | 1 | return *expr_crs += 4 + data[0]; |
902 | | |
903 | 0 | case '^': |
904 | 2 | case '$': |
905 | | |
906 | 2 | if (*expr_crs + 3 >= expr_max) return 0; |
907 | 2 | EXPR_TYPE(*expr_crs) = PTN_END_OF_INPUT; |
908 | 2 | (*input_crs)++; |
909 | 2 | return *expr_crs += 3; |
910 | | |
911 | 328 | case '\\': |
912 | | |
913 | 328 | (*input_crs)++; |
914 | 328 | if (*input_crs >= input_max) return 0; |
915 | | |
916 | 4.89k | default: |
917 | | |
918 | 4.89k | if (*expr_crs + 5 >= expr_max) return 0; |
919 | 4.89k | EXPR_TYPE(*expr_crs) = PTN_CHARS; |
920 | 4.89k | EXPR_DATA_0(*expr_crs) = 1; |
921 | 4.89k | EXPR_DATA_1(*expr_crs) = (widechar)input[*input_crs]; |
922 | 4.89k | (*input_crs)++; |
923 | 4.89k | return *expr_crs += 5; |
924 | 6.19k | } |
925 | 6.19k | } |
926 | | |
927 | | static int |
928 | | pattern_insert_alternate(const widechar *input, const int input_max, int *input_crs, |
929 | | widechar *expr_data, const int expr_max, widechar *expr_crs, widechar *loop_cnts, |
930 | 0 | int expr_insert, TranslationTableHeader *table, const FileInfo *nested) { |
931 | 0 | int expr_group, expr_alt, expr_end; |
932 | 0 |
|
933 | 0 | if (EXPR_TYPE(*expr_crs) == PTN_START) return 0; |
934 | 0 |
|
935 | 0 | if (*expr_crs + 12 >= expr_max) return 0; |
936 | 0 |
|
937 | 0 | /* setup alternate expression */ |
938 | 0 | expr_alt = *expr_crs; |
939 | 0 | EXPR_TYPE(expr_alt) = PTN_ALTERNATE; |
940 | 0 | EXPR_PRV(expr_alt) = PTN_END; |
941 | 0 | EXPR_NXT(expr_alt) = PTN_END; |
942 | 0 | *expr_crs += 5; |
943 | 0 |
|
944 | 0 | /* setup group expression */ |
945 | 0 | expr_group = *expr_crs; |
946 | 0 | EXPR_TYPE(expr_group) = PTN_GROUP; |
947 | 0 | EXPR_PRV(expr_group) = PTN_END; |
948 | 0 | EXPR_NXT(expr_group) = PTN_END; |
949 | 0 | *expr_crs += 4; |
950 | 0 | EXPR_DATA_0(expr_group) = *expr_crs; |
951 | 0 |
|
952 | 0 | EXPR_TYPE(*expr_crs) = PTN_ERROR; |
953 | 0 | EXPR_PRV(*expr_crs) = PTN_END; |
954 | 0 | EXPR_NXT(*expr_crs) = PTN_END; |
955 | 0 | if (!pattern_compile_1(input, input_max, input_crs, expr_data, expr_max, expr_crs, |
956 | 0 | loop_cnts, table, nested)) |
957 | 0 | return 0; |
958 | 0 | expr_end = *expr_crs; |
959 | 0 | EXPR_NXT(expr_end) = expr_group; |
960 | 0 |
|
961 | 0 | /* setup last end expression */ |
962 | 0 | if (*expr_crs + 3 >= expr_max) return 0; |
963 | 0 | *expr_crs += 3; |
964 | 0 | EXPR_TYPE(*expr_crs) = PTN_END; |
965 | 0 | EXPR_NXT(*expr_crs) = PTN_END; |
966 | 0 |
|
967 | 0 | /* replace insert expression with group expression using last end expression */ |
968 | 0 | EXPR_NXT(EXPR_PRV(expr_insert)) = expr_group; |
969 | 0 | EXPR_PRV(expr_group) = EXPR_PRV(expr_insert); |
970 | 0 |
|
971 | 0 | EXPR_NXT(expr_group) = *expr_crs; |
972 | 0 | EXPR_PRV(*expr_crs) = expr_group; |
973 | 0 |
|
974 | 0 | /* link alternate and insert expressions before group end expression */ |
975 | 0 | EXPR_NXT(EXPR_PRV(expr_end)) = expr_alt; |
976 | 0 | EXPR_PRV(expr_alt) = EXPR_PRV(expr_end); |
977 | 0 |
|
978 | 0 | EXPR_NXT(expr_alt) = expr_insert; |
979 | 0 | EXPR_PRV(expr_insert) = expr_alt; |
980 | 0 |
|
981 | 0 | EXPR_NXT(expr_insert) = expr_end; |
982 | 0 | EXPR_PRV(expr_end) = expr_insert; |
983 | 0 |
|
984 | 0 | return *expr_crs; |
985 | 0 | } |
986 | | |
987 | | /* Compile all expression sequences, resolving character sets, attributes, |
988 | | * groups, nots, and hooks. Note that unlike the other compile functions, on |
989 | | * returning the expr_crs is set to the last end expression, not after it. |
990 | | */ |
991 | | static int |
992 | | pattern_compile_1(const widechar *input, const int input_max, int *input_crs, |
993 | | widechar *expr_data, const int expr_max, widechar *expr_crs, widechar *loop_cnts, |
994 | 104 | TranslationTableHeader *table, const FileInfo *nested) { |
995 | 104 | int expr_crs_prv; |
996 | | |
997 | 104 | if (*expr_crs + 6 >= expr_max) return 0; |
998 | | |
999 | 104 | expr_crs_prv = *expr_crs; |
1000 | | |
1001 | | /* setup start expression */ |
1002 | 104 | EXPR_TYPE(*expr_crs) = PTN_START; |
1003 | 104 | EXPR_PRV(*expr_crs) = PTN_END; |
1004 | 104 | *expr_crs += 3; |
1005 | 104 | EXPR_NXT(expr_crs_prv) = *expr_crs; |
1006 | | |
1007 | | /* setup end expression */ |
1008 | 104 | EXPR_TYPE(*expr_crs) = PTN_END; |
1009 | 104 | EXPR_PRV(*expr_crs) = expr_crs_prv; |
1010 | 104 | EXPR_NXT(*expr_crs) = PTN_END; |
1011 | | |
1012 | 6.26k | while (*input_crs < input_max) { |
1013 | 6.15k | expr_crs_prv = *expr_crs; |
1014 | 6.15k | if (!pattern_compile_expression(input, input_max, input_crs, expr_data, expr_max, |
1015 | 6.15k | expr_crs, loop_cnts, table, nested)) |
1016 | 0 | return 0; |
1017 | | |
1018 | | /* setup end expression */ |
1019 | 6.15k | if (*expr_crs + 3 >= expr_max) return 0; |
1020 | 6.15k | EXPR_NXT(expr_crs_prv) = *expr_crs; |
1021 | 6.15k | EXPR_TYPE(*expr_crs) = PTN_END; |
1022 | 6.15k | EXPR_PRV(*expr_crs) = expr_crs_prv; |
1023 | 6.15k | EXPR_NXT(*expr_crs) = PTN_END; |
1024 | | |
1025 | | /* insert seqafterexpression before attributes of seqafterchars */ |
1026 | | // if(EXPR_TYPE(expr_crs_prv) == PTN_ATTRIBUTES) |
1027 | | // if(EXPR_DATA_1(expr_crs_prv) & CTC_SeqAfter) |
1028 | | // { |
1029 | | // i = 0; |
1030 | | // pattern_insert_alternate(table->seqAfterExpression, |
1031 | | // table->seqAfterExpressionLength, &i, expr_data, expr_max, |
1032 | | // expr_crs, loop_cnts, expr_crs_prv); |
1033 | | // } |
1034 | 6.15k | } |
1035 | | |
1036 | 104 | return *expr_crs; |
1037 | 104 | } |
1038 | | |
1039 | | /* Resolve optional and loop expressions. |
1040 | | */ |
1041 | | static int |
1042 | | pattern_compile_2( |
1043 | 143 | widechar *expr_data, int expr_at, const int expr_max, widechar *expr_crs) { |
1044 | 143 | int expr_start, expr_end, expr_prv, expr_sub; |
1045 | | |
1046 | 6.48k | while (EXPR_TYPE(expr_at) != PTN_END) { |
1047 | 6.34k | if (EXPR_TYPE(expr_at) == PTN_GROUP || EXPR_TYPE(expr_at) == PTN_NOT) { |
1048 | 39 | if (!pattern_compile_2(expr_data, EXPR_DATA_0(expr_at), expr_max, expr_crs)) |
1049 | 0 | return 0; |
1050 | 39 | } |
1051 | | |
1052 | 6.34k | if (EXPR_TYPE(expr_at) == PTN_ZERO_MORE || EXPR_TYPE(expr_at) == PTN_ONE_MORE || |
1053 | 6.27k | EXPR_TYPE(expr_at) == PTN_OPTIONAL) { |
1054 | 85 | if (*expr_crs + 6 >= expr_max) return 0; |
1055 | | |
1056 | | /* get previous expressions, there must |
1057 | | * be at least something and a PTN_START */ |
1058 | 85 | expr_sub = EXPR_PRV(expr_at); |
1059 | 85 | if (EXPR_TYPE(expr_sub) == PTN_START) return 0; |
1060 | 85 | expr_prv = EXPR_PRV(expr_sub); |
1061 | | |
1062 | | /* create start expression */ |
1063 | 85 | expr_start = *expr_crs; |
1064 | 85 | EXPR_TYPE(expr_start) = PTN_START; |
1065 | 85 | EXPR_PRV(expr_start) = PTN_END; |
1066 | 85 | EXPR_NXT(expr_start) = expr_sub; |
1067 | 85 | *expr_crs += 3; |
1068 | | |
1069 | | /* create end expression */ |
1070 | 85 | expr_end = *expr_crs; |
1071 | 85 | EXPR_TYPE(expr_end) = PTN_END; |
1072 | 85 | EXPR_PRV(expr_end) = expr_sub; |
1073 | 85 | EXPR_NXT(expr_end) = expr_at; |
1074 | 85 | *expr_crs += 3; |
1075 | | |
1076 | | /* relink previous expression before sub expression */ |
1077 | 85 | EXPR_DATA_0(expr_at) = expr_start; |
1078 | 85 | EXPR_NXT(expr_prv) = expr_at; |
1079 | 85 | EXPR_PRV(expr_at) = expr_prv; |
1080 | | |
1081 | | /* relink sub expression to start and end */ |
1082 | 85 | EXPR_PRV(expr_sub) = expr_start; |
1083 | 85 | EXPR_NXT(expr_sub) = expr_end; |
1084 | 85 | } |
1085 | | |
1086 | 6.34k | expr_at = EXPR_NXT(expr_at); |
1087 | 6.34k | } |
1088 | | |
1089 | 143 | return 1; |
1090 | 143 | } |
1091 | | |
1092 | | /* Resolves alternative expressions. |
1093 | | */ |
1094 | | static int |
1095 | | pattern_compile_3( |
1096 | 329 | widechar *expr_data, int expr_at, const int expr_max, widechar *expr_crs) { |
1097 | 329 | int expr_mrk, expr_start, expr_end, expr_sub_start, expr_sub_end; |
1098 | | |
1099 | 6.85k | while (EXPR_TYPE(expr_at) != PTN_END) { |
1100 | 6.52k | if (EXPR_TYPE(expr_at) == PTN_GROUP || EXPR_TYPE(expr_at) == PTN_NOT || |
1101 | 6.48k | EXPR_TYPE(expr_at) == PTN_OPTIONAL || |
1102 | 6.46k | EXPR_TYPE(expr_at) == PTN_ZERO_MORE || |
1103 | 6.40k | EXPR_TYPE(expr_at) == PTN_ONE_MORE) { |
1104 | 124 | if (!pattern_compile_3(expr_data, EXPR_DATA_0(expr_at), expr_max, expr_crs)) |
1105 | 0 | return 0; |
1106 | 124 | } |
1107 | | |
1108 | 6.52k | if (EXPR_TYPE(expr_at) == PTN_ALTERNATE) { |
1109 | 101 | if (*expr_crs + 12 >= expr_max) return 0; |
1110 | | |
1111 | | /* get previous start expression, |
1112 | | * can include alternate expressions */ |
1113 | 101 | expr_mrk = EXPR_PRV(expr_at); |
1114 | 101 | if (EXPR_TYPE(expr_mrk) == PTN_START) return 0; |
1115 | 101 | expr_sub_end = expr_mrk; |
1116 | 839 | while (EXPR_TYPE(expr_mrk) != PTN_START) expr_mrk = EXPR_PRV(expr_mrk); |
1117 | 101 | expr_sub_start = EXPR_NXT(expr_mrk); |
1118 | | |
1119 | | /* create first start expression */ |
1120 | 101 | expr_start = *expr_crs; |
1121 | 101 | EXPR_TYPE(expr_start) = PTN_START; |
1122 | 101 | EXPR_PRV(expr_start) = PTN_END; |
1123 | 101 | EXPR_NXT(expr_start) = expr_sub_start; |
1124 | 101 | *expr_crs += 3; |
1125 | | |
1126 | | /* create first end expression */ |
1127 | 101 | expr_end = *expr_crs; |
1128 | 101 | EXPR_TYPE(expr_end) = PTN_END; |
1129 | 101 | EXPR_PRV(expr_end) = expr_sub_end; |
1130 | 101 | EXPR_NXT(expr_end) = expr_at; |
1131 | 101 | *expr_crs += 3; |
1132 | | |
1133 | | /* relink previous expression before sub expression */ |
1134 | 101 | EXPR_DATA_0(expr_at) = expr_start; |
1135 | 101 | EXPR_NXT(expr_mrk) = expr_at; |
1136 | 101 | EXPR_PRV(expr_at) = expr_mrk; |
1137 | | |
1138 | | /* relink sub expression to start and end */ |
1139 | 101 | EXPR_PRV(expr_sub_start) = expr_start; |
1140 | 101 | EXPR_NXT(expr_sub_end) = expr_end; |
1141 | | |
1142 | | /* get following PTN_END or PTN_ALTERNATE expression */ |
1143 | 101 | expr_mrk = EXPR_NXT(expr_at); |
1144 | 101 | if (EXPR_TYPE(expr_mrk) == PTN_END || EXPR_TYPE(expr_mrk) == PTN_ALTERNATE) |
1145 | 0 | return 0; |
1146 | 101 | expr_sub_start = expr_mrk; |
1147 | 5.20k | while (EXPR_TYPE(expr_mrk) != PTN_END && EXPR_TYPE(expr_mrk) != PTN_ALTERNATE) |
1148 | 5.10k | expr_mrk = EXPR_NXT(expr_mrk); |
1149 | 101 | expr_sub_end = EXPR_PRV(expr_mrk); |
1150 | | |
1151 | | /* create first start expression */ |
1152 | 101 | expr_start = *expr_crs; |
1153 | 101 | EXPR_TYPE(expr_start) = PTN_START; |
1154 | 101 | EXPR_PRV(expr_start) = PTN_END; |
1155 | 101 | EXPR_NXT(expr_start) = expr_sub_start; |
1156 | 101 | *expr_crs += 3; |
1157 | | |
1158 | | /* create first end expression */ |
1159 | 101 | expr_end = *expr_crs; |
1160 | 101 | EXPR_TYPE(expr_end) = PTN_END; |
1161 | 101 | EXPR_PRV(expr_end) = expr_sub_end; |
1162 | 101 | EXPR_NXT(expr_end) = expr_at; |
1163 | 101 | *expr_crs += 3; |
1164 | | |
1165 | | /* relink following expression before sub expression */ |
1166 | 101 | EXPR_DATA_1(expr_at) = expr_start; |
1167 | 101 | EXPR_PRV(expr_mrk) = expr_at; |
1168 | 101 | EXPR_NXT(expr_at) = expr_mrk; |
1169 | | |
1170 | | /* relink sub expression to start and end */ |
1171 | 101 | EXPR_PRV(expr_sub_start) = expr_start; |
1172 | 101 | EXPR_NXT(expr_sub_end) = expr_end; |
1173 | | |
1174 | | /* check expressions were after alternate and got moved into |
1175 | | * a sub expression, previous expressions already checked */ |
1176 | 101 | if (!pattern_compile_3(expr_data, EXPR_DATA_1(expr_at), expr_max, expr_crs)) |
1177 | 0 | return 0; |
1178 | 101 | } |
1179 | | |
1180 | 6.52k | expr_at = EXPR_NXT(expr_at); |
1181 | 6.52k | } |
1182 | | |
1183 | 329 | return 1; |
1184 | 329 | } |
1185 | | |
1186 | | int EXPORT_CALL |
1187 | | _lou_pattern_compile(const widechar *input, const int input_max, widechar *expr_data, |
1188 | 104 | const int expr_max, TranslationTableHeader *table, const FileInfo *nested) { |
1189 | 104 | int input_crs; |
1190 | | |
1191 | 104 | input_crs = 0; |
1192 | 104 | expr_data[0] = 2; |
1193 | 104 | expr_data[1] = 0; |
1194 | | |
1195 | 104 | if (!pattern_compile_1(input, input_max, &input_crs, expr_data, expr_max, |
1196 | 104 | &expr_data[0], &expr_data[1], table, nested)) |
1197 | 0 | return 0; |
1198 | | |
1199 | | /* shift past the last end */ |
1200 | 104 | expr_data[0] += 3; |
1201 | | |
1202 | 104 | if (!pattern_compile_2(expr_data, 2, expr_max, &expr_data[0])) return 0; |
1203 | | |
1204 | 104 | if (!pattern_compile_3(expr_data, 2, expr_max, &expr_data[0])) return 0; |
1205 | | |
1206 | 104 | return expr_data[0]; |
1207 | 104 | } |
1208 | | |
1209 | | //////////////////////////////////////////////////////////////////////////////// |
1210 | | |
1211 | | static void |
1212 | | pattern_reverse_expression(widechar *expr_data, const int expr_start); |
1213 | | |
1214 | | static void |
1215 | 6.02k | pattern_reverse_branch(widechar *expr_data, const int expr_at) { |
1216 | 6.02k | widechar expr_swap; |
1217 | | |
1218 | 6.02k | switch (EXPR_TYPE(expr_at)) { |
1219 | 101 | case PTN_ALTERNATE: |
1220 | | |
1221 | 101 | pattern_reverse_expression(expr_data, EXPR_DATA_0(expr_at)); |
1222 | 101 | expr_swap = EXPR_DATA_0(expr_at); |
1223 | 101 | EXPR_DATA_0(expr_at) = EXPR_DATA_1(expr_at); |
1224 | 101 | EXPR_DATA_1(expr_at) = expr_swap; |
1225 | | |
1226 | 101 | case PTN_GROUP: |
1227 | 139 | case PTN_NOT: |
1228 | 142 | case PTN_ONE_MORE: |
1229 | 202 | case PTN_ZERO_MORE: |
1230 | 224 | case PTN_OPTIONAL: |
1231 | | |
1232 | 224 | pattern_reverse_expression(expr_data, EXPR_DATA_0(expr_at)); |
1233 | 6.02k | } |
1234 | 6.02k | } |
1235 | | |
1236 | | static void |
1237 | 377 | pattern_reverse_expression(widechar *expr_data, const int expr_start) { |
1238 | 377 | widechar expr_end, expr_crs, expr_prv; |
1239 | | |
1240 | 377 | expr_end = EXPR_NXT(expr_start); |
1241 | | |
1242 | | /* empty expression */ |
1243 | 377 | if (EXPR_TYPE(expr_end) == PTN_END) return; |
1244 | | |
1245 | | /* find end expression */ |
1246 | 6.40k | while (EXPR_TYPE(expr_end) != PTN_END) expr_end = EXPR_NXT(expr_end); |
1247 | | |
1248 | 377 | expr_crs = EXPR_PRV(expr_end); |
1249 | 377 | expr_prv = EXPR_PRV(expr_crs); |
1250 | | |
1251 | | /* relink expression before end expression */ |
1252 | 377 | EXPR_NXT(expr_start) = expr_crs; |
1253 | 377 | EXPR_PRV(expr_crs) = expr_start; |
1254 | 377 | EXPR_NXT(expr_crs) = expr_prv; |
1255 | | |
1256 | | /* reverse any branching expressions */ |
1257 | 377 | pattern_reverse_branch(expr_data, expr_crs); |
1258 | | |
1259 | 6.02k | while (expr_prv != expr_start) { |
1260 | | /* shift current expression */ |
1261 | 5.65k | expr_crs = expr_prv; |
1262 | 5.65k | expr_prv = EXPR_PRV(expr_prv); |
1263 | | |
1264 | | /* reverse any branching expressions */ |
1265 | 5.65k | pattern_reverse_branch(expr_data, expr_crs); |
1266 | | |
1267 | | /* relink current expression */ |
1268 | 5.65k | EXPR_PRV(expr_crs) = EXPR_NXT(expr_crs); |
1269 | 5.65k | EXPR_NXT(expr_crs) = expr_prv; |
1270 | 5.65k | } |
1271 | | |
1272 | | /* relink expression after start expression */ |
1273 | 377 | EXPR_PRV(expr_crs) = EXPR_NXT(expr_crs); |
1274 | 377 | EXPR_NXT(expr_crs) = expr_end; |
1275 | 377 | EXPR_PRV(expr_end) = expr_crs; |
1276 | 377 | } |
1277 | | |
1278 | | void EXPORT_CALL |
1279 | 52 | _lou_pattern_reverse(widechar *expr_data) { |
1280 | 52 | pattern_reverse_expression(expr_data, 2); |
1281 | 52 | } |
1282 | | |
1283 | | //////////////////////////////////////////////////////////////////////////////// |
1284 | | |
1285 | | static int |
1286 | 898k | pattern_check_chars(const widechar input_char, const widechar *expr_data) { |
1287 | 898k | int expr_cnt, i; |
1288 | | |
1289 | 898k | expr_cnt = expr_data[0] + 1; |
1290 | | |
1291 | 1.79M | for (i = 1; i < expr_cnt; i++) |
1292 | 898k | if (input_char == expr_data[i]) break; |
1293 | | |
1294 | 898k | if (i == expr_cnt) return 0; |
1295 | 696 | return 1; |
1296 | 898k | } |
1297 | | |
1298 | | static int |
1299 | | pattern_check_attrs(const widechar input_char, const widechar *expr_data, |
1300 | 288k | const TranslationTableHeader *table) { |
1301 | 288k | int attrs; |
1302 | | |
1303 | 288k | attrs = ((expr_data[0] << 16) | expr_data[1]) & ~(CTC_EndOfInput | CTC_EmpMatch); |
1304 | 288k | if (!checkAttr(input_char, attrs, table)) return 0; |
1305 | 0 | return 1; |
1306 | 288k | } |
1307 | | |
1308 | | static int |
1309 | | pattern_check_expression(const widechar *const input, int *input_crs, |
1310 | | const int input_minmax, const int input_dir, const widechar *const expr_data, |
1311 | | int (*hook)(const widechar input, const int data_len), widechar *hook_data, |
1312 | | const int hook_max, int expr_crs, int notOperator, int loop_crs, int *loop_cnts, |
1313 | 1.24M | const TranslationTableHeader *table) { |
1314 | 1.24M | int input_crs_prv, input_start, attrs, ret, i; |
1315 | 1.24M | const widechar *data; |
1316 | | |
1317 | 1.24M | data = NULL; |
1318 | | |
1319 | | /* save input_crs to know if loop consumed input */ |
1320 | 1.24M | input_start = *input_crs; |
1321 | | |
1322 | 1.24M | CHECK_OUTPUT(START, 0, __LINE__, "check start") |
1323 | | |
1324 | 6.68M | while (!(EXPR_TYPE(expr_crs) == PTN_END && EXPR_TYPE(expr_crs) == PTN_END)) { |
1325 | | /* end of input expression */ |
1326 | 6.68M | if (EXPR_TYPE(expr_crs) == PTN_END_OF_INPUT) { |
1327 | 0 | if (*input_crs * input_dir >= input_minmax * input_dir) { |
1328 | 0 | if (notOperator) |
1329 | 0 | CHECK_OUTPUT( |
1330 | 0 | RETURN, 0, __LINE__, "end of input failed: no input and not") |
1331 | 0 | else |
1332 | 0 | CHECK_OUTPUT(RETURN, 1, __LINE__, "end of input passed: no input") |
1333 | 0 | return !notOperator; |
1334 | 0 | } else { |
1335 | 0 | if (notOperator) |
1336 | 0 | CHECK_OUTPUT( |
1337 | 0 | RETURN, 1, __LINE__, "end of input passed: input and not") |
1338 | 0 | else |
1339 | 0 | CHECK_OUTPUT(RETURN, 0, __LINE__, "end of input failed: input") |
1340 | 0 | return notOperator; |
1341 | 0 | } |
1342 | 0 | } |
1343 | | |
1344 | | /* no more input */ |
1345 | 6.68M | if (*input_crs * input_dir >= input_minmax * input_dir) { |
1346 | 37.8k | switch (EXPR_TYPE(expr_crs)) { |
1347 | 1.67k | case PTN_ATTRIBUTES: |
1348 | | |
1349 | 1.67k | attrs = (EXPR_DATA_0(expr_crs) << 16); |
1350 | 1.67k | if (attrs & CTC_EndOfInput) { |
1351 | 1.67k | if (notOperator) { |
1352 | 1.46k | CHECK_OUTPUT(RETURN, 0, __LINE__, |
1353 | 1.46k | "attributes failed: end of input attribute: not") |
1354 | 1.46k | return 0; |
1355 | 1.46k | } |
1356 | 203 | CHECK_OUTPUT(RETURN, 1, __LINE__, |
1357 | 203 | "attributes passed: end of input attribute") |
1358 | 203 | return 1; |
1359 | 1.67k | } |
1360 | 0 | CHECK_OUTPUT(RETURN, 0, __LINE__, |
1361 | 0 | "attributes failed: no end of input attribute") |
1362 | 0 | return 0; |
1363 | | |
1364 | 207 | case PTN_ANY: |
1365 | 4.88k | case PTN_CHARS: |
1366 | | |
1367 | 4.88k | CHECK_OUTPUT(RETURN, 0, __LINE__, "chars failed: no input") |
1368 | 4.88k | return 0; |
1369 | 37.8k | } |
1370 | | |
1371 | 31.3k | CHECK_OUTPUT(SHOW, 0, __LINE__, "no input") |
1372 | 31.3k | } |
1373 | | |
1374 | 6.68M | switch (EXPR_TYPE(expr_crs)) { |
1375 | | |
1376 | 1.57M | case PTN_START: |
1377 | | |
1378 | 1.57M | expr_crs = EXPR_NXT(expr_crs); |
1379 | 1.57M | CHECK_OUTPUT(SHOW, 0, __LINE__, "start next") |
1380 | 1.57M | break; |
1381 | | |
1382 | 0 | case PTN_GROUP: |
1383 | |
|
1384 | 0 | expr_crs = EXPR_DATA_0(expr_crs); |
1385 | 0 | CHECK_OUTPUT(SHOW, 0, __LINE__, "group next") |
1386 | 0 | break; |
1387 | | |
1388 | 321k | case PTN_NOT: |
1389 | | |
1390 | 321k | notOperator = !notOperator; |
1391 | 321k | expr_crs = EXPR_DATA_0(expr_crs); |
1392 | 321k | CHECK_OUTPUT(SHOW, 0, __LINE__, "not next") |
1393 | 321k | break; |
1394 | | |
1395 | 31.7k | case PTN_ONE_MORE: |
1396 | | |
1397 | 31.7k | CHECK_OUTPUT(SHOW, 0, __LINE__, "loop+ start") |
1398 | | |
1399 | 2.68M | case PTN_ZERO_MORE: |
1400 | | |
1401 | | /* check if loop already started */ |
1402 | 2.68M | if (expr_crs == loop_crs) { |
1403 | 362k | loop_cnts[EXPR_DATA_1(loop_crs)]++; |
1404 | 362k | CHECK_OUTPUT(SHOW, 0, __LINE__, "loop again") |
1405 | 2.32M | } else { |
1406 | | /* check if loop nested, wasn't running but has a count */ |
1407 | 2.32M | if (loop_cnts[EXPR_DATA_1(expr_crs)]) { |
1408 | 2.31M | CHECK_OUTPUT(SHOW, 0, __LINE__, "loop already running") |
1409 | 2.31M | goto loop_next; |
1410 | 2.31M | } |
1411 | | |
1412 | | /* start loop */ |
1413 | 15.5k | loop_crs = expr_crs; |
1414 | 15.5k | loop_cnts[EXPR_DATA_1(loop_crs)] = 1; |
1415 | 15.5k | CHECK_OUTPUT(SHOW, 0, __LINE__, "loop start") |
1416 | 15.5k | } |
1417 | | |
1418 | | /* start loop expression */ |
1419 | 378k | input_crs_prv = *input_crs; |
1420 | 378k | ret = pattern_check_expression(input, input_crs, input_minmax, input_dir, |
1421 | 378k | expr_data, hook, hook_data, hook_max, EXPR_DATA_0(expr_crs), |
1422 | 378k | notOperator, loop_crs, loop_cnts, table); |
1423 | 378k | if (ret) { |
1424 | 73.5k | CHECK_OUTPUT(RETURN, 1, __LINE__, "loop passed") |
1425 | 73.5k | return 1; |
1426 | 73.5k | } |
1427 | 304k | CHECK_OUTPUT(SHOW, 0, __LINE__, "loop failed") |
1428 | 304k | *input_crs = input_crs_prv; |
1429 | | |
1430 | | /* check loop count */ |
1431 | 304k | loop_cnts[EXPR_DATA_1(loop_crs)]--; |
1432 | 304k | if (EXPR_TYPE(expr_crs) == PTN_ONE_MORE) { |
1433 | 65 | if (loop_cnts[EXPR_DATA_1(loop_crs)] < 1) { |
1434 | 0 | CHECK_OUTPUT(RETURN, 0, __LINE__, "loop+ failed") |
1435 | 0 | return 0; |
1436 | 0 | } else |
1437 | 65 | CHECK_OUTPUT(SHOW, 0, __LINE__, "loop+ passed") |
1438 | 65 | } |
1439 | | |
1440 | | /* continue after loop */ |
1441 | 2.61M | loop_next: |
1442 | 2.61M | expr_crs = EXPR_NXT(expr_crs); |
1443 | 2.61M | CHECK_OUTPUT(SHOW, 0, __LINE__, "loop next") |
1444 | 2.61M | break; |
1445 | | |
1446 | 870k | case PTN_OPTIONAL: |
1447 | | |
1448 | | /* save current state */ |
1449 | 870k | input_crs_prv = *input_crs; |
1450 | | |
1451 | | /* start optional expression */ |
1452 | 870k | CHECK_OUTPUT(CALL, 0, __LINE__, "option start") |
1453 | 870k | if (pattern_check_expression(input, input_crs, input_minmax, input_dir, |
1454 | 870k | expr_data, hook, hook_data, hook_max, EXPR_DATA_0(expr_crs), |
1455 | 870k | notOperator, loop_crs, loop_cnts, table)) { |
1456 | 268 | CHECK_OUTPUT(RETURN, 1, __LINE__, "option passed") |
1457 | 268 | return 1; |
1458 | 268 | } |
1459 | 870k | CHECK_OUTPUT(SHOW, 0, __LINE__, "option failed") |
1460 | | |
1461 | | /* continue after optional expression */ |
1462 | 870k | *input_crs = input_crs_prv; |
1463 | 870k | CHECK_OUTPUT(SHOW, 0, __LINE__, "no option start") |
1464 | 870k | expr_crs = EXPR_NXT(expr_crs); |
1465 | 870k | break; |
1466 | | |
1467 | 213 | case PTN_ALTERNATE: |
1468 | | |
1469 | | /* save current state */ |
1470 | 213 | input_crs_prv = *input_crs; |
1471 | | |
1472 | | /* start first expression */ |
1473 | 213 | CHECK_OUTPUT(CALL, 0, __LINE__, "or 1 start") |
1474 | 213 | if (pattern_check_expression(input, input_crs, input_minmax, input_dir, |
1475 | 213 | expr_data, hook, hook_data, hook_max, EXPR_DATA_0(expr_crs), |
1476 | 213 | notOperator, loop_crs, loop_cnts, table)) { |
1477 | 203 | CHECK_OUTPUT(RETURN, 1, __LINE__, "or 1 passed") |
1478 | 203 | return 1; |
1479 | 203 | } |
1480 | 10 | CHECK_OUTPUT(SHOW, 0, __LINE__, "or 1 failed") |
1481 | | |
1482 | | /* start second expression (no need to push) */ |
1483 | 10 | *input_crs = input_crs_prv; |
1484 | 10 | CHECK_OUTPUT(SHOW, 0, __LINE__, "or 2 start") |
1485 | 10 | expr_crs = EXPR_DATA_1(expr_crs); |
1486 | 10 | break; |
1487 | | |
1488 | 42.4k | case PTN_ANY: |
1489 | | |
1490 | 42.4k | CHECK_OUTPUT(SHOW, 0, __LINE__, "any") |
1491 | 42.4k | *input_crs += input_dir; |
1492 | 42.4k | expr_crs = EXPR_NXT(expr_crs); |
1493 | 42.4k | break; |
1494 | | |
1495 | 288k | case PTN_ATTRIBUTES: |
1496 | | |
1497 | 288k | ret = pattern_check_attrs( |
1498 | 288k | input[*input_crs], EXPR_CONST_DATA(expr_crs), table); |
1499 | 288k | if (ret && notOperator) { |
1500 | 0 | CHECK_OUTPUT(RETURN, 0, __LINE__, "attributes failed: not"); |
1501 | 0 | return 0; |
1502 | 0 | } |
1503 | 288k | if (!ret && !notOperator) { |
1504 | 0 | CHECK_OUTPUT(RETURN, 0, __LINE__, "attributes failed"); |
1505 | 0 | return 0; |
1506 | 0 | } |
1507 | 288k | CHECK_OUTPUT(SHOW, 0, __LINE__, "attributes passed") |
1508 | 288k | *input_crs += input_dir; |
1509 | 288k | expr_crs = EXPR_NXT(expr_crs); |
1510 | 288k | break; |
1511 | | |
1512 | 898k | case PTN_CHARS: |
1513 | | |
1514 | 898k | ret = pattern_check_chars(input[*input_crs], EXPR_CONST_DATA(expr_crs)); |
1515 | 898k | if (ret && notOperator) { |
1516 | 0 | CHECK_OUTPUT(RETURN, 0, __LINE__, "chars failed: not"); |
1517 | 0 | return 0; |
1518 | 0 | } |
1519 | 898k | if (!ret && !notOperator) { |
1520 | 866k | CHECK_OUTPUT(RETURN, 0, __LINE__, "chars failed"); |
1521 | 866k | return 0; |
1522 | 866k | } |
1523 | 32.4k | CHECK_OUTPUT(SHOW, 0, __LINE__, "chars passed") |
1524 | 32.4k | *input_crs += input_dir; |
1525 | 32.4k | expr_crs = EXPR_NXT(expr_crs); |
1526 | 32.4k | break; |
1527 | | |
1528 | 0 | case PTN_HOOK: |
1529 | |
|
1530 | 0 | if (hook == NULL) { |
1531 | 0 | CHECK_OUTPUT(RETURN, 0, __LINE__, "hook failed: NULL"); |
1532 | 0 | return 0; |
1533 | 0 | } |
1534 | | |
1535 | | /* copy expression data */ |
1536 | 0 | data = EXPR_CONST_DATA(expr_crs); |
1537 | 0 | for (i = 0; i < data[0]; i++) hook_data[i] = data[i + 1]; |
1538 | | |
1539 | | /* call hook function */ |
1540 | 0 | ret = hook(input[*input_crs], data[0]); |
1541 | 0 | if (ret && notOperator) { |
1542 | 0 | CHECK_OUTPUT(RETURN, 0, __LINE__, "hook failed: not"); |
1543 | 0 | return 0; |
1544 | 0 | } |
1545 | 0 | if (!ret && !notOperator) { |
1546 | 0 | CHECK_OUTPUT(RETURN, 0, __LINE__, "hook failed"); |
1547 | 0 | return 0; |
1548 | 0 | } |
1549 | 0 | CHECK_OUTPUT(SHOW, 0, __LINE__, "hook passed") |
1550 | 0 | *input_crs += input_dir; |
1551 | 0 | expr_crs = EXPR_NXT(expr_crs); |
1552 | 0 | break; |
1553 | | |
1554 | 0 | case PTN_END: |
1555 | 0 | break; |
1556 | | |
1557 | 0 | default: |
1558 | |
|
1559 | 0 | CHECK_OUTPUT(RETURN, 0, __LINE__, "unknown opcode") |
1560 | 0 | return 0; |
1561 | 6.68M | } |
1562 | | |
1563 | | /* check end expression */ |
1564 | 6.35M | while (EXPR_TYPE(expr_crs) == PTN_END) { |
1565 | 3.58M | CHECK_OUTPUT(SHOW, 0, __LINE__, "end") |
1566 | | |
1567 | | /* check for end of expressions */ |
1568 | 3.58M | if (EXPR_NXT(expr_crs) == PTN_END) break; |
1569 | | |
1570 | 3.58M | expr_crs = EXPR_NXT(expr_crs); |
1571 | | |
1572 | | /* returning loop */ |
1573 | 3.58M | if (EXPR_TYPE(expr_crs) == PTN_ZERO_MORE || |
1574 | 2.97M | EXPR_TYPE(expr_crs) == PTN_ONE_MORE) { |
1575 | 2.97M | CHECK_OUTPUT(SHOW, 0, __LINE__, "end loop") |
1576 | | |
1577 | | /* check that loop consumed input */ |
1578 | 2.97M | if (*input_crs == input_start) { |
1579 | 302k | CHECK_OUTPUT(RETURN, 0, __LINE__, "loop failed: did not consume") |
1580 | 302k | return 0; |
1581 | 302k | } |
1582 | | |
1583 | | /* loops do not continue to the next expression */ |
1584 | 2.67M | break; |
1585 | 2.97M | } |
1586 | | |
1587 | | /* returning not */ |
1588 | 609k | if (EXPR_TYPE(expr_crs) == PTN_NOT) notOperator = !notOperator; |
1589 | | |
1590 | 609k | expr_crs = EXPR_NXT(expr_crs); |
1591 | | |
1592 | 609k | CHECK_OUTPUT(SHOW, 0, __LINE__, "end next") |
1593 | 609k | } |
1594 | | |
1595 | 5.43M | CHECK_OUTPUT(SHOW, 0, __LINE__, "check next") |
1596 | 5.43M | } |
1597 | | |
1598 | 65 | CHECK_OUTPUT(RETURN, 1, __LINE__, "check passed: end of expression"); |
1599 | 65 | return 1; |
1600 | 1.24M | } |
1601 | | |
1602 | | static int |
1603 | | pattern_check_hook(const widechar *input, const int input_start, const int input_minmax, |
1604 | | const int input_dir, const widechar *expr_data, |
1605 | | int (*hook)(const widechar input, const int data_len), widechar *hook_data, |
1606 | 541 | const int hook_max, const TranslationTableHeader *table) { |
1607 | 541 | int input_crs, ret, *loop_cnts; |
1608 | | |
1609 | 541 | input_crs = input_start; |
1610 | 541 | loop_cnts = malloc(expr_data[1] * sizeof(int)); |
1611 | 541 | memset(loop_cnts, 0, expr_data[1] * sizeof(int)); |
1612 | 541 | ret = pattern_check_expression(input, &input_crs, input_minmax, input_dir, expr_data, |
1613 | 541 | hook, hook_data, hook_max, 2, 0, 0, loop_cnts, table); |
1614 | 541 | free(loop_cnts); |
1615 | 541 | return ret; |
1616 | 541 | } |
1617 | | |
1618 | | int EXPORT_CALL |
1619 | | _lou_pattern_check(const widechar *input, const int input_start, const int input_minmax, |
1620 | | const int input_dir, const widechar *expr_data, |
1621 | 541 | const TranslationTableHeader *table) { |
1622 | | #ifdef CHECK_OUTPUT_DEFINED |
1623 | | pattern_output(expr_data, table); |
1624 | | #endif |
1625 | 541 | return pattern_check_hook( |
1626 | 541 | input, input_start, input_minmax, input_dir, expr_data, NULL, NULL, 0, table); |
1627 | 541 | } |
1628 | | |
1629 | | //////////////////////////////////////////////////////////////////////////////// |