/src/liblouis/liblouis/pattern.c
Line | Count | Source |
1 | | /* liblouis Braille Translation and Back-Translation Library |
2 | | |
3 | | Copyright (C) 2016 Mike Gray, American Printing House for the Blind |
4 | | |
5 | | This file is part of liblouis. |
6 | | |
7 | | liblouis is free software: you can redistribute it and/or modify it |
8 | | under the terms of the GNU Lesser General Public License as published |
9 | | by the Free Software Foundation, either version 2.1 of the License, or |
10 | | (at your option) any later version. |
11 | | |
12 | | liblouis is distributed in the hope that it will be useful, but |
13 | | WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | | Lesser General Public License for more details. |
16 | | |
17 | | You should have received a copy of the GNU Lesser General Public |
18 | | License along with liblouis. If not, see <http://www.gnu.org/licenses/>. |
19 | | */ |
20 | | |
21 | | #include "config.h" |
22 | | |
23 | | #include <stdlib.h> |
24 | | #include <stdio.h> |
25 | | #include <string.h> |
26 | | #include <ctype.h> |
27 | | |
28 | | #include "internal.h" |
29 | | |
30 | | // #define CHECK_OUTPUT_DEFINED |
31 | | |
32 | | ///// |
33 | | |
34 | | // TODO: these functions are static and copied serveral times |
35 | | |
36 | | int translation_direction = 1; |
37 | | |
38 | | static TranslationTableCharacter * |
39 | 300 | findCharOrDots(widechar c, int m, const TranslationTableHeader *table) { |
40 | | /* Look up character or dot pattern in the appropriate |
41 | | * table. */ |
42 | 300 | static TranslationTableCharacter noChar = { NULL, -1, 0, 0, 0, CTC_Space, 0, 0, 32, 0, |
43 | 300 | 0 }; |
44 | 300 | static TranslationTableCharacter noDots = { NULL, -1, 0, 0, 0, CTC_Space, 0, 0, |
45 | 300 | LOU_DOTS, 0, 0 }; |
46 | 300 | TranslationTableCharacter *notFound; |
47 | 300 | TranslationTableCharacter *character; |
48 | 300 | TranslationTableOffset bucket; |
49 | 300 | unsigned long int makeHash = _lou_charHash(c); |
50 | 300 | if (m == 0) { |
51 | 300 | bucket = table->characters[makeHash]; |
52 | 300 | notFound = &noChar; |
53 | 300 | } else { |
54 | 0 | bucket = table->dots[makeHash]; |
55 | 0 | notFound = &noDots; |
56 | 0 | } |
57 | 300 | while (bucket) { |
58 | 222 | character = (TranslationTableCharacter *)&table->ruleArea[bucket]; |
59 | 222 | if (character->value == c) return character; |
60 | 0 | bucket = character->next; |
61 | 0 | } |
62 | 78 | notFound->value = c; |
63 | 78 | return notFound; |
64 | 300 | } |
65 | | |
66 | | static int |
67 | | checkAttr(const widechar c, const TranslationTableCharacterAttributes a, |
68 | 300 | const TranslationTableHeader *table) { |
69 | 300 | return (((findCharOrDots(c, translation_direction ? 0 : 1, table))->attributes & a) |
70 | 300 | ? 1 |
71 | 300 | : 0); |
72 | 300 | } |
73 | | |
74 | | ///// |
75 | | |
76 | | enum pattern_type { |
77 | | PTN_ERROR, |
78 | | |
79 | | PTN_START, |
80 | | PTN_GROUP, |
81 | | PTN_NOT, |
82 | | |
83 | | PTN_ONE_MORE, |
84 | | PTN_ZERO_MORE, |
85 | | PTN_OPTIONAL, |
86 | | |
87 | | PTN_ALTERNATE, |
88 | | |
89 | | PTN_ANY, |
90 | | PTN_ATTRIBUTES, |
91 | | PTN_CHARS, |
92 | | PTN_HOOK, |
93 | | PTN_END_OF_INPUT, |
94 | | |
95 | | PTN_END = 0xffff, |
96 | | }; |
97 | | |
98 | 550k | #define EXPR_TYPE_IN(at, buffer) (buffer[(at) + 0]) |
99 | 192 | #define EXPR_PRV_IN(at, buffer) (buffer[(at) + 1]) |
100 | 182k | #define EXPR_NXT_IN(at, buffer) (buffer[(at) + 2]) |
101 | 45.5k | #define EXPR_DATA_0_IN(at, buffer) (buffer[(at) + 3]) |
102 | 22.5k | #define EXPR_DATA_1_IN(at, buffer) (buffer[(at) + 4]) |
103 | | #define EXPR_DATA_2_IN(at, buffer) (buffer[(at) + 5]) |
104 | 0 | #define EXPR_DATA_IN(at, buffer) ((widechar *)&buffer[(at) + 3]) |
105 | 600 | #define EXPR_CONST_DATA_IN(at, buffer) ((const widechar *)&buffer[(at) + 3]) |
106 | | |
107 | 550k | #define EXPR_TYPE(at) EXPR_TYPE_IN((at), expr_data) |
108 | 192 | #define EXPR_PRV(at) EXPR_PRV_IN((at), expr_data) |
109 | 182k | #define EXPR_NXT(at) EXPR_NXT_IN((at), expr_data) |
110 | 45.5k | #define EXPR_DATA_0(at) EXPR_DATA_0_IN((at), expr_data) |
111 | 22.5k | #define EXPR_DATA_1(at) EXPR_DATA_1_IN((at), expr_data) |
112 | | #define EXPR_DATA_2(at) EXPR_DATA_2_IN((at), expr_data) |
113 | 0 | #define EXPR_DATA(at) EXPR_DATA_IN((at), expr_data) |
114 | 600 | #define EXPR_CONST_DATA(at) EXPR_CONST_DATA_IN((at), expr_data) |
115 | | |
116 | | #ifdef CHECK_OUTPUT_DEFINED |
117 | | |
118 | | #ifndef DEBUG |
119 | | #define DEBUG |
120 | | |
121 | | #endif |
122 | | |
123 | | #define START 0 |
124 | | #define CALL 1 |
125 | | #define RETURN 2 |
126 | | #define SHOW 3 |
127 | | |
128 | | #define CHECK_OUTPUT(type, ret, line, msg) \ |
129 | | { \ |
130 | | do_output(type, ret, line, input[*input_crs], input_minmax, *input_crs, \ |
131 | | input_dir, expr_data, expr_crs, notOperator, loop_crs, loop_cnts, msg); \ |
132 | | } |
133 | | |
134 | | #else |
135 | | |
136 | | #define CHECK_OUTPUT(type, ret, line, msg) \ |
137 | 342k | { ; } |
138 | | |
139 | | #endif |
140 | | |
141 | | struct expression { |
142 | | widechar type; |
143 | | widechar prv; |
144 | | widechar nxt; |
145 | | widechar data[1]; |
146 | | }; |
147 | | |
148 | | /* gdb won't know what this is unless it is actually used */ |
149 | | #ifdef DEBUG |
150 | | static struct expression *expr_debug; |
151 | | #endif |
152 | | |
153 | | //////////////////////////////////////////////////////////////////////////////// |
154 | | |
155 | | static char spaces[] = ".............................."; |
156 | | static int space = 30; |
157 | | |
158 | | static void |
159 | | pattern_output_expression( |
160 | 0 | const widechar *expr_data, int expr_crs, const TranslationTableHeader *table) { |
161 | 0 | int i; |
162 | 0 |
|
163 | 0 | if (expr_crs == PTN_END) return; |
164 | 0 |
|
165 | 0 | while (EXPR_TYPE(expr_crs) != PTN_END) { |
166 | 0 | printf("%s%d", &spaces[space], expr_crs); |
167 | 0 | if (expr_crs < 100) printf(" "); |
168 | 0 | if (expr_crs < 10) printf(" "); |
169 | 0 | for (i = 0; i < 13 - (30 - space); i++) printf(" "); |
170 | 0 |
|
171 | 0 | switch (EXPR_TYPE(expr_crs)) { |
172 | 0 | case PTN_START: |
173 | 0 |
|
174 | 0 | printf("START\t%d\t%d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs)); |
175 | 0 | break; |
176 | 0 |
|
177 | 0 | case PTN_GROUP: |
178 | 0 |
|
179 | 0 | printf("( \t%d\t%d\t-> %d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs), |
180 | 0 | EXPR_DATA_0(expr_crs)); |
181 | 0 | space--; |
182 | 0 | if (space < 0) space = 0; |
183 | 0 | pattern_output_expression(expr_data, EXPR_DATA_0(expr_crs), table); |
184 | 0 | space++; |
185 | 0 | if (space > 30) space = 30; |
186 | 0 | break; |
187 | 0 |
|
188 | 0 | case PTN_NOT: |
189 | 0 |
|
190 | 0 | printf("! \t%d\t%d\t-> %d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs), |
191 | 0 | EXPR_DATA_0(expr_crs)); |
192 | 0 | space--; |
193 | 0 | if (space < 0) space = 0; |
194 | 0 | pattern_output_expression(expr_data, EXPR_DATA_0(expr_crs), table); |
195 | 0 | space++; |
196 | 0 | if (space > 30) space = 30; |
197 | 0 | break; |
198 | 0 |
|
199 | 0 | case PTN_ONE_MORE: |
200 | 0 |
|
201 | 0 | printf("+ \t%d\t%d\t-> %d\t#%d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs), |
202 | 0 | EXPR_DATA_0(expr_crs), EXPR_DATA_1(expr_crs)); |
203 | 0 | space--; |
204 | 0 | if (space < 0) space = 0; |
205 | 0 | pattern_output_expression(expr_data, EXPR_DATA_0(expr_crs), table); |
206 | 0 | space++; |
207 | 0 | if (space > 30) space = 30; |
208 | 0 | break; |
209 | 0 |
|
210 | 0 | case PTN_ZERO_MORE: |
211 | 0 |
|
212 | 0 | printf("* \t%d\t%d\t-> %d\t#%d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs), |
213 | 0 | EXPR_DATA_0(expr_crs), EXPR_DATA_1(expr_crs)); |
214 | 0 | space--; |
215 | 0 | if (space < 0) space = 0; |
216 | 0 | pattern_output_expression(expr_data, EXPR_DATA_0(expr_crs), table); |
217 | 0 | space++; |
218 | 0 | if (space > 30) space = 30; |
219 | 0 | break; |
220 | 0 |
|
221 | 0 | case PTN_OPTIONAL: |
222 | 0 |
|
223 | 0 | printf("? \t%d\t%d\t-> %d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs), |
224 | 0 | EXPR_DATA_0(expr_crs)); |
225 | 0 | space--; |
226 | 0 | if (space < 0) space = 0; |
227 | 0 | pattern_output_expression(expr_data, EXPR_DATA_0(expr_crs), table); |
228 | 0 | space++; |
229 | 0 | if (space > 30) space = 30; |
230 | 0 | break; |
231 | 0 |
|
232 | 0 | case PTN_ALTERNATE: |
233 | 0 |
|
234 | 0 | printf("| \t%d\t%d\t-> %d\t-> %d\n", EXPR_PRV(expr_crs), |
235 | 0 | EXPR_NXT(expr_crs), EXPR_DATA_0(expr_crs), EXPR_DATA_1(expr_crs)); |
236 | 0 | space--; |
237 | 0 | if (space < 0) space = 0; |
238 | 0 | pattern_output_expression(expr_data, EXPR_DATA_0(expr_crs), table); |
239 | 0 | pattern_output_expression(expr_data, EXPR_DATA_1(expr_crs), table); |
240 | 0 | space++; |
241 | 0 | if (space > 30) space = 30; |
242 | 0 | break; |
243 | 0 |
|
244 | 0 | case PTN_ANY: |
245 | 0 |
|
246 | 0 | printf(". \t%d\t%d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs)); |
247 | 0 | break; |
248 | 0 |
|
249 | 0 | case PTN_ATTRIBUTES: |
250 | 0 |
|
251 | 0 | printf("%% \t%d\t%d\t", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs)); |
252 | 0 | if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[0] >> 16)) printf("0"); |
253 | 0 | if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[1] >> 16)) printf("1"); |
254 | 0 | if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[2] >> 16)) printf("2"); |
255 | 0 | if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[3] >> 16)) printf("3"); |
256 | 0 | if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[4] >> 16)) printf("4"); |
257 | 0 | if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[5] >> 16)) printf("5"); |
258 | 0 | if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[6] >> 16)) printf("6"); |
259 | 0 | if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[7] >> 16)) printf("7"); |
260 | 0 | if (EXPR_DATA_0(expr_crs) & (CTC_EndOfInput >> 16)) printf("^"); |
261 | 0 | if (EXPR_DATA_1(expr_crs) & CTC_Space) printf("_"); |
262 | 0 | if (EXPR_DATA_1(expr_crs) & CTC_Digit) printf("#"); |
263 | 0 | if (EXPR_DATA_1(expr_crs) & CTC_Letter) printf("a"); |
264 | 0 | if (EXPR_DATA_1(expr_crs) & CTC_UpperCase) printf("u"); |
265 | 0 | if (EXPR_DATA_1(expr_crs) & CTC_LowerCase) printf("l"); |
266 | 0 | if (EXPR_DATA_1(expr_crs) & CTC_Punctuation) printf("."); |
267 | 0 | if (EXPR_DATA_1(expr_crs) & CTC_Sign) printf("$"); |
268 | 0 | if (EXPR_DATA_1(expr_crs) & CTC_SeqDelimiter) printf("~"); |
269 | 0 | if (EXPR_DATA_1(expr_crs) & CTC_SeqBefore) printf("<"); |
270 | 0 | if (EXPR_DATA_1(expr_crs) & CTC_SeqAfter) printf(">"); |
271 | 0 | puts(""); |
272 | 0 | break; |
273 | 0 |
|
274 | 0 | case PTN_CHARS: |
275 | 0 |
|
276 | 0 | printf("[] \t%d\t%d\t", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs)); |
277 | 0 | for (i = 0; i < EXPR_DATA_0(expr_crs); i++) |
278 | 0 | printf("%c", EXPR_CONST_DATA(expr_crs)[i + 1]); |
279 | 0 | puts(""); |
280 | 0 | break; |
281 | 0 |
|
282 | 0 | case PTN_HOOK: |
283 | 0 |
|
284 | 0 | printf("@ \t%d\t%d\t", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs)); |
285 | 0 | for (i = 0; i < EXPR_DATA_0(expr_crs); i++) |
286 | 0 | printf("%c", EXPR_CONST_DATA(expr_crs)[i + 1]); |
287 | 0 | puts(""); |
288 | 0 | break; |
289 | 0 |
|
290 | 0 | case PTN_END_OF_INPUT: |
291 | 0 |
|
292 | 0 | printf("^ \t%d\t%d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs)); |
293 | 0 | break; |
294 | 0 |
|
295 | 0 | default: |
296 | 0 |
|
297 | 0 | printf("%d? \t%d\t%d\n", EXPR_TYPE(expr_crs), EXPR_PRV(expr_crs), |
298 | 0 | EXPR_NXT(expr_crs)); |
299 | 0 | break; |
300 | 0 | } |
301 | 0 |
|
302 | 0 | expr_crs = EXPR_NXT(expr_crs); |
303 | 0 | } |
304 | 0 |
|
305 | 0 | printf("%s%d", &spaces[space], expr_crs); |
306 | 0 | if (expr_crs < 100) printf(" "); |
307 | 0 | if (expr_crs < 10) printf(" "); |
308 | 0 | for (i = 0; i < 13 - (30 - space); i++) printf(" "); |
309 | 0 | printf("END\t%d\t%d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs)); |
310 | 0 | fflush(stdout); |
311 | 0 | return; |
312 | 0 | } |
313 | | |
314 | | static void |
315 | 0 | pattern_output(const widechar *expr_data, const TranslationTableHeader *table) { |
316 | 0 | printf("%d \tlength\n", expr_data[0]); |
317 | 0 | printf("%d \tloops\n", expr_data[1]); |
318 | 0 | if (expr_data[0] > 0 && expr_data[0] != PTN_END) |
319 | 0 | pattern_output_expression(expr_data, 2, table); |
320 | 0 | } |
321 | | |
322 | | static void |
323 | | pattern_print_expression( |
324 | 0 | const widechar *expr_data, int expr_crs, const TranslationTableHeader *table) { |
325 | 0 | int i; |
326 | 0 |
|
327 | 0 | if (expr_crs == PTN_END) return; |
328 | 0 |
|
329 | 0 | while (EXPR_TYPE(expr_crs) != PTN_END) { |
330 | 0 | switch (EXPR_TYPE(expr_crs)) { |
331 | 0 | case PTN_START: |
332 | 0 | break; |
333 | 0 |
|
334 | 0 | case PTN_GROUP: |
335 | 0 |
|
336 | 0 | printf(" ("); |
337 | 0 | pattern_print_expression(expr_data, EXPR_DATA_0(expr_crs), table); |
338 | 0 | printf(") "); |
339 | 0 | break; |
340 | 0 |
|
341 | 0 | case PTN_NOT: |
342 | 0 |
|
343 | 0 | printf("!"); |
344 | 0 | pattern_print_expression(expr_data, EXPR_DATA_0(expr_crs), table); |
345 | 0 | break; |
346 | 0 |
|
347 | 0 | case PTN_ONE_MORE: |
348 | 0 |
|
349 | 0 | pattern_print_expression(expr_data, EXPR_DATA_0(expr_crs), table); |
350 | 0 | printf("+"); |
351 | 0 | break; |
352 | 0 |
|
353 | 0 | case PTN_ZERO_MORE: |
354 | 0 |
|
355 | 0 | pattern_print_expression(expr_data, EXPR_DATA_0(expr_crs), table); |
356 | 0 | printf("*"); |
357 | 0 | break; |
358 | 0 |
|
359 | 0 | case PTN_OPTIONAL: |
360 | 0 |
|
361 | 0 | pattern_print_expression(expr_data, EXPR_DATA_0(expr_crs), table); |
362 | 0 | printf("?"); |
363 | 0 | break; |
364 | 0 |
|
365 | 0 | case PTN_ALTERNATE: |
366 | 0 |
|
367 | 0 | pattern_print_expression(expr_data, EXPR_DATA_0(expr_crs), table); |
368 | 0 | printf(" | "); |
369 | 0 | pattern_print_expression(expr_data, EXPR_DATA_1(expr_crs), table); |
370 | 0 | break; |
371 | 0 |
|
372 | 0 | case PTN_ANY: |
373 | 0 |
|
374 | 0 | printf("."); |
375 | 0 | break; |
376 | 0 |
|
377 | 0 | case PTN_ATTRIBUTES: |
378 | 0 |
|
379 | 0 | printf("%%["); |
380 | 0 | if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[0] >> 16)) printf("0"); |
381 | 0 | if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[1] >> 16)) printf("1"); |
382 | 0 | if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[2] >> 16)) printf("2"); |
383 | 0 | if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[3] >> 16)) printf("3"); |
384 | 0 | if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[4] >> 16)) printf("4"); |
385 | 0 | if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[5] >> 16)) printf("5"); |
386 | 0 | if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[6] >> 16)) printf("6"); |
387 | 0 | if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[7] >> 16)) printf("7"); |
388 | 0 | if (EXPR_DATA_0(expr_crs) & (CTC_EndOfInput >> 16)) printf("^"); |
389 | 0 | if (EXPR_DATA_1(expr_crs) & CTC_Space) printf("_"); |
390 | 0 | if (EXPR_DATA_1(expr_crs) & CTC_Digit) printf("#"); |
391 | 0 | if (EXPR_DATA_1(expr_crs) & CTC_Letter) printf("a"); |
392 | 0 | if (EXPR_DATA_1(expr_crs) & CTC_UpperCase) printf("u"); |
393 | 0 | if (EXPR_DATA_1(expr_crs) & CTC_LowerCase) printf("l"); |
394 | 0 | if (EXPR_DATA_1(expr_crs) & CTC_Punctuation) printf("."); |
395 | 0 | if (EXPR_DATA_1(expr_crs) & CTC_Sign) printf("$"); |
396 | 0 | if (EXPR_DATA_1(expr_crs) & CTC_SeqDelimiter) printf("~"); |
397 | 0 | if (EXPR_DATA_1(expr_crs) & CTC_SeqBefore) printf("<"); |
398 | 0 | if (EXPR_DATA_1(expr_crs) & CTC_SeqAfter) printf(">"); |
399 | 0 | printf("]"); |
400 | 0 | break; |
401 | 0 |
|
402 | 0 | case PTN_CHARS: |
403 | 0 |
|
404 | 0 | if (EXPR_DATA_0(expr_crs) == 1) |
405 | 0 | printf("%c", EXPR_DATA_1(expr_crs)); |
406 | 0 | else { |
407 | 0 | printf("["); |
408 | 0 | for (i = 0; i < EXPR_DATA_0(expr_crs); i++) |
409 | 0 | printf("%c", EXPR_CONST_DATA(expr_crs)[i + 1]); |
410 | 0 | printf("]"); |
411 | 0 | } |
412 | 0 | break; |
413 | 0 |
|
414 | 0 | case PTN_HOOK: |
415 | 0 |
|
416 | 0 | printf("@["); |
417 | 0 | for (i = 0; i < EXPR_DATA_0(expr_crs); i++) |
418 | 0 | printf("%c", EXPR_CONST_DATA(expr_crs)[i + 1]); |
419 | 0 | printf("]"); |
420 | 0 | break; |
421 | 0 |
|
422 | 0 | case PTN_END_OF_INPUT: |
423 | 0 |
|
424 | 0 | printf("^"); |
425 | 0 | break; |
426 | 0 |
|
427 | 0 | // default: printf("%d?\n", EXPR_TYPE(expr_crs)); break; |
428 | 0 | } |
429 | 0 |
|
430 | 0 | expr_crs = EXPR_NXT(expr_crs); |
431 | 0 | } |
432 | 0 |
|
433 | 0 | return; |
434 | 0 | } |
435 | | |
436 | | static void |
437 | 0 | pattern_print(const widechar *expr_data, const TranslationTableHeader *table) { |
438 | 0 | if (expr_data[0] > 0 && expr_data[0] != PTN_END) |
439 | 0 | pattern_print_expression(expr_data, 2, table); |
440 | 0 | puts(""); |
441 | 0 | } |
442 | | |
443 | | #ifdef CHECK_OUTPUT_DEFINED |
444 | | |
445 | | static void |
446 | | do_padd(const int value) { |
447 | | if (value < 100000) printf(" "); |
448 | | if (value < 10000) printf(" "); |
449 | | if (value < 1000) printf(" "); |
450 | | if (value < 100) printf(" "); |
451 | | if (value < 10) printf(" "); |
452 | | } |
453 | | |
454 | | static void |
455 | | do_pad(const int value) { |
456 | | if (value < 100) printf(" "); |
457 | | if (value < 10) printf(" "); |
458 | | } |
459 | | |
460 | | static void |
461 | | do_output(const int type, const int ret, const int line, |
462 | | |
463 | | const int input, const int input_minmax, const int input_crs, const int input_dir, |
464 | | const widechar *expr_data, const int expr_crs, const int notOperator, |
465 | | const int loop_crs, const int *loop_cnts, |
466 | | |
467 | | const char *msg) { |
468 | | switch (type) { |
469 | | case START: |
470 | | |
471 | | space--; |
472 | | if (space < 0) space = 0; |
473 | | printf("|%s() ", &spaces[space]); |
474 | | break; |
475 | | |
476 | | case CALL: |
477 | | |
478 | | printf("|%s> ", &spaces[space]); |
479 | | break; |
480 | | |
481 | | case RETURN: |
482 | | |
483 | | printf("|%s<%d ", &spaces[space], ret); |
484 | | space++; |
485 | | if (space > 31) space = 31; |
486 | | break; |
487 | | |
488 | | case SHOW: |
489 | | |
490 | | printf("|%s ", &spaces[space]); |
491 | | break; |
492 | | } |
493 | | |
494 | | printf("%d ", line); |
495 | | do_padd(line); |
496 | | |
497 | | switch (expr_data[expr_crs]) { |
498 | | case PTN_ERROR: |
499 | | printf("# "); |
500 | | break; |
501 | | case PTN_START: |
502 | | printf("> "); |
503 | | break; |
504 | | case PTN_END_OF_INPUT: |
505 | | printf("^ "); |
506 | | break; |
507 | | case PTN_ALTERNATE: |
508 | | printf("| "); |
509 | | break; |
510 | | case PTN_OPTIONAL: |
511 | | printf("? "); |
512 | | break; |
513 | | case PTN_ONE_MORE: |
514 | | printf("+ "); |
515 | | break; |
516 | | case PTN_ZERO_MORE: |
517 | | printf("* "); |
518 | | break; |
519 | | case PTN_NOT: |
520 | | printf("! "); |
521 | | break; |
522 | | case PTN_GROUP: |
523 | | printf("( "); |
524 | | break; |
525 | | case PTN_ANY: |
526 | | printf(". "); |
527 | | break; |
528 | | case PTN_ATTRIBUTES: |
529 | | printf("%% "); |
530 | | break; |
531 | | case PTN_CHARS: |
532 | | printf("[ "); |
533 | | break; |
534 | | case PTN_HOOK: |
535 | | printf("@ "); |
536 | | break; |
537 | | case PTN_END: |
538 | | printf("< "); |
539 | | break; |
540 | | default: |
541 | | printf(" "); |
542 | | break; |
543 | | } |
544 | | printf("%d ", expr_crs); |
545 | | do_padd(expr_crs); |
546 | | |
547 | | if (input > 31 && input < 127) |
548 | | printf("%c ", input); |
549 | | else |
550 | | printf("_ "); |
551 | | |
552 | | if (input_crs * input_dir >= input_minmax * input_dir) |
553 | | printf("# "); |
554 | | else { |
555 | | printf("%d ", input_crs); |
556 | | do_pad(input_crs); |
557 | | } |
558 | | |
559 | | if (input_dir > 0) |
560 | | printf("<"); |
561 | | else |
562 | | printf(">"); |
563 | | printf("%d ", input_minmax); |
564 | | do_pad(input_minmax); |
565 | | |
566 | | if (notOperator) |
567 | | printf("! "); |
568 | | else |
569 | | printf(" "); |
570 | | |
571 | | if (loop_crs) { |
572 | | printf("%d ", loop_crs); |
573 | | do_pad(loop_crs); |
574 | | printf("%d ", loop_cnts[EXPR_DATA_1(loop_crs)]); |
575 | | do_pad(loop_cnts[EXPR_DATA_1(loop_crs)]); |
576 | | } else |
577 | | printf("- - "); |
578 | | if (EXPR_TYPE(expr_crs) == PTN_ONE_MORE || EXPR_TYPE(expr_crs) == PTN_ZERO_MORE) { |
579 | | printf("%d ", loop_cnts[EXPR_DATA_1(expr_crs)]); |
580 | | do_pad(loop_cnts[EXPR_DATA_1(expr_crs)]); |
581 | | } else |
582 | | printf("- "); |
583 | | |
584 | | if (msg) printf("%s", msg); |
585 | | puts(""); |
586 | | } |
587 | | |
588 | | #endif |
589 | | |
590 | | //////////////////////////////////////////////////////////////////////////////// |
591 | | |
592 | | static int |
593 | | pattern_compile_1(const widechar *input, const int input_max, int *input_crs, |
594 | | widechar *expr_data, const int expr_max, widechar *expr_crs, widechar *loop_cnts, |
595 | | TranslationTableHeader *table, const FileInfo *nested); |
596 | | |
597 | | static int |
598 | | pattern_compile_expression(const widechar *input, const int input_max, int *input_crs, |
599 | | widechar *expr_data, const int expr_max, widechar *expr_crs, widechar *loop_cnts, |
600 | 41 | TranslationTableHeader *table, const FileInfo *nested) { |
601 | 41 | widechar *data; |
602 | 41 | int expr_start, expr_end, expr_sub, expr_crs_prv; |
603 | 41 | int input_end; |
604 | 41 | int attrs0, attrs1; |
605 | 41 | int set, esc, nest, i; |
606 | | |
607 | 41 | if (*input_crs >= input_max) return 0; |
608 | 41 | switch (input[*input_crs]) { |
609 | 0 | case '(': |
610 | |
|
611 | 0 | if (*expr_crs + 10 >= expr_max) return 0; |
612 | | |
613 | 0 | (*input_crs)++; |
614 | 0 | if (*input_crs >= input_max) return 0; |
615 | | |
616 | | /* find closing parenthesis */ |
617 | 0 | nest = esc = 0; |
618 | 0 | for (input_end = *input_crs; input_end < input_max; input_end++) { |
619 | 0 | if (input[input_end] == '\\' && !esc) { |
620 | 0 | esc = 1; |
621 | 0 | continue; |
622 | 0 | } |
623 | | |
624 | 0 | if (input[input_end] == '(' && !esc) |
625 | 0 | nest++; |
626 | 0 | else if (input[input_end] == ')' && !esc) { |
627 | 0 | if (nest) |
628 | 0 | nest--; |
629 | 0 | else |
630 | 0 | break; |
631 | 0 | } |
632 | | |
633 | 0 | esc = 0; |
634 | 0 | } |
635 | 0 | if (input_end >= input_max) return 0; |
636 | | |
637 | 0 | EXPR_TYPE(*expr_crs) = PTN_GROUP; |
638 | | |
639 | | /* compile sub expressions */ |
640 | 0 | expr_crs_prv = *expr_crs; |
641 | 0 | *expr_crs += 4; |
642 | 0 | EXPR_DATA_0(expr_crs_prv) = *expr_crs; |
643 | 0 | expr_sub = *expr_crs; |
644 | 0 | EXPR_TYPE(expr_sub) = PTN_ERROR; |
645 | 0 | EXPR_PRV(expr_sub) = PTN_END; |
646 | 0 | EXPR_NXT(expr_sub) = PTN_END; |
647 | 0 | if (!pattern_compile_1(input, input_end, input_crs, expr_data, expr_max, expr_crs, |
648 | 0 | loop_cnts, table, nested)) |
649 | 0 | return 0; |
650 | 0 | (*input_crs)++; |
651 | | |
652 | | /* reset end expression */ |
653 | 0 | expr_end = *expr_crs; |
654 | 0 | EXPR_NXT(expr_end) = expr_crs_prv; |
655 | |
|
656 | 0 | return *expr_crs += 3; |
657 | | |
658 | 10 | case '!': |
659 | | |
660 | 10 | if (*expr_crs + 10 >= expr_max) return 0; |
661 | | |
662 | 10 | (*input_crs)++; |
663 | 10 | EXPR_TYPE(*expr_crs) = PTN_NOT; |
664 | 10 | expr_crs_prv = *expr_crs; |
665 | 10 | *expr_crs += 4; |
666 | 10 | EXPR_DATA_0(expr_crs_prv) = *expr_crs; |
667 | | |
668 | | /* create start expression */ |
669 | 10 | expr_start = *expr_crs; |
670 | 10 | EXPR_TYPE(expr_start) = PTN_START; |
671 | 10 | EXPR_PRV(expr_start) = PTN_END; |
672 | 10 | *expr_crs += 3; |
673 | 10 | EXPR_NXT(expr_start) = *expr_crs; |
674 | | |
675 | | /* compile sub expression */ |
676 | 10 | expr_sub = *expr_crs; |
677 | 10 | EXPR_TYPE(expr_sub) = PTN_ERROR; |
678 | 10 | EXPR_PRV(expr_sub) = expr_start; |
679 | 10 | EXPR_NXT(expr_sub) = PTN_END; |
680 | | |
681 | 10 | if (!pattern_compile_expression(input, input_max, input_crs, expr_data, expr_max, |
682 | 10 | expr_crs, loop_cnts, table, nested)) |
683 | 0 | return 0; |
684 | | |
685 | 10 | if (*expr_crs + 3 >= expr_max) return 0; |
686 | | |
687 | 10 | EXPR_NXT(expr_sub) = *expr_crs; |
688 | | |
689 | | /* create end expression */ |
690 | 10 | expr_end = *expr_crs; |
691 | 10 | EXPR_TYPE(expr_end) = PTN_END; |
692 | 10 | EXPR_PRV(expr_end) = expr_sub; |
693 | 10 | EXPR_NXT(expr_end) = expr_crs_prv; |
694 | | |
695 | 10 | return *expr_crs += 3; |
696 | | |
697 | 0 | case '+': |
698 | |
|
699 | 0 | if (*expr_crs + 5 >= expr_max) return 0; |
700 | 0 | EXPR_TYPE(*expr_crs) = PTN_ONE_MORE; |
701 | 0 | EXPR_DATA_1(*expr_crs) = (*loop_cnts)++; |
702 | 0 | (*input_crs)++; |
703 | 0 | return *expr_crs += 5; |
704 | | |
705 | 1 | case '*': |
706 | | |
707 | 1 | if (*expr_crs + 5 >= expr_max) return 0; |
708 | 1 | EXPR_TYPE(*expr_crs) = PTN_ZERO_MORE; |
709 | 1 | EXPR_DATA_1(*expr_crs) = (*loop_cnts)++; |
710 | 1 | (*input_crs)++; |
711 | 1 | return *expr_crs += 5; |
712 | | |
713 | 0 | case '?': |
714 | |
|
715 | 0 | if (*expr_crs + 4 >= expr_max) return 0; |
716 | 0 | EXPR_TYPE(*expr_crs) = PTN_OPTIONAL; |
717 | 0 | (*input_crs)++; |
718 | 0 | return *expr_crs += 4; |
719 | | |
720 | 0 | case '|': |
721 | |
|
722 | 0 | if (*expr_crs + 5 >= expr_max) return 0; |
723 | 0 | EXPR_TYPE(*expr_crs) = PTN_ALTERNATE; |
724 | 0 | (*input_crs)++; |
725 | 0 | return *expr_crs += 5; |
726 | | |
727 | 2 | case '.': |
728 | | |
729 | 2 | if (*expr_crs + 3 >= expr_max) return 0; |
730 | 2 | EXPR_TYPE(*expr_crs) = PTN_ANY; |
731 | 2 | (*input_crs)++; |
732 | 2 | return *expr_crs += 3; |
733 | | |
734 | 2 | case '%': |
735 | | |
736 | 2 | if (*expr_crs + 5 >= expr_max) return 0; |
737 | | |
738 | 2 | (*input_crs)++; |
739 | 2 | if (*input_crs >= input_max) return 0; |
740 | | |
741 | | /* find closing bracket */ |
742 | 2 | if (input[*input_crs] == '[') { |
743 | 0 | set = 1; |
744 | 0 | (*input_crs)++; |
745 | 0 | for (input_end = *input_crs; input_end < input_max; input_end++) |
746 | 0 | if (input[input_end] == ']') break; |
747 | 0 | if (input_end >= input_max) return 0; |
748 | 2 | } else { |
749 | 2 | set = 0; |
750 | 2 | input_end = *input_crs + 1; |
751 | 2 | } |
752 | | |
753 | 2 | EXPR_TYPE(*expr_crs) = PTN_ATTRIBUTES; |
754 | | |
755 | 2 | attrs0 = attrs1 = 0; |
756 | 4 | for (; (*input_crs) < input_end; (*input_crs)++) { |
757 | 2 | switch (input[*input_crs]) { |
758 | 0 | case '_': |
759 | 0 | attrs0 |= CTC_Space; |
760 | 0 | break; |
761 | 0 | case '#': |
762 | 0 | attrs0 |= CTC_Digit; |
763 | 0 | break; |
764 | 0 | case 'a': |
765 | 0 | attrs0 |= CTC_Letter; |
766 | 0 | break; |
767 | 2 | case 'u': |
768 | 2 | attrs0 |= CTC_UpperCase; |
769 | 2 | break; |
770 | 0 | case 'l': |
771 | 0 | attrs0 |= CTC_LowerCase; |
772 | 0 | break; |
773 | 0 | case '.': |
774 | 0 | attrs0 |= CTC_Punctuation; |
775 | 0 | break; |
776 | 0 | case '$': |
777 | 0 | attrs0 |= CTC_Sign; |
778 | 0 | break; |
779 | 0 | case 'm': |
780 | 0 | attrs0 |= CTC_Math; |
781 | 0 | break; |
782 | 0 | case '~': |
783 | 0 | attrs0 |= CTC_SeqDelimiter; |
784 | 0 | break; |
785 | 0 | case '<': |
786 | 0 | attrs0 |= CTC_SeqBefore; |
787 | 0 | break; |
788 | 0 | case '>': |
789 | 0 | attrs0 |= CTC_SeqAfter; |
790 | 0 | break; |
791 | | |
792 | 0 | case '0': |
793 | 0 | case '1': |
794 | 0 | case '2': |
795 | 0 | case '3': |
796 | 0 | case '4': |
797 | 0 | case '5': |
798 | 0 | case '6': |
799 | 0 | case '7': { |
800 | 0 | int k = input[*input_crs] - '0'; |
801 | 0 | TranslationTableCharacterAttributes a = table->numberedAttributes[k]; |
802 | 0 | if (!a) { |
803 | | // attribute not used before yet: assign it a value |
804 | 0 | a = table->numberedAttributes[k] = |
805 | 0 | table->nextNumberedCharacterClassAttribute; |
806 | 0 | if (a > CTC_UserDefined8) { |
807 | 0 | _lou_logMessage(LOU_LOG_ERROR, |
808 | 0 | "%s:%d: error: Too many character attributes defined", |
809 | 0 | nested->fileName, nested->lineNumber); |
810 | 0 | return 0; |
811 | 0 | } |
812 | 0 | table->nextNumberedCharacterClassAttribute <<= 1; |
813 | 0 | } |
814 | 0 | attrs1 |= (a >> 16); |
815 | 0 | break; |
816 | 0 | } |
817 | 0 | case '^': |
818 | 0 | attrs1 |= (CTC_EndOfInput >> 16); |
819 | 0 | break; |
820 | | |
821 | 0 | default: |
822 | 0 | return 0; |
823 | 2 | } |
824 | 2 | } |
825 | 2 | EXPR_DATA_0(*expr_crs) = attrs1; |
826 | 2 | EXPR_DATA_1(*expr_crs) = attrs0; |
827 | | |
828 | 2 | if (set) (*input_crs)++; |
829 | 2 | return *expr_crs += 5; |
830 | | |
831 | 0 | case '[': |
832 | |
|
833 | 0 | (*input_crs)++; |
834 | 0 | if (*input_crs >= input_max) return 0; |
835 | | |
836 | | /* find closing bracket */ |
837 | 0 | esc = 0; |
838 | 0 | for (input_end = *input_crs; input_end < input_max; input_end++) { |
839 | 0 | if (input[input_end] == '\\' && !esc) { |
840 | 0 | esc = 1; |
841 | 0 | continue; |
842 | 0 | } |
843 | | |
844 | 0 | if (input[input_end] == ']' && !esc) break; |
845 | 0 | esc = 0; |
846 | 0 | } |
847 | 0 | if (input_end >= input_max) return 0; |
848 | | |
849 | 0 | if (*expr_crs + 4 + (input_end - *input_crs) >= expr_max) return 0; |
850 | | |
851 | 0 | EXPR_TYPE(*expr_crs) = PTN_CHARS; |
852 | |
|
853 | 0 | esc = 0; |
854 | 0 | data = EXPR_DATA(*expr_crs); |
855 | 0 | for (i = 1; *input_crs < input_end; (*input_crs)++) { |
856 | 0 | if (input[*input_crs] == '\\' && !esc) { |
857 | 0 | esc = 1; |
858 | 0 | continue; |
859 | 0 | } |
860 | | |
861 | 0 | esc = 0; |
862 | 0 | data[i++] = (widechar)input[*input_crs]; |
863 | 0 | } |
864 | 0 | data[0] = i - 1; |
865 | 0 | (*input_crs)++; |
866 | 0 | return *expr_crs += 4 + data[0]; |
867 | | |
868 | 0 | case '@': |
869 | |
|
870 | 0 | (*input_crs)++; |
871 | 0 | if (*input_crs >= input_max) return 0; |
872 | | |
873 | | /* find closing bracket */ |
874 | 0 | if (input[*input_crs] == '[') { |
875 | 0 | set = 1; |
876 | 0 | (*input_crs)++; |
877 | 0 | for (input_end = *input_crs; input_end < input_max; input_end++) |
878 | 0 | if (input[input_end] == ']') break; |
879 | 0 | if (input_end >= input_max) return 0; |
880 | 0 | } else { |
881 | 0 | set = 0; |
882 | 0 | input_end = *input_crs + 1; |
883 | 0 | } |
884 | | |
885 | 0 | if (*expr_crs + 4 + (input_end - *input_crs) >= expr_max) return 0; |
886 | | |
887 | 0 | EXPR_TYPE(*expr_crs) = PTN_HOOK; |
888 | |
|
889 | 0 | esc = 0; |
890 | 0 | data = EXPR_DATA(*expr_crs); |
891 | 0 | for (i = 1; *input_crs < input_end; (*input_crs)++) { |
892 | 0 | if (input[*input_crs] == '\\' && !esc) { |
893 | 0 | esc = 1; |
894 | 0 | continue; |
895 | 0 | } |
896 | | |
897 | 0 | esc = 0; |
898 | 0 | data[i++] = (widechar)input[*input_crs]; |
899 | 0 | } |
900 | 0 | data[0] = i - 1; |
901 | 0 | if (set) (*input_crs)++; |
902 | 0 | return *expr_crs += 4 + data[0]; |
903 | | |
904 | 0 | case '^': |
905 | 4 | case '$': |
906 | | |
907 | 4 | if (*expr_crs + 3 >= expr_max) return 0; |
908 | 4 | EXPR_TYPE(*expr_crs) = PTN_END_OF_INPUT; |
909 | 4 | (*input_crs)++; |
910 | 4 | return *expr_crs += 3; |
911 | | |
912 | 0 | case '\\': |
913 | |
|
914 | 0 | (*input_crs)++; |
915 | 0 | if (*input_crs >= input_max) return 0; |
916 | | |
917 | 22 | default: |
918 | | |
919 | 22 | if (*expr_crs + 5 >= expr_max) return 0; |
920 | 22 | EXPR_TYPE(*expr_crs) = PTN_CHARS; |
921 | 22 | EXPR_DATA_0(*expr_crs) = 1; |
922 | 22 | EXPR_DATA_1(*expr_crs) = (widechar)input[*input_crs]; |
923 | 22 | (*input_crs)++; |
924 | 22 | return *expr_crs += 5; |
925 | 41 | } |
926 | 41 | } |
927 | | |
928 | | static int |
929 | | pattern_insert_alternate(const widechar *input, const int input_max, int *input_crs, |
930 | | widechar *expr_data, const int expr_max, widechar *expr_crs, widechar *loop_cnts, |
931 | 0 | int expr_insert, TranslationTableHeader *table, const FileInfo *nested) { |
932 | 0 | int expr_group, expr_alt, expr_end; |
933 | 0 |
|
934 | 0 | if (EXPR_TYPE(*expr_crs) == PTN_START) return 0; |
935 | 0 |
|
936 | 0 | if (*expr_crs + 12 >= expr_max) return 0; |
937 | 0 |
|
938 | 0 | /* setup alternate expression */ |
939 | 0 | expr_alt = *expr_crs; |
940 | 0 | EXPR_TYPE(expr_alt) = PTN_ALTERNATE; |
941 | 0 | EXPR_PRV(expr_alt) = PTN_END; |
942 | 0 | EXPR_NXT(expr_alt) = PTN_END; |
943 | 0 | *expr_crs += 5; |
944 | 0 |
|
945 | 0 | /* setup group expression */ |
946 | 0 | expr_group = *expr_crs; |
947 | 0 | EXPR_TYPE(expr_group) = PTN_GROUP; |
948 | 0 | EXPR_PRV(expr_group) = PTN_END; |
949 | 0 | EXPR_NXT(expr_group) = PTN_END; |
950 | 0 | *expr_crs += 4; |
951 | 0 | EXPR_DATA_0(expr_group) = *expr_crs; |
952 | 0 |
|
953 | 0 | EXPR_TYPE(*expr_crs) = PTN_ERROR; |
954 | 0 | EXPR_PRV(*expr_crs) = PTN_END; |
955 | 0 | EXPR_NXT(*expr_crs) = PTN_END; |
956 | 0 | if (!pattern_compile_1(input, input_max, input_crs, expr_data, expr_max, expr_crs, |
957 | 0 | loop_cnts, table, nested)) |
958 | 0 | return 0; |
959 | 0 | expr_end = *expr_crs; |
960 | 0 | EXPR_NXT(expr_end) = expr_group; |
961 | 0 |
|
962 | 0 | /* setup last end expression */ |
963 | 0 | if (*expr_crs + 3 >= expr_max) return 0; |
964 | 0 | *expr_crs += 3; |
965 | 0 | EXPR_TYPE(*expr_crs) = PTN_END; |
966 | 0 | EXPR_NXT(*expr_crs) = PTN_END; |
967 | 0 |
|
968 | 0 | /* replace insert expression with group expression using last end expression */ |
969 | 0 | EXPR_NXT(EXPR_PRV(expr_insert)) = expr_group; |
970 | 0 | EXPR_PRV(expr_group) = EXPR_PRV(expr_insert); |
971 | 0 |
|
972 | 0 | EXPR_NXT(expr_group) = *expr_crs; |
973 | 0 | EXPR_PRV(*expr_crs) = expr_group; |
974 | 0 |
|
975 | 0 | /* link alternate and insert expressions before group end expression */ |
976 | 0 | EXPR_NXT(EXPR_PRV(expr_end)) = expr_alt; |
977 | 0 | EXPR_PRV(expr_alt) = EXPR_PRV(expr_end); |
978 | 0 |
|
979 | 0 | EXPR_NXT(expr_alt) = expr_insert; |
980 | 0 | EXPR_PRV(expr_insert) = expr_alt; |
981 | 0 |
|
982 | 0 | EXPR_NXT(expr_insert) = expr_end; |
983 | 0 | EXPR_PRV(expr_end) = expr_insert; |
984 | 0 |
|
985 | 0 | return *expr_crs; |
986 | 0 | } |
987 | | |
988 | | /* Compile all expression sequences, resolving character sets, attributes, |
989 | | * groups, nots, and hooks. Note that unlike the other compile functions, on |
990 | | * returning the expr_crs is set to the last end expression, not after it. |
991 | | */ |
992 | | static int |
993 | | pattern_compile_1(const widechar *input, const int input_max, int *input_crs, |
994 | | widechar *expr_data, const int expr_max, widechar *expr_crs, widechar *loop_cnts, |
995 | 4 | TranslationTableHeader *table, const FileInfo *nested) { |
996 | 4 | int expr_crs_prv; |
997 | | |
998 | 4 | if (*expr_crs + 6 >= expr_max) return 0; |
999 | | |
1000 | 4 | expr_crs_prv = *expr_crs; |
1001 | | |
1002 | | /* setup start expression */ |
1003 | 4 | EXPR_TYPE(*expr_crs) = PTN_START; |
1004 | 4 | EXPR_PRV(*expr_crs) = PTN_END; |
1005 | 4 | *expr_crs += 3; |
1006 | 4 | EXPR_NXT(expr_crs_prv) = *expr_crs; |
1007 | | |
1008 | | /* setup end expression */ |
1009 | 4 | EXPR_TYPE(*expr_crs) = PTN_END; |
1010 | 4 | EXPR_PRV(*expr_crs) = expr_crs_prv; |
1011 | 4 | EXPR_NXT(*expr_crs) = PTN_END; |
1012 | | |
1013 | 35 | while (*input_crs < input_max) { |
1014 | 31 | expr_crs_prv = *expr_crs; |
1015 | 31 | if (!pattern_compile_expression(input, input_max, input_crs, expr_data, expr_max, |
1016 | 31 | expr_crs, loop_cnts, table, nested)) |
1017 | 0 | return 0; |
1018 | | |
1019 | | /* setup end expression */ |
1020 | 31 | if (*expr_crs + 3 >= expr_max) return 0; |
1021 | 31 | EXPR_NXT(expr_crs_prv) = *expr_crs; |
1022 | 31 | EXPR_TYPE(*expr_crs) = PTN_END; |
1023 | 31 | EXPR_PRV(*expr_crs) = expr_crs_prv; |
1024 | 31 | EXPR_NXT(*expr_crs) = PTN_END; |
1025 | | |
1026 | | /* insert seqafterexpression before attributes of seqafterchars */ |
1027 | | // if(EXPR_TYPE(expr_crs_prv) == PTN_ATTRIBUTES) |
1028 | | // if(EXPR_DATA_1(expr_crs_prv) & CTC_SeqAfter) |
1029 | | // { |
1030 | | // i = 0; |
1031 | | // pattern_insert_alternate(table->seqAfterExpression, |
1032 | | // table->seqAfterExpressionLength, &i, expr_data, expr_max, |
1033 | | // expr_crs, loop_cnts, expr_crs_prv); |
1034 | | // } |
1035 | 31 | } |
1036 | | |
1037 | 4 | return *expr_crs; |
1038 | 4 | } |
1039 | | |
1040 | | /* Resolve optional and loop expressions. |
1041 | | */ |
1042 | | static int |
1043 | | pattern_compile_2( |
1044 | 14 | widechar *expr_data, int expr_at, const int expr_max, widechar *expr_crs) { |
1045 | 14 | int expr_start, expr_end, expr_prv, expr_sub; |
1046 | | |
1047 | 69 | while (EXPR_TYPE(expr_at) != PTN_END) { |
1048 | 55 | if (EXPR_TYPE(expr_at) == PTN_GROUP || EXPR_TYPE(expr_at) == PTN_NOT) { |
1049 | 10 | if (!pattern_compile_2(expr_data, EXPR_DATA_0(expr_at), expr_max, expr_crs)) |
1050 | 0 | return 0; |
1051 | 10 | } |
1052 | | |
1053 | 55 | if (EXPR_TYPE(expr_at) == PTN_ZERO_MORE || EXPR_TYPE(expr_at) == PTN_ONE_MORE || |
1054 | 54 | EXPR_TYPE(expr_at) == PTN_OPTIONAL) { |
1055 | 1 | if (*expr_crs + 6 >= expr_max) return 0; |
1056 | | |
1057 | | /* get previous expressions, there must |
1058 | | * be at least something and a PTN_START */ |
1059 | 1 | expr_sub = EXPR_PRV(expr_at); |
1060 | 1 | if (EXPR_TYPE(expr_sub) == PTN_START) return 0; |
1061 | 1 | expr_prv = EXPR_PRV(expr_sub); |
1062 | | |
1063 | | /* create start expression */ |
1064 | 1 | expr_start = *expr_crs; |
1065 | 1 | EXPR_TYPE(expr_start) = PTN_START; |
1066 | 1 | EXPR_PRV(expr_start) = PTN_END; |
1067 | 1 | EXPR_NXT(expr_start) = expr_sub; |
1068 | 1 | *expr_crs += 3; |
1069 | | |
1070 | | /* create end expression */ |
1071 | 1 | expr_end = *expr_crs; |
1072 | 1 | EXPR_TYPE(expr_end) = PTN_END; |
1073 | 1 | EXPR_PRV(expr_end) = expr_sub; |
1074 | 1 | EXPR_NXT(expr_end) = expr_at; |
1075 | 1 | *expr_crs += 3; |
1076 | | |
1077 | | /* relink previous expression before sub expression */ |
1078 | 1 | EXPR_DATA_0(expr_at) = expr_start; |
1079 | 1 | EXPR_NXT(expr_prv) = expr_at; |
1080 | 1 | EXPR_PRV(expr_at) = expr_prv; |
1081 | | |
1082 | | /* relink sub expression to start and end */ |
1083 | 1 | EXPR_PRV(expr_sub) = expr_start; |
1084 | 1 | EXPR_NXT(expr_sub) = expr_end; |
1085 | 1 | } |
1086 | | |
1087 | 55 | expr_at = EXPR_NXT(expr_at); |
1088 | 55 | } |
1089 | | |
1090 | 14 | return 1; |
1091 | 14 | } |
1092 | | |
1093 | | /* Resolves alternative expressions. |
1094 | | */ |
1095 | | static int |
1096 | | pattern_compile_3( |
1097 | 15 | widechar *expr_data, int expr_at, const int expr_max, widechar *expr_crs) { |
1098 | 15 | int expr_mrk, expr_start, expr_end, expr_sub_start, expr_sub_end; |
1099 | | |
1100 | 71 | while (EXPR_TYPE(expr_at) != PTN_END) { |
1101 | 56 | if (EXPR_TYPE(expr_at) == PTN_GROUP || EXPR_TYPE(expr_at) == PTN_NOT || |
1102 | 46 | EXPR_TYPE(expr_at) == PTN_OPTIONAL || |
1103 | 46 | EXPR_TYPE(expr_at) == PTN_ZERO_MORE || |
1104 | 45 | EXPR_TYPE(expr_at) == PTN_ONE_MORE) { |
1105 | 11 | if (!pattern_compile_3(expr_data, EXPR_DATA_0(expr_at), expr_max, expr_crs)) |
1106 | 0 | return 0; |
1107 | 11 | } |
1108 | | |
1109 | 56 | if (EXPR_TYPE(expr_at) == PTN_ALTERNATE) { |
1110 | 0 | if (*expr_crs + 12 >= expr_max) return 0; |
1111 | | |
1112 | | /* get previous start expression, |
1113 | | * can include alternate expressions */ |
1114 | 0 | expr_mrk = EXPR_PRV(expr_at); |
1115 | 0 | if (EXPR_TYPE(expr_mrk) == PTN_START) return 0; |
1116 | 0 | expr_sub_end = expr_mrk; |
1117 | 0 | while (EXPR_TYPE(expr_mrk) != PTN_START) expr_mrk = EXPR_PRV(expr_mrk); |
1118 | 0 | expr_sub_start = EXPR_NXT(expr_mrk); |
1119 | | |
1120 | | /* create first start expression */ |
1121 | 0 | expr_start = *expr_crs; |
1122 | 0 | EXPR_TYPE(expr_start) = PTN_START; |
1123 | 0 | EXPR_PRV(expr_start) = PTN_END; |
1124 | 0 | EXPR_NXT(expr_start) = expr_sub_start; |
1125 | 0 | *expr_crs += 3; |
1126 | | |
1127 | | /* create first end expression */ |
1128 | 0 | expr_end = *expr_crs; |
1129 | 0 | EXPR_TYPE(expr_end) = PTN_END; |
1130 | 0 | EXPR_PRV(expr_end) = expr_sub_end; |
1131 | 0 | EXPR_NXT(expr_end) = expr_at; |
1132 | 0 | *expr_crs += 3; |
1133 | | |
1134 | | /* relink previous expression before sub expression */ |
1135 | 0 | EXPR_DATA_0(expr_at) = expr_start; |
1136 | 0 | EXPR_NXT(expr_mrk) = expr_at; |
1137 | 0 | EXPR_PRV(expr_at) = expr_mrk; |
1138 | | |
1139 | | /* relink sub expression to start and end */ |
1140 | 0 | EXPR_PRV(expr_sub_start) = expr_start; |
1141 | 0 | EXPR_NXT(expr_sub_end) = expr_end; |
1142 | | |
1143 | | /* get following PTN_END or PTN_ALTERNATE expression */ |
1144 | 0 | expr_mrk = EXPR_NXT(expr_at); |
1145 | 0 | if (EXPR_TYPE(expr_mrk) == PTN_END || EXPR_TYPE(expr_mrk) == PTN_ALTERNATE) |
1146 | 0 | return 0; |
1147 | 0 | expr_sub_start = expr_mrk; |
1148 | 0 | while (EXPR_TYPE(expr_mrk) != PTN_END && EXPR_TYPE(expr_mrk) != PTN_ALTERNATE) |
1149 | 0 | expr_mrk = EXPR_NXT(expr_mrk); |
1150 | 0 | expr_sub_end = EXPR_PRV(expr_mrk); |
1151 | | |
1152 | | /* create first start expression */ |
1153 | 0 | expr_start = *expr_crs; |
1154 | 0 | EXPR_TYPE(expr_start) = PTN_START; |
1155 | 0 | EXPR_PRV(expr_start) = PTN_END; |
1156 | 0 | EXPR_NXT(expr_start) = expr_sub_start; |
1157 | 0 | *expr_crs += 3; |
1158 | | |
1159 | | /* create first end expression */ |
1160 | 0 | expr_end = *expr_crs; |
1161 | 0 | EXPR_TYPE(expr_end) = PTN_END; |
1162 | 0 | EXPR_PRV(expr_end) = expr_sub_end; |
1163 | 0 | EXPR_NXT(expr_end) = expr_at; |
1164 | 0 | *expr_crs += 3; |
1165 | | |
1166 | | /* relink following expression before sub expression */ |
1167 | 0 | EXPR_DATA_1(expr_at) = expr_start; |
1168 | 0 | EXPR_PRV(expr_mrk) = expr_at; |
1169 | 0 | EXPR_NXT(expr_at) = expr_mrk; |
1170 | | |
1171 | | /* relink sub expression to start and end */ |
1172 | 0 | EXPR_PRV(expr_sub_start) = expr_start; |
1173 | 0 | EXPR_NXT(expr_sub_end) = expr_end; |
1174 | | |
1175 | | /* check expressions were after alternate and got moved into |
1176 | | * a sub expression, previous expressions already checked */ |
1177 | 0 | if (!pattern_compile_3(expr_data, EXPR_DATA_1(expr_at), expr_max, expr_crs)) |
1178 | 0 | return 0; |
1179 | 0 | } |
1180 | | |
1181 | 56 | expr_at = EXPR_NXT(expr_at); |
1182 | 56 | } |
1183 | | |
1184 | 15 | return 1; |
1185 | 15 | } |
1186 | | |
1187 | | int EXPORT_CALL |
1188 | | _lou_pattern_compile(const widechar *input, const int input_max, widechar *expr_data, |
1189 | 4 | const int expr_max, TranslationTableHeader *table, const FileInfo *nested) { |
1190 | 4 | int input_crs; |
1191 | | |
1192 | 4 | input_crs = 0; |
1193 | 4 | expr_data[0] = 2; |
1194 | 4 | expr_data[1] = 0; |
1195 | | |
1196 | 4 | if (table == NULL || nested == NULL) return 0; |
1197 | | |
1198 | 4 | if (!pattern_compile_1(input, input_max, &input_crs, expr_data, expr_max, |
1199 | 4 | &expr_data[0], &expr_data[1], table, nested)) |
1200 | 0 | return 0; |
1201 | | |
1202 | | /* shift past the last end */ |
1203 | 4 | expr_data[0] += 3; |
1204 | | |
1205 | 4 | if (!pattern_compile_2(expr_data, 2, expr_max, &expr_data[0])) return 0; |
1206 | | |
1207 | 4 | if (!pattern_compile_3(expr_data, 2, expr_max, &expr_data[0])) return 0; |
1208 | | |
1209 | 4 | return expr_data[0]; |
1210 | 4 | } |
1211 | | |
1212 | | //////////////////////////////////////////////////////////////////////////////// |
1213 | | |
1214 | | static void |
1215 | | pattern_reverse_expression(widechar *expr_data, const int expr_start); |
1216 | | |
1217 | | static void |
1218 | 39 | pattern_reverse_branch(widechar *expr_data, const int expr_at) { |
1219 | 39 | widechar expr_swap; |
1220 | | |
1221 | 39 | switch (EXPR_TYPE(expr_at)) { |
1222 | 0 | case PTN_ALTERNATE: |
1223 | |
|
1224 | 0 | pattern_reverse_expression(expr_data, EXPR_DATA_0(expr_at)); |
1225 | 0 | expr_swap = EXPR_DATA_0(expr_at); |
1226 | 0 | EXPR_DATA_0(expr_at) = EXPR_DATA_1(expr_at); |
1227 | 0 | EXPR_DATA_1(expr_at) = expr_swap; |
1228 | |
|
1229 | 0 | case PTN_GROUP: |
1230 | 10 | case PTN_NOT: |
1231 | 10 | case PTN_ONE_MORE: |
1232 | 11 | case PTN_ZERO_MORE: |
1233 | 11 | case PTN_OPTIONAL: |
1234 | | |
1235 | 11 | pattern_reverse_expression(expr_data, EXPR_DATA_0(expr_at)); |
1236 | 39 | } |
1237 | 39 | } |
1238 | | |
1239 | | static void |
1240 | 13 | pattern_reverse_expression(widechar *expr_data, const int expr_start) { |
1241 | 13 | widechar expr_end, expr_crs, expr_prv; |
1242 | | |
1243 | 13 | expr_end = EXPR_NXT(expr_start); |
1244 | | |
1245 | | /* empty expression */ |
1246 | 13 | if (EXPR_TYPE(expr_end) == PTN_END) return; |
1247 | | |
1248 | | /* find end expression */ |
1249 | 52 | while (EXPR_TYPE(expr_end) != PTN_END) expr_end = EXPR_NXT(expr_end); |
1250 | | |
1251 | 13 | expr_crs = EXPR_PRV(expr_end); |
1252 | 13 | expr_prv = EXPR_PRV(expr_crs); |
1253 | | |
1254 | | /* relink expression before end expression */ |
1255 | 13 | EXPR_NXT(expr_start) = expr_crs; |
1256 | 13 | EXPR_PRV(expr_crs) = expr_start; |
1257 | 13 | EXPR_NXT(expr_crs) = expr_prv; |
1258 | | |
1259 | | /* reverse any branching expressions */ |
1260 | 13 | pattern_reverse_branch(expr_data, expr_crs); |
1261 | | |
1262 | 39 | while (expr_prv != expr_start) { |
1263 | | /* shift current expression */ |
1264 | 26 | expr_crs = expr_prv; |
1265 | 26 | expr_prv = EXPR_PRV(expr_prv); |
1266 | | |
1267 | | /* reverse any branching expressions */ |
1268 | 26 | pattern_reverse_branch(expr_data, expr_crs); |
1269 | | |
1270 | | /* relink current expression */ |
1271 | 26 | EXPR_PRV(expr_crs) = EXPR_NXT(expr_crs); |
1272 | 26 | EXPR_NXT(expr_crs) = expr_prv; |
1273 | 26 | } |
1274 | | |
1275 | | /* relink expression after start expression */ |
1276 | 13 | EXPR_PRV(expr_crs) = EXPR_NXT(expr_crs); |
1277 | 13 | EXPR_NXT(expr_crs) = expr_end; |
1278 | 13 | EXPR_PRV(expr_end) = expr_crs; |
1279 | 13 | } |
1280 | | |
1281 | | void EXPORT_CALL |
1282 | 2 | _lou_pattern_reverse(widechar *expr_data) { |
1283 | 2 | pattern_reverse_expression(expr_data, 2); |
1284 | 2 | } |
1285 | | |
1286 | | //////////////////////////////////////////////////////////////////////////////// |
1287 | | |
1288 | | static int |
1289 | 300 | pattern_check_chars(const widechar input_char, const widechar *expr_data) { |
1290 | 300 | int expr_cnt, i; |
1291 | | |
1292 | 300 | expr_cnt = expr_data[0] + 1; |
1293 | | |
1294 | 452 | for (i = 1; i < expr_cnt; i++) |
1295 | 300 | if (input_char == expr_data[i]) break; |
1296 | | |
1297 | 300 | if (i == expr_cnt) return 0; |
1298 | 148 | return 1; |
1299 | 300 | } |
1300 | | |
1301 | | static int |
1302 | | pattern_check_attrs(const widechar input_char, const widechar *expr_data, |
1303 | 300 | const TranslationTableHeader *table) { |
1304 | 300 | int attrs; |
1305 | | |
1306 | 300 | attrs = ((expr_data[0] << 16) | expr_data[1]) & ~(CTC_EndOfInput | CTC_EmpMatch); |
1307 | 300 | if (!checkAttr(input_char, attrs, table)) return 0; |
1308 | 0 | return 1; |
1309 | 300 | } |
1310 | | |
1311 | | static int |
1312 | | pattern_check_expression(const widechar *const input, int *input_crs, |
1313 | | const int input_minmax, const int input_dir, const widechar *const expr_data, |
1314 | | int (*hook)(const widechar input, const int data_len), widechar *hook_data, |
1315 | | const int hook_max, int expr_crs, int notOperator, int loop_crs, int *loop_cnts, |
1316 | 22.7k | const TranslationTableHeader *table) { |
1317 | 22.7k | int input_crs_prv, input_start, attrs, ret, i; |
1318 | 22.7k | const widechar *data; |
1319 | | |
1320 | 22.7k | data = NULL; |
1321 | | |
1322 | | /* save input_crs to know if loop consumed input */ |
1323 | 22.7k | input_start = *input_crs; |
1324 | | |
1325 | 22.7k | CHECK_OUTPUT(START, 0, __LINE__, "check start") |
1326 | | |
1327 | 114k | while (!(EXPR_TYPE(expr_crs) == PTN_END && EXPR_TYPE(expr_crs) == PTN_END)) { |
1328 | | /* end of input expression */ |
1329 | 114k | if (EXPR_TYPE(expr_crs) == PTN_END_OF_INPUT) { |
1330 | 229 | if (*input_crs * input_dir >= input_minmax * input_dir) { |
1331 | 77 | if (notOperator) |
1332 | 0 | CHECK_OUTPUT( |
1333 | 77 | RETURN, 0, __LINE__, "end of input failed: no input and not") |
1334 | 77 | else |
1335 | 77 | CHECK_OUTPUT(RETURN, 1, __LINE__, "end of input passed: no input") |
1336 | 77 | return !notOperator; |
1337 | 152 | } else { |
1338 | 152 | if (notOperator) |
1339 | 0 | CHECK_OUTPUT( |
1340 | 152 | RETURN, 1, __LINE__, "end of input passed: input and not") |
1341 | 152 | else |
1342 | 152 | CHECK_OUTPUT(RETURN, 0, __LINE__, "end of input failed: input") |
1343 | 152 | return notOperator; |
1344 | 152 | } |
1345 | 229 | } |
1346 | | |
1347 | | /* no more input */ |
1348 | 114k | if (*input_crs * input_dir >= input_minmax * input_dir) { |
1349 | 380 | switch (EXPR_TYPE(expr_crs)) { |
1350 | 0 | case PTN_ATTRIBUTES: |
1351 | |
|
1352 | 0 | attrs = (EXPR_DATA_0(expr_crs) << 16); |
1353 | 0 | if (attrs & CTC_EndOfInput) { |
1354 | 0 | if (notOperator) { |
1355 | 0 | CHECK_OUTPUT(RETURN, 0, __LINE__, |
1356 | 0 | "attributes failed: end of input attribute: not") |
1357 | 0 | return 0; |
1358 | 0 | } |
1359 | 0 | CHECK_OUTPUT(RETURN, 1, __LINE__, |
1360 | 0 | "attributes passed: end of input attribute") |
1361 | 0 | return 1; |
1362 | 0 | } |
1363 | 0 | CHECK_OUTPUT(RETURN, 0, __LINE__, |
1364 | 0 | "attributes failed: no end of input attribute") |
1365 | 0 | return 0; |
1366 | | |
1367 | 76 | case PTN_ANY: |
1368 | 76 | case PTN_CHARS: |
1369 | | |
1370 | 76 | CHECK_OUTPUT(RETURN, 0, __LINE__, "chars failed: no input") |
1371 | 76 | return 0; |
1372 | 380 | } |
1373 | | |
1374 | 304 | CHECK_OUTPUT(SHOW, 0, __LINE__, "no input") |
1375 | 304 | } |
1376 | | |
1377 | 114k | switch (EXPR_TYPE(expr_crs)) { |
1378 | | |
1379 | 45.8k | case PTN_START: |
1380 | | |
1381 | 45.8k | expr_crs = EXPR_NXT(expr_crs); |
1382 | 45.8k | CHECK_OUTPUT(SHOW, 0, __LINE__, "start next") |
1383 | 45.8k | break; |
1384 | | |
1385 | 0 | case PTN_GROUP: |
1386 | |
|
1387 | 0 | expr_crs = EXPR_DATA_0(expr_crs); |
1388 | 0 | CHECK_OUTPUT(SHOW, 0, __LINE__, "group next") |
1389 | 0 | break; |
1390 | | |
1391 | 23.0k | case PTN_NOT: |
1392 | | |
1393 | 23.0k | notOperator = !notOperator; |
1394 | 23.0k | expr_crs = EXPR_DATA_0(expr_crs); |
1395 | 23.0k | CHECK_OUTPUT(SHOW, 0, __LINE__, "not next") |
1396 | 23.0k | break; |
1397 | | |
1398 | 0 | case PTN_ONE_MORE: |
1399 | |
|
1400 | 0 | CHECK_OUTPUT(SHOW, 0, __LINE__, "loop+ start") |
1401 | |
|
1402 | 22.3k | case PTN_ZERO_MORE: |
1403 | | |
1404 | | /* check if loop already started */ |
1405 | 22.3k | if (expr_crs == loop_crs) { |
1406 | 22.3k | loop_cnts[EXPR_DATA_1(loop_crs)]++; |
1407 | 22.3k | CHECK_OUTPUT(SHOW, 0, __LINE__, "loop again") |
1408 | 22.3k | } else { |
1409 | | /* check if loop nested, wasn't running but has a count */ |
1410 | 76 | if (loop_cnts[EXPR_DATA_1(expr_crs)]) { |
1411 | 0 | CHECK_OUTPUT(SHOW, 0, __LINE__, "loop already running") |
1412 | 0 | goto loop_next; |
1413 | 0 | } |
1414 | | |
1415 | | /* start loop */ |
1416 | 76 | loop_crs = expr_crs; |
1417 | 76 | loop_cnts[EXPR_DATA_1(loop_crs)] = 1; |
1418 | 76 | CHECK_OUTPUT(SHOW, 0, __LINE__, "loop start") |
1419 | 76 | } |
1420 | | |
1421 | | /* start loop expression */ |
1422 | 22.3k | input_crs_prv = *input_crs; |
1423 | 22.3k | ret = pattern_check_expression(input, input_crs, input_minmax, input_dir, |
1424 | 22.3k | expr_data, hook, hook_data, hook_max, EXPR_DATA_0(expr_crs), |
1425 | 22.3k | notOperator, loop_crs, loop_cnts, table); |
1426 | 22.3k | if (ret) { |
1427 | 22.3k | CHECK_OUTPUT(RETURN, 1, __LINE__, "loop passed") |
1428 | 22.3k | return 1; |
1429 | 22.3k | } |
1430 | 76 | CHECK_OUTPUT(SHOW, 0, __LINE__, "loop failed") |
1431 | 76 | *input_crs = input_crs_prv; |
1432 | | |
1433 | | /* check loop count */ |
1434 | 76 | loop_cnts[EXPR_DATA_1(loop_crs)]--; |
1435 | 76 | if (EXPR_TYPE(expr_crs) == PTN_ONE_MORE) { |
1436 | 0 | if (loop_cnts[EXPR_DATA_1(loop_crs)] < 1) { |
1437 | 0 | CHECK_OUTPUT(RETURN, 0, __LINE__, "loop+ failed") |
1438 | 0 | return 0; |
1439 | 0 | } else |
1440 | 0 | CHECK_OUTPUT(SHOW, 0, __LINE__, "loop+ passed") |
1441 | 0 | } |
1442 | | |
1443 | | /* continue after loop */ |
1444 | 76 | loop_next: |
1445 | 76 | expr_crs = EXPR_NXT(expr_crs); |
1446 | 76 | CHECK_OUTPUT(SHOW, 0, __LINE__, "loop next") |
1447 | 76 | break; |
1448 | | |
1449 | 0 | case PTN_OPTIONAL: |
1450 | | |
1451 | | /* save current state */ |
1452 | 0 | input_crs_prv = *input_crs; |
1453 | | |
1454 | | /* start optional expression */ |
1455 | 0 | CHECK_OUTPUT(CALL, 0, __LINE__, "option start") |
1456 | 0 | if (pattern_check_expression(input, input_crs, input_minmax, input_dir, |
1457 | 0 | expr_data, hook, hook_data, hook_max, EXPR_DATA_0(expr_crs), |
1458 | 0 | notOperator, loop_crs, loop_cnts, table)) { |
1459 | 0 | CHECK_OUTPUT(RETURN, 1, __LINE__, "option passed") |
1460 | 0 | return 1; |
1461 | 0 | } |
1462 | 0 | CHECK_OUTPUT(SHOW, 0, __LINE__, "option failed") |
1463 | | |
1464 | | /* continue after optional expression */ |
1465 | 0 | *input_crs = input_crs_prv; |
1466 | 0 | CHECK_OUTPUT(SHOW, 0, __LINE__, "no option start") |
1467 | 0 | expr_crs = EXPR_NXT(expr_crs); |
1468 | 0 | break; |
1469 | | |
1470 | 0 | case PTN_ALTERNATE: |
1471 | | |
1472 | | /* save current state */ |
1473 | 0 | input_crs_prv = *input_crs; |
1474 | | |
1475 | | /* start first expression */ |
1476 | 0 | CHECK_OUTPUT(CALL, 0, __LINE__, "or 1 start") |
1477 | 0 | if (pattern_check_expression(input, input_crs, input_minmax, input_dir, |
1478 | 0 | expr_data, hook, hook_data, hook_max, EXPR_DATA_0(expr_crs), |
1479 | 0 | notOperator, loop_crs, loop_cnts, table)) { |
1480 | 0 | CHECK_OUTPUT(RETURN, 1, __LINE__, "or 1 passed") |
1481 | 0 | return 1; |
1482 | 0 | } |
1483 | 0 | CHECK_OUTPUT(SHOW, 0, __LINE__, "or 1 failed") |
1484 | | |
1485 | | /* start second expression (no need to push) */ |
1486 | 0 | *input_crs = input_crs_prv; |
1487 | 0 | CHECK_OUTPUT(SHOW, 0, __LINE__, "or 2 start") |
1488 | 0 | expr_crs = EXPR_DATA_1(expr_crs); |
1489 | 0 | break; |
1490 | | |
1491 | 22.3k | case PTN_ANY: |
1492 | | |
1493 | 22.3k | CHECK_OUTPUT(SHOW, 0, __LINE__, "any") |
1494 | 22.3k | *input_crs += input_dir; |
1495 | 22.3k | expr_crs = EXPR_NXT(expr_crs); |
1496 | 22.3k | break; |
1497 | | |
1498 | 300 | case PTN_ATTRIBUTES: |
1499 | | |
1500 | 300 | ret = pattern_check_attrs( |
1501 | 300 | input[*input_crs], EXPR_CONST_DATA(expr_crs), table); |
1502 | 300 | if (ret && notOperator) { |
1503 | 0 | CHECK_OUTPUT(RETURN, 0, __LINE__, "attributes failed: not"); |
1504 | 0 | return 0; |
1505 | 0 | } |
1506 | 300 | if (!ret && !notOperator) { |
1507 | 0 | CHECK_OUTPUT(RETURN, 0, __LINE__, "attributes failed"); |
1508 | 0 | return 0; |
1509 | 0 | } |
1510 | 300 | CHECK_OUTPUT(SHOW, 0, __LINE__, "attributes passed") |
1511 | 300 | *input_crs += input_dir; |
1512 | 300 | expr_crs = EXPR_NXT(expr_crs); |
1513 | 300 | break; |
1514 | | |
1515 | 300 | case PTN_CHARS: |
1516 | | |
1517 | 300 | ret = pattern_check_chars(input[*input_crs], EXPR_CONST_DATA(expr_crs)); |
1518 | 300 | if (ret && notOperator) { |
1519 | 148 | CHECK_OUTPUT(RETURN, 0, __LINE__, "chars failed: not"); |
1520 | 148 | return 0; |
1521 | 148 | } |
1522 | 152 | if (!ret && !notOperator) { |
1523 | 0 | CHECK_OUTPUT(RETURN, 0, __LINE__, "chars failed"); |
1524 | 0 | return 0; |
1525 | 0 | } |
1526 | 152 | CHECK_OUTPUT(SHOW, 0, __LINE__, "chars passed") |
1527 | 152 | *input_crs += input_dir; |
1528 | 152 | expr_crs = EXPR_NXT(expr_crs); |
1529 | 152 | break; |
1530 | | |
1531 | 0 | case PTN_HOOK: |
1532 | |
|
1533 | 0 | if (hook == NULL) { |
1534 | 0 | CHECK_OUTPUT(RETURN, 0, __LINE__, "hook failed: NULL"); |
1535 | 0 | return 0; |
1536 | 0 | } |
1537 | | |
1538 | | /* copy expression data */ |
1539 | 0 | data = EXPR_CONST_DATA(expr_crs); |
1540 | 0 | for (i = 0; i < data[0]; i++) hook_data[i] = data[i + 1]; |
1541 | | |
1542 | | /* call hook function */ |
1543 | 0 | ret = hook(input[*input_crs], data[0]); |
1544 | 0 | if (ret && notOperator) { |
1545 | 0 | CHECK_OUTPUT(RETURN, 0, __LINE__, "hook failed: not"); |
1546 | 0 | return 0; |
1547 | 0 | } |
1548 | 0 | if (!ret && !notOperator) { |
1549 | 0 | CHECK_OUTPUT(RETURN, 0, __LINE__, "hook failed"); |
1550 | 0 | return 0; |
1551 | 0 | } |
1552 | 0 | CHECK_OUTPUT(SHOW, 0, __LINE__, "hook passed") |
1553 | 0 | *input_crs += input_dir; |
1554 | 0 | expr_crs = EXPR_NXT(expr_crs); |
1555 | 0 | break; |
1556 | | |
1557 | 0 | case PTN_END: |
1558 | 0 | break; |
1559 | | |
1560 | 0 | default: |
1561 | |
|
1562 | 0 | CHECK_OUTPUT(RETURN, 0, __LINE__, "unknown opcode") |
1563 | 0 | return 0; |
1564 | 114k | } |
1565 | | |
1566 | | /* check end expression */ |
1567 | 114k | while (EXPR_TYPE(expr_crs) == PTN_END) { |
1568 | 45.1k | CHECK_OUTPUT(SHOW, 0, __LINE__, "end") |
1569 | | |
1570 | | /* check for end of expressions */ |
1571 | 45.1k | if (EXPR_NXT(expr_crs) == PTN_END) break; |
1572 | | |
1573 | 45.1k | expr_crs = EXPR_NXT(expr_crs); |
1574 | | |
1575 | | /* returning loop */ |
1576 | 45.1k | if (EXPR_TYPE(expr_crs) == PTN_ZERO_MORE || |
1577 | 22.8k | EXPR_TYPE(expr_crs) == PTN_ONE_MORE) { |
1578 | 22.3k | CHECK_OUTPUT(SHOW, 0, __LINE__, "end loop") |
1579 | | |
1580 | | /* check that loop consumed input */ |
1581 | 22.3k | if (*input_crs == input_start) { |
1582 | 0 | CHECK_OUTPUT(RETURN, 0, __LINE__, "loop failed: did not consume") |
1583 | 0 | return 0; |
1584 | 0 | } |
1585 | | |
1586 | | /* loops do not continue to the next expression */ |
1587 | 22.3k | break; |
1588 | 22.3k | } |
1589 | | |
1590 | | /* returning not */ |
1591 | 22.8k | if (EXPR_TYPE(expr_crs) == PTN_NOT) notOperator = !notOperator; |
1592 | | |
1593 | 22.8k | expr_crs = EXPR_NXT(expr_crs); |
1594 | | |
1595 | 22.8k | CHECK_OUTPUT(SHOW, 0, __LINE__, "end next") |
1596 | 22.8k | } |
1597 | | |
1598 | 91.8k | CHECK_OUTPUT(SHOW, 0, __LINE__, "check next") |
1599 | 91.8k | } |
1600 | | |
1601 | 0 | CHECK_OUTPUT(RETURN, 1, __LINE__, "check passed: end of expression"); |
1602 | 0 | return 1; |
1603 | 22.7k | } |
1604 | | |
1605 | | static int |
1606 | | pattern_check_hook(const widechar *input, const int input_start, const int input_minmax, |
1607 | | const int input_dir, const widechar *expr_data, |
1608 | | int (*hook)(const widechar input, const int data_len), widechar *hook_data, |
1609 | 377 | const int hook_max, const TranslationTableHeader *table) { |
1610 | 377 | int input_crs, ret, *loop_cnts; |
1611 | | |
1612 | 377 | input_crs = input_start; |
1613 | 377 | loop_cnts = malloc(expr_data[1] * sizeof(int)); |
1614 | 377 | memset(loop_cnts, 0, expr_data[1] * sizeof(int)); |
1615 | 377 | ret = pattern_check_expression(input, &input_crs, input_minmax, input_dir, expr_data, |
1616 | 377 | hook, hook_data, hook_max, 2, 0, 0, loop_cnts, table); |
1617 | 377 | free(loop_cnts); |
1618 | 377 | return ret; |
1619 | 377 | } |
1620 | | |
1621 | | int EXPORT_CALL |
1622 | | _lou_pattern_check(const widechar *input, const int input_start, const int input_minmax, |
1623 | | const int input_dir, const widechar *expr_data, |
1624 | 377 | const TranslationTableHeader *table) { |
1625 | | #ifdef CHECK_OUTPUT_DEFINED |
1626 | | pattern_output(expr_data, table); |
1627 | | #endif |
1628 | 377 | return pattern_check_hook( |
1629 | 377 | input, input_start, input_minmax, input_dir, expr_data, NULL, NULL, 0, table); |
1630 | 377 | } |
1631 | | |
1632 | | //////////////////////////////////////////////////////////////////////////////// |