Coverage Report

Created: 2026-06-13 06:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/liblouis/liblouis/pattern.c
Line
Count
Source
1
/* liblouis Braille Translation and Back-Translation Library
2
3
   Copyright (C) 2016 Mike Gray, American Printing House for the Blind
4
5
   This file is part of liblouis.
6
7
   liblouis is free software: you can redistribute it and/or modify it
8
   under the terms of the GNU Lesser General Public License as published
9
   by the Free Software Foundation, either version 2.1 of the License, or
10
   (at your option) any later version.
11
12
   liblouis is distributed in the hope that it will be useful, but
13
   WITHOUT ANY WARRANTY; without even the implied warranty of
14
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
   Lesser General Public License for more details.
16
17
   You should have received a copy of the GNU Lesser General Public
18
   License along with liblouis. If not, see <http://www.gnu.org/licenses/>.
19
*/
20
21
#include "config.h"
22
23
#include <stdlib.h>
24
#include <stdio.h>
25
#include <string.h>
26
#include <ctype.h>
27
28
#include "internal.h"
29
30
// #define CHECK_OUTPUT_DEFINED
31
32
/////
33
34
// TODO: these functions are static and copied serveral times
35
36
int translation_direction = 1;
37
38
static TranslationTableCharacter *
39
300
findCharOrDots(widechar c, int m, const TranslationTableHeader *table) {
40
  /* Look up character or dot pattern in the appropriate
41
   * table. */
42
300
  static TranslationTableCharacter noChar = { NULL, -1, 0, 0, 0, CTC_Space, 0, 0, 32, 0,
43
300
    0 };
44
300
  static TranslationTableCharacter noDots = { NULL, -1, 0, 0, 0, CTC_Space, 0, 0,
45
300
    LOU_DOTS, 0, 0 };
46
300
  TranslationTableCharacter *notFound;
47
300
  TranslationTableCharacter *character;
48
300
  TranslationTableOffset bucket;
49
300
  unsigned long int makeHash = _lou_charHash(c);
50
300
  if (m == 0) {
51
300
    bucket = table->characters[makeHash];
52
300
    notFound = &noChar;
53
300
  } else {
54
0
    bucket = table->dots[makeHash];
55
0
    notFound = &noDots;
56
0
  }
57
300
  while (bucket) {
58
222
    character = (TranslationTableCharacter *)&table->ruleArea[bucket];
59
222
    if (character->value == c) return character;
60
0
    bucket = character->next;
61
0
  }
62
78
  notFound->value = c;
63
78
  return notFound;
64
300
}
65
66
static int
67
checkAttr(const widechar c, const TranslationTableCharacterAttributes a,
68
300
    const TranslationTableHeader *table) {
69
300
  return (((findCharOrDots(c, translation_direction ? 0 : 1, table))->attributes & a)
70
300
          ? 1
71
300
          : 0);
72
300
}
73
74
/////
75
76
enum pattern_type {
77
  PTN_ERROR,
78
79
  PTN_START,
80
  PTN_GROUP,
81
  PTN_NOT,
82
83
  PTN_ONE_MORE,
84
  PTN_ZERO_MORE,
85
  PTN_OPTIONAL,
86
87
  PTN_ALTERNATE,
88
89
  PTN_ANY,
90
  PTN_ATTRIBUTES,
91
  PTN_CHARS,
92
  PTN_HOOK,
93
  PTN_END_OF_INPUT,
94
95
  PTN_END = 0xffff,
96
};
97
98
550k
#define EXPR_TYPE_IN(at, buffer) (buffer[(at) + 0])
99
192
#define EXPR_PRV_IN(at, buffer) (buffer[(at) + 1])
100
182k
#define EXPR_NXT_IN(at, buffer) (buffer[(at) + 2])
101
45.5k
#define EXPR_DATA_0_IN(at, buffer) (buffer[(at) + 3])
102
22.5k
#define EXPR_DATA_1_IN(at, buffer) (buffer[(at) + 4])
103
#define EXPR_DATA_2_IN(at, buffer) (buffer[(at) + 5])
104
0
#define EXPR_DATA_IN(at, buffer) ((widechar *)&buffer[(at) + 3])
105
600
#define EXPR_CONST_DATA_IN(at, buffer) ((const widechar *)&buffer[(at) + 3])
106
107
550k
#define EXPR_TYPE(at) EXPR_TYPE_IN((at), expr_data)
108
192
#define EXPR_PRV(at) EXPR_PRV_IN((at), expr_data)
109
182k
#define EXPR_NXT(at) EXPR_NXT_IN((at), expr_data)
110
45.5k
#define EXPR_DATA_0(at) EXPR_DATA_0_IN((at), expr_data)
111
22.5k
#define EXPR_DATA_1(at) EXPR_DATA_1_IN((at), expr_data)
112
#define EXPR_DATA_2(at) EXPR_DATA_2_IN((at), expr_data)
113
0
#define EXPR_DATA(at) EXPR_DATA_IN((at), expr_data)
114
600
#define EXPR_CONST_DATA(at) EXPR_CONST_DATA_IN((at), expr_data)
115
116
#ifdef CHECK_OUTPUT_DEFINED
117
118
#ifndef DEBUG
119
#define DEBUG
120
121
#endif
122
123
#define START 0
124
#define CALL 1
125
#define RETURN 2
126
#define SHOW 3
127
128
#define CHECK_OUTPUT(type, ret, line, msg)                                              \
129
  {                                                                                   \
130
    do_output(type, ret, line, input[*input_crs], input_minmax, *input_crs,         \
131
        input_dir, expr_data, expr_crs, notOperator, loop_crs, loop_cnts, msg); \
132
  }
133
134
#else
135
136
#define CHECK_OUTPUT(type, ret, line, msg) \
137
342k
  { ; }
138
139
#endif
140
141
struct expression {
142
  widechar type;
143
  widechar prv;
144
  widechar nxt;
145
  widechar data[1];
146
};
147
148
/* gdb won't know what this is unless it is actually used */
149
#ifdef DEBUG
150
static struct expression *expr_debug;
151
#endif
152
153
////////////////////////////////////////////////////////////////////////////////
154
155
static char spaces[] = "..............................";
156
static int space = 30;
157
158
static void
159
pattern_output_expression(
160
0
    const widechar *expr_data, int expr_crs, const TranslationTableHeader *table) {
161
0
  int i;
162
0
163
0
  if (expr_crs == PTN_END) return;
164
0
165
0
  while (EXPR_TYPE(expr_crs) != PTN_END) {
166
0
    printf("%s%d", &spaces[space], expr_crs);
167
0
    if (expr_crs < 100) printf(" ");
168
0
    if (expr_crs < 10) printf(" ");
169
0
    for (i = 0; i < 13 - (30 - space); i++) printf(" ");
170
0
171
0
    switch (EXPR_TYPE(expr_crs)) {
172
0
    case PTN_START:
173
0
174
0
      printf("START\t%d\t%d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs));
175
0
      break;
176
0
177
0
    case PTN_GROUP:
178
0
179
0
      printf("(    \t%d\t%d\t-> %d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs),
180
0
          EXPR_DATA_0(expr_crs));
181
0
      space--;
182
0
      if (space < 0) space = 0;
183
0
      pattern_output_expression(expr_data, EXPR_DATA_0(expr_crs), table);
184
0
      space++;
185
0
      if (space > 30) space = 30;
186
0
      break;
187
0
188
0
    case PTN_NOT:
189
0
190
0
      printf("!    \t%d\t%d\t-> %d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs),
191
0
          EXPR_DATA_0(expr_crs));
192
0
      space--;
193
0
      if (space < 0) space = 0;
194
0
      pattern_output_expression(expr_data, EXPR_DATA_0(expr_crs), table);
195
0
      space++;
196
0
      if (space > 30) space = 30;
197
0
      break;
198
0
199
0
    case PTN_ONE_MORE:
200
0
201
0
      printf("+    \t%d\t%d\t-> %d\t#%d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs),
202
0
          EXPR_DATA_0(expr_crs), EXPR_DATA_1(expr_crs));
203
0
      space--;
204
0
      if (space < 0) space = 0;
205
0
      pattern_output_expression(expr_data, EXPR_DATA_0(expr_crs), table);
206
0
      space++;
207
0
      if (space > 30) space = 30;
208
0
      break;
209
0
210
0
    case PTN_ZERO_MORE:
211
0
212
0
      printf("*    \t%d\t%d\t-> %d\t#%d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs),
213
0
          EXPR_DATA_0(expr_crs), EXPR_DATA_1(expr_crs));
214
0
      space--;
215
0
      if (space < 0) space = 0;
216
0
      pattern_output_expression(expr_data, EXPR_DATA_0(expr_crs), table);
217
0
      space++;
218
0
      if (space > 30) space = 30;
219
0
      break;
220
0
221
0
    case PTN_OPTIONAL:
222
0
223
0
      printf("?    \t%d\t%d\t-> %d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs),
224
0
          EXPR_DATA_0(expr_crs));
225
0
      space--;
226
0
      if (space < 0) space = 0;
227
0
      pattern_output_expression(expr_data, EXPR_DATA_0(expr_crs), table);
228
0
      space++;
229
0
      if (space > 30) space = 30;
230
0
      break;
231
0
232
0
    case PTN_ALTERNATE:
233
0
234
0
      printf("|    \t%d\t%d\t-> %d\t-> %d\n", EXPR_PRV(expr_crs),
235
0
          EXPR_NXT(expr_crs), EXPR_DATA_0(expr_crs), EXPR_DATA_1(expr_crs));
236
0
      space--;
237
0
      if (space < 0) space = 0;
238
0
      pattern_output_expression(expr_data, EXPR_DATA_0(expr_crs), table);
239
0
      pattern_output_expression(expr_data, EXPR_DATA_1(expr_crs), table);
240
0
      space++;
241
0
      if (space > 30) space = 30;
242
0
      break;
243
0
244
0
    case PTN_ANY:
245
0
246
0
      printf(".    \t%d\t%d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs));
247
0
      break;
248
0
249
0
    case PTN_ATTRIBUTES:
250
0
251
0
      printf("%%    \t%d\t%d\t", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs));
252
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[0] >> 16)) printf("0");
253
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[1] >> 16)) printf("1");
254
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[2] >> 16)) printf("2");
255
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[3] >> 16)) printf("3");
256
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[4] >> 16)) printf("4");
257
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[5] >> 16)) printf("5");
258
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[6] >> 16)) printf("6");
259
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[7] >> 16)) printf("7");
260
0
      if (EXPR_DATA_0(expr_crs) & (CTC_EndOfInput >> 16)) printf("^");
261
0
      if (EXPR_DATA_1(expr_crs) & CTC_Space) printf("_");
262
0
      if (EXPR_DATA_1(expr_crs) & CTC_Digit) printf("#");
263
0
      if (EXPR_DATA_1(expr_crs) & CTC_Letter) printf("a");
264
0
      if (EXPR_DATA_1(expr_crs) & CTC_UpperCase) printf("u");
265
0
      if (EXPR_DATA_1(expr_crs) & CTC_LowerCase) printf("l");
266
0
      if (EXPR_DATA_1(expr_crs) & CTC_Punctuation) printf(".");
267
0
      if (EXPR_DATA_1(expr_crs) & CTC_Sign) printf("$");
268
0
      if (EXPR_DATA_1(expr_crs) & CTC_SeqDelimiter) printf("~");
269
0
      if (EXPR_DATA_1(expr_crs) & CTC_SeqBefore) printf("<");
270
0
      if (EXPR_DATA_1(expr_crs) & CTC_SeqAfter) printf(">");
271
0
      puts("");
272
0
      break;
273
0
274
0
    case PTN_CHARS:
275
0
276
0
      printf("[]   \t%d\t%d\t", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs));
277
0
      for (i = 0; i < EXPR_DATA_0(expr_crs); i++)
278
0
        printf("%c", EXPR_CONST_DATA(expr_crs)[i + 1]);
279
0
      puts("");
280
0
      break;
281
0
282
0
    case PTN_HOOK:
283
0
284
0
      printf("@    \t%d\t%d\t", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs));
285
0
      for (i = 0; i < EXPR_DATA_0(expr_crs); i++)
286
0
        printf("%c", EXPR_CONST_DATA(expr_crs)[i + 1]);
287
0
      puts("");
288
0
      break;
289
0
290
0
    case PTN_END_OF_INPUT:
291
0
292
0
      printf("^    \t%d\t%d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs));
293
0
      break;
294
0
295
0
    default:
296
0
297
0
      printf("%d?    \t%d\t%d\n", EXPR_TYPE(expr_crs), EXPR_PRV(expr_crs),
298
0
          EXPR_NXT(expr_crs));
299
0
      break;
300
0
    }
301
0
302
0
    expr_crs = EXPR_NXT(expr_crs);
303
0
  }
304
0
305
0
  printf("%s%d", &spaces[space], expr_crs);
306
0
  if (expr_crs < 100) printf(" ");
307
0
  if (expr_crs < 10) printf(" ");
308
0
  for (i = 0; i < 13 - (30 - space); i++) printf(" ");
309
0
  printf("END\t%d\t%d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs));
310
0
  fflush(stdout);
311
0
  return;
312
0
}
313
314
static void
315
0
pattern_output(const widechar *expr_data, const TranslationTableHeader *table) {
316
0
  printf("%d    \tlength\n", expr_data[0]);
317
0
  printf("%d    \tloops\n", expr_data[1]);
318
0
  if (expr_data[0] > 0 && expr_data[0] != PTN_END)
319
0
    pattern_output_expression(expr_data, 2, table);
320
0
}
321
322
static void
323
pattern_print_expression(
324
0
    const widechar *expr_data, int expr_crs, const TranslationTableHeader *table) {
325
0
  int i;
326
0
327
0
  if (expr_crs == PTN_END) return;
328
0
329
0
  while (EXPR_TYPE(expr_crs) != PTN_END) {
330
0
    switch (EXPR_TYPE(expr_crs)) {
331
0
    case PTN_START:
332
0
      break;
333
0
334
0
    case PTN_GROUP:
335
0
336
0
      printf(" (");
337
0
      pattern_print_expression(expr_data, EXPR_DATA_0(expr_crs), table);
338
0
      printf(") ");
339
0
      break;
340
0
341
0
    case PTN_NOT:
342
0
343
0
      printf("!");
344
0
      pattern_print_expression(expr_data, EXPR_DATA_0(expr_crs), table);
345
0
      break;
346
0
347
0
    case PTN_ONE_MORE:
348
0
349
0
      pattern_print_expression(expr_data, EXPR_DATA_0(expr_crs), table);
350
0
      printf("+");
351
0
      break;
352
0
353
0
    case PTN_ZERO_MORE:
354
0
355
0
      pattern_print_expression(expr_data, EXPR_DATA_0(expr_crs), table);
356
0
      printf("*");
357
0
      break;
358
0
359
0
    case PTN_OPTIONAL:
360
0
361
0
      pattern_print_expression(expr_data, EXPR_DATA_0(expr_crs), table);
362
0
      printf("?");
363
0
      break;
364
0
365
0
    case PTN_ALTERNATE:
366
0
367
0
      pattern_print_expression(expr_data, EXPR_DATA_0(expr_crs), table);
368
0
      printf(" | ");
369
0
      pattern_print_expression(expr_data, EXPR_DATA_1(expr_crs), table);
370
0
      break;
371
0
372
0
    case PTN_ANY:
373
0
374
0
      printf(".");
375
0
      break;
376
0
377
0
    case PTN_ATTRIBUTES:
378
0
379
0
      printf("%%[");
380
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[0] >> 16)) printf("0");
381
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[1] >> 16)) printf("1");
382
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[2] >> 16)) printf("2");
383
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[3] >> 16)) printf("3");
384
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[4] >> 16)) printf("4");
385
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[5] >> 16)) printf("5");
386
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[6] >> 16)) printf("6");
387
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[7] >> 16)) printf("7");
388
0
      if (EXPR_DATA_0(expr_crs) & (CTC_EndOfInput >> 16)) printf("^");
389
0
      if (EXPR_DATA_1(expr_crs) & CTC_Space) printf("_");
390
0
      if (EXPR_DATA_1(expr_crs) & CTC_Digit) printf("#");
391
0
      if (EXPR_DATA_1(expr_crs) & CTC_Letter) printf("a");
392
0
      if (EXPR_DATA_1(expr_crs) & CTC_UpperCase) printf("u");
393
0
      if (EXPR_DATA_1(expr_crs) & CTC_LowerCase) printf("l");
394
0
      if (EXPR_DATA_1(expr_crs) & CTC_Punctuation) printf(".");
395
0
      if (EXPR_DATA_1(expr_crs) & CTC_Sign) printf("$");
396
0
      if (EXPR_DATA_1(expr_crs) & CTC_SeqDelimiter) printf("~");
397
0
      if (EXPR_DATA_1(expr_crs) & CTC_SeqBefore) printf("<");
398
0
      if (EXPR_DATA_1(expr_crs) & CTC_SeqAfter) printf(">");
399
0
      printf("]");
400
0
      break;
401
0
402
0
    case PTN_CHARS:
403
0
404
0
      if (EXPR_DATA_0(expr_crs) == 1)
405
0
        printf("%c", EXPR_DATA_1(expr_crs));
406
0
      else {
407
0
        printf("[");
408
0
        for (i = 0; i < EXPR_DATA_0(expr_crs); i++)
409
0
          printf("%c", EXPR_CONST_DATA(expr_crs)[i + 1]);
410
0
        printf("]");
411
0
      }
412
0
      break;
413
0
414
0
    case PTN_HOOK:
415
0
416
0
      printf("@[");
417
0
      for (i = 0; i < EXPR_DATA_0(expr_crs); i++)
418
0
        printf("%c", EXPR_CONST_DATA(expr_crs)[i + 1]);
419
0
      printf("]");
420
0
      break;
421
0
422
0
    case PTN_END_OF_INPUT:
423
0
424
0
      printf("^");
425
0
      break;
426
0
427
0
      // default:  printf("%d?\n", EXPR_TYPE(expr_crs));  break;
428
0
    }
429
0
430
0
    expr_crs = EXPR_NXT(expr_crs);
431
0
  }
432
0
433
0
  return;
434
0
}
435
436
static void
437
0
pattern_print(const widechar *expr_data, const TranslationTableHeader *table) {
438
0
  if (expr_data[0] > 0 && expr_data[0] != PTN_END)
439
0
    pattern_print_expression(expr_data, 2, table);
440
0
  puts("");
441
0
}
442
443
#ifdef CHECK_OUTPUT_DEFINED
444
445
static void
446
do_padd(const int value) {
447
  if (value < 100000) printf(" ");
448
  if (value < 10000) printf(" ");
449
  if (value < 1000) printf(" ");
450
  if (value < 100) printf(" ");
451
  if (value < 10) printf(" ");
452
}
453
454
static void
455
do_pad(const int value) {
456
  if (value < 100) printf(" ");
457
  if (value < 10) printf(" ");
458
}
459
460
static void
461
do_output(const int type, const int ret, const int line,
462
463
    const int input, const int input_minmax, const int input_crs, const int input_dir,
464
    const widechar *expr_data, const int expr_crs, const int notOperator,
465
    const int loop_crs, const int *loop_cnts,
466
467
    const char *msg) {
468
  switch (type) {
469
  case START:
470
471
    space--;
472
    if (space < 0) space = 0;
473
    printf("|%s()  ", &spaces[space]);
474
    break;
475
476
  case CALL:
477
478
    printf("|%s>   ", &spaces[space]);
479
    break;
480
481
  case RETURN:
482
483
    printf("|%s<%d  ", &spaces[space], ret);
484
    space++;
485
    if (space > 31) space = 31;
486
    break;
487
488
  case SHOW:
489
490
    printf("|%s    ", &spaces[space]);
491
    break;
492
  }
493
494
  printf("%d ", line);
495
  do_padd(line);
496
497
  switch (expr_data[expr_crs]) {
498
  case PTN_ERROR:
499
    printf("# ");
500
    break;
501
  case PTN_START:
502
    printf("> ");
503
    break;
504
  case PTN_END_OF_INPUT:
505
    printf("^ ");
506
    break;
507
  case PTN_ALTERNATE:
508
    printf("| ");
509
    break;
510
  case PTN_OPTIONAL:
511
    printf("? ");
512
    break;
513
  case PTN_ONE_MORE:
514
    printf("+ ");
515
    break;
516
  case PTN_ZERO_MORE:
517
    printf("* ");
518
    break;
519
  case PTN_NOT:
520
    printf("! ");
521
    break;
522
  case PTN_GROUP:
523
    printf("( ");
524
    break;
525
  case PTN_ANY:
526
    printf(". ");
527
    break;
528
  case PTN_ATTRIBUTES:
529
    printf("%% ");
530
    break;
531
  case PTN_CHARS:
532
    printf("[ ");
533
    break;
534
  case PTN_HOOK:
535
    printf("@ ");
536
    break;
537
  case PTN_END:
538
    printf("< ");
539
    break;
540
  default:
541
    printf("  ");
542
    break;
543
  }
544
  printf("%d ", expr_crs);
545
  do_padd(expr_crs);
546
547
  if (input > 31 && input < 127)
548
    printf("%c ", input);
549
  else
550
    printf("_ ");
551
552
  if (input_crs * input_dir >= input_minmax * input_dir)
553
    printf("#   ");
554
  else {
555
    printf("%d ", input_crs);
556
    do_pad(input_crs);
557
  }
558
559
  if (input_dir > 0)
560
    printf("<");
561
  else
562
    printf(">");
563
  printf("%d ", input_minmax);
564
  do_pad(input_minmax);
565
566
  if (notOperator)
567
    printf("!   ");
568
  else
569
    printf("    ");
570
571
  if (loop_crs) {
572
    printf("%d ", loop_crs);
573
    do_pad(loop_crs);
574
    printf("%d ", loop_cnts[EXPR_DATA_1(loop_crs)]);
575
    do_pad(loop_cnts[EXPR_DATA_1(loop_crs)]);
576
  } else
577
    printf("-   -   ");
578
  if (EXPR_TYPE(expr_crs) == PTN_ONE_MORE || EXPR_TYPE(expr_crs) == PTN_ZERO_MORE) {
579
    printf("%d ", loop_cnts[EXPR_DATA_1(expr_crs)]);
580
    do_pad(loop_cnts[EXPR_DATA_1(expr_crs)]);
581
  } else
582
    printf("-   ");
583
584
  if (msg) printf("%s", msg);
585
  puts("");
586
}
587
588
#endif
589
590
////////////////////////////////////////////////////////////////////////////////
591
592
static int
593
pattern_compile_1(const widechar *input, const int input_max, int *input_crs,
594
    widechar *expr_data, const int expr_max, widechar *expr_crs, widechar *loop_cnts,
595
    TranslationTableHeader *table, const FileInfo *nested);
596
597
static int
598
pattern_compile_expression(const widechar *input, const int input_max, int *input_crs,
599
    widechar *expr_data, const int expr_max, widechar *expr_crs, widechar *loop_cnts,
600
41
    TranslationTableHeader *table, const FileInfo *nested) {
601
41
  widechar *data;
602
41
  int expr_start, expr_end, expr_sub, expr_crs_prv;
603
41
  int input_end;
604
41
  int attrs0, attrs1;
605
41
  int set, esc, nest, i;
606
607
41
  if (*input_crs >= input_max) return 0;
608
41
  switch (input[*input_crs]) {
609
0
  case '(':
610
611
0
    if (*expr_crs + 10 >= expr_max) return 0;
612
613
0
    (*input_crs)++;
614
0
    if (*input_crs >= input_max) return 0;
615
616
    /* find closing parenthesis */
617
0
    nest = esc = 0;
618
0
    for (input_end = *input_crs; input_end < input_max; input_end++) {
619
0
      if (input[input_end] == '\\' && !esc) {
620
0
        esc = 1;
621
0
        continue;
622
0
      }
623
624
0
      if (input[input_end] == '(' && !esc)
625
0
        nest++;
626
0
      else if (input[input_end] == ')' && !esc) {
627
0
        if (nest)
628
0
          nest--;
629
0
        else
630
0
          break;
631
0
      }
632
633
0
      esc = 0;
634
0
    }
635
0
    if (input_end >= input_max) return 0;
636
637
0
    EXPR_TYPE(*expr_crs) = PTN_GROUP;
638
639
    /* compile sub expressions */
640
0
    expr_crs_prv = *expr_crs;
641
0
    *expr_crs += 4;
642
0
    EXPR_DATA_0(expr_crs_prv) = *expr_crs;
643
0
    expr_sub = *expr_crs;
644
0
    EXPR_TYPE(expr_sub) = PTN_ERROR;
645
0
    EXPR_PRV(expr_sub) = PTN_END;
646
0
    EXPR_NXT(expr_sub) = PTN_END;
647
0
    if (!pattern_compile_1(input, input_end, input_crs, expr_data, expr_max, expr_crs,
648
0
          loop_cnts, table, nested))
649
0
      return 0;
650
0
    (*input_crs)++;
651
652
    /* reset end expression */
653
0
    expr_end = *expr_crs;
654
0
    EXPR_NXT(expr_end) = expr_crs_prv;
655
656
0
    return *expr_crs += 3;
657
658
10
  case '!':
659
660
10
    if (*expr_crs + 10 >= expr_max) return 0;
661
662
10
    (*input_crs)++;
663
10
    EXPR_TYPE(*expr_crs) = PTN_NOT;
664
10
    expr_crs_prv = *expr_crs;
665
10
    *expr_crs += 4;
666
10
    EXPR_DATA_0(expr_crs_prv) = *expr_crs;
667
668
    /* create start expression */
669
10
    expr_start = *expr_crs;
670
10
    EXPR_TYPE(expr_start) = PTN_START;
671
10
    EXPR_PRV(expr_start) = PTN_END;
672
10
    *expr_crs += 3;
673
10
    EXPR_NXT(expr_start) = *expr_crs;
674
675
    /* compile sub expression */
676
10
    expr_sub = *expr_crs;
677
10
    EXPR_TYPE(expr_sub) = PTN_ERROR;
678
10
    EXPR_PRV(expr_sub) = expr_start;
679
10
    EXPR_NXT(expr_sub) = PTN_END;
680
681
10
    if (!pattern_compile_expression(input, input_max, input_crs, expr_data, expr_max,
682
10
          expr_crs, loop_cnts, table, nested))
683
0
      return 0;
684
685
10
    if (*expr_crs + 3 >= expr_max) return 0;
686
687
10
    EXPR_NXT(expr_sub) = *expr_crs;
688
689
    /* create end expression */
690
10
    expr_end = *expr_crs;
691
10
    EXPR_TYPE(expr_end) = PTN_END;
692
10
    EXPR_PRV(expr_end) = expr_sub;
693
10
    EXPR_NXT(expr_end) = expr_crs_prv;
694
695
10
    return *expr_crs += 3;
696
697
0
  case '+':
698
699
0
    if (*expr_crs + 5 >= expr_max) return 0;
700
0
    EXPR_TYPE(*expr_crs) = PTN_ONE_MORE;
701
0
    EXPR_DATA_1(*expr_crs) = (*loop_cnts)++;
702
0
    (*input_crs)++;
703
0
    return *expr_crs += 5;
704
705
1
  case '*':
706
707
1
    if (*expr_crs + 5 >= expr_max) return 0;
708
1
    EXPR_TYPE(*expr_crs) = PTN_ZERO_MORE;
709
1
    EXPR_DATA_1(*expr_crs) = (*loop_cnts)++;
710
1
    (*input_crs)++;
711
1
    return *expr_crs += 5;
712
713
0
  case '?':
714
715
0
    if (*expr_crs + 4 >= expr_max) return 0;
716
0
    EXPR_TYPE(*expr_crs) = PTN_OPTIONAL;
717
0
    (*input_crs)++;
718
0
    return *expr_crs += 4;
719
720
0
  case '|':
721
722
0
    if (*expr_crs + 5 >= expr_max) return 0;
723
0
    EXPR_TYPE(*expr_crs) = PTN_ALTERNATE;
724
0
    (*input_crs)++;
725
0
    return *expr_crs += 5;
726
727
2
  case '.':
728
729
2
    if (*expr_crs + 3 >= expr_max) return 0;
730
2
    EXPR_TYPE(*expr_crs) = PTN_ANY;
731
2
    (*input_crs)++;
732
2
    return *expr_crs += 3;
733
734
2
  case '%':
735
736
2
    if (*expr_crs + 5 >= expr_max) return 0;
737
738
2
    (*input_crs)++;
739
2
    if (*input_crs >= input_max) return 0;
740
741
    /* find closing bracket */
742
2
    if (input[*input_crs] == '[') {
743
0
      set = 1;
744
0
      (*input_crs)++;
745
0
      for (input_end = *input_crs; input_end < input_max; input_end++)
746
0
        if (input[input_end] == ']') break;
747
0
      if (input_end >= input_max) return 0;
748
2
    } else {
749
2
      set = 0;
750
2
      input_end = *input_crs + 1;
751
2
    }
752
753
2
    EXPR_TYPE(*expr_crs) = PTN_ATTRIBUTES;
754
755
2
    attrs0 = attrs1 = 0;
756
4
    for (; (*input_crs) < input_end; (*input_crs)++) {
757
2
      switch (input[*input_crs]) {
758
0
      case '_':
759
0
        attrs0 |= CTC_Space;
760
0
        break;
761
0
      case '#':
762
0
        attrs0 |= CTC_Digit;
763
0
        break;
764
0
      case 'a':
765
0
        attrs0 |= CTC_Letter;
766
0
        break;
767
2
      case 'u':
768
2
        attrs0 |= CTC_UpperCase;
769
2
        break;
770
0
      case 'l':
771
0
        attrs0 |= CTC_LowerCase;
772
0
        break;
773
0
      case '.':
774
0
        attrs0 |= CTC_Punctuation;
775
0
        break;
776
0
      case '$':
777
0
        attrs0 |= CTC_Sign;
778
0
        break;
779
0
      case 'm':
780
0
        attrs0 |= CTC_Math;
781
0
        break;
782
0
      case '~':
783
0
        attrs0 |= CTC_SeqDelimiter;
784
0
        break;
785
0
      case '<':
786
0
        attrs0 |= CTC_SeqBefore;
787
0
        break;
788
0
      case '>':
789
0
        attrs0 |= CTC_SeqAfter;
790
0
        break;
791
792
0
      case '0':
793
0
      case '1':
794
0
      case '2':
795
0
      case '3':
796
0
      case '4':
797
0
      case '5':
798
0
      case '6':
799
0
      case '7': {
800
0
        int k = input[*input_crs] - '0';
801
0
        TranslationTableCharacterAttributes a = table->numberedAttributes[k];
802
0
        if (!a) {
803
          // attribute not used before yet: assign it a value
804
0
          a = table->numberedAttributes[k] =
805
0
              table->nextNumberedCharacterClassAttribute;
806
0
          if (a > CTC_UserDefined8) {
807
0
            _lou_logMessage(LOU_LOG_ERROR,
808
0
                "%s:%d: error: Too many character attributes defined",
809
0
                nested->fileName, nested->lineNumber);
810
0
            return 0;
811
0
          }
812
0
          table->nextNumberedCharacterClassAttribute <<= 1;
813
0
        }
814
0
        attrs1 |= (a >> 16);
815
0
        break;
816
0
      }
817
0
      case '^':
818
0
        attrs1 |= (CTC_EndOfInput >> 16);
819
0
        break;
820
821
0
      default:
822
0
        return 0;
823
2
      }
824
2
    }
825
2
    EXPR_DATA_0(*expr_crs) = attrs1;
826
2
    EXPR_DATA_1(*expr_crs) = attrs0;
827
828
2
    if (set) (*input_crs)++;
829
2
    return *expr_crs += 5;
830
831
0
  case '[':
832
833
0
    (*input_crs)++;
834
0
    if (*input_crs >= input_max) return 0;
835
836
    /* find closing bracket */
837
0
    esc = 0;
838
0
    for (input_end = *input_crs; input_end < input_max; input_end++) {
839
0
      if (input[input_end] == '\\' && !esc) {
840
0
        esc = 1;
841
0
        continue;
842
0
      }
843
844
0
      if (input[input_end] == ']' && !esc) break;
845
0
      esc = 0;
846
0
    }
847
0
    if (input_end >= input_max) return 0;
848
849
0
    if (*expr_crs + 4 + (input_end - *input_crs) >= expr_max) return 0;
850
851
0
    EXPR_TYPE(*expr_crs) = PTN_CHARS;
852
853
0
    esc = 0;
854
0
    data = EXPR_DATA(*expr_crs);
855
0
    for (i = 1; *input_crs < input_end; (*input_crs)++) {
856
0
      if (input[*input_crs] == '\\' && !esc) {
857
0
        esc = 1;
858
0
        continue;
859
0
      }
860
861
0
      esc = 0;
862
0
      data[i++] = (widechar)input[*input_crs];
863
0
    }
864
0
    data[0] = i - 1;
865
0
    (*input_crs)++;
866
0
    return *expr_crs += 4 + data[0];
867
868
0
  case '@':
869
870
0
    (*input_crs)++;
871
0
    if (*input_crs >= input_max) return 0;
872
873
    /* find closing bracket */
874
0
    if (input[*input_crs] == '[') {
875
0
      set = 1;
876
0
      (*input_crs)++;
877
0
      for (input_end = *input_crs; input_end < input_max; input_end++)
878
0
        if (input[input_end] == ']') break;
879
0
      if (input_end >= input_max) return 0;
880
0
    } else {
881
0
      set = 0;
882
0
      input_end = *input_crs + 1;
883
0
    }
884
885
0
    if (*expr_crs + 4 + (input_end - *input_crs) >= expr_max) return 0;
886
887
0
    EXPR_TYPE(*expr_crs) = PTN_HOOK;
888
889
0
    esc = 0;
890
0
    data = EXPR_DATA(*expr_crs);
891
0
    for (i = 1; *input_crs < input_end; (*input_crs)++) {
892
0
      if (input[*input_crs] == '\\' && !esc) {
893
0
        esc = 1;
894
0
        continue;
895
0
      }
896
897
0
      esc = 0;
898
0
      data[i++] = (widechar)input[*input_crs];
899
0
    }
900
0
    data[0] = i - 1;
901
0
    if (set) (*input_crs)++;
902
0
    return *expr_crs += 4 + data[0];
903
904
0
  case '^':
905
4
  case '$':
906
907
4
    if (*expr_crs + 3 >= expr_max) return 0;
908
4
    EXPR_TYPE(*expr_crs) = PTN_END_OF_INPUT;
909
4
    (*input_crs)++;
910
4
    return *expr_crs += 3;
911
912
0
  case '\\':
913
914
0
    (*input_crs)++;
915
0
    if (*input_crs >= input_max) return 0;
916
917
22
  default:
918
919
22
    if (*expr_crs + 5 >= expr_max) return 0;
920
22
    EXPR_TYPE(*expr_crs) = PTN_CHARS;
921
22
    EXPR_DATA_0(*expr_crs) = 1;
922
22
    EXPR_DATA_1(*expr_crs) = (widechar)input[*input_crs];
923
22
    (*input_crs)++;
924
22
    return *expr_crs += 5;
925
41
  }
926
41
}
927
928
static int
929
pattern_insert_alternate(const widechar *input, const int input_max, int *input_crs,
930
    widechar *expr_data, const int expr_max, widechar *expr_crs, widechar *loop_cnts,
931
0
    int expr_insert, TranslationTableHeader *table, const FileInfo *nested) {
932
0
  int expr_group, expr_alt, expr_end;
933
0
934
0
  if (EXPR_TYPE(*expr_crs) == PTN_START) return 0;
935
0
936
0
  if (*expr_crs + 12 >= expr_max) return 0;
937
0
938
0
  /* setup alternate expression */
939
0
  expr_alt = *expr_crs;
940
0
  EXPR_TYPE(expr_alt) = PTN_ALTERNATE;
941
0
  EXPR_PRV(expr_alt) = PTN_END;
942
0
  EXPR_NXT(expr_alt) = PTN_END;
943
0
  *expr_crs += 5;
944
0
945
0
  /* setup group expression */
946
0
  expr_group = *expr_crs;
947
0
  EXPR_TYPE(expr_group) = PTN_GROUP;
948
0
  EXPR_PRV(expr_group) = PTN_END;
949
0
  EXPR_NXT(expr_group) = PTN_END;
950
0
  *expr_crs += 4;
951
0
  EXPR_DATA_0(expr_group) = *expr_crs;
952
0
953
0
  EXPR_TYPE(*expr_crs) = PTN_ERROR;
954
0
  EXPR_PRV(*expr_crs) = PTN_END;
955
0
  EXPR_NXT(*expr_crs) = PTN_END;
956
0
  if (!pattern_compile_1(input, input_max, input_crs, expr_data, expr_max, expr_crs,
957
0
        loop_cnts, table, nested))
958
0
    return 0;
959
0
  expr_end = *expr_crs;
960
0
  EXPR_NXT(expr_end) = expr_group;
961
0
962
0
  /* setup last end expression */
963
0
  if (*expr_crs + 3 >= expr_max) return 0;
964
0
  *expr_crs += 3;
965
0
  EXPR_TYPE(*expr_crs) = PTN_END;
966
0
  EXPR_NXT(*expr_crs) = PTN_END;
967
0
968
0
  /* replace insert expression with group expression using last end expression */
969
0
  EXPR_NXT(EXPR_PRV(expr_insert)) = expr_group;
970
0
  EXPR_PRV(expr_group) = EXPR_PRV(expr_insert);
971
0
972
0
  EXPR_NXT(expr_group) = *expr_crs;
973
0
  EXPR_PRV(*expr_crs) = expr_group;
974
0
975
0
  /* link alternate and insert expressions before group end expression */
976
0
  EXPR_NXT(EXPR_PRV(expr_end)) = expr_alt;
977
0
  EXPR_PRV(expr_alt) = EXPR_PRV(expr_end);
978
0
979
0
  EXPR_NXT(expr_alt) = expr_insert;
980
0
  EXPR_PRV(expr_insert) = expr_alt;
981
0
982
0
  EXPR_NXT(expr_insert) = expr_end;
983
0
  EXPR_PRV(expr_end) = expr_insert;
984
0
985
0
  return *expr_crs;
986
0
}
987
988
/* Compile all expression sequences, resolving character sets, attributes,
989
 * groups, nots, and hooks.  Note that unlike the other compile functions, on
990
 * returning the expr_crs is set to the last end expression, not after it.
991
 */
992
static int
993
pattern_compile_1(const widechar *input, const int input_max, int *input_crs,
994
    widechar *expr_data, const int expr_max, widechar *expr_crs, widechar *loop_cnts,
995
4
    TranslationTableHeader *table, const FileInfo *nested) {
996
4
  int expr_crs_prv;
997
998
4
  if (*expr_crs + 6 >= expr_max) return 0;
999
1000
4
  expr_crs_prv = *expr_crs;
1001
1002
  /* setup start expression */
1003
4
  EXPR_TYPE(*expr_crs) = PTN_START;
1004
4
  EXPR_PRV(*expr_crs) = PTN_END;
1005
4
  *expr_crs += 3;
1006
4
  EXPR_NXT(expr_crs_prv) = *expr_crs;
1007
1008
  /* setup end expression */
1009
4
  EXPR_TYPE(*expr_crs) = PTN_END;
1010
4
  EXPR_PRV(*expr_crs) = expr_crs_prv;
1011
4
  EXPR_NXT(*expr_crs) = PTN_END;
1012
1013
35
  while (*input_crs < input_max) {
1014
31
    expr_crs_prv = *expr_crs;
1015
31
    if (!pattern_compile_expression(input, input_max, input_crs, expr_data, expr_max,
1016
31
          expr_crs, loop_cnts, table, nested))
1017
0
      return 0;
1018
1019
    /* setup end expression */
1020
31
    if (*expr_crs + 3 >= expr_max) return 0;
1021
31
    EXPR_NXT(expr_crs_prv) = *expr_crs;
1022
31
    EXPR_TYPE(*expr_crs) = PTN_END;
1023
31
    EXPR_PRV(*expr_crs) = expr_crs_prv;
1024
31
    EXPR_NXT(*expr_crs) = PTN_END;
1025
1026
    /* insert seqafterexpression before attributes of seqafterchars */
1027
    // if(EXPR_TYPE(expr_crs_prv) == PTN_ATTRIBUTES)
1028
    // if(EXPR_DATA_1(expr_crs_prv) & CTC_SeqAfter)
1029
    // {
1030
    //  i = 0;
1031
    //  pattern_insert_alternate(table->seqAfterExpression,
1032
    //    table->seqAfterExpressionLength, &i, expr_data, expr_max,
1033
    //    expr_crs, loop_cnts, expr_crs_prv);
1034
    // }
1035
31
  }
1036
1037
4
  return *expr_crs;
1038
4
}
1039
1040
/* Resolve optional and loop expressions.
1041
 */
1042
static int
1043
pattern_compile_2(
1044
14
    widechar *expr_data, int expr_at, const int expr_max, widechar *expr_crs) {
1045
14
  int expr_start, expr_end, expr_prv, expr_sub;
1046
1047
69
  while (EXPR_TYPE(expr_at) != PTN_END) {
1048
55
    if (EXPR_TYPE(expr_at) == PTN_GROUP || EXPR_TYPE(expr_at) == PTN_NOT) {
1049
10
      if (!pattern_compile_2(expr_data, EXPR_DATA_0(expr_at), expr_max, expr_crs))
1050
0
        return 0;
1051
10
    }
1052
1053
55
    if (EXPR_TYPE(expr_at) == PTN_ZERO_MORE || EXPR_TYPE(expr_at) == PTN_ONE_MORE ||
1054
54
        EXPR_TYPE(expr_at) == PTN_OPTIONAL) {
1055
1
      if (*expr_crs + 6 >= expr_max) return 0;
1056
1057
      /* get previous expressions, there must
1058
       * be at least something and a PTN_START */
1059
1
      expr_sub = EXPR_PRV(expr_at);
1060
1
      if (EXPR_TYPE(expr_sub) == PTN_START) return 0;
1061
1
      expr_prv = EXPR_PRV(expr_sub);
1062
1063
      /* create start expression */
1064
1
      expr_start = *expr_crs;
1065
1
      EXPR_TYPE(expr_start) = PTN_START;
1066
1
      EXPR_PRV(expr_start) = PTN_END;
1067
1
      EXPR_NXT(expr_start) = expr_sub;
1068
1
      *expr_crs += 3;
1069
1070
      /* create end expression */
1071
1
      expr_end = *expr_crs;
1072
1
      EXPR_TYPE(expr_end) = PTN_END;
1073
1
      EXPR_PRV(expr_end) = expr_sub;
1074
1
      EXPR_NXT(expr_end) = expr_at;
1075
1
      *expr_crs += 3;
1076
1077
      /* relink previous expression before sub expression */
1078
1
      EXPR_DATA_0(expr_at) = expr_start;
1079
1
      EXPR_NXT(expr_prv) = expr_at;
1080
1
      EXPR_PRV(expr_at) = expr_prv;
1081
1082
      /* relink sub expression to start and end */
1083
1
      EXPR_PRV(expr_sub) = expr_start;
1084
1
      EXPR_NXT(expr_sub) = expr_end;
1085
1
    }
1086
1087
55
    expr_at = EXPR_NXT(expr_at);
1088
55
  }
1089
1090
14
  return 1;
1091
14
}
1092
1093
/* Resolves alternative expressions.
1094
 */
1095
static int
1096
pattern_compile_3(
1097
15
    widechar *expr_data, int expr_at, const int expr_max, widechar *expr_crs) {
1098
15
  int expr_mrk, expr_start, expr_end, expr_sub_start, expr_sub_end;
1099
1100
71
  while (EXPR_TYPE(expr_at) != PTN_END) {
1101
56
    if (EXPR_TYPE(expr_at) == PTN_GROUP || EXPR_TYPE(expr_at) == PTN_NOT ||
1102
46
        EXPR_TYPE(expr_at) == PTN_OPTIONAL ||
1103
46
        EXPR_TYPE(expr_at) == PTN_ZERO_MORE ||
1104
45
        EXPR_TYPE(expr_at) == PTN_ONE_MORE) {
1105
11
      if (!pattern_compile_3(expr_data, EXPR_DATA_0(expr_at), expr_max, expr_crs))
1106
0
        return 0;
1107
11
    }
1108
1109
56
    if (EXPR_TYPE(expr_at) == PTN_ALTERNATE) {
1110
0
      if (*expr_crs + 12 >= expr_max) return 0;
1111
1112
      /* get previous start expression,
1113
       * can include alternate expressions */
1114
0
      expr_mrk = EXPR_PRV(expr_at);
1115
0
      if (EXPR_TYPE(expr_mrk) == PTN_START) return 0;
1116
0
      expr_sub_end = expr_mrk;
1117
0
      while (EXPR_TYPE(expr_mrk) != PTN_START) expr_mrk = EXPR_PRV(expr_mrk);
1118
0
      expr_sub_start = EXPR_NXT(expr_mrk);
1119
1120
      /* create first start expression */
1121
0
      expr_start = *expr_crs;
1122
0
      EXPR_TYPE(expr_start) = PTN_START;
1123
0
      EXPR_PRV(expr_start) = PTN_END;
1124
0
      EXPR_NXT(expr_start) = expr_sub_start;
1125
0
      *expr_crs += 3;
1126
1127
      /* create first end expression */
1128
0
      expr_end = *expr_crs;
1129
0
      EXPR_TYPE(expr_end) = PTN_END;
1130
0
      EXPR_PRV(expr_end) = expr_sub_end;
1131
0
      EXPR_NXT(expr_end) = expr_at;
1132
0
      *expr_crs += 3;
1133
1134
      /* relink previous expression before sub expression */
1135
0
      EXPR_DATA_0(expr_at) = expr_start;
1136
0
      EXPR_NXT(expr_mrk) = expr_at;
1137
0
      EXPR_PRV(expr_at) = expr_mrk;
1138
1139
      /* relink sub expression to start and end */
1140
0
      EXPR_PRV(expr_sub_start) = expr_start;
1141
0
      EXPR_NXT(expr_sub_end) = expr_end;
1142
1143
      /* get following PTN_END or PTN_ALTERNATE expression */
1144
0
      expr_mrk = EXPR_NXT(expr_at);
1145
0
      if (EXPR_TYPE(expr_mrk) == PTN_END || EXPR_TYPE(expr_mrk) == PTN_ALTERNATE)
1146
0
        return 0;
1147
0
      expr_sub_start = expr_mrk;
1148
0
      while (EXPR_TYPE(expr_mrk) != PTN_END && EXPR_TYPE(expr_mrk) != PTN_ALTERNATE)
1149
0
        expr_mrk = EXPR_NXT(expr_mrk);
1150
0
      expr_sub_end = EXPR_PRV(expr_mrk);
1151
1152
      /* create first start expression */
1153
0
      expr_start = *expr_crs;
1154
0
      EXPR_TYPE(expr_start) = PTN_START;
1155
0
      EXPR_PRV(expr_start) = PTN_END;
1156
0
      EXPR_NXT(expr_start) = expr_sub_start;
1157
0
      *expr_crs += 3;
1158
1159
      /* create first end expression */
1160
0
      expr_end = *expr_crs;
1161
0
      EXPR_TYPE(expr_end) = PTN_END;
1162
0
      EXPR_PRV(expr_end) = expr_sub_end;
1163
0
      EXPR_NXT(expr_end) = expr_at;
1164
0
      *expr_crs += 3;
1165
1166
      /* relink following expression before sub expression */
1167
0
      EXPR_DATA_1(expr_at) = expr_start;
1168
0
      EXPR_PRV(expr_mrk) = expr_at;
1169
0
      EXPR_NXT(expr_at) = expr_mrk;
1170
1171
      /* relink sub expression to start and end */
1172
0
      EXPR_PRV(expr_sub_start) = expr_start;
1173
0
      EXPR_NXT(expr_sub_end) = expr_end;
1174
1175
      /* check expressions were after alternate and got moved into
1176
       * a sub expression, previous expressions already checked */
1177
0
      if (!pattern_compile_3(expr_data, EXPR_DATA_1(expr_at), expr_max, expr_crs))
1178
0
        return 0;
1179
0
    }
1180
1181
56
    expr_at = EXPR_NXT(expr_at);
1182
56
  }
1183
1184
15
  return 1;
1185
15
}
1186
1187
int EXPORT_CALL
1188
_lou_pattern_compile(const widechar *input, const int input_max, widechar *expr_data,
1189
4
    const int expr_max, TranslationTableHeader *table, const FileInfo *nested) {
1190
4
  int input_crs;
1191
1192
4
  input_crs = 0;
1193
4
  expr_data[0] = 2;
1194
4
  expr_data[1] = 0;
1195
1196
4
  if (table == NULL || nested == NULL) return 0;
1197
1198
4
  if (!pattern_compile_1(input, input_max, &input_crs, expr_data, expr_max,
1199
4
        &expr_data[0], &expr_data[1], table, nested))
1200
0
    return 0;
1201
1202
  /* shift past the last end */
1203
4
  expr_data[0] += 3;
1204
1205
4
  if (!pattern_compile_2(expr_data, 2, expr_max, &expr_data[0])) return 0;
1206
1207
4
  if (!pattern_compile_3(expr_data, 2, expr_max, &expr_data[0])) return 0;
1208
1209
4
  return expr_data[0];
1210
4
}
1211
1212
////////////////////////////////////////////////////////////////////////////////
1213
1214
static void
1215
pattern_reverse_expression(widechar *expr_data, const int expr_start);
1216
1217
static void
1218
39
pattern_reverse_branch(widechar *expr_data, const int expr_at) {
1219
39
  widechar expr_swap;
1220
1221
39
  switch (EXPR_TYPE(expr_at)) {
1222
0
  case PTN_ALTERNATE:
1223
1224
0
    pattern_reverse_expression(expr_data, EXPR_DATA_0(expr_at));
1225
0
    expr_swap = EXPR_DATA_0(expr_at);
1226
0
    EXPR_DATA_0(expr_at) = EXPR_DATA_1(expr_at);
1227
0
    EXPR_DATA_1(expr_at) = expr_swap;
1228
1229
0
  case PTN_GROUP:
1230
10
  case PTN_NOT:
1231
10
  case PTN_ONE_MORE:
1232
11
  case PTN_ZERO_MORE:
1233
11
  case PTN_OPTIONAL:
1234
1235
11
    pattern_reverse_expression(expr_data, EXPR_DATA_0(expr_at));
1236
39
  }
1237
39
}
1238
1239
static void
1240
13
pattern_reverse_expression(widechar *expr_data, const int expr_start) {
1241
13
  widechar expr_end, expr_crs, expr_prv;
1242
1243
13
  expr_end = EXPR_NXT(expr_start);
1244
1245
  /* empty expression */
1246
13
  if (EXPR_TYPE(expr_end) == PTN_END) return;
1247
1248
  /* find end expression */
1249
52
  while (EXPR_TYPE(expr_end) != PTN_END) expr_end = EXPR_NXT(expr_end);
1250
1251
13
  expr_crs = EXPR_PRV(expr_end);
1252
13
  expr_prv = EXPR_PRV(expr_crs);
1253
1254
  /* relink expression before end expression */
1255
13
  EXPR_NXT(expr_start) = expr_crs;
1256
13
  EXPR_PRV(expr_crs) = expr_start;
1257
13
  EXPR_NXT(expr_crs) = expr_prv;
1258
1259
  /* reverse any branching expressions */
1260
13
  pattern_reverse_branch(expr_data, expr_crs);
1261
1262
39
  while (expr_prv != expr_start) {
1263
    /* shift current expression */
1264
26
    expr_crs = expr_prv;
1265
26
    expr_prv = EXPR_PRV(expr_prv);
1266
1267
    /* reverse any branching expressions */
1268
26
    pattern_reverse_branch(expr_data, expr_crs);
1269
1270
    /* relink current expression */
1271
26
    EXPR_PRV(expr_crs) = EXPR_NXT(expr_crs);
1272
26
    EXPR_NXT(expr_crs) = expr_prv;
1273
26
  }
1274
1275
  /* relink expression after start expression */
1276
13
  EXPR_PRV(expr_crs) = EXPR_NXT(expr_crs);
1277
13
  EXPR_NXT(expr_crs) = expr_end;
1278
13
  EXPR_PRV(expr_end) = expr_crs;
1279
13
}
1280
1281
void EXPORT_CALL
1282
2
_lou_pattern_reverse(widechar *expr_data) {
1283
2
  pattern_reverse_expression(expr_data, 2);
1284
2
}
1285
1286
////////////////////////////////////////////////////////////////////////////////
1287
1288
static int
1289
300
pattern_check_chars(const widechar input_char, const widechar *expr_data) {
1290
300
  int expr_cnt, i;
1291
1292
300
  expr_cnt = expr_data[0] + 1;
1293
1294
452
  for (i = 1; i < expr_cnt; i++)
1295
300
    if (input_char == expr_data[i]) break;
1296
1297
300
  if (i == expr_cnt) return 0;
1298
148
  return 1;
1299
300
}
1300
1301
static int
1302
pattern_check_attrs(const widechar input_char, const widechar *expr_data,
1303
300
    const TranslationTableHeader *table) {
1304
300
  int attrs;
1305
1306
300
  attrs = ((expr_data[0] << 16) | expr_data[1]) & ~(CTC_EndOfInput | CTC_EmpMatch);
1307
300
  if (!checkAttr(input_char, attrs, table)) return 0;
1308
0
  return 1;
1309
300
}
1310
1311
static int
1312
pattern_check_expression(const widechar *const input, int *input_crs,
1313
    const int input_minmax, const int input_dir, const widechar *const expr_data,
1314
    int (*hook)(const widechar input, const int data_len), widechar *hook_data,
1315
    const int hook_max, int expr_crs, int notOperator, int loop_crs, int *loop_cnts,
1316
22.7k
    const TranslationTableHeader *table) {
1317
22.7k
  int input_crs_prv, input_start, attrs, ret, i;
1318
22.7k
  const widechar *data;
1319
1320
22.7k
  data = NULL;
1321
1322
  /* save input_crs to know if loop consumed input */
1323
22.7k
  input_start = *input_crs;
1324
1325
22.7k
  CHECK_OUTPUT(START, 0, __LINE__, "check start")
1326
1327
114k
  while (!(EXPR_TYPE(expr_crs) == PTN_END && EXPR_TYPE(expr_crs) == PTN_END)) {
1328
    /* end of input expression */
1329
114k
    if (EXPR_TYPE(expr_crs) == PTN_END_OF_INPUT) {
1330
229
      if (*input_crs * input_dir >= input_minmax * input_dir) {
1331
77
        if (notOperator)
1332
0
          CHECK_OUTPUT(
1333
77
              RETURN, 0, __LINE__, "end of input failed:  no input and not")
1334
77
        else
1335
77
          CHECK_OUTPUT(RETURN, 1, __LINE__, "end of input passed:  no input")
1336
77
        return !notOperator;
1337
152
      } else {
1338
152
        if (notOperator)
1339
0
          CHECK_OUTPUT(
1340
152
              RETURN, 1, __LINE__, "end of input passed:  input and not")
1341
152
        else
1342
152
          CHECK_OUTPUT(RETURN, 0, __LINE__, "end of input failed:  input")
1343
152
        return notOperator;
1344
152
      }
1345
229
    }
1346
1347
    /* no more input */
1348
114k
    if (*input_crs * input_dir >= input_minmax * input_dir) {
1349
380
      switch (EXPR_TYPE(expr_crs)) {
1350
0
      case PTN_ATTRIBUTES:
1351
1352
0
        attrs = (EXPR_DATA_0(expr_crs) << 16);
1353
0
        if (attrs & CTC_EndOfInput) {
1354
0
          if (notOperator) {
1355
0
            CHECK_OUTPUT(RETURN, 0, __LINE__,
1356
0
                "attributes failed:  end of input attribute:  not")
1357
0
            return 0;
1358
0
          }
1359
0
          CHECK_OUTPUT(RETURN, 1, __LINE__,
1360
0
              "attributes passed:  end of input attribute")
1361
0
          return 1;
1362
0
        }
1363
0
        CHECK_OUTPUT(RETURN, 0, __LINE__,
1364
0
            "attributes failed:  no end of input attribute")
1365
0
        return 0;
1366
1367
76
      case PTN_ANY:
1368
76
      case PTN_CHARS:
1369
1370
76
        CHECK_OUTPUT(RETURN, 0, __LINE__, "chars failed:  no input")
1371
76
        return 0;
1372
380
      }
1373
1374
304
      CHECK_OUTPUT(SHOW, 0, __LINE__, "no input")
1375
304
    }
1376
1377
114k
    switch (EXPR_TYPE(expr_crs)) {
1378
1379
45.8k
    case PTN_START:
1380
1381
45.8k
      expr_crs = EXPR_NXT(expr_crs);
1382
45.8k
      CHECK_OUTPUT(SHOW, 0, __LINE__, "start next")
1383
45.8k
      break;
1384
1385
0
    case PTN_GROUP:
1386
1387
0
      expr_crs = EXPR_DATA_0(expr_crs);
1388
0
      CHECK_OUTPUT(SHOW, 0, __LINE__, "group next")
1389
0
      break;
1390
1391
23.0k
    case PTN_NOT:
1392
1393
23.0k
      notOperator = !notOperator;
1394
23.0k
      expr_crs = EXPR_DATA_0(expr_crs);
1395
23.0k
      CHECK_OUTPUT(SHOW, 0, __LINE__, "not next")
1396
23.0k
      break;
1397
1398
0
    case PTN_ONE_MORE:
1399
1400
0
      CHECK_OUTPUT(SHOW, 0, __LINE__, "loop+ start")
1401
1402
22.3k
    case PTN_ZERO_MORE:
1403
1404
      /* check if loop already started */
1405
22.3k
      if (expr_crs == loop_crs) {
1406
22.3k
        loop_cnts[EXPR_DATA_1(loop_crs)]++;
1407
22.3k
        CHECK_OUTPUT(SHOW, 0, __LINE__, "loop again")
1408
22.3k
      } else {
1409
        /* check if loop nested, wasn't running but has a count */
1410
76
        if (loop_cnts[EXPR_DATA_1(expr_crs)]) {
1411
0
          CHECK_OUTPUT(SHOW, 0, __LINE__, "loop already running")
1412
0
          goto loop_next;
1413
0
        }
1414
1415
        /* start loop */
1416
76
        loop_crs = expr_crs;
1417
76
        loop_cnts[EXPR_DATA_1(loop_crs)] = 1;
1418
76
        CHECK_OUTPUT(SHOW, 0, __LINE__, "loop start")
1419
76
      }
1420
1421
      /* start loop expression */
1422
22.3k
      input_crs_prv = *input_crs;
1423
22.3k
      ret = pattern_check_expression(input, input_crs, input_minmax, input_dir,
1424
22.3k
          expr_data, hook, hook_data, hook_max, EXPR_DATA_0(expr_crs),
1425
22.3k
          notOperator, loop_crs, loop_cnts, table);
1426
22.3k
      if (ret) {
1427
22.3k
        CHECK_OUTPUT(RETURN, 1, __LINE__, "loop passed")
1428
22.3k
        return 1;
1429
22.3k
      }
1430
76
      CHECK_OUTPUT(SHOW, 0, __LINE__, "loop failed")
1431
76
      *input_crs = input_crs_prv;
1432
1433
      /* check loop count */
1434
76
      loop_cnts[EXPR_DATA_1(loop_crs)]--;
1435
76
      if (EXPR_TYPE(expr_crs) == PTN_ONE_MORE) {
1436
0
        if (loop_cnts[EXPR_DATA_1(loop_crs)] < 1) {
1437
0
          CHECK_OUTPUT(RETURN, 0, __LINE__, "loop+ failed")
1438
0
          return 0;
1439
0
        } else
1440
0
          CHECK_OUTPUT(SHOW, 0, __LINE__, "loop+ passed")
1441
0
      }
1442
1443
    /* continue after loop */
1444
76
    loop_next:
1445
76
      expr_crs = EXPR_NXT(expr_crs);
1446
76
      CHECK_OUTPUT(SHOW, 0, __LINE__, "loop next")
1447
76
      break;
1448
1449
0
    case PTN_OPTIONAL:
1450
1451
      /* save current state */
1452
0
      input_crs_prv = *input_crs;
1453
1454
      /* start optional expression */
1455
0
      CHECK_OUTPUT(CALL, 0, __LINE__, "option start")
1456
0
      if (pattern_check_expression(input, input_crs, input_minmax, input_dir,
1457
0
            expr_data, hook, hook_data, hook_max, EXPR_DATA_0(expr_crs),
1458
0
            notOperator, loop_crs, loop_cnts, table)) {
1459
0
        CHECK_OUTPUT(RETURN, 1, __LINE__, "option passed")
1460
0
        return 1;
1461
0
      }
1462
0
      CHECK_OUTPUT(SHOW, 0, __LINE__, "option failed")
1463
1464
      /* continue after optional expression */
1465
0
      *input_crs = input_crs_prv;
1466
0
      CHECK_OUTPUT(SHOW, 0, __LINE__, "no option start")
1467
0
      expr_crs = EXPR_NXT(expr_crs);
1468
0
      break;
1469
1470
0
    case PTN_ALTERNATE:
1471
1472
      /* save current state */
1473
0
      input_crs_prv = *input_crs;
1474
1475
      /* start first expression */
1476
0
      CHECK_OUTPUT(CALL, 0, __LINE__, "or 1 start")
1477
0
      if (pattern_check_expression(input, input_crs, input_minmax, input_dir,
1478
0
            expr_data, hook, hook_data, hook_max, EXPR_DATA_0(expr_crs),
1479
0
            notOperator, loop_crs, loop_cnts, table)) {
1480
0
        CHECK_OUTPUT(RETURN, 1, __LINE__, "or 1 passed")
1481
0
        return 1;
1482
0
      }
1483
0
      CHECK_OUTPUT(SHOW, 0, __LINE__, "or 1 failed")
1484
1485
      /* start second expression (no need to push) */
1486
0
      *input_crs = input_crs_prv;
1487
0
      CHECK_OUTPUT(SHOW, 0, __LINE__, "or 2 start")
1488
0
      expr_crs = EXPR_DATA_1(expr_crs);
1489
0
      break;
1490
1491
22.3k
    case PTN_ANY:
1492
1493
22.3k
      CHECK_OUTPUT(SHOW, 0, __LINE__, "any")
1494
22.3k
      *input_crs += input_dir;
1495
22.3k
      expr_crs = EXPR_NXT(expr_crs);
1496
22.3k
      break;
1497
1498
300
    case PTN_ATTRIBUTES:
1499
1500
300
      ret = pattern_check_attrs(
1501
300
          input[*input_crs], EXPR_CONST_DATA(expr_crs), table);
1502
300
      if (ret && notOperator) {
1503
0
        CHECK_OUTPUT(RETURN, 0, __LINE__, "attributes failed:  not");
1504
0
        return 0;
1505
0
      }
1506
300
      if (!ret && !notOperator) {
1507
0
        CHECK_OUTPUT(RETURN, 0, __LINE__, "attributes failed");
1508
0
        return 0;
1509
0
      }
1510
300
      CHECK_OUTPUT(SHOW, 0, __LINE__, "attributes passed")
1511
300
      *input_crs += input_dir;
1512
300
      expr_crs = EXPR_NXT(expr_crs);
1513
300
      break;
1514
1515
300
    case PTN_CHARS:
1516
1517
300
      ret = pattern_check_chars(input[*input_crs], EXPR_CONST_DATA(expr_crs));
1518
300
      if (ret && notOperator) {
1519
148
        CHECK_OUTPUT(RETURN, 0, __LINE__, "chars failed:  not");
1520
148
        return 0;
1521
148
      }
1522
152
      if (!ret && !notOperator) {
1523
0
        CHECK_OUTPUT(RETURN, 0, __LINE__, "chars failed");
1524
0
        return 0;
1525
0
      }
1526
152
      CHECK_OUTPUT(SHOW, 0, __LINE__, "chars passed")
1527
152
      *input_crs += input_dir;
1528
152
      expr_crs = EXPR_NXT(expr_crs);
1529
152
      break;
1530
1531
0
    case PTN_HOOK:
1532
1533
0
      if (hook == NULL) {
1534
0
        CHECK_OUTPUT(RETURN, 0, __LINE__, "hook failed:  NULL");
1535
0
        return 0;
1536
0
      }
1537
1538
      /* copy expression data */
1539
0
      data = EXPR_CONST_DATA(expr_crs);
1540
0
      for (i = 0; i < data[0]; i++) hook_data[i] = data[i + 1];
1541
1542
      /* call hook function */
1543
0
      ret = hook(input[*input_crs], data[0]);
1544
0
      if (ret && notOperator) {
1545
0
        CHECK_OUTPUT(RETURN, 0, __LINE__, "hook failed:  not");
1546
0
        return 0;
1547
0
      }
1548
0
      if (!ret && !notOperator) {
1549
0
        CHECK_OUTPUT(RETURN, 0, __LINE__, "hook failed");
1550
0
        return 0;
1551
0
      }
1552
0
      CHECK_OUTPUT(SHOW, 0, __LINE__, "hook passed")
1553
0
      *input_crs += input_dir;
1554
0
      expr_crs = EXPR_NXT(expr_crs);
1555
0
      break;
1556
1557
0
    case PTN_END:
1558
0
      break;
1559
1560
0
    default:
1561
1562
0
      CHECK_OUTPUT(RETURN, 0, __LINE__, "unknown opcode")
1563
0
      return 0;
1564
114k
    }
1565
1566
    /* check end expression  */
1567
114k
    while (EXPR_TYPE(expr_crs) == PTN_END) {
1568
45.1k
      CHECK_OUTPUT(SHOW, 0, __LINE__, "end")
1569
1570
      /* check for end of expressions */
1571
45.1k
      if (EXPR_NXT(expr_crs) == PTN_END) break;
1572
1573
45.1k
      expr_crs = EXPR_NXT(expr_crs);
1574
1575
      /* returning loop */
1576
45.1k
      if (EXPR_TYPE(expr_crs) == PTN_ZERO_MORE ||
1577
22.8k
          EXPR_TYPE(expr_crs) == PTN_ONE_MORE) {
1578
22.3k
        CHECK_OUTPUT(SHOW, 0, __LINE__, "end loop")
1579
1580
        /* check that loop consumed input */
1581
22.3k
        if (*input_crs == input_start) {
1582
0
          CHECK_OUTPUT(RETURN, 0, __LINE__, "loop failed:  did not consume")
1583
0
          return 0;
1584
0
        }
1585
1586
        /* loops do not continue to the next expression */
1587
22.3k
        break;
1588
22.3k
      }
1589
1590
      /* returning not */
1591
22.8k
      if (EXPR_TYPE(expr_crs) == PTN_NOT) notOperator = !notOperator;
1592
1593
22.8k
      expr_crs = EXPR_NXT(expr_crs);
1594
1595
22.8k
      CHECK_OUTPUT(SHOW, 0, __LINE__, "end next")
1596
22.8k
    }
1597
1598
91.8k
    CHECK_OUTPUT(SHOW, 0, __LINE__, "check next")
1599
91.8k
  }
1600
1601
0
  CHECK_OUTPUT(RETURN, 1, __LINE__, "check passed:  end of expression");
1602
0
  return 1;
1603
22.7k
}
1604
1605
static int
1606
pattern_check_hook(const widechar *input, const int input_start, const int input_minmax,
1607
    const int input_dir, const widechar *expr_data,
1608
    int (*hook)(const widechar input, const int data_len), widechar *hook_data,
1609
377
    const int hook_max, const TranslationTableHeader *table) {
1610
377
  int input_crs, ret, *loop_cnts;
1611
1612
377
  input_crs = input_start;
1613
377
  loop_cnts = malloc(expr_data[1] * sizeof(int));
1614
377
  memset(loop_cnts, 0, expr_data[1] * sizeof(int));
1615
377
  ret = pattern_check_expression(input, &input_crs, input_minmax, input_dir, expr_data,
1616
377
      hook, hook_data, hook_max, 2, 0, 0, loop_cnts, table);
1617
377
  free(loop_cnts);
1618
377
  return ret;
1619
377
}
1620
1621
int EXPORT_CALL
1622
_lou_pattern_check(const widechar *input, const int input_start, const int input_minmax,
1623
    const int input_dir, const widechar *expr_data,
1624
377
    const TranslationTableHeader *table) {
1625
#ifdef CHECK_OUTPUT_DEFINED
1626
  pattern_output(expr_data, table);
1627
#endif
1628
377
  return pattern_check_hook(
1629
377
      input, input_start, input_minmax, input_dir, expr_data, NULL, NULL, 0, table);
1630
377
}
1631
1632
////////////////////////////////////////////////////////////////////////////////