Coverage Report

Created: 2025-12-14 06:56

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/liblouis/liblouis/pattern.c
Line
Count
Source
1
/* liblouis Braille Translation and Back-Translation Library
2
3
   Copyright (C) 2016 Mike Gray, American Printing House for the Blind
4
5
   This file is part of liblouis.
6
7
   liblouis is free software: you can redistribute it and/or modify it
8
   under the terms of the GNU Lesser General Public License as published
9
   by the Free Software Foundation, either version 2.1 of the License, or
10
   (at your option) any later version.
11
12
   liblouis is distributed in the hope that it will be useful, but
13
   WITHOUT ANY WARRANTY; without even the implied warranty of
14
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
   Lesser General Public License for more details.
16
17
   You should have received a copy of the GNU Lesser General Public
18
   License along with liblouis. If not, see <http://www.gnu.org/licenses/>.
19
*/
20
21
#include "config.h"
22
23
#include <stdlib.h>
24
#include <stdio.h>
25
#include <string.h>
26
#include <ctype.h>
27
28
#include "internal.h"
29
30
//#define CHECK_OUTPUT_DEFINED
31
32
/////
33
34
// TODO: these functions are static and copied serveral times
35
36
int translation_direction = 1;
37
38
static TranslationTableCharacter *
39
4.63k
findCharOrDots(widechar c, int m, const TranslationTableHeader *table) {
40
  /* Look up character or dot pattern in the appropriate
41
   * table. */
42
4.63k
  static TranslationTableCharacter noChar = { NULL, -1, 0, 0, 0, CTC_Space, 0, 0, 32, 0,
43
4.63k
    0 };
44
4.63k
  static TranslationTableCharacter noDots = { NULL, -1, 0, 0, 0, CTC_Space, 0, 0,
45
4.63k
    LOU_DOTS, 0, 0 };
46
4.63k
  TranslationTableCharacter *notFound;
47
4.63k
  TranslationTableCharacter *character;
48
4.63k
  TranslationTableOffset bucket;
49
4.63k
  unsigned long int makeHash = _lou_charHash(c);
50
4.63k
  if (m == 0) {
51
4.63k
    bucket = table->characters[makeHash];
52
4.63k
    notFound = &noChar;
53
4.63k
  } else {
54
0
    bucket = table->dots[makeHash];
55
0
    notFound = &noDots;
56
0
  }
57
4.63k
  while (bucket) {
58
3.47k
    character = (TranslationTableCharacter *)&table->ruleArea[bucket];
59
3.47k
    if (character->value == c) return character;
60
0
    bucket = character->next;
61
0
  }
62
1.15k
  notFound->value = c;
63
1.15k
  return notFound;
64
4.63k
}
65
66
static int
67
checkAttr(const widechar c, const TranslationTableCharacterAttributes a,
68
4.63k
    const TranslationTableHeader *table) {
69
4.63k
  return (((findCharOrDots(c, translation_direction ? 0 : 1, table))->attributes & a)
70
4.63k
          ? 1
71
4.63k
          : 0);
72
4.63k
}
73
74
/////
75
76
enum pattern_type {
77
  PTN_ERROR,
78
79
  PTN_START,
80
  PTN_GROUP,
81
  PTN_NOT,
82
83
  PTN_ONE_MORE,
84
  PTN_ZERO_MORE,
85
  PTN_OPTIONAL,
86
87
  PTN_ALTERNATE,
88
89
  PTN_ANY,
90
  PTN_ATTRIBUTES,
91
  PTN_CHARS,
92
  PTN_HOOK,
93
  PTN_END_OF_INPUT,
94
95
  PTN_END = 0xffff,
96
};
97
98
8.56M
#define EXPR_TYPE_IN(at, buffer) (buffer[(at) + 0])
99
33.4k
#define EXPR_PRV_IN(at, buffer) (buffer[(at) + 1])
100
5.00M
#define EXPR_NXT_IN(at, buffer) (buffer[(at) + 2])
101
360k
#define EXPR_DATA_0_IN(at, buffer) (buffer[(at) + 3])
102
103k
#define EXPR_DATA_1_IN(at, buffer) (buffer[(at) + 4])
103
#define EXPR_DATA_2_IN(at, buffer) (buffer[(at) + 5])
104
4
#define EXPR_DATA_IN(at, buffer) ((widechar *)&buffer[(at) + 3])
105
41.2k
#define EXPR_CONST_DATA_IN(at, buffer) ((const widechar *)&buffer[(at) + 3])
106
107
8.56M
#define EXPR_TYPE(at) EXPR_TYPE_IN((at), expr_data)
108
33.4k
#define EXPR_PRV(at) EXPR_PRV_IN((at), expr_data)
109
5.00M
#define EXPR_NXT(at) EXPR_NXT_IN((at), expr_data)
110
360k
#define EXPR_DATA_0(at) EXPR_DATA_0_IN((at), expr_data)
111
103k
#define EXPR_DATA_1(at) EXPR_DATA_1_IN((at), expr_data)
112
#define EXPR_DATA_2(at) EXPR_DATA_2_IN((at), expr_data)
113
4
#define EXPR_DATA(at) EXPR_DATA_IN((at), expr_data)
114
41.2k
#define EXPR_CONST_DATA(at) EXPR_CONST_DATA_IN((at), expr_data)
115
116
#ifdef CHECK_OUTPUT_DEFINED
117
118
#ifndef DEBUG
119
#define DEBUG
120
121
#endif
122
123
#define START 0
124
#define CALL 1
125
#define RETURN 2
126
#define SHOW 3
127
128
#define CHECK_OUTPUT(type, ret, line, msg)                                              \
129
  {                                                                                   \
130
    do_output(type, ret, line, input[*input_crs], input_minmax, *input_crs,         \
131
        input_dir, expr_data, expr_crs, notOperator, loop_crs, loop_cnts, msg); \
132
  }
133
134
#else
135
136
#define CHECK_OUTPUT(type, ret, line, msg) \
137
5.53M
  { ; }
138
139
#endif
140
141
struct expression {
142
  widechar type;
143
  widechar prv;
144
  widechar nxt;
145
  widechar data[1];
146
};
147
148
/* gdb won't know what this is unless it is actually used */
149
#ifdef DEBUG
150
static struct expression *expr_debug;
151
#endif
152
153
////////////////////////////////////////////////////////////////////////////////
154
155
static char spaces[] = "..............................";
156
static int space = 30;
157
158
static void
159
pattern_output_expression(
160
0
    const widechar *expr_data, int expr_crs, const TranslationTableHeader *table) {
161
0
  int i;
162
0
163
0
  if (expr_crs == PTN_END) return;
164
0
165
0
  while (EXPR_TYPE(expr_crs) != PTN_END) {
166
0
    printf("%s%d", &spaces[space], expr_crs);
167
0
    if (expr_crs < 100) printf(" ");
168
0
    if (expr_crs < 10) printf(" ");
169
0
    for (i = 0; i < 13 - (30 - space); i++) printf(" ");
170
0
171
0
    switch (EXPR_TYPE(expr_crs)) {
172
0
    case PTN_START:
173
0
174
0
      printf("START\t%d\t%d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs));
175
0
      break;
176
0
177
0
    case PTN_GROUP:
178
0
179
0
      printf("(    \t%d\t%d\t-> %d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs),
180
0
          EXPR_DATA_0(expr_crs));
181
0
      space--;
182
0
      if (space < 0) space = 0;
183
0
      pattern_output_expression(expr_data, EXPR_DATA_0(expr_crs), table);
184
0
      space++;
185
0
      if (space > 30) space = 30;
186
0
      break;
187
0
188
0
    case PTN_NOT:
189
0
190
0
      printf("!    \t%d\t%d\t-> %d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs),
191
0
          EXPR_DATA_0(expr_crs));
192
0
      space--;
193
0
      if (space < 0) space = 0;
194
0
      pattern_output_expression(expr_data, EXPR_DATA_0(expr_crs), table);
195
0
      space++;
196
0
      if (space > 30) space = 30;
197
0
      break;
198
0
199
0
    case PTN_ONE_MORE:
200
0
201
0
      printf("+    \t%d\t%d\t-> %d\t#%d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs),
202
0
          EXPR_DATA_0(expr_crs), EXPR_DATA_1(expr_crs));
203
0
      space--;
204
0
      if (space < 0) space = 0;
205
0
      pattern_output_expression(expr_data, EXPR_DATA_0(expr_crs), table);
206
0
      space++;
207
0
      if (space > 30) space = 30;
208
0
      break;
209
0
210
0
    case PTN_ZERO_MORE:
211
0
212
0
      printf("*    \t%d\t%d\t-> %d\t#%d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs),
213
0
          EXPR_DATA_0(expr_crs), EXPR_DATA_1(expr_crs));
214
0
      space--;
215
0
      if (space < 0) space = 0;
216
0
      pattern_output_expression(expr_data, EXPR_DATA_0(expr_crs), table);
217
0
      space++;
218
0
      if (space > 30) space = 30;
219
0
      break;
220
0
221
0
    case PTN_OPTIONAL:
222
0
223
0
      printf("?    \t%d\t%d\t-> %d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs),
224
0
          EXPR_DATA_0(expr_crs));
225
0
      space--;
226
0
      if (space < 0) space = 0;
227
0
      pattern_output_expression(expr_data, EXPR_DATA_0(expr_crs), table);
228
0
      space++;
229
0
      if (space > 30) space = 30;
230
0
      break;
231
0
232
0
    case PTN_ALTERNATE:
233
0
234
0
      printf("|    \t%d\t%d\t-> %d\t-> %d\n", EXPR_PRV(expr_crs),
235
0
          EXPR_NXT(expr_crs), EXPR_DATA_0(expr_crs), EXPR_DATA_1(expr_crs));
236
0
      space--;
237
0
      if (space < 0) space = 0;
238
0
      pattern_output_expression(expr_data, EXPR_DATA_0(expr_crs), table);
239
0
      pattern_output_expression(expr_data, EXPR_DATA_1(expr_crs), table);
240
0
      space++;
241
0
      if (space > 30) space = 30;
242
0
      break;
243
0
244
0
    case PTN_ANY:
245
0
246
0
      printf(".    \t%d\t%d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs));
247
0
      break;
248
0
249
0
    case PTN_ATTRIBUTES:
250
0
251
0
      printf("%%    \t%d\t%d\t", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs));
252
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[0] >> 16)) printf("0");
253
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[1] >> 16)) printf("1");
254
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[2] >> 16)) printf("2");
255
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[3] >> 16)) printf("3");
256
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[4] >> 16)) printf("4");
257
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[5] >> 16)) printf("5");
258
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[6] >> 16)) printf("6");
259
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[7] >> 16)) printf("7");
260
0
      if (EXPR_DATA_0(expr_crs) & (CTC_EndOfInput >> 16)) printf("^");
261
0
      if (EXPR_DATA_1(expr_crs) & CTC_Space) printf("_");
262
0
      if (EXPR_DATA_1(expr_crs) & CTC_Digit) printf("#");
263
0
      if (EXPR_DATA_1(expr_crs) & CTC_Letter) printf("a");
264
0
      if (EXPR_DATA_1(expr_crs) & CTC_UpperCase) printf("u");
265
0
      if (EXPR_DATA_1(expr_crs) & CTC_LowerCase) printf("l");
266
0
      if (EXPR_DATA_1(expr_crs) & CTC_Punctuation) printf(".");
267
0
      if (EXPR_DATA_1(expr_crs) & CTC_Sign) printf("$");
268
0
      if (EXPR_DATA_1(expr_crs) & CTC_SeqDelimiter) printf("~");
269
0
      if (EXPR_DATA_1(expr_crs) & CTC_SeqBefore) printf("<");
270
0
      if (EXPR_DATA_1(expr_crs) & CTC_SeqAfter) printf(">");
271
0
      puts("");
272
0
      break;
273
0
274
0
    case PTN_CHARS:
275
0
276
0
      printf("[]   \t%d\t%d\t", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs));
277
0
      for (i = 0; i < EXPR_DATA_0(expr_crs); i++)
278
0
        printf("%c", EXPR_CONST_DATA(expr_crs)[i + 1]);
279
0
      puts("");
280
0
      break;
281
0
282
0
    case PTN_HOOK:
283
0
284
0
      printf("@    \t%d\t%d\t", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs));
285
0
      for (i = 0; i < EXPR_DATA_0(expr_crs); i++)
286
0
        printf("%c", EXPR_CONST_DATA(expr_crs)[i + 1]);
287
0
      puts("");
288
0
      break;
289
0
290
0
    case PTN_END_OF_INPUT:
291
0
292
0
      printf("^    \t%d\t%d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs));
293
0
      break;
294
0
295
0
    default:
296
0
297
0
      printf("%d?    \t%d\t%d\n", EXPR_TYPE(expr_crs), EXPR_PRV(expr_crs),
298
0
          EXPR_NXT(expr_crs));
299
0
      break;
300
0
    }
301
0
302
0
    expr_crs = EXPR_NXT(expr_crs);
303
0
  }
304
0
305
0
  printf("%s%d", &spaces[space], expr_crs);
306
0
  if (expr_crs < 100) printf(" ");
307
0
  if (expr_crs < 10) printf(" ");
308
0
  for (i = 0; i < 13 - (30 - space); i++) printf(" ");
309
0
  printf("END\t%d\t%d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs));
310
0
  fflush(stdout);
311
0
  return;
312
0
}
313
314
static void
315
0
pattern_output(const widechar *expr_data, const TranslationTableHeader *table) {
316
0
  printf("%d    \tlength\n", expr_data[0]);
317
0
  printf("%d    \tloops\n", expr_data[1]);
318
0
  if (expr_data[0] > 0 && expr_data[0] != PTN_END)
319
0
    pattern_output_expression(expr_data, 2, table);
320
0
}
321
322
static void
323
pattern_print_expression(
324
0
    const widechar *expr_data, int expr_crs, const TranslationTableHeader *table) {
325
0
  int i;
326
0
327
0
  if (expr_crs == PTN_END) return;
328
0
329
0
  while (EXPR_TYPE(expr_crs) != PTN_END) {
330
0
    switch (EXPR_TYPE(expr_crs)) {
331
0
    case PTN_START:
332
0
      break;
333
0
334
0
    case PTN_GROUP:
335
0
336
0
      printf(" (");
337
0
      pattern_print_expression(expr_data, EXPR_DATA_0(expr_crs), table);
338
0
      printf(") ");
339
0
      break;
340
0
341
0
    case PTN_NOT:
342
0
343
0
      printf("!");
344
0
      pattern_print_expression(expr_data, EXPR_DATA_0(expr_crs), table);
345
0
      break;
346
0
347
0
    case PTN_ONE_MORE:
348
0
349
0
      pattern_print_expression(expr_data, EXPR_DATA_0(expr_crs), table);
350
0
      printf("+");
351
0
      break;
352
0
353
0
    case PTN_ZERO_MORE:
354
0
355
0
      pattern_print_expression(expr_data, EXPR_DATA_0(expr_crs), table);
356
0
      printf("*");
357
0
      break;
358
0
359
0
    case PTN_OPTIONAL:
360
0
361
0
      pattern_print_expression(expr_data, EXPR_DATA_0(expr_crs), table);
362
0
      printf("?");
363
0
      break;
364
0
365
0
    case PTN_ALTERNATE:
366
0
367
0
      pattern_print_expression(expr_data, EXPR_DATA_0(expr_crs), table);
368
0
      printf(" | ");
369
0
      pattern_print_expression(expr_data, EXPR_DATA_1(expr_crs), table);
370
0
      break;
371
0
372
0
    case PTN_ANY:
373
0
374
0
      printf(".");
375
0
      break;
376
0
377
0
    case PTN_ATTRIBUTES:
378
0
379
0
      printf("%%[");
380
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[0] >> 16)) printf("0");
381
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[1] >> 16)) printf("1");
382
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[2] >> 16)) printf("2");
383
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[3] >> 16)) printf("3");
384
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[4] >> 16)) printf("4");
385
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[5] >> 16)) printf("5");
386
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[6] >> 16)) printf("6");
387
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[7] >> 16)) printf("7");
388
0
      if (EXPR_DATA_0(expr_crs) & (CTC_EndOfInput >> 16)) printf("^");
389
0
      if (EXPR_DATA_1(expr_crs) & CTC_Space) printf("_");
390
0
      if (EXPR_DATA_1(expr_crs) & CTC_Digit) printf("#");
391
0
      if (EXPR_DATA_1(expr_crs) & CTC_Letter) printf("a");
392
0
      if (EXPR_DATA_1(expr_crs) & CTC_UpperCase) printf("u");
393
0
      if (EXPR_DATA_1(expr_crs) & CTC_LowerCase) printf("l");
394
0
      if (EXPR_DATA_1(expr_crs) & CTC_Punctuation) printf(".");
395
0
      if (EXPR_DATA_1(expr_crs) & CTC_Sign) printf("$");
396
0
      if (EXPR_DATA_1(expr_crs) & CTC_SeqDelimiter) printf("~");
397
0
      if (EXPR_DATA_1(expr_crs) & CTC_SeqBefore) printf("<");
398
0
      if (EXPR_DATA_1(expr_crs) & CTC_SeqAfter) printf(">");
399
0
      printf("]");
400
0
      break;
401
0
402
0
    case PTN_CHARS:
403
0
404
0
      if (EXPR_DATA_0(expr_crs) == 1)
405
0
        printf("%c", EXPR_DATA_1(expr_crs));
406
0
      else {
407
0
        printf("[");
408
0
        for (i = 0; i < EXPR_DATA_0(expr_crs); i++)
409
0
          printf("%c", EXPR_CONST_DATA(expr_crs)[i + 1]);
410
0
        printf("]");
411
0
      }
412
0
      break;
413
0
414
0
    case PTN_HOOK:
415
0
416
0
      printf("@[");
417
0
      for (i = 0; i < EXPR_DATA_0(expr_crs); i++)
418
0
        printf("%c", EXPR_CONST_DATA(expr_crs)[i + 1]);
419
0
      printf("]");
420
0
      break;
421
0
422
0
    case PTN_END_OF_INPUT:
423
0
424
0
      printf("^");
425
0
      break;
426
0
427
0
      // default:  printf("%d?\n", EXPR_TYPE(expr_crs));  break;
428
0
    }
429
0
430
0
    expr_crs = EXPR_NXT(expr_crs);
431
0
  }
432
0
433
0
  return;
434
0
}
435
436
static void
437
0
pattern_print(const widechar *expr_data, const TranslationTableHeader *table) {
438
0
  if (expr_data[0] > 0 && expr_data[0] != PTN_END)
439
0
    pattern_print_expression(expr_data, 2, table);
440
0
  puts("");
441
0
}
442
443
#ifdef CHECK_OUTPUT_DEFINED
444
445
static void
446
do_padd(const int value) {
447
  if (value < 100000) printf(" ");
448
  if (value < 10000) printf(" ");
449
  if (value < 1000) printf(" ");
450
  if (value < 100) printf(" ");
451
  if (value < 10) printf(" ");
452
}
453
454
static void
455
do_pad(const int value) {
456
  if (value < 100) printf(" ");
457
  if (value < 10) printf(" ");
458
}
459
460
static void
461
do_output(const int type, const int ret, const int line,
462
463
    const int input, const int input_minmax, const int input_crs, const int input_dir,
464
    const widechar *expr_data, const int expr_crs, const int notOperator,
465
    const int loop_crs, const int *loop_cnts,
466
467
    const char *msg) {
468
  switch (type) {
469
  case START:
470
471
    space--;
472
    if (space < 0) space = 0;
473
    printf("|%s()  ", &spaces[space]);
474
    break;
475
476
  case CALL:
477
478
    printf("|%s>   ", &spaces[space]);
479
    break;
480
481
  case RETURN:
482
483
    printf("|%s<%d  ", &spaces[space], ret);
484
    space++;
485
    if (space > 31) space = 31;
486
    break;
487
488
  case SHOW:
489
490
    printf("|%s    ", &spaces[space]);
491
    break;
492
  }
493
494
  printf("%d ", line);
495
  do_padd(line);
496
497
  switch (expr_data[expr_crs]) {
498
  case PTN_ERROR:
499
    printf("# ");
500
    break;
501
  case PTN_START:
502
    printf("> ");
503
    break;
504
  case PTN_END_OF_INPUT:
505
    printf("^ ");
506
    break;
507
  case PTN_ALTERNATE:
508
    printf("| ");
509
    break;
510
  case PTN_OPTIONAL:
511
    printf("? ");
512
    break;
513
  case PTN_ONE_MORE:
514
    printf("+ ");
515
    break;
516
  case PTN_ZERO_MORE:
517
    printf("* ");
518
    break;
519
  case PTN_NOT:
520
    printf("! ");
521
    break;
522
  case PTN_GROUP:
523
    printf("( ");
524
    break;
525
  case PTN_ANY:
526
    printf(". ");
527
    break;
528
  case PTN_ATTRIBUTES:
529
    printf("%% ");
530
    break;
531
  case PTN_CHARS:
532
    printf("[ ");
533
    break;
534
  case PTN_HOOK:
535
    printf("@ ");
536
    break;
537
  case PTN_END:
538
    printf("< ");
539
    break;
540
  default:
541
    printf("  ");
542
    break;
543
  }
544
  printf("%d ", expr_crs);
545
  do_padd(expr_crs);
546
547
  if (input > 31 && input < 127)
548
    printf("%c ", input);
549
  else
550
    printf("_ ");
551
552
  if (input_crs * input_dir >= input_minmax * input_dir)
553
    printf("#   ");
554
  else {
555
    printf("%d ", input_crs);
556
    do_pad(input_crs);
557
  }
558
559
  if (input_dir > 0)
560
    printf("<");
561
  else
562
    printf(">");
563
  printf("%d ", input_minmax);
564
  do_pad(input_minmax);
565
566
  if (notOperator)
567
    printf("!   ");
568
  else
569
    printf("    ");
570
571
  if (loop_crs) {
572
    printf("%d ", loop_crs);
573
    do_pad(loop_crs);
574
    printf("%d ", loop_cnts[EXPR_DATA_1(loop_crs)]);
575
    do_pad(loop_cnts[EXPR_DATA_1(loop_crs)]);
576
  } else
577
    printf("-   -   ");
578
  if (EXPR_TYPE(expr_crs) == PTN_ONE_MORE || EXPR_TYPE(expr_crs) == PTN_ZERO_MORE) {
579
    printf("%d ", loop_cnts[EXPR_DATA_1(expr_crs)]);
580
    do_pad(loop_cnts[EXPR_DATA_1(expr_crs)]);
581
  } else
582
    printf("-   ");
583
584
  if (msg) printf("%s", msg);
585
  puts("");
586
}
587
588
#endif
589
590
////////////////////////////////////////////////////////////////////////////////
591
592
static int
593
pattern_compile_1(const widechar *input, const int input_max, int *input_crs,
594
    widechar *expr_data, const int expr_max, widechar *expr_crs, widechar *loop_cnts,
595
    TranslationTableHeader *table, const FileInfo *nested);
596
597
static int
598
pattern_compile_expression(const widechar *input, const int input_max, int *input_crs,
599
    widechar *expr_data, const int expr_max, widechar *expr_crs, widechar *loop_cnts,
600
10.7k
    TranslationTableHeader *table, const FileInfo *nested) {
601
10.7k
  widechar *data;
602
10.7k
  int expr_start, expr_end, expr_sub, expr_crs_prv;
603
10.7k
  int input_end;
604
10.7k
  int attrs0, attrs1;
605
10.7k
  int set, esc, nest, i;
606
607
10.7k
  switch (input[*input_crs]) {
608
2
  case '(':
609
610
2
    if (*expr_crs + 10 >= expr_max) return 0;
611
612
2
    (*input_crs)++;
613
2
    if (*input_crs >= input_max) return 0;
614
615
    /* find closing parenthesis */
616
2
    nest = esc = 0;
617
22
    for (input_end = *input_crs; input_end < input_max; input_end++) {
618
22
      if (input[input_end] == '\\' && !esc) {
619
0
        esc = 1;
620
0
        continue;
621
0
      }
622
623
22
      if (input[input_end] == '(' && !esc)
624
1
        nest++;
625
21
      else if (input[input_end] == ')' && !esc) {
626
3
        if (nest)
627
1
          nest--;
628
2
        else
629
2
          break;
630
3
      }
631
632
20
      esc = 0;
633
20
    }
634
2
    if (input_end >= input_max) return 0;
635
636
2
    EXPR_TYPE(*expr_crs) = PTN_GROUP;
637
638
    /* compile sub expressions */
639
2
    expr_crs_prv = *expr_crs;
640
2
    *expr_crs += 4;
641
2
    EXPR_DATA_0(expr_crs_prv) = *expr_crs;
642
2
    expr_sub = *expr_crs;
643
2
    EXPR_TYPE(expr_sub) = PTN_ERROR;
644
2
    EXPR_PRV(expr_sub) = PTN_END;
645
2
    EXPR_NXT(expr_sub) = PTN_END;
646
2
    if (!pattern_compile_1(input, input_end, input_crs, expr_data, expr_max, expr_crs,
647
2
          loop_cnts, table, nested))
648
0
      return 0;
649
2
    (*input_crs)++;
650
651
    /* reset end expression */
652
2
    expr_end = *expr_crs;
653
2
    EXPR_NXT(expr_end) = expr_crs_prv;
654
655
2
    return *expr_crs += 3;
656
657
10
  case '!':
658
659
10
    if (*expr_crs + 10 >= expr_max) return 0;
660
661
10
    (*input_crs)++;
662
10
    EXPR_TYPE(*expr_crs) = PTN_NOT;
663
10
    expr_crs_prv = *expr_crs;
664
10
    *expr_crs += 4;
665
10
    EXPR_DATA_0(expr_crs_prv) = *expr_crs;
666
667
    /* create start expression */
668
10
    expr_start = *expr_crs;
669
10
    EXPR_TYPE(expr_start) = PTN_START;
670
10
    EXPR_PRV(expr_start) = PTN_END;
671
10
    *expr_crs += 3;
672
10
    EXPR_NXT(expr_start) = *expr_crs;
673
674
    /* compile sub expression */
675
10
    expr_sub = *expr_crs;
676
10
    EXPR_TYPE(expr_sub) = PTN_ERROR;
677
10
    EXPR_PRV(expr_sub) = expr_start;
678
10
    EXPR_NXT(expr_sub) = PTN_END;
679
680
10
    if (!pattern_compile_expression(input, input_max, input_crs, expr_data, expr_max,
681
10
          expr_crs, loop_cnts, table, nested))
682
0
      return 0;
683
684
10
    if (*expr_crs + 3 >= expr_max) return 0;
685
686
10
    EXPR_NXT(expr_sub) = *expr_crs;
687
688
    /* create end expression */
689
10
    expr_end = *expr_crs;
690
10
    EXPR_TYPE(expr_end) = PTN_END;
691
10
    EXPR_PRV(expr_end) = expr_sub;
692
10
    EXPR_NXT(expr_end) = expr_crs_prv;
693
694
10
    return *expr_crs += 3;
695
696
55
  case '+':
697
698
55
    if (*expr_crs + 5 >= expr_max) return 0;
699
55
    EXPR_TYPE(*expr_crs) = PTN_ONE_MORE;
700
55
    EXPR_DATA_1(*expr_crs) = (*loop_cnts)++;
701
55
    (*input_crs)++;
702
55
    return *expr_crs += 5;
703
704
3
  case '*':
705
706
3
    if (*expr_crs + 5 >= expr_max) return 0;
707
3
    EXPR_TYPE(*expr_crs) = PTN_ZERO_MORE;
708
3
    EXPR_DATA_1(*expr_crs) = (*loop_cnts)++;
709
3
    (*input_crs)++;
710
3
    return *expr_crs += 5;
711
712
55
  case '?':
713
714
55
    if (*expr_crs + 4 >= expr_max) return 0;
715
55
    EXPR_TYPE(*expr_crs) = PTN_OPTIONAL;
716
55
    (*input_crs)++;
717
55
    return *expr_crs += 4;
718
719
1
  case '|':
720
721
1
    if (*expr_crs + 5 >= expr_max) return 0;
722
1
    EXPR_TYPE(*expr_crs) = PTN_ALTERNATE;
723
1
    (*input_crs)++;
724
1
    return *expr_crs += 5;
725
726
3
  case '.':
727
728
3
    if (*expr_crs + 3 >= expr_max) return 0;
729
3
    EXPR_TYPE(*expr_crs) = PTN_ANY;
730
3
    (*input_crs)++;
731
3
    return *expr_crs += 3;
732
733
3
  case '%':
734
735
3
    if (*expr_crs + 5 >= expr_max) return 0;
736
737
3
    (*input_crs)++;
738
3
    if (*input_crs >= input_max) return 0;
739
740
    /* find closing bracket */
741
3
    if (input[*input_crs] == '[') {
742
1
      set = 1;
743
1
      (*input_crs)++;
744
1
      for (input_end = *input_crs; input_end < input_max; input_end++)
745
1
        if (input[input_end] == ']') break;
746
1
      if (input_end >= input_max) return 0;
747
2
    } else {
748
2
      set = 0;
749
2
      input_end = *input_crs + 1;
750
2
    }
751
752
3
    EXPR_TYPE(*expr_crs) = PTN_ATTRIBUTES;
753
754
3
    attrs0 = attrs1 = 0;
755
5
    for (; (*input_crs) < input_end; (*input_crs)++) {
756
2
      switch (input[*input_crs]) {
757
0
      case '_':
758
0
        attrs0 |= CTC_Space;
759
0
        break;
760
0
      case '#':
761
0
        attrs0 |= CTC_Digit;
762
0
        break;
763
0
      case 'a':
764
0
        attrs0 |= CTC_Letter;
765
0
        break;
766
2
      case 'u':
767
2
        attrs0 |= CTC_UpperCase;
768
2
        break;
769
0
      case 'l':
770
0
        attrs0 |= CTC_LowerCase;
771
0
        break;
772
0
      case '.':
773
0
        attrs0 |= CTC_Punctuation;
774
0
        break;
775
0
      case '$':
776
0
        attrs0 |= CTC_Sign;
777
0
        break;
778
0
      case 'm':
779
0
        attrs0 |= CTC_Math;
780
0
        break;
781
0
      case '~':
782
0
        attrs0 |= CTC_SeqDelimiter;
783
0
        break;
784
0
      case '<':
785
0
        attrs0 |= CTC_SeqBefore;
786
0
        break;
787
0
      case '>':
788
0
        attrs0 |= CTC_SeqAfter;
789
0
        break;
790
791
0
      case '0':
792
0
      case '1':
793
0
      case '2':
794
0
      case '3':
795
0
      case '4':
796
0
      case '5':
797
0
      case '6':
798
0
      case '7': {
799
0
        int k = input[*input_crs] - '0';
800
0
        TranslationTableCharacterAttributes a = table->numberedAttributes[k];
801
0
        if (!a) {
802
          // attribute not used before yet: assign it a value
803
0
          a = table->numberedAttributes[k] =
804
0
              table->nextNumberedCharacterClassAttribute;
805
0
          if (a > CTC_UserDefined8) {
806
0
            _lou_logMessage(LOU_LOG_ERROR,
807
0
                "%s:%d: error: Too many character attributes defined",
808
0
                nested->fileName, nested->lineNumber);
809
0
            return 0;
810
0
          }
811
0
          table->nextNumberedCharacterClassAttribute <<= 1;
812
0
        }
813
0
        attrs1 |= (a >> 16);
814
0
        break;
815
0
      }
816
0
      case '^':
817
0
        attrs1 |= (CTC_EndOfInput >> 16);
818
0
        break;
819
820
0
      default:
821
0
        return 0;
822
2
      }
823
2
    }
824
3
    EXPR_DATA_0(*expr_crs) = attrs1;
825
3
    EXPR_DATA_1(*expr_crs) = attrs0;
826
827
3
    if (set) (*input_crs)++;
828
3
    return *expr_crs += 5;
829
830
2
  case '[':
831
832
2
    (*input_crs)++;
833
2
    if (*input_crs >= input_max) return 0;
834
835
    /* find closing bracket */
836
2
    esc = 0;
837
18
    for (input_end = *input_crs; input_end < input_max; input_end++) {
838
18
      if (input[input_end] == '\\' && !esc) {
839
2
        esc = 1;
840
2
        continue;
841
2
      }
842
843
16
      if (input[input_end] == ']' && !esc) break;
844
14
      esc = 0;
845
14
    }
846
2
    if (input_end >= input_max) return 0;
847
848
2
    if (*expr_crs + 4 + (input_end - *input_crs) >= expr_max) return 0;
849
850
2
    EXPR_TYPE(*expr_crs) = PTN_CHARS;
851
852
2
    esc = 0;
853
2
    data = EXPR_DATA(*expr_crs);
854
18
    for (i = 1; *input_crs < input_end; (*input_crs)++) {
855
16
      if (input[*input_crs] == '\\' && !esc) {
856
2
        esc = 1;
857
2
        continue;
858
2
      }
859
860
14
      esc = 0;
861
14
      data[i++] = (widechar)input[*input_crs];
862
14
    }
863
2
    data[0] = i - 1;
864
2
    (*input_crs)++;
865
2
    return *expr_crs += 4 + data[0];
866
867
2
  case '@':
868
869
2
    (*input_crs)++;
870
2
    if (*input_crs >= input_max) return 0;
871
872
    /* find closing bracket */
873
2
    if (input[*input_crs] == '[') {
874
0
      set = 1;
875
0
      (*input_crs)++;
876
0
      for (input_end = *input_crs; input_end < input_max; input_end++)
877
0
        if (input[input_end] == ']') break;
878
0
      if (input_end >= input_max) return 0;
879
2
    } else {
880
2
      set = 0;
881
2
      input_end = *input_crs + 1;
882
2
    }
883
884
2
    if (*expr_crs + 4 + (input_end - *input_crs) >= expr_max) return 0;
885
886
2
    EXPR_TYPE(*expr_crs) = PTN_HOOK;
887
888
2
    esc = 0;
889
2
    data = EXPR_DATA(*expr_crs);
890
4
    for (i = 1; *input_crs < input_end; (*input_crs)++) {
891
2
      if (input[*input_crs] == '\\' && !esc) {
892
0
        esc = 1;
893
0
        continue;
894
0
      }
895
896
2
      esc = 0;
897
2
      data[i++] = (widechar)input[*input_crs];
898
2
    }
899
2
    data[0] = i - 1;
900
2
    if (set) (*input_crs)++;
901
2
    return *expr_crs += 4 + data[0];
902
903
19
  case '^':
904
31
  case '$':
905
906
31
    if (*expr_crs + 3 >= expr_max) return 0;
907
31
    EXPR_TYPE(*expr_crs) = PTN_END_OF_INPUT;
908
31
    (*input_crs)++;
909
31
    return *expr_crs += 3;
910
911
5
  case '\\':
912
913
5
    (*input_crs)++;
914
5
    if (*input_crs >= input_max) return 0;
915
916
10.6k
  default:
917
918
10.6k
    if (*expr_crs + 5 >= expr_max) return 0;
919
10.6k
    EXPR_TYPE(*expr_crs) = PTN_CHARS;
920
10.6k
    EXPR_DATA_0(*expr_crs) = 1;
921
10.6k
    EXPR_DATA_1(*expr_crs) = (widechar)input[*input_crs];
922
10.6k
    (*input_crs)++;
923
10.6k
    return *expr_crs += 5;
924
10.7k
  }
925
10.7k
}
926
927
static int
928
pattern_insert_alternate(const widechar *input, const int input_max, int *input_crs,
929
    widechar *expr_data, const int expr_max, widechar *expr_crs, widechar *loop_cnts,
930
0
    int expr_insert, TranslationTableHeader *table, const FileInfo *nested) {
931
0
  int expr_group, expr_alt, expr_end;
932
0
933
0
  if (EXPR_TYPE(*expr_crs) == PTN_START) return 0;
934
0
935
0
  if (*expr_crs + 12 >= expr_max) return 0;
936
0
937
0
  /* setup alternate expression */
938
0
  expr_alt = *expr_crs;
939
0
  EXPR_TYPE(expr_alt) = PTN_ALTERNATE;
940
0
  EXPR_PRV(expr_alt) = PTN_END;
941
0
  EXPR_NXT(expr_alt) = PTN_END;
942
0
  *expr_crs += 5;
943
0
944
0
  /* setup group expression */
945
0
  expr_group = *expr_crs;
946
0
  EXPR_TYPE(expr_group) = PTN_GROUP;
947
0
  EXPR_PRV(expr_group) = PTN_END;
948
0
  EXPR_NXT(expr_group) = PTN_END;
949
0
  *expr_crs += 4;
950
0
  EXPR_DATA_0(expr_group) = *expr_crs;
951
0
952
0
  EXPR_TYPE(*expr_crs) = PTN_ERROR;
953
0
  EXPR_PRV(*expr_crs) = PTN_END;
954
0
  EXPR_NXT(*expr_crs) = PTN_END;
955
0
  if (!pattern_compile_1(input, input_max, input_crs, expr_data, expr_max, expr_crs,
956
0
        loop_cnts, table, nested))
957
0
    return 0;
958
0
  expr_end = *expr_crs;
959
0
  EXPR_NXT(expr_end) = expr_group;
960
0
961
0
  /* setup last end expression */
962
0
  if (*expr_crs + 3 >= expr_max) return 0;
963
0
  *expr_crs += 3;
964
0
  EXPR_TYPE(*expr_crs) = PTN_END;
965
0
  EXPR_NXT(*expr_crs) = PTN_END;
966
0
967
0
  /* replace insert expression with group expression using last end expression */
968
0
  EXPR_NXT(EXPR_PRV(expr_insert)) = expr_group;
969
0
  EXPR_PRV(expr_group) = EXPR_PRV(expr_insert);
970
0
971
0
  EXPR_NXT(expr_group) = *expr_crs;
972
0
  EXPR_PRV(*expr_crs) = expr_group;
973
0
974
0
  /* link alternate and insert expressions before group end expression */
975
0
  EXPR_NXT(EXPR_PRV(expr_end)) = expr_alt;
976
0
  EXPR_PRV(expr_alt) = EXPR_PRV(expr_end);
977
0
978
0
  EXPR_NXT(expr_alt) = expr_insert;
979
0
  EXPR_PRV(expr_insert) = expr_alt;
980
0
981
0
  EXPR_NXT(expr_insert) = expr_end;
982
0
  EXPR_PRV(expr_end) = expr_insert;
983
0
984
0
  return *expr_crs;
985
0
}
986
987
/* Compile all expression sequences, resolving character sets, attributes,
988
 * groups, nots, and hooks.  Note that unlike the other compile functions, on
989
 * returning the expr_crs is set to the last end expression, not after it.
990
 */
991
static int
992
pattern_compile_1(const widechar *input, const int input_max, int *input_crs,
993
    widechar *expr_data, const int expr_max, widechar *expr_crs, widechar *loop_cnts,
994
32
    TranslationTableHeader *table, const FileInfo *nested) {
995
32
  int expr_crs_prv;
996
997
32
  if (*expr_crs + 6 >= expr_max) return 0;
998
999
32
  expr_crs_prv = *expr_crs;
1000
1001
  /* setup start expression */
1002
32
  EXPR_TYPE(*expr_crs) = PTN_START;
1003
32
  EXPR_PRV(*expr_crs) = PTN_END;
1004
32
  *expr_crs += 3;
1005
32
  EXPR_NXT(expr_crs_prv) = *expr_crs;
1006
1007
  /* setup end expression */
1008
32
  EXPR_TYPE(*expr_crs) = PTN_END;
1009
32
  EXPR_PRV(*expr_crs) = expr_crs_prv;
1010
32
  EXPR_NXT(*expr_crs) = PTN_END;
1011
1012
10.7k
  while (*input_crs < input_max) {
1013
10.7k
    expr_crs_prv = *expr_crs;
1014
10.7k
    if (!pattern_compile_expression(input, input_max, input_crs, expr_data, expr_max,
1015
10.7k
          expr_crs, loop_cnts, table, nested))
1016
0
      return 0;
1017
1018
    /* setup end expression */
1019
10.7k
    if (*expr_crs + 3 >= expr_max) return 0;
1020
10.7k
    EXPR_NXT(expr_crs_prv) = *expr_crs;
1021
10.7k
    EXPR_TYPE(*expr_crs) = PTN_END;
1022
10.7k
    EXPR_PRV(*expr_crs) = expr_crs_prv;
1023
10.7k
    EXPR_NXT(*expr_crs) = PTN_END;
1024
1025
    /* insert seqafterexpression before attributes of seqafterchars */
1026
    // if(EXPR_TYPE(expr_crs_prv) == PTN_ATTRIBUTES)
1027
    // if(EXPR_DATA_1(expr_crs_prv) & CTC_SeqAfter)
1028
    // {
1029
    //  i = 0;
1030
    //  pattern_insert_alternate(table->seqAfterExpression,
1031
    //    table->seqAfterExpressionLength, &i, expr_data, expr_max,
1032
    //    expr_crs, loop_cnts, expr_crs_prv);
1033
    // }
1034
10.7k
  }
1035
1036
32
  return *expr_crs;
1037
32
}
1038
1039
/* Resolve optional and loop expressions.
1040
 */
1041
static int
1042
pattern_compile_2(
1043
42
    widechar *expr_data, int expr_at, const int expr_max, widechar *expr_crs) {
1044
42
  int expr_start, expr_end, expr_prv, expr_sub;
1045
1046
10.8k
  while (EXPR_TYPE(expr_at) != PTN_END) {
1047
10.8k
    if (EXPR_TYPE(expr_at) == PTN_GROUP || EXPR_TYPE(expr_at) == PTN_NOT) {
1048
12
      if (!pattern_compile_2(expr_data, EXPR_DATA_0(expr_at), expr_max, expr_crs))
1049
0
        return 0;
1050
12
    }
1051
1052
10.8k
    if (EXPR_TYPE(expr_at) == PTN_ZERO_MORE || EXPR_TYPE(expr_at) == PTN_ONE_MORE ||
1053
10.7k
        EXPR_TYPE(expr_at) == PTN_OPTIONAL) {
1054
113
      if (*expr_crs + 6 >= expr_max) return 0;
1055
1056
      /* get previous expressions, there must
1057
       * be at least something and a PTN_START */
1058
113
      expr_sub = EXPR_PRV(expr_at);
1059
113
      if (EXPR_TYPE(expr_sub) == PTN_START) return 0;
1060
113
      expr_prv = EXPR_PRV(expr_sub);
1061
1062
      /* create start expression */
1063
113
      expr_start = *expr_crs;
1064
113
      EXPR_TYPE(expr_start) = PTN_START;
1065
113
      EXPR_PRV(expr_start) = PTN_END;
1066
113
      EXPR_NXT(expr_start) = expr_sub;
1067
113
      *expr_crs += 3;
1068
1069
      /* create end expression */
1070
113
      expr_end = *expr_crs;
1071
113
      EXPR_TYPE(expr_end) = PTN_END;
1072
113
      EXPR_PRV(expr_end) = expr_sub;
1073
113
      EXPR_NXT(expr_end) = expr_at;
1074
113
      *expr_crs += 3;
1075
1076
      /* relink previous expression before sub expression */
1077
113
      EXPR_DATA_0(expr_at) = expr_start;
1078
113
      EXPR_NXT(expr_prv) = expr_at;
1079
113
      EXPR_PRV(expr_at) = expr_prv;
1080
1081
      /* relink sub expression to start and end */
1082
113
      EXPR_PRV(expr_sub) = expr_start;
1083
113
      EXPR_NXT(expr_sub) = expr_end;
1084
113
    }
1085
1086
10.8k
    expr_at = EXPR_NXT(expr_at);
1087
10.8k
  }
1088
1089
42
  return 1;
1090
42
}
1091
1092
/* Resolves alternative expressions.
1093
 */
1094
static int
1095
pattern_compile_3(
1096
156
    widechar *expr_data, int expr_at, const int expr_max, widechar *expr_crs) {
1097
156
  int expr_mrk, expr_start, expr_end, expr_sub_start, expr_sub_end;
1098
1099
11.0k
  while (EXPR_TYPE(expr_at) != PTN_END) {
1100
10.9k
    if (EXPR_TYPE(expr_at) == PTN_GROUP || EXPR_TYPE(expr_at) == PTN_NOT ||
1101
10.9k
        EXPR_TYPE(expr_at) == PTN_OPTIONAL ||
1102
10.8k
        EXPR_TYPE(expr_at) == PTN_ZERO_MORE ||
1103
10.8k
        EXPR_TYPE(expr_at) == PTN_ONE_MORE) {
1104
125
      if (!pattern_compile_3(expr_data, EXPR_DATA_0(expr_at), expr_max, expr_crs))
1105
0
        return 0;
1106
125
    }
1107
1108
10.9k
    if (EXPR_TYPE(expr_at) == PTN_ALTERNATE) {
1109
1
      if (*expr_crs + 12 >= expr_max) return 0;
1110
1111
      /* get previous start expression,
1112
       * can include alternate expressions */
1113
1
      expr_mrk = EXPR_PRV(expr_at);
1114
1
      if (EXPR_TYPE(expr_mrk) == PTN_START) return 0;
1115
1
      expr_sub_end = expr_mrk;
1116
6
      while (EXPR_TYPE(expr_mrk) != PTN_START) expr_mrk = EXPR_PRV(expr_mrk);
1117
1
      expr_sub_start = EXPR_NXT(expr_mrk);
1118
1119
      /* create first start expression */
1120
1
      expr_start = *expr_crs;
1121
1
      EXPR_TYPE(expr_start) = PTN_START;
1122
1
      EXPR_PRV(expr_start) = PTN_END;
1123
1
      EXPR_NXT(expr_start) = expr_sub_start;
1124
1
      *expr_crs += 3;
1125
1126
      /* create first end expression */
1127
1
      expr_end = *expr_crs;
1128
1
      EXPR_TYPE(expr_end) = PTN_END;
1129
1
      EXPR_PRV(expr_end) = expr_sub_end;
1130
1
      EXPR_NXT(expr_end) = expr_at;
1131
1
      *expr_crs += 3;
1132
1133
      /* relink previous expression before sub expression */
1134
1
      EXPR_DATA_0(expr_at) = expr_start;
1135
1
      EXPR_NXT(expr_mrk) = expr_at;
1136
1
      EXPR_PRV(expr_at) = expr_mrk;
1137
1138
      /* relink sub expression to start and end */
1139
1
      EXPR_PRV(expr_sub_start) = expr_start;
1140
1
      EXPR_NXT(expr_sub_end) = expr_end;
1141
1142
      /* get following PTN_END or PTN_ALTERNATE expression */
1143
1
      expr_mrk = EXPR_NXT(expr_at);
1144
1
      if (EXPR_TYPE(expr_mrk) == PTN_END || EXPR_TYPE(expr_mrk) == PTN_ALTERNATE)
1145
0
        return 0;
1146
1
      expr_sub_start = expr_mrk;
1147
1.01k
      while (EXPR_TYPE(expr_mrk) != PTN_END && EXPR_TYPE(expr_mrk) != PTN_ALTERNATE)
1148
1.01k
        expr_mrk = EXPR_NXT(expr_mrk);
1149
1
      expr_sub_end = EXPR_PRV(expr_mrk);
1150
1151
      /* create first start expression */
1152
1
      expr_start = *expr_crs;
1153
1
      EXPR_TYPE(expr_start) = PTN_START;
1154
1
      EXPR_PRV(expr_start) = PTN_END;
1155
1
      EXPR_NXT(expr_start) = expr_sub_start;
1156
1
      *expr_crs += 3;
1157
1158
      /* create first end expression */
1159
1
      expr_end = *expr_crs;
1160
1
      EXPR_TYPE(expr_end) = PTN_END;
1161
1
      EXPR_PRV(expr_end) = expr_sub_end;
1162
1
      EXPR_NXT(expr_end) = expr_at;
1163
1
      *expr_crs += 3;
1164
1165
      /* relink following expression before sub expression */
1166
1
      EXPR_DATA_1(expr_at) = expr_start;
1167
1
      EXPR_PRV(expr_mrk) = expr_at;
1168
1
      EXPR_NXT(expr_at) = expr_mrk;
1169
1170
      /* relink sub expression to start and end */
1171
1
      EXPR_PRV(expr_sub_start) = expr_start;
1172
1
      EXPR_NXT(expr_sub_end) = expr_end;
1173
1174
      /* check expressions were after alternate and got moved into
1175
       * a sub expression, previous expressions already checked */
1176
1
      if (!pattern_compile_3(expr_data, EXPR_DATA_1(expr_at), expr_max, expr_crs))
1177
0
        return 0;
1178
1
    }
1179
1180
10.9k
    expr_at = EXPR_NXT(expr_at);
1181
10.9k
  }
1182
1183
156
  return 1;
1184
156
}
1185
1186
int EXPORT_CALL
1187
_lou_pattern_compile(const widechar *input, const int input_max, widechar *expr_data,
1188
30
    const int expr_max, TranslationTableHeader *table, const FileInfo *nested) {
1189
30
  int input_crs;
1190
1191
30
  input_crs = 0;
1192
30
  expr_data[0] = 2;
1193
30
  expr_data[1] = 0;
1194
1195
30
  if (!pattern_compile_1(input, input_max, &input_crs, expr_data, expr_max,
1196
30
        &expr_data[0], &expr_data[1], table, nested))
1197
0
    return 0;
1198
1199
  /* shift past the last end */
1200
30
  expr_data[0] += 3;
1201
1202
30
  if (!pattern_compile_2(expr_data, 2, expr_max, &expr_data[0])) return 0;
1203
1204
30
  if (!pattern_compile_3(expr_data, 2, expr_max, &expr_data[0])) return 0;
1205
1206
30
  return expr_data[0];
1207
30
}
1208
1209
////////////////////////////////////////////////////////////////////////////////
1210
1211
static void
1212
pattern_reverse_expression(widechar *expr_data, const int expr_start);
1213
1214
static void
1215
10.7k
pattern_reverse_branch(widechar *expr_data, const int expr_at) {
1216
10.7k
  widechar expr_swap;
1217
1218
10.7k
  switch (EXPR_TYPE(expr_at)) {
1219
1
  case PTN_ALTERNATE:
1220
1221
1
    pattern_reverse_expression(expr_data, EXPR_DATA_0(expr_at));
1222
1
    expr_swap = EXPR_DATA_0(expr_at);
1223
1
    EXPR_DATA_0(expr_at) = EXPR_DATA_1(expr_at);
1224
1
    EXPR_DATA_1(expr_at) = expr_swap;
1225
1226
3
  case PTN_GROUP:
1227
13
  case PTN_NOT:
1228
68
  case PTN_ONE_MORE:
1229
71
  case PTN_ZERO_MORE:
1230
126
  case PTN_OPTIONAL:
1231
1232
126
    pattern_reverse_expression(expr_data, EXPR_DATA_0(expr_at));
1233
10.7k
  }
1234
10.7k
}
1235
1236
static void
1237
142
pattern_reverse_expression(widechar *expr_data, const int expr_start) {
1238
142
  widechar expr_end, expr_crs, expr_prv;
1239
1240
142
  expr_end = EXPR_NXT(expr_start);
1241
1242
  /* empty expression */
1243
142
  if (EXPR_TYPE(expr_end) == PTN_END) return;
1244
1245
  /* find end expression */
1246
10.9k
  while (EXPR_TYPE(expr_end) != PTN_END) expr_end = EXPR_NXT(expr_end);
1247
1248
142
  expr_crs = EXPR_PRV(expr_end);
1249
142
  expr_prv = EXPR_PRV(expr_crs);
1250
1251
  /* relink expression before end expression */
1252
142
  EXPR_NXT(expr_start) = expr_crs;
1253
142
  EXPR_PRV(expr_crs) = expr_start;
1254
142
  EXPR_NXT(expr_crs) = expr_prv;
1255
1256
  /* reverse any branching expressions */
1257
142
  pattern_reverse_branch(expr_data, expr_crs);
1258
1259
10.7k
  while (expr_prv != expr_start) {
1260
    /* shift current expression */
1261
10.6k
    expr_crs = expr_prv;
1262
10.6k
    expr_prv = EXPR_PRV(expr_prv);
1263
1264
    /* reverse any branching expressions */
1265
10.6k
    pattern_reverse_branch(expr_data, expr_crs);
1266
1267
    /* relink current expression */
1268
10.6k
    EXPR_PRV(expr_crs) = EXPR_NXT(expr_crs);
1269
10.6k
    EXPR_NXT(expr_crs) = expr_prv;
1270
10.6k
  }
1271
1272
  /* relink expression after start expression */
1273
142
  EXPR_PRV(expr_crs) = EXPR_NXT(expr_crs);
1274
142
  EXPR_NXT(expr_crs) = expr_end;
1275
142
  EXPR_PRV(expr_end) = expr_crs;
1276
142
}
1277
1278
void EXPORT_CALL
1279
15
_lou_pattern_reverse(widechar *expr_data) {
1280
15
  pattern_reverse_expression(expr_data, 2);
1281
15
}
1282
1283
////////////////////////////////////////////////////////////////////////////////
1284
1285
static int
1286
36.6k
pattern_check_chars(const widechar input_char, const widechar *expr_data) {
1287
36.6k
  int expr_cnt, i;
1288
1289
36.6k
  expr_cnt = expr_data[0] + 1;
1290
1291
70.9k
  for (i = 1; i < expr_cnt; i++)
1292
36.6k
    if (input_char == expr_data[i]) break;
1293
1294
36.6k
  if (i == expr_cnt) return 0;
1295
2.31k
  return 1;
1296
36.6k
}
1297
1298
static int
1299
pattern_check_attrs(const widechar input_char, const widechar *expr_data,
1300
4.63k
    const TranslationTableHeader *table) {
1301
4.63k
  int attrs;
1302
1303
4.63k
  attrs = ((expr_data[0] << 16) | expr_data[1]) & ~(CTC_EndOfInput | CTC_EmpMatch);
1304
4.63k
  if (!checkAttr(input_char, attrs, table)) return 0;
1305
0
  return 1;
1306
4.63k
}
1307
1308
static int
1309
pattern_check_expression(const widechar *const input, int *input_crs,
1310
    const int input_minmax, const int input_dir, const widechar *const expr_data,
1311
    int (*hook)(const widechar input, const int data_len), widechar *hook_data,
1312
    const int hook_max, int expr_crs, int notOperator, int loop_crs, int *loop_cnts,
1313
344k
    const TranslationTableHeader *table) {
1314
344k
  int input_crs_prv, input_start, attrs, ret, i;
1315
344k
  const widechar *data;
1316
1317
344k
  data = NULL;
1318
1319
  /* save input_crs to know if loop consumed input */
1320
344k
  input_start = *input_crs;
1321
1322
344k
  CHECK_OUTPUT(START, 0, __LINE__, "check start")
1323
1324
812k
  while (!(EXPR_TYPE(expr_crs) == PTN_END && EXPR_TYPE(expr_crs) == PTN_END)) {
1325
    /* end of input expression */
1326
812k
    if (EXPR_TYPE(expr_crs) == PTN_END_OF_INPUT) {
1327
32.1k
      if (*input_crs * input_dir >= input_minmax * input_dir) {
1328
2
        if (notOperator)
1329
0
          CHECK_OUTPUT(
1330
2
              RETURN, 0, __LINE__, "end of input failed:  no input and not")
1331
2
        else
1332
2
          CHECK_OUTPUT(RETURN, 1, __LINE__, "end of input passed:  no input")
1333
2
        return !notOperator;
1334
32.1k
      } else {
1335
32.1k
        if (notOperator)
1336
0
          CHECK_OUTPUT(
1337
32.1k
              RETURN, 1, __LINE__, "end of input passed:  input and not")
1338
32.1k
        else
1339
32.1k
          CHECK_OUTPUT(RETURN, 0, __LINE__, "end of input failed:  input")
1340
32.1k
        return notOperator;
1341
32.1k
      }
1342
32.1k
    }
1343
1344
    /* no more input */
1345
779k
    if (*input_crs * input_dir >= input_minmax * input_dir) {
1346
72
      switch (EXPR_TYPE(expr_crs)) {
1347
0
      case PTN_ATTRIBUTES:
1348
1349
0
        attrs = (EXPR_DATA_0(expr_crs) << 16);
1350
0
        if (attrs & CTC_EndOfInput) {
1351
0
          if (notOperator) {
1352
0
            CHECK_OUTPUT(RETURN, 0, __LINE__,
1353
0
                "attributes failed:  end of input attribute:  not")
1354
0
            return 0;
1355
0
          }
1356
0
          CHECK_OUTPUT(RETURN, 1, __LINE__,
1357
0
              "attributes passed:  end of input attribute")
1358
0
          return 1;
1359
0
        }
1360
0
        CHECK_OUTPUT(RETURN, 0, __LINE__,
1361
0
            "attributes failed:  no end of input attribute")
1362
0
        return 0;
1363
1364
1
      case PTN_ANY:
1365
2
      case PTN_CHARS:
1366
1367
2
        CHECK_OUTPUT(RETURN, 0, __LINE__, "chars failed:  no input")
1368
2
        return 0;
1369
72
      }
1370
1371
70
      CHECK_OUTPUT(SHOW, 0, __LINE__, "no input")
1372
70
    }
1373
1374
779k
    switch (EXPR_TYPE(expr_crs)) {
1375
1376
355k
    case PTN_START:
1377
1378
355k
      expr_crs = EXPR_NXT(expr_crs);
1379
355k
      CHECK_OUTPUT(SHOW, 0, __LINE__, "start next")
1380
355k
      break;
1381
1382
0
    case PTN_GROUP:
1383
1384
0
      expr_crs = EXPR_DATA_0(expr_crs);
1385
0
      CHECK_OUTPUT(SHOW, 0, __LINE__, "group next")
1386
0
      break;
1387
1388
10.4k
    case PTN_NOT:
1389
1390
10.4k
      notOperator = !notOperator;
1391
10.4k
      expr_crs = EXPR_DATA_0(expr_crs);
1392
10.4k
      CHECK_OUTPUT(SHOW, 0, __LINE__, "not next")
1393
10.4k
      break;
1394
1395
0
    case PTN_ONE_MORE:
1396
1397
0
      CHECK_OUTPUT(SHOW, 0, __LINE__, "loop+ start")
1398
1399
30.9k
    case PTN_ZERO_MORE:
1400
1401
      /* check if loop already started */
1402
30.9k
      if (expr_crs == loop_crs) {
1403
0
        loop_cnts[EXPR_DATA_1(loop_crs)]++;
1404
0
        CHECK_OUTPUT(SHOW, 0, __LINE__, "loop again")
1405
30.9k
      } else {
1406
        /* check if loop nested, wasn't running but has a count */
1407
30.9k
        if (loop_cnts[EXPR_DATA_1(expr_crs)]) {
1408
0
          CHECK_OUTPUT(SHOW, 0, __LINE__, "loop already running")
1409
0
          goto loop_next;
1410
0
        }
1411
1412
        /* start loop */
1413
30.9k
        loop_crs = expr_crs;
1414
30.9k
        loop_cnts[EXPR_DATA_1(loop_crs)] = 1;
1415
30.9k
        CHECK_OUTPUT(SHOW, 0, __LINE__, "loop start")
1416
30.9k
      }
1417
1418
      /* start loop expression */
1419
30.9k
      input_crs_prv = *input_crs;
1420
30.9k
      ret = pattern_check_expression(input, input_crs, input_minmax, input_dir,
1421
30.9k
          expr_data, hook, hook_data, hook_max, EXPR_DATA_0(expr_crs),
1422
30.9k
          notOperator, loop_crs, loop_cnts, table);
1423
30.9k
      if (ret) {
1424
1
        CHECK_OUTPUT(RETURN, 1, __LINE__, "loop passed")
1425
1
        return 1;
1426
1
      }
1427
30.9k
      CHECK_OUTPUT(SHOW, 0, __LINE__, "loop failed")
1428
30.9k
      *input_crs = input_crs_prv;
1429
1430
      /* check loop count */
1431
30.9k
      loop_cnts[EXPR_DATA_1(loop_crs)]--;
1432
30.9k
      if (EXPR_TYPE(expr_crs) == PTN_ONE_MORE) {
1433
0
        if (loop_cnts[EXPR_DATA_1(loop_crs)] < 1) {
1434
0
          CHECK_OUTPUT(RETURN, 0, __LINE__, "loop+ failed")
1435
0
          return 0;
1436
0
        } else
1437
0
          CHECK_OUTPUT(SHOW, 0, __LINE__, "loop+ passed")
1438
0
      }
1439
1440
    /* continue after loop */
1441
30.9k
    loop_next:
1442
30.9k
      expr_crs = EXPR_NXT(expr_crs);
1443
30.9k
      CHECK_OUTPUT(SHOW, 0, __LINE__, "loop next")
1444
30.9k
      break;
1445
1446
307k
    case PTN_OPTIONAL:
1447
1448
      /* save current state */
1449
307k
      input_crs_prv = *input_crs;
1450
1451
      /* start optional expression */
1452
307k
      CHECK_OUTPUT(CALL, 0, __LINE__, "option start")
1453
307k
      if (pattern_check_expression(input, input_crs, input_minmax, input_dir,
1454
307k
            expr_data, hook, hook_data, hook_max, EXPR_DATA_0(expr_crs),
1455
307k
            notOperator, loop_crs, loop_cnts, table)) {
1456
31
        CHECK_OUTPUT(RETURN, 1, __LINE__, "option passed")
1457
31
        return 1;
1458
31
      }
1459
307k
      CHECK_OUTPUT(SHOW, 0, __LINE__, "option failed")
1460
1461
      /* continue after optional expression */
1462
307k
      *input_crs = input_crs_prv;
1463
307k
      CHECK_OUTPUT(SHOW, 0, __LINE__, "no option start")
1464
307k
      expr_crs = EXPR_NXT(expr_crs);
1465
307k
      break;
1466
1467
0
    case PTN_ALTERNATE:
1468
1469
      /* save current state */
1470
0
      input_crs_prv = *input_crs;
1471
1472
      /* start first expression */
1473
0
      CHECK_OUTPUT(CALL, 0, __LINE__, "or 1 start")
1474
0
      if (pattern_check_expression(input, input_crs, input_minmax, input_dir,
1475
0
            expr_data, hook, hook_data, hook_max, EXPR_DATA_0(expr_crs),
1476
0
            notOperator, loop_crs, loop_cnts, table)) {
1477
0
        CHECK_OUTPUT(RETURN, 1, __LINE__, "or 1 passed")
1478
0
        return 1;
1479
0
      }
1480
0
      CHECK_OUTPUT(SHOW, 0, __LINE__, "or 1 failed")
1481
1482
      /* start second expression (no need to push) */
1483
0
      *input_crs = input_crs_prv;
1484
0
      CHECK_OUTPUT(SHOW, 0, __LINE__, "or 2 start")
1485
0
      expr_crs = EXPR_DATA_1(expr_crs);
1486
0
      break;
1487
1488
3.55k
    case PTN_ANY:
1489
1490
3.55k
      CHECK_OUTPUT(SHOW, 0, __LINE__, "any")
1491
3.55k
      *input_crs += input_dir;
1492
3.55k
      expr_crs = EXPR_NXT(expr_crs);
1493
3.55k
      break;
1494
1495
4.63k
    case PTN_ATTRIBUTES:
1496
1497
4.63k
      ret = pattern_check_attrs(
1498
4.63k
          input[*input_crs], EXPR_CONST_DATA(expr_crs), table);
1499
4.63k
      if (ret && notOperator) {
1500
0
        CHECK_OUTPUT(RETURN, 0, __LINE__, "attributes failed:  not");
1501
0
        return 0;
1502
0
      }
1503
4.63k
      if (!ret && !notOperator) {
1504
0
        CHECK_OUTPUT(RETURN, 0, __LINE__, "attributes failed");
1505
0
        return 0;
1506
0
      }
1507
4.63k
      CHECK_OUTPUT(SHOW, 0, __LINE__, "attributes passed")
1508
4.63k
      *input_crs += input_dir;
1509
4.63k
      expr_crs = EXPR_NXT(expr_crs);
1510
4.63k
      break;
1511
1512
36.6k
    case PTN_CHARS:
1513
1514
36.6k
      ret = pattern_check_chars(input[*input_crs], EXPR_CONST_DATA(expr_crs));
1515
36.6k
      if (ret && notOperator) {
1516
2.31k
        CHECK_OUTPUT(RETURN, 0, __LINE__, "chars failed:  not");
1517
2.31k
        return 0;
1518
2.31k
      }
1519
34.3k
      if (!ret && !notOperator) {
1520
32.0k
        CHECK_OUTPUT(RETURN, 0, __LINE__, "chars failed");
1521
32.0k
        return 0;
1522
32.0k
      }
1523
2.31k
      CHECK_OUTPUT(SHOW, 0, __LINE__, "chars passed")
1524
2.31k
      *input_crs += input_dir;
1525
2.31k
      expr_crs = EXPR_NXT(expr_crs);
1526
2.31k
      break;
1527
1528
30.9k
    case PTN_HOOK:
1529
1530
30.9k
      if (hook == NULL) {
1531
30.9k
        CHECK_OUTPUT(RETURN, 0, __LINE__, "hook failed:  NULL");
1532
30.9k
        return 0;
1533
30.9k
      }
1534
1535
      /* copy expression data */
1536
0
      data = EXPR_CONST_DATA(expr_crs);
1537
0
      for (i = 0; i < data[0]; i++) hook_data[i] = data[i + 1];
1538
1539
      /* call hook function */
1540
0
      ret = hook(input[*input_crs], data[0]);
1541
0
      if (ret && notOperator) {
1542
0
        CHECK_OUTPUT(RETURN, 0, __LINE__, "hook failed:  not");
1543
0
        return 0;
1544
0
      }
1545
0
      if (!ret && !notOperator) {
1546
0
        CHECK_OUTPUT(RETURN, 0, __LINE__, "hook failed");
1547
0
        return 0;
1548
0
      }
1549
0
      CHECK_OUTPUT(SHOW, 0, __LINE__, "hook passed")
1550
0
      *input_crs += input_dir;
1551
0
      expr_crs = EXPR_NXT(expr_crs);
1552
0
      break;
1553
1554
0
    case PTN_END:
1555
0
      break;
1556
1557
0
    default:
1558
1559
0
      CHECK_OUTPUT(RETURN, 0, __LINE__, "unknown opcode")
1560
0
      return 0;
1561
779k
    }
1562
1563
    /* check end expression  */
1564
1.95M
    while (EXPR_TYPE(expr_crs) == PTN_END) {
1565
1.49M
      CHECK_OUTPUT(SHOW, 0, __LINE__, "end")
1566
1567
      /* check for end of expressions */
1568
1.49M
      if (EXPR_NXT(expr_crs) == PTN_END) break;
1569
1570
1.49M
      expr_crs = EXPR_NXT(expr_crs);
1571
1572
      /* returning loop */
1573
1.49M
      if (EXPR_TYPE(expr_crs) == PTN_ZERO_MORE ||
1574
1.24M
          EXPR_TYPE(expr_crs) == PTN_ONE_MORE) {
1575
247k
        CHECK_OUTPUT(SHOW, 0, __LINE__, "end loop")
1576
1577
        /* check that loop consumed input */
1578
247k
        if (*input_crs == input_start) {
1579
247k
          CHECK_OUTPUT(RETURN, 0, __LINE__, "loop failed:  did not consume")
1580
247k
          return 0;
1581
247k
        }
1582
1583
        /* loops do not continue to the next expression */
1584
0
        break;
1585
247k
      }
1586
1587
      /* returning not */
1588
1.24M
      if (EXPR_TYPE(expr_crs) == PTN_NOT) notOperator = !notOperator;
1589
1590
1.24M
      expr_crs = EXPR_NXT(expr_crs);
1591
1592
1.24M
      CHECK_OUTPUT(SHOW, 0, __LINE__, "end next")
1593
1.24M
    }
1594
1595
467k
    CHECK_OUTPUT(SHOW, 0, __LINE__, "check next")
1596
467k
  }
1597
1598
0
  CHECK_OUTPUT(RETURN, 1, __LINE__, "check passed:  end of expression");
1599
0
  return 1;
1600
344k
}
1601
1602
static int
1603
pattern_check_hook(const widechar *input, const int input_start, const int input_minmax,
1604
    const int input_dir, const widechar *expr_data,
1605
    int (*hook)(const widechar input, const int data_len), widechar *hook_data,
1606
5.87k
    const int hook_max, const TranslationTableHeader *table) {
1607
5.87k
  int input_crs, ret, *loop_cnts;
1608
1609
5.87k
  input_crs = input_start;
1610
5.87k
  loop_cnts = malloc(expr_data[1] * sizeof(int));
1611
5.87k
  memset(loop_cnts, 0, expr_data[1] * sizeof(int));
1612
5.87k
  ret = pattern_check_expression(input, &input_crs, input_minmax, input_dir, expr_data,
1613
5.87k
      hook, hook_data, hook_max, 2, 0, 0, loop_cnts, table);
1614
5.87k
  free(loop_cnts);
1615
5.87k
  return ret;
1616
5.87k
}
1617
1618
int EXPORT_CALL
1619
_lou_pattern_check(const widechar *input, const int input_start, const int input_minmax,
1620
    const int input_dir, const widechar *expr_data,
1621
5.87k
    const TranslationTableHeader *table) {
1622
#ifdef CHECK_OUTPUT_DEFINED
1623
  pattern_output(expr_data, table);
1624
#endif
1625
5.87k
  return pattern_check_hook(
1626
5.87k
      input, input_start, input_minmax, input_dir, expr_data, NULL, NULL, 0, table);
1627
5.87k
}
1628
1629
////////////////////////////////////////////////////////////////////////////////