Coverage Report

Created: 2026-01-17 07:30

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/liblouis/liblouis/pattern.c
Line
Count
Source
1
/* liblouis Braille Translation and Back-Translation Library
2
3
   Copyright (C) 2016 Mike Gray, American Printing House for the Blind
4
5
   This file is part of liblouis.
6
7
   liblouis is free software: you can redistribute it and/or modify it
8
   under the terms of the GNU Lesser General Public License as published
9
   by the Free Software Foundation, either version 2.1 of the License, or
10
   (at your option) any later version.
11
12
   liblouis is distributed in the hope that it will be useful, but
13
   WITHOUT ANY WARRANTY; without even the implied warranty of
14
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
   Lesser General Public License for more details.
16
17
   You should have received a copy of the GNU Lesser General Public
18
   License along with liblouis. If not, see <http://www.gnu.org/licenses/>.
19
*/
20
21
#include "config.h"
22
23
#include <stdlib.h>
24
#include <stdio.h>
25
#include <string.h>
26
#include <ctype.h>
27
28
#include "internal.h"
29
30
//#define CHECK_OUTPUT_DEFINED
31
32
/////
33
34
// TODO: these functions are static and copied serveral times
35
36
int translation_direction = 1;
37
38
static TranslationTableCharacter *
39
12.0k
findCharOrDots(widechar c, int m, const TranslationTableHeader *table) {
40
  /* Look up character or dot pattern in the appropriate
41
   * table. */
42
12.0k
  static TranslationTableCharacter noChar = { NULL, -1, 0, 0, 0, CTC_Space, 0, 0, 32, 0,
43
12.0k
    0 };
44
12.0k
  static TranslationTableCharacter noDots = { NULL, -1, 0, 0, 0, CTC_Space, 0, 0,
45
12.0k
    LOU_DOTS, 0, 0 };
46
12.0k
  TranslationTableCharacter *notFound;
47
12.0k
  TranslationTableCharacter *character;
48
12.0k
  TranslationTableOffset bucket;
49
12.0k
  unsigned long int makeHash = _lou_charHash(c);
50
12.0k
  if (m == 0) {
51
0
    bucket = table->characters[makeHash];
52
0
    notFound = &noChar;
53
12.0k
  } else {
54
12.0k
    bucket = table->dots[makeHash];
55
12.0k
    notFound = &noDots;
56
12.0k
  }
57
12.0k
  while (bucket) {
58
8.87k
    character = (TranslationTableCharacter *)&table->ruleArea[bucket];
59
8.87k
    if (character->value == c) return character;
60
0
    bucket = character->next;
61
0
  }
62
3.17k
  notFound->value = c;
63
3.17k
  return notFound;
64
12.0k
}
65
66
static int
67
checkAttr(const widechar c, const TranslationTableCharacterAttributes a,
68
12.0k
    const TranslationTableHeader *table) {
69
12.0k
  return (((findCharOrDots(c, translation_direction ? 0 : 1, table))->attributes & a)
70
12.0k
          ? 1
71
12.0k
          : 0);
72
12.0k
}
73
74
/////
75
76
enum pattern_type {
77
  PTN_ERROR,
78
79
  PTN_START,
80
  PTN_GROUP,
81
  PTN_NOT,
82
83
  PTN_ONE_MORE,
84
  PTN_ZERO_MORE,
85
  PTN_OPTIONAL,
86
87
  PTN_ALTERNATE,
88
89
  PTN_ANY,
90
  PTN_ATTRIBUTES,
91
  PTN_CHARS,
92
  PTN_HOOK,
93
  PTN_END_OF_INPUT,
94
95
  PTN_END = 0xffff,
96
};
97
98
849M
#define EXPR_TYPE_IN(at, buffer) (buffer[(at) + 0])
99
5.24k
#define EXPR_PRV_IN(at, buffer) (buffer[(at) + 1])
100
477M
#define EXPR_NXT_IN(at, buffer) (buffer[(at) + 2])
101
25.6M
#define EXPR_DATA_0_IN(at, buffer) (buffer[(at) + 3])
102
22.6k
#define EXPR_DATA_1_IN(at, buffer) (buffer[(at) + 4])
103
#define EXPR_DATA_2_IN(at, buffer) (buffer[(at) + 5])
104
13
#define EXPR_DATA_IN(at, buffer) ((widechar *)&buffer[(at) + 3])
105
23.0M
#define EXPR_CONST_DATA_IN(at, buffer) ((const widechar *)&buffer[(at) + 3])
106
107
849M
#define EXPR_TYPE(at) EXPR_TYPE_IN((at), expr_data)
108
5.23k
#define EXPR_PRV(at) EXPR_PRV_IN((at), expr_data)
109
477M
#define EXPR_NXT(at) EXPR_NXT_IN((at), expr_data)
110
25.6M
#define EXPR_DATA_0(at) EXPR_DATA_0_IN((at), expr_data)
111
22.6k
#define EXPR_DATA_1(at) EXPR_DATA_1_IN((at), expr_data)
112
#define EXPR_DATA_2(at) EXPR_DATA_2_IN((at), expr_data)
113
13
#define EXPR_DATA(at) EXPR_DATA_IN((at), expr_data)
114
23.0M
#define EXPR_CONST_DATA(at) EXPR_CONST_DATA_IN((at), expr_data)
115
116
#ifdef CHECK_OUTPUT_DEFINED
117
118
#ifndef DEBUG
119
#define DEBUG
120
121
#endif
122
123
#define START 0
124
#define CALL 1
125
#define RETURN 2
126
#define SHOW 3
127
128
#define CHECK_OUTPUT(type, ret, line, msg)                                              \
129
  {                                                                                   \
130
    do_output(type, ret, line, input[*input_crs], input_minmax, *input_crs,         \
131
        input_dir, expr_data, expr_crs, notOperator, loop_crs, loop_cnts, msg); \
132
  }
133
134
#else
135
136
#define CHECK_OUTPUT(type, ret, line, msg) \
137
489M
  { ; }
138
139
#endif
140
141
struct expression {
142
  widechar type;
143
  widechar prv;
144
  widechar nxt;
145
  widechar data[1];
146
};
147
148
/* gdb won't know what this is unless it is actually used */
149
#ifdef DEBUG
150
static struct expression *expr_debug;
151
#endif
152
153
////////////////////////////////////////////////////////////////////////////////
154
155
static char spaces[] = "..............................";
156
static int space = 30;
157
158
static void
159
pattern_output_expression(
160
0
    const widechar *expr_data, int expr_crs, const TranslationTableHeader *table) {
161
0
  int i;
162
0
163
0
  if (expr_crs == PTN_END) return;
164
0
165
0
  while (EXPR_TYPE(expr_crs) != PTN_END) {
166
0
    printf("%s%d", &spaces[space], expr_crs);
167
0
    if (expr_crs < 100) printf(" ");
168
0
    if (expr_crs < 10) printf(" ");
169
0
    for (i = 0; i < 13 - (30 - space); i++) printf(" ");
170
0
171
0
    switch (EXPR_TYPE(expr_crs)) {
172
0
    case PTN_START:
173
0
174
0
      printf("START\t%d\t%d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs));
175
0
      break;
176
0
177
0
    case PTN_GROUP:
178
0
179
0
      printf("(    \t%d\t%d\t-> %d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs),
180
0
          EXPR_DATA_0(expr_crs));
181
0
      space--;
182
0
      if (space < 0) space = 0;
183
0
      pattern_output_expression(expr_data, EXPR_DATA_0(expr_crs), table);
184
0
      space++;
185
0
      if (space > 30) space = 30;
186
0
      break;
187
0
188
0
    case PTN_NOT:
189
0
190
0
      printf("!    \t%d\t%d\t-> %d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs),
191
0
          EXPR_DATA_0(expr_crs));
192
0
      space--;
193
0
      if (space < 0) space = 0;
194
0
      pattern_output_expression(expr_data, EXPR_DATA_0(expr_crs), table);
195
0
      space++;
196
0
      if (space > 30) space = 30;
197
0
      break;
198
0
199
0
    case PTN_ONE_MORE:
200
0
201
0
      printf("+    \t%d\t%d\t-> %d\t#%d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs),
202
0
          EXPR_DATA_0(expr_crs), EXPR_DATA_1(expr_crs));
203
0
      space--;
204
0
      if (space < 0) space = 0;
205
0
      pattern_output_expression(expr_data, EXPR_DATA_0(expr_crs), table);
206
0
      space++;
207
0
      if (space > 30) space = 30;
208
0
      break;
209
0
210
0
    case PTN_ZERO_MORE:
211
0
212
0
      printf("*    \t%d\t%d\t-> %d\t#%d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs),
213
0
          EXPR_DATA_0(expr_crs), EXPR_DATA_1(expr_crs));
214
0
      space--;
215
0
      if (space < 0) space = 0;
216
0
      pattern_output_expression(expr_data, EXPR_DATA_0(expr_crs), table);
217
0
      space++;
218
0
      if (space > 30) space = 30;
219
0
      break;
220
0
221
0
    case PTN_OPTIONAL:
222
0
223
0
      printf("?    \t%d\t%d\t-> %d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs),
224
0
          EXPR_DATA_0(expr_crs));
225
0
      space--;
226
0
      if (space < 0) space = 0;
227
0
      pattern_output_expression(expr_data, EXPR_DATA_0(expr_crs), table);
228
0
      space++;
229
0
      if (space > 30) space = 30;
230
0
      break;
231
0
232
0
    case PTN_ALTERNATE:
233
0
234
0
      printf("|    \t%d\t%d\t-> %d\t-> %d\n", EXPR_PRV(expr_crs),
235
0
          EXPR_NXT(expr_crs), EXPR_DATA_0(expr_crs), EXPR_DATA_1(expr_crs));
236
0
      space--;
237
0
      if (space < 0) space = 0;
238
0
      pattern_output_expression(expr_data, EXPR_DATA_0(expr_crs), table);
239
0
      pattern_output_expression(expr_data, EXPR_DATA_1(expr_crs), table);
240
0
      space++;
241
0
      if (space > 30) space = 30;
242
0
      break;
243
0
244
0
    case PTN_ANY:
245
0
246
0
      printf(".    \t%d\t%d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs));
247
0
      break;
248
0
249
0
    case PTN_ATTRIBUTES:
250
0
251
0
      printf("%%    \t%d\t%d\t", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs));
252
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[0] >> 16)) printf("0");
253
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[1] >> 16)) printf("1");
254
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[2] >> 16)) printf("2");
255
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[3] >> 16)) printf("3");
256
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[4] >> 16)) printf("4");
257
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[5] >> 16)) printf("5");
258
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[6] >> 16)) printf("6");
259
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[7] >> 16)) printf("7");
260
0
      if (EXPR_DATA_0(expr_crs) & (CTC_EndOfInput >> 16)) printf("^");
261
0
      if (EXPR_DATA_1(expr_crs) & CTC_Space) printf("_");
262
0
      if (EXPR_DATA_1(expr_crs) & CTC_Digit) printf("#");
263
0
      if (EXPR_DATA_1(expr_crs) & CTC_Letter) printf("a");
264
0
      if (EXPR_DATA_1(expr_crs) & CTC_UpperCase) printf("u");
265
0
      if (EXPR_DATA_1(expr_crs) & CTC_LowerCase) printf("l");
266
0
      if (EXPR_DATA_1(expr_crs) & CTC_Punctuation) printf(".");
267
0
      if (EXPR_DATA_1(expr_crs) & CTC_Sign) printf("$");
268
0
      if (EXPR_DATA_1(expr_crs) & CTC_SeqDelimiter) printf("~");
269
0
      if (EXPR_DATA_1(expr_crs) & CTC_SeqBefore) printf("<");
270
0
      if (EXPR_DATA_1(expr_crs) & CTC_SeqAfter) printf(">");
271
0
      puts("");
272
0
      break;
273
0
274
0
    case PTN_CHARS:
275
0
276
0
      printf("[]   \t%d\t%d\t", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs));
277
0
      for (i = 0; i < EXPR_DATA_0(expr_crs); i++)
278
0
        printf("%c", EXPR_CONST_DATA(expr_crs)[i + 1]);
279
0
      puts("");
280
0
      break;
281
0
282
0
    case PTN_HOOK:
283
0
284
0
      printf("@    \t%d\t%d\t", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs));
285
0
      for (i = 0; i < EXPR_DATA_0(expr_crs); i++)
286
0
        printf("%c", EXPR_CONST_DATA(expr_crs)[i + 1]);
287
0
      puts("");
288
0
      break;
289
0
290
0
    case PTN_END_OF_INPUT:
291
0
292
0
      printf("^    \t%d\t%d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs));
293
0
      break;
294
0
295
0
    default:
296
0
297
0
      printf("%d?    \t%d\t%d\n", EXPR_TYPE(expr_crs), EXPR_PRV(expr_crs),
298
0
          EXPR_NXT(expr_crs));
299
0
      break;
300
0
    }
301
0
302
0
    expr_crs = EXPR_NXT(expr_crs);
303
0
  }
304
0
305
0
  printf("%s%d", &spaces[space], expr_crs);
306
0
  if (expr_crs < 100) printf(" ");
307
0
  if (expr_crs < 10) printf(" ");
308
0
  for (i = 0; i < 13 - (30 - space); i++) printf(" ");
309
0
  printf("END\t%d\t%d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs));
310
0
  fflush(stdout);
311
0
  return;
312
0
}
313
314
static void
315
0
pattern_output(const widechar *expr_data, const TranslationTableHeader *table) {
316
0
  printf("%d    \tlength\n", expr_data[0]);
317
0
  printf("%d    \tloops\n", expr_data[1]);
318
0
  if (expr_data[0] > 0 && expr_data[0] != PTN_END)
319
0
    pattern_output_expression(expr_data, 2, table);
320
0
}
321
322
static void
323
pattern_print_expression(
324
0
    const widechar *expr_data, int expr_crs, const TranslationTableHeader *table) {
325
0
  int i;
326
0
327
0
  if (expr_crs == PTN_END) return;
328
0
329
0
  while (EXPR_TYPE(expr_crs) != PTN_END) {
330
0
    switch (EXPR_TYPE(expr_crs)) {
331
0
    case PTN_START:
332
0
      break;
333
0
334
0
    case PTN_GROUP:
335
0
336
0
      printf(" (");
337
0
      pattern_print_expression(expr_data, EXPR_DATA_0(expr_crs), table);
338
0
      printf(") ");
339
0
      break;
340
0
341
0
    case PTN_NOT:
342
0
343
0
      printf("!");
344
0
      pattern_print_expression(expr_data, EXPR_DATA_0(expr_crs), table);
345
0
      break;
346
0
347
0
    case PTN_ONE_MORE:
348
0
349
0
      pattern_print_expression(expr_data, EXPR_DATA_0(expr_crs), table);
350
0
      printf("+");
351
0
      break;
352
0
353
0
    case PTN_ZERO_MORE:
354
0
355
0
      pattern_print_expression(expr_data, EXPR_DATA_0(expr_crs), table);
356
0
      printf("*");
357
0
      break;
358
0
359
0
    case PTN_OPTIONAL:
360
0
361
0
      pattern_print_expression(expr_data, EXPR_DATA_0(expr_crs), table);
362
0
      printf("?");
363
0
      break;
364
0
365
0
    case PTN_ALTERNATE:
366
0
367
0
      pattern_print_expression(expr_data, EXPR_DATA_0(expr_crs), table);
368
0
      printf(" | ");
369
0
      pattern_print_expression(expr_data, EXPR_DATA_1(expr_crs), table);
370
0
      break;
371
0
372
0
    case PTN_ANY:
373
0
374
0
      printf(".");
375
0
      break;
376
0
377
0
    case PTN_ATTRIBUTES:
378
0
379
0
      printf("%%[");
380
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[0] >> 16)) printf("0");
381
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[1] >> 16)) printf("1");
382
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[2] >> 16)) printf("2");
383
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[3] >> 16)) printf("3");
384
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[4] >> 16)) printf("4");
385
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[5] >> 16)) printf("5");
386
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[6] >> 16)) printf("6");
387
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[7] >> 16)) printf("7");
388
0
      if (EXPR_DATA_0(expr_crs) & (CTC_EndOfInput >> 16)) printf("^");
389
0
      if (EXPR_DATA_1(expr_crs) & CTC_Space) printf("_");
390
0
      if (EXPR_DATA_1(expr_crs) & CTC_Digit) printf("#");
391
0
      if (EXPR_DATA_1(expr_crs) & CTC_Letter) printf("a");
392
0
      if (EXPR_DATA_1(expr_crs) & CTC_UpperCase) printf("u");
393
0
      if (EXPR_DATA_1(expr_crs) & CTC_LowerCase) printf("l");
394
0
      if (EXPR_DATA_1(expr_crs) & CTC_Punctuation) printf(".");
395
0
      if (EXPR_DATA_1(expr_crs) & CTC_Sign) printf("$");
396
0
      if (EXPR_DATA_1(expr_crs) & CTC_SeqDelimiter) printf("~");
397
0
      if (EXPR_DATA_1(expr_crs) & CTC_SeqBefore) printf("<");
398
0
      if (EXPR_DATA_1(expr_crs) & CTC_SeqAfter) printf(">");
399
0
      printf("]");
400
0
      break;
401
0
402
0
    case PTN_CHARS:
403
0
404
0
      if (EXPR_DATA_0(expr_crs) == 1)
405
0
        printf("%c", EXPR_DATA_1(expr_crs));
406
0
      else {
407
0
        printf("[");
408
0
        for (i = 0; i < EXPR_DATA_0(expr_crs); i++)
409
0
          printf("%c", EXPR_CONST_DATA(expr_crs)[i + 1]);
410
0
        printf("]");
411
0
      }
412
0
      break;
413
0
414
0
    case PTN_HOOK:
415
0
416
0
      printf("@[");
417
0
      for (i = 0; i < EXPR_DATA_0(expr_crs); i++)
418
0
        printf("%c", EXPR_CONST_DATA(expr_crs)[i + 1]);
419
0
      printf("]");
420
0
      break;
421
0
422
0
    case PTN_END_OF_INPUT:
423
0
424
0
      printf("^");
425
0
      break;
426
0
427
0
      // default:  printf("%d?\n", EXPR_TYPE(expr_crs));  break;
428
0
    }
429
0
430
0
    expr_crs = EXPR_NXT(expr_crs);
431
0
  }
432
0
433
0
  return;
434
0
}
435
436
static void
437
0
pattern_print(const widechar *expr_data, const TranslationTableHeader *table) {
438
0
  if (expr_data[0] > 0 && expr_data[0] != PTN_END)
439
0
    pattern_print_expression(expr_data, 2, table);
440
0
  puts("");
441
0
}
442
443
#ifdef CHECK_OUTPUT_DEFINED
444
445
static void
446
do_padd(const int value) {
447
  if (value < 100000) printf(" ");
448
  if (value < 10000) printf(" ");
449
  if (value < 1000) printf(" ");
450
  if (value < 100) printf(" ");
451
  if (value < 10) printf(" ");
452
}
453
454
static void
455
do_pad(const int value) {
456
  if (value < 100) printf(" ");
457
  if (value < 10) printf(" ");
458
}
459
460
static void
461
do_output(const int type, const int ret, const int line,
462
463
    const int input, const int input_minmax, const int input_crs, const int input_dir,
464
    const widechar *expr_data, const int expr_crs, const int notOperator,
465
    const int loop_crs, const int *loop_cnts,
466
467
    const char *msg) {
468
  switch (type) {
469
  case START:
470
471
    space--;
472
    if (space < 0) space = 0;
473
    printf("|%s()  ", &spaces[space]);
474
    break;
475
476
  case CALL:
477
478
    printf("|%s>   ", &spaces[space]);
479
    break;
480
481
  case RETURN:
482
483
    printf("|%s<%d  ", &spaces[space], ret);
484
    space++;
485
    if (space > 31) space = 31;
486
    break;
487
488
  case SHOW:
489
490
    printf("|%s    ", &spaces[space]);
491
    break;
492
  }
493
494
  printf("%d ", line);
495
  do_padd(line);
496
497
  switch (expr_data[expr_crs]) {
498
  case PTN_ERROR:
499
    printf("# ");
500
    break;
501
  case PTN_START:
502
    printf("> ");
503
    break;
504
  case PTN_END_OF_INPUT:
505
    printf("^ ");
506
    break;
507
  case PTN_ALTERNATE:
508
    printf("| ");
509
    break;
510
  case PTN_OPTIONAL:
511
    printf("? ");
512
    break;
513
  case PTN_ONE_MORE:
514
    printf("+ ");
515
    break;
516
  case PTN_ZERO_MORE:
517
    printf("* ");
518
    break;
519
  case PTN_NOT:
520
    printf("! ");
521
    break;
522
  case PTN_GROUP:
523
    printf("( ");
524
    break;
525
  case PTN_ANY:
526
    printf(". ");
527
    break;
528
  case PTN_ATTRIBUTES:
529
    printf("%% ");
530
    break;
531
  case PTN_CHARS:
532
    printf("[ ");
533
    break;
534
  case PTN_HOOK:
535
    printf("@ ");
536
    break;
537
  case PTN_END:
538
    printf("< ");
539
    break;
540
  default:
541
    printf("  ");
542
    break;
543
  }
544
  printf("%d ", expr_crs);
545
  do_padd(expr_crs);
546
547
  if (input > 31 && input < 127)
548
    printf("%c ", input);
549
  else
550
    printf("_ ");
551
552
  if (input_crs * input_dir >= input_minmax * input_dir)
553
    printf("#   ");
554
  else {
555
    printf("%d ", input_crs);
556
    do_pad(input_crs);
557
  }
558
559
  if (input_dir > 0)
560
    printf("<");
561
  else
562
    printf(">");
563
  printf("%d ", input_minmax);
564
  do_pad(input_minmax);
565
566
  if (notOperator)
567
    printf("!   ");
568
  else
569
    printf("    ");
570
571
  if (loop_crs) {
572
    printf("%d ", loop_crs);
573
    do_pad(loop_crs);
574
    printf("%d ", loop_cnts[EXPR_DATA_1(loop_crs)]);
575
    do_pad(loop_cnts[EXPR_DATA_1(loop_crs)]);
576
  } else
577
    printf("-   -   ");
578
  if (EXPR_TYPE(expr_crs) == PTN_ONE_MORE || EXPR_TYPE(expr_crs) == PTN_ZERO_MORE) {
579
    printf("%d ", loop_cnts[EXPR_DATA_1(expr_crs)]);
580
    do_pad(loop_cnts[EXPR_DATA_1(expr_crs)]);
581
  } else
582
    printf("-   ");
583
584
  if (msg) printf("%s", msg);
585
  puts("");
586
}
587
588
#endif
589
590
////////////////////////////////////////////////////////////////////////////////
591
592
static int
593
pattern_compile_1(const widechar *input, const int input_max, int *input_crs,
594
    widechar *expr_data, const int expr_max, widechar *expr_crs, widechar *loop_cnts,
595
    TranslationTableHeader *table, const FileInfo *nested);
596
597
static int
598
pattern_compile_expression(const widechar *input, const int input_max, int *input_crs,
599
    widechar *expr_data, const int expr_max, widechar *expr_crs, widechar *loop_cnts,
600
2.24k
    TranslationTableHeader *table, const FileInfo *nested) {
601
2.24k
  widechar *data;
602
2.24k
  int expr_start, expr_end, expr_sub, expr_crs_prv;
603
2.24k
  int input_end;
604
2.24k
  int attrs0, attrs1;
605
2.24k
  int set, esc, nest, i;
606
607
2.24k
  switch (input[*input_crs]) {
608
8
  case '(':
609
610
8
    if (*expr_crs + 10 >= expr_max) return 0;
611
612
8
    (*input_crs)++;
613
8
    if (*input_crs >= input_max) return 0;
614
615
    /* find closing parenthesis */
616
8
    nest = esc = 0;
617
157
    for (input_end = *input_crs; input_end < input_max; input_end++) {
618
156
      if (input[input_end] == '\\' && !esc) {
619
0
        esc = 1;
620
0
        continue;
621
0
      }
622
623
156
      if (input[input_end] == '(' && !esc)
624
5
        nest++;
625
151
      else if (input[input_end] == ')' && !esc) {
626
12
        if (nest)
627
5
          nest--;
628
7
        else
629
7
          break;
630
12
      }
631
632
149
      esc = 0;
633
149
    }
634
8
    if (input_end >= input_max) return 0;
635
636
7
    EXPR_TYPE(*expr_crs) = PTN_GROUP;
637
638
    /* compile sub expressions */
639
7
    expr_crs_prv = *expr_crs;
640
7
    *expr_crs += 4;
641
7
    EXPR_DATA_0(expr_crs_prv) = *expr_crs;
642
7
    expr_sub = *expr_crs;
643
7
    EXPR_TYPE(expr_sub) = PTN_ERROR;
644
7
    EXPR_PRV(expr_sub) = PTN_END;
645
7
    EXPR_NXT(expr_sub) = PTN_END;
646
7
    if (!pattern_compile_1(input, input_end, input_crs, expr_data, expr_max, expr_crs,
647
7
          loop_cnts, table, nested))
648
0
      return 0;
649
7
    (*input_crs)++;
650
651
    /* reset end expression */
652
7
    expr_end = *expr_crs;
653
7
    EXPR_NXT(expr_end) = expr_crs_prv;
654
655
7
    return *expr_crs += 3;
656
657
16
  case '!':
658
659
16
    if (*expr_crs + 10 >= expr_max) return 0;
660
661
16
    (*input_crs)++;
662
16
    EXPR_TYPE(*expr_crs) = PTN_NOT;
663
16
    expr_crs_prv = *expr_crs;
664
16
    *expr_crs += 4;
665
16
    EXPR_DATA_0(expr_crs_prv) = *expr_crs;
666
667
    /* create start expression */
668
16
    expr_start = *expr_crs;
669
16
    EXPR_TYPE(expr_start) = PTN_START;
670
16
    EXPR_PRV(expr_start) = PTN_END;
671
16
    *expr_crs += 3;
672
16
    EXPR_NXT(expr_start) = *expr_crs;
673
674
    /* compile sub expression */
675
16
    expr_sub = *expr_crs;
676
16
    EXPR_TYPE(expr_sub) = PTN_ERROR;
677
16
    EXPR_PRV(expr_sub) = expr_start;
678
16
    EXPR_NXT(expr_sub) = PTN_END;
679
680
16
    if (!pattern_compile_expression(input, input_max, input_crs, expr_data, expr_max,
681
16
          expr_crs, loop_cnts, table, nested))
682
2
      return 0;
683
684
14
    if (*expr_crs + 3 >= expr_max) return 0;
685
686
14
    EXPR_NXT(expr_sub) = *expr_crs;
687
688
    /* create end expression */
689
14
    expr_end = *expr_crs;
690
14
    EXPR_TYPE(expr_end) = PTN_END;
691
14
    EXPR_PRV(expr_end) = expr_sub;
692
14
    EXPR_NXT(expr_end) = expr_crs_prv;
693
694
14
    return *expr_crs += 3;
695
696
2
  case '+':
697
698
2
    if (*expr_crs + 5 >= expr_max) return 0;
699
2
    EXPR_TYPE(*expr_crs) = PTN_ONE_MORE;
700
2
    EXPR_DATA_1(*expr_crs) = (*loop_cnts)++;
701
2
    (*input_crs)++;
702
2
    return *expr_crs += 5;
703
704
90
  case '*':
705
706
90
    if (*expr_crs + 5 >= expr_max) return 0;
707
90
    EXPR_TYPE(*expr_crs) = PTN_ZERO_MORE;
708
90
    EXPR_DATA_1(*expr_crs) = (*loop_cnts)++;
709
90
    (*input_crs)++;
710
90
    return *expr_crs += 5;
711
712
121
  case '?':
713
714
121
    if (*expr_crs + 4 >= expr_max) return 0;
715
121
    EXPR_TYPE(*expr_crs) = PTN_OPTIONAL;
716
121
    (*input_crs)++;
717
121
    return *expr_crs += 4;
718
719
22
  case '|':
720
721
22
    if (*expr_crs + 5 >= expr_max) return 0;
722
22
    EXPR_TYPE(*expr_crs) = PTN_ALTERNATE;
723
22
    (*input_crs)++;
724
22
    return *expr_crs += 5;
725
726
9
  case '.':
727
728
9
    if (*expr_crs + 3 >= expr_max) return 0;
729
9
    EXPR_TYPE(*expr_crs) = PTN_ANY;
730
9
    (*input_crs)++;
731
9
    return *expr_crs += 3;
732
733
15
  case '%':
734
735
15
    if (*expr_crs + 5 >= expr_max) return 0;
736
737
15
    (*input_crs)++;
738
15
    if (*input_crs >= input_max) return 0;
739
740
    /* find closing bracket */
741
14
    if (input[*input_crs] == '[') {
742
0
      set = 1;
743
0
      (*input_crs)++;
744
0
      for (input_end = *input_crs; input_end < input_max; input_end++)
745
0
        if (input[input_end] == ']') break;
746
0
      if (input_end >= input_max) return 0;
747
14
    } else {
748
14
      set = 0;
749
14
      input_end = *input_crs + 1;
750
14
    }
751
752
14
    EXPR_TYPE(*expr_crs) = PTN_ATTRIBUTES;
753
754
14
    attrs0 = attrs1 = 0;
755
27
    for (; (*input_crs) < input_end; (*input_crs)++) {
756
14
      switch (input[*input_crs]) {
757
4
      case '_':
758
4
        attrs0 |= CTC_Space;
759
4
        break;
760
0
      case '#':
761
0
        attrs0 |= CTC_Digit;
762
0
        break;
763
0
      case 'a':
764
0
        attrs0 |= CTC_Letter;
765
0
        break;
766
1
      case 'u':
767
1
        attrs0 |= CTC_UpperCase;
768
1
        break;
769
0
      case 'l':
770
0
        attrs0 |= CTC_LowerCase;
771
0
        break;
772
0
      case '.':
773
0
        attrs0 |= CTC_Punctuation;
774
0
        break;
775
0
      case '$':
776
0
        attrs0 |= CTC_Sign;
777
0
        break;
778
0
      case 'm':
779
0
        attrs0 |= CTC_Math;
780
0
        break;
781
0
      case '~':
782
0
        attrs0 |= CTC_SeqDelimiter;
783
0
        break;
784
0
      case '<':
785
0
        attrs0 |= CTC_SeqBefore;
786
0
        break;
787
0
      case '>':
788
0
        attrs0 |= CTC_SeqAfter;
789
0
        break;
790
791
2
      case '0':
792
3
      case '1':
793
3
      case '2':
794
3
      case '3':
795
5
      case '4':
796
5
      case '5':
797
5
      case '6':
798
5
      case '7': {
799
5
        int k = input[*input_crs] - '0';
800
5
        TranslationTableCharacterAttributes a = table->numberedAttributes[k];
801
5
        if (!a) {
802
          // attribute not used before yet: assign it a value
803
5
          a = table->numberedAttributes[k] =
804
5
              table->nextNumberedCharacterClassAttribute;
805
5
          if (a > CTC_UserDefined8) {
806
0
            _lou_logMessage(LOU_LOG_ERROR,
807
0
                "%s:%d: error: Too many character attributes defined",
808
0
                nested->fileName, nested->lineNumber);
809
0
            return 0;
810
0
          }
811
5
          table->nextNumberedCharacterClassAttribute <<= 1;
812
5
        }
813
5
        attrs1 |= (a >> 16);
814
5
        break;
815
5
      }
816
3
      case '^':
817
3
        attrs1 |= (CTC_EndOfInput >> 16);
818
3
        break;
819
820
1
      default:
821
1
        return 0;
822
14
      }
823
14
    }
824
13
    EXPR_DATA_0(*expr_crs) = attrs1;
825
13
    EXPR_DATA_1(*expr_crs) = attrs0;
826
827
13
    if (set) (*input_crs)++;
828
13
    return *expr_crs += 5;
829
830
5
  case '[':
831
832
5
    (*input_crs)++;
833
5
    if (*input_crs >= input_max) return 0;
834
835
    /* find closing bracket */
836
5
    esc = 0;
837
49
    for (input_end = *input_crs; input_end < input_max; input_end++) {
838
48
      if (input[input_end] == '\\' && !esc) {
839
0
        esc = 1;
840
0
        continue;
841
0
      }
842
843
48
      if (input[input_end] == ']' && !esc) break;
844
44
      esc = 0;
845
44
    }
846
5
    if (input_end >= input_max) return 0;
847
848
4
    if (*expr_crs + 4 + (input_end - *input_crs) >= expr_max) return 0;
849
850
4
    EXPR_TYPE(*expr_crs) = PTN_CHARS;
851
852
4
    esc = 0;
853
4
    data = EXPR_DATA(*expr_crs);
854
15
    for (i = 1; *input_crs < input_end; (*input_crs)++) {
855
11
      if (input[*input_crs] == '\\' && !esc) {
856
0
        esc = 1;
857
0
        continue;
858
0
      }
859
860
11
      esc = 0;
861
11
      data[i++] = (widechar)input[*input_crs];
862
11
    }
863
4
    data[0] = i - 1;
864
4
    (*input_crs)++;
865
4
    return *expr_crs += 4 + data[0];
866
867
9
  case '@':
868
869
9
    (*input_crs)++;
870
9
    if (*input_crs >= input_max) return 0;
871
872
    /* find closing bracket */
873
9
    if (input[*input_crs] == '[') {
874
0
      set = 1;
875
0
      (*input_crs)++;
876
0
      for (input_end = *input_crs; input_end < input_max; input_end++)
877
0
        if (input[input_end] == ']') break;
878
0
      if (input_end >= input_max) return 0;
879
9
    } else {
880
9
      set = 0;
881
9
      input_end = *input_crs + 1;
882
9
    }
883
884
9
    if (*expr_crs + 4 + (input_end - *input_crs) >= expr_max) return 0;
885
886
9
    EXPR_TYPE(*expr_crs) = PTN_HOOK;
887
888
9
    esc = 0;
889
9
    data = EXPR_DATA(*expr_crs);
890
18
    for (i = 1; *input_crs < input_end; (*input_crs)++) {
891
9
      if (input[*input_crs] == '\\' && !esc) {
892
0
        esc = 1;
893
0
        continue;
894
0
      }
895
896
9
      esc = 0;
897
9
      data[i++] = (widechar)input[*input_crs];
898
9
    }
899
9
    data[0] = i - 1;
900
9
    if (set) (*input_crs)++;
901
9
    return *expr_crs += 4 + data[0];
902
903
3
  case '^':
904
21
  case '$':
905
906
21
    if (*expr_crs + 3 >= expr_max) return 0;
907
21
    EXPR_TYPE(*expr_crs) = PTN_END_OF_INPUT;
908
21
    (*input_crs)++;
909
21
    return *expr_crs += 3;
910
911
6
  case '\\':
912
913
6
    (*input_crs)++;
914
6
    if (*input_crs >= input_max) return 0;
915
916
1.92k
  default:
917
918
1.92k
    if (*expr_crs + 5 >= expr_max) return 0;
919
1.92k
    EXPR_TYPE(*expr_crs) = PTN_CHARS;
920
1.92k
    EXPR_DATA_0(*expr_crs) = 1;
921
1.92k
    EXPR_DATA_1(*expr_crs) = (widechar)input[*input_crs];
922
1.92k
    (*input_crs)++;
923
1.92k
    return *expr_crs += 5;
924
2.24k
  }
925
2.24k
}
926
927
static int
928
pattern_insert_alternate(const widechar *input, const int input_max, int *input_crs,
929
    widechar *expr_data, const int expr_max, widechar *expr_crs, widechar *loop_cnts,
930
0
    int expr_insert, TranslationTableHeader *table, const FileInfo *nested) {
931
0
  int expr_group, expr_alt, expr_end;
932
0
933
0
  if (EXPR_TYPE(*expr_crs) == PTN_START) return 0;
934
0
935
0
  if (*expr_crs + 12 >= expr_max) return 0;
936
0
937
0
  /* setup alternate expression */
938
0
  expr_alt = *expr_crs;
939
0
  EXPR_TYPE(expr_alt) = PTN_ALTERNATE;
940
0
  EXPR_PRV(expr_alt) = PTN_END;
941
0
  EXPR_NXT(expr_alt) = PTN_END;
942
0
  *expr_crs += 5;
943
0
944
0
  /* setup group expression */
945
0
  expr_group = *expr_crs;
946
0
  EXPR_TYPE(expr_group) = PTN_GROUP;
947
0
  EXPR_PRV(expr_group) = PTN_END;
948
0
  EXPR_NXT(expr_group) = PTN_END;
949
0
  *expr_crs += 4;
950
0
  EXPR_DATA_0(expr_group) = *expr_crs;
951
0
952
0
  EXPR_TYPE(*expr_crs) = PTN_ERROR;
953
0
  EXPR_PRV(*expr_crs) = PTN_END;
954
0
  EXPR_NXT(*expr_crs) = PTN_END;
955
0
  if (!pattern_compile_1(input, input_max, input_crs, expr_data, expr_max, expr_crs,
956
0
        loop_cnts, table, nested))
957
0
    return 0;
958
0
  expr_end = *expr_crs;
959
0
  EXPR_NXT(expr_end) = expr_group;
960
0
961
0
  /* setup last end expression */
962
0
  if (*expr_crs + 3 >= expr_max) return 0;
963
0
  *expr_crs += 3;
964
0
  EXPR_TYPE(*expr_crs) = PTN_END;
965
0
  EXPR_NXT(*expr_crs) = PTN_END;
966
0
967
0
  /* replace insert expression with group expression using last end expression */
968
0
  EXPR_NXT(EXPR_PRV(expr_insert)) = expr_group;
969
0
  EXPR_PRV(expr_group) = EXPR_PRV(expr_insert);
970
0
971
0
  EXPR_NXT(expr_group) = *expr_crs;
972
0
  EXPR_PRV(*expr_crs) = expr_group;
973
0
974
0
  /* link alternate and insert expressions before group end expression */
975
0
  EXPR_NXT(EXPR_PRV(expr_end)) = expr_alt;
976
0
  EXPR_PRV(expr_alt) = EXPR_PRV(expr_end);
977
0
978
0
  EXPR_NXT(expr_alt) = expr_insert;
979
0
  EXPR_PRV(expr_insert) = expr_alt;
980
0
981
0
  EXPR_NXT(expr_insert) = expr_end;
982
0
  EXPR_PRV(expr_end) = expr_insert;
983
0
984
0
  return *expr_crs;
985
0
}
986
987
/* Compile all expression sequences, resolving character sets, attributes,
988
 * groups, nots, and hooks.  Note that unlike the other compile functions, on
989
 * returning the expr_crs is set to the last end expression, not after it.
990
 */
991
static int
992
pattern_compile_1(const widechar *input, const int input_max, int *input_crs,
993
    widechar *expr_data, const int expr_max, widechar *expr_crs, widechar *loop_cnts,
994
48
    TranslationTableHeader *table, const FileInfo *nested) {
995
48
  int expr_crs_prv;
996
997
48
  if (*expr_crs + 6 >= expr_max) return 0;
998
999
48
  expr_crs_prv = *expr_crs;
1000
1001
  /* setup start expression */
1002
48
  EXPR_TYPE(*expr_crs) = PTN_START;
1003
48
  EXPR_PRV(*expr_crs) = PTN_END;
1004
48
  *expr_crs += 3;
1005
48
  EXPR_NXT(expr_crs_prv) = *expr_crs;
1006
1007
  /* setup end expression */
1008
48
  EXPR_TYPE(*expr_crs) = PTN_END;
1009
48
  EXPR_PRV(*expr_crs) = expr_crs_prv;
1010
48
  EXPR_NXT(*expr_crs) = PTN_END;
1011
1012
2.27k
  while (*input_crs < input_max) {
1013
2.22k
    expr_crs_prv = *expr_crs;
1014
2.22k
    if (!pattern_compile_expression(input, input_max, input_crs, expr_data, expr_max,
1015
2.22k
          expr_crs, loop_cnts, table, nested))
1016
4
      return 0;
1017
1018
    /* setup end expression */
1019
2.22k
    if (*expr_crs + 3 >= expr_max) return 0;
1020
2.22k
    EXPR_NXT(expr_crs_prv) = *expr_crs;
1021
2.22k
    EXPR_TYPE(*expr_crs) = PTN_END;
1022
2.22k
    EXPR_PRV(*expr_crs) = expr_crs_prv;
1023
2.22k
    EXPR_NXT(*expr_crs) = PTN_END;
1024
1025
    /* insert seqafterexpression before attributes of seqafterchars */
1026
    // if(EXPR_TYPE(expr_crs_prv) == PTN_ATTRIBUTES)
1027
    // if(EXPR_DATA_1(expr_crs_prv) & CTC_SeqAfter)
1028
    // {
1029
    //  i = 0;
1030
    //  pattern_insert_alternate(table->seqAfterExpression,
1031
    //    table->seqAfterExpressionLength, &i, expr_data, expr_max,
1032
    //    expr_crs, loop_cnts, expr_crs_prv);
1033
    // }
1034
2.22k
  }
1035
1036
44
  return *expr_crs;
1037
48
}
1038
1039
/* Resolve optional and loop expressions.
1040
 */
1041
static int
1042
pattern_compile_2(
1043
55
    widechar *expr_data, int expr_at, const int expr_max, widechar *expr_crs) {
1044
55
  int expr_start, expr_end, expr_prv, expr_sub;
1045
1046
1.15k
  while (EXPR_TYPE(expr_at) != PTN_END) {
1047
1.09k
    if (EXPR_TYPE(expr_at) == PTN_GROUP || EXPR_TYPE(expr_at) == PTN_NOT) {
1048
18
      if (!pattern_compile_2(expr_data, EXPR_DATA_0(expr_at), expr_max, expr_crs))
1049
0
        return 0;
1050
18
    }
1051
1052
1.09k
    if (EXPR_TYPE(expr_at) == PTN_ZERO_MORE || EXPR_TYPE(expr_at) == PTN_ONE_MORE ||
1053
1.06k
        EXPR_TYPE(expr_at) == PTN_OPTIONAL) {
1054
148
      if (*expr_crs + 6 >= expr_max) return 0;
1055
1056
      /* get previous expressions, there must
1057
       * be at least something and a PTN_START */
1058
148
      expr_sub = EXPR_PRV(expr_at);
1059
148
      if (EXPR_TYPE(expr_sub) == PTN_START) return 0;
1060
148
      expr_prv = EXPR_PRV(expr_sub);
1061
1062
      /* create start expression */
1063
148
      expr_start = *expr_crs;
1064
148
      EXPR_TYPE(expr_start) = PTN_START;
1065
148
      EXPR_PRV(expr_start) = PTN_END;
1066
148
      EXPR_NXT(expr_start) = expr_sub;
1067
148
      *expr_crs += 3;
1068
1069
      /* create end expression */
1070
148
      expr_end = *expr_crs;
1071
148
      EXPR_TYPE(expr_end) = PTN_END;
1072
148
      EXPR_PRV(expr_end) = expr_sub;
1073
148
      EXPR_NXT(expr_end) = expr_at;
1074
148
      *expr_crs += 3;
1075
1076
      /* relink previous expression before sub expression */
1077
148
      EXPR_DATA_0(expr_at) = expr_start;
1078
148
      EXPR_NXT(expr_prv) = expr_at;
1079
148
      EXPR_PRV(expr_at) = expr_prv;
1080
1081
      /* relink sub expression to start and end */
1082
148
      EXPR_PRV(expr_sub) = expr_start;
1083
148
      EXPR_NXT(expr_sub) = expr_end;
1084
148
    }
1085
1086
1.09k
    expr_at = EXPR_NXT(expr_at);
1087
1.09k
  }
1088
1089
55
  return 1;
1090
55
}
1091
1092
/* Resolves alternative expressions.
1093
 */
1094
static int
1095
pattern_compile_3(
1096
213
    widechar *expr_data, int expr_at, const int expr_max, widechar *expr_crs) {
1097
213
  int expr_mrk, expr_start, expr_end, expr_sub_start, expr_sub_end;
1098
1099
1.46k
  while (EXPR_TYPE(expr_at) != PTN_END) {
1100
1.25k
    if (EXPR_TYPE(expr_at) == PTN_GROUP || EXPR_TYPE(expr_at) == PTN_NOT ||
1101
1.23k
        EXPR_TYPE(expr_at) == PTN_OPTIONAL ||
1102
1.11k
        EXPR_TYPE(expr_at) == PTN_ZERO_MORE ||
1103
1.09k
        EXPR_TYPE(expr_at) == PTN_ONE_MORE) {
1104
166
      if (!pattern_compile_3(expr_data, EXPR_DATA_0(expr_at), expr_max, expr_crs))
1105
0
        return 0;
1106
166
    }
1107
1108
1.25k
    if (EXPR_TYPE(expr_at) == PTN_ALTERNATE) {
1109
10
      if (*expr_crs + 12 >= expr_max) return 0;
1110
1111
      /* get previous start expression,
1112
       * can include alternate expressions */
1113
10
      expr_mrk = EXPR_PRV(expr_at);
1114
10
      if (EXPR_TYPE(expr_mrk) == PTN_START) return 0;
1115
10
      expr_sub_end = expr_mrk;
1116
128
      while (EXPR_TYPE(expr_mrk) != PTN_START) expr_mrk = EXPR_PRV(expr_mrk);
1117
10
      expr_sub_start = EXPR_NXT(expr_mrk);
1118
1119
      /* create first start expression */
1120
10
      expr_start = *expr_crs;
1121
10
      EXPR_TYPE(expr_start) = PTN_START;
1122
10
      EXPR_PRV(expr_start) = PTN_END;
1123
10
      EXPR_NXT(expr_start) = expr_sub_start;
1124
10
      *expr_crs += 3;
1125
1126
      /* create first end expression */
1127
10
      expr_end = *expr_crs;
1128
10
      EXPR_TYPE(expr_end) = PTN_END;
1129
10
      EXPR_PRV(expr_end) = expr_sub_end;
1130
10
      EXPR_NXT(expr_end) = expr_at;
1131
10
      *expr_crs += 3;
1132
1133
      /* relink previous expression before sub expression */
1134
10
      EXPR_DATA_0(expr_at) = expr_start;
1135
10
      EXPR_NXT(expr_mrk) = expr_at;
1136
10
      EXPR_PRV(expr_at) = expr_mrk;
1137
1138
      /* relink sub expression to start and end */
1139
10
      EXPR_PRV(expr_sub_start) = expr_start;
1140
10
      EXPR_NXT(expr_sub_end) = expr_end;
1141
1142
      /* get following PTN_END or PTN_ALTERNATE expression */
1143
10
      expr_mrk = EXPR_NXT(expr_at);
1144
10
      if (EXPR_TYPE(expr_mrk) == PTN_END || EXPR_TYPE(expr_mrk) == PTN_ALTERNATE)
1145
0
        return 0;
1146
10
      expr_sub_start = expr_mrk;
1147
49
      while (EXPR_TYPE(expr_mrk) != PTN_END && EXPR_TYPE(expr_mrk) != PTN_ALTERNATE)
1148
39
        expr_mrk = EXPR_NXT(expr_mrk);
1149
10
      expr_sub_end = EXPR_PRV(expr_mrk);
1150
1151
      /* create first start expression */
1152
10
      expr_start = *expr_crs;
1153
10
      EXPR_TYPE(expr_start) = PTN_START;
1154
10
      EXPR_PRV(expr_start) = PTN_END;
1155
10
      EXPR_NXT(expr_start) = expr_sub_start;
1156
10
      *expr_crs += 3;
1157
1158
      /* create first end expression */
1159
10
      expr_end = *expr_crs;
1160
10
      EXPR_TYPE(expr_end) = PTN_END;
1161
10
      EXPR_PRV(expr_end) = expr_sub_end;
1162
10
      EXPR_NXT(expr_end) = expr_at;
1163
10
      *expr_crs += 3;
1164
1165
      /* relink following expression before sub expression */
1166
10
      EXPR_DATA_1(expr_at) = expr_start;
1167
10
      EXPR_PRV(expr_mrk) = expr_at;
1168
10
      EXPR_NXT(expr_at) = expr_mrk;
1169
1170
      /* relink sub expression to start and end */
1171
10
      EXPR_PRV(expr_sub_start) = expr_start;
1172
10
      EXPR_NXT(expr_sub_end) = expr_end;
1173
1174
      /* check expressions were after alternate and got moved into
1175
       * a sub expression, previous expressions already checked */
1176
10
      if (!pattern_compile_3(expr_data, EXPR_DATA_1(expr_at), expr_max, expr_crs))
1177
0
        return 0;
1178
10
    }
1179
1180
1.25k
    expr_at = EXPR_NXT(expr_at);
1181
1.25k
  }
1182
1183
213
  return 1;
1184
213
}
1185
1186
int EXPORT_CALL
1187
_lou_pattern_compile(const widechar *input, const int input_max, widechar *expr_data,
1188
41
    const int expr_max, TranslationTableHeader *table, const FileInfo *nested) {
1189
41
  int input_crs;
1190
1191
41
  input_crs = 0;
1192
41
  expr_data[0] = 2;
1193
41
  expr_data[1] = 0;
1194
1195
41
  if (!pattern_compile_1(input, input_max, &input_crs, expr_data, expr_max,
1196
41
        &expr_data[0], &expr_data[1], table, nested))
1197
4
    return 0;
1198
1199
  /* shift past the last end */
1200
37
  expr_data[0] += 3;
1201
1202
37
  if (!pattern_compile_2(expr_data, 2, expr_max, &expr_data[0])) return 0;
1203
1204
37
  if (!pattern_compile_3(expr_data, 2, expr_max, &expr_data[0])) return 0;
1205
1206
37
  return expr_data[0];
1207
37
}
1208
1209
////////////////////////////////////////////////////////////////////////////////
1210
1211
static void
1212
pattern_reverse_expression(widechar *expr_data, const int expr_start);
1213
1214
static void
1215
609
pattern_reverse_branch(widechar *expr_data, const int expr_at) {
1216
609
  widechar expr_swap;
1217
1218
609
  switch (EXPR_TYPE(expr_at)) {
1219
9
  case PTN_ALTERNATE:
1220
1221
9
    pattern_reverse_expression(expr_data, EXPR_DATA_0(expr_at));
1222
9
    expr_swap = EXPR_DATA_0(expr_at);
1223
9
    EXPR_DATA_0(expr_at) = EXPR_DATA_1(expr_at);
1224
9
    EXPR_DATA_1(expr_at) = expr_swap;
1225
1226
9
  case PTN_GROUP:
1227
16
  case PTN_NOT:
1228
17
  case PTN_ONE_MORE:
1229
36
  case PTN_ZERO_MORE:
1230
151
  case PTN_OPTIONAL:
1231
1232
151
    pattern_reverse_expression(expr_data, EXPR_DATA_0(expr_at));
1233
609
  }
1234
609
}
1235
1236
static void
1237
179
pattern_reverse_expression(widechar *expr_data, const int expr_start) {
1238
179
  widechar expr_end, expr_crs, expr_prv;
1239
1240
179
  expr_end = EXPR_NXT(expr_start);
1241
1242
  /* empty expression */
1243
179
  if (EXPR_TYPE(expr_end) == PTN_END) return;
1244
1245
  /* find end expression */
1246
788
  while (EXPR_TYPE(expr_end) != PTN_END) expr_end = EXPR_NXT(expr_end);
1247
1248
179
  expr_crs = EXPR_PRV(expr_end);
1249
179
  expr_prv = EXPR_PRV(expr_crs);
1250
1251
  /* relink expression before end expression */
1252
179
  EXPR_NXT(expr_start) = expr_crs;
1253
179
  EXPR_PRV(expr_crs) = expr_start;
1254
179
  EXPR_NXT(expr_crs) = expr_prv;
1255
1256
  /* reverse any branching expressions */
1257
179
  pattern_reverse_branch(expr_data, expr_crs);
1258
1259
609
  while (expr_prv != expr_start) {
1260
    /* shift current expression */
1261
430
    expr_crs = expr_prv;
1262
430
    expr_prv = EXPR_PRV(expr_prv);
1263
1264
    /* reverse any branching expressions */
1265
430
    pattern_reverse_branch(expr_data, expr_crs);
1266
1267
    /* relink current expression */
1268
430
    EXPR_PRV(expr_crs) = EXPR_NXT(expr_crs);
1269
430
    EXPR_NXT(expr_crs) = expr_prv;
1270
430
  }
1271
1272
  /* relink expression after start expression */
1273
179
  EXPR_PRV(expr_crs) = EXPR_NXT(expr_crs);
1274
179
  EXPR_NXT(expr_crs) = expr_end;
1275
179
  EXPR_PRV(expr_end) = expr_crs;
1276
179
}
1277
1278
void EXPORT_CALL
1279
19
_lou_pattern_reverse(widechar *expr_data) {
1280
19
  pattern_reverse_expression(expr_data, 2);
1281
19
}
1282
1283
////////////////////////////////////////////////////////////////////////////////
1284
1285
static int
1286
22.9M
pattern_check_chars(const widechar input_char, const widechar *expr_data) {
1287
22.9M
  int expr_cnt, i;
1288
1289
22.9M
  expr_cnt = expr_data[0] + 1;
1290
1291
45.9M
  for (i = 1; i < expr_cnt; i++)
1292
22.9M
    if (input_char == expr_data[i]) break;
1293
1294
22.9M
  if (i == expr_cnt) return 0;
1295
0
  return 1;
1296
22.9M
}
1297
1298
static int
1299
pattern_check_attrs(const widechar input_char, const widechar *expr_data,
1300
12.0k
    const TranslationTableHeader *table) {
1301
12.0k
  int attrs;
1302
1303
12.0k
  attrs = ((expr_data[0] << 16) | expr_data[1]) & ~(CTC_EndOfInput | CTC_EmpMatch);
1304
12.0k
  if (!checkAttr(input_char, attrs, table)) return 0;
1305
3.40k
  return 1;
1306
12.0k
}
1307
1308
static int
1309
pattern_check_expression(const widechar *const input, int *input_crs,
1310
    const int input_minmax, const int input_dir, const widechar *const expr_data,
1311
    int (*hook)(const widechar input, const int data_len), widechar *hook_data,
1312
    const int hook_max, int expr_crs, int notOperator, int loop_crs, int *loop_cnts,
1313
25.6M
    const TranslationTableHeader *table) {
1314
25.6M
  int input_crs_prv, input_start, attrs, ret, i;
1315
25.6M
  const widechar *data;
1316
1317
25.6M
  data = NULL;
1318
1319
  /* save input_crs to know if loop consumed input */
1320
25.6M
  input_start = *input_crs;
1321
1322
25.6M
  CHECK_OUTPUT(START, 0, __LINE__, "check start")
1323
1324
77.0M
  while (!(EXPR_TYPE(expr_crs) == PTN_END && EXPR_TYPE(expr_crs) == PTN_END)) {
1325
    /* end of input expression */
1326
77.0M
    if (EXPR_TYPE(expr_crs) == PTN_END_OF_INPUT) {
1327
874k
      if (*input_crs * input_dir >= input_minmax * input_dir) {
1328
1
        if (notOperator)
1329
0
          CHECK_OUTPUT(
1330
1
              RETURN, 0, __LINE__, "end of input failed:  no input and not")
1331
1
        else
1332
1
          CHECK_OUTPUT(RETURN, 1, __LINE__, "end of input passed:  no input")
1333
1
        return !notOperator;
1334
874k
      } else {
1335
874k
        if (notOperator)
1336
0
          CHECK_OUTPUT(
1337
874k
              RETURN, 1, __LINE__, "end of input passed:  input and not")
1338
874k
        else
1339
874k
          CHECK_OUTPUT(RETURN, 0, __LINE__, "end of input failed:  input")
1340
874k
        return notOperator;
1341
874k
      }
1342
874k
    }
1343
1344
    /* no more input */
1345
76.1M
    if (*input_crs * input_dir >= input_minmax * input_dir) {
1346
195
      switch (EXPR_TYPE(expr_crs)) {
1347
6
      case PTN_ATTRIBUTES:
1348
1349
6
        attrs = (EXPR_DATA_0(expr_crs) << 16);
1350
6
        if (attrs & CTC_EndOfInput) {
1351
3
          if (notOperator) {
1352
0
            CHECK_OUTPUT(RETURN, 0, __LINE__,
1353
0
                "attributes failed:  end of input attribute:  not")
1354
0
            return 0;
1355
0
          }
1356
3
          CHECK_OUTPUT(RETURN, 1, __LINE__,
1357
3
              "attributes passed:  end of input attribute")
1358
3
          return 1;
1359
3
        }
1360
3
        CHECK_OUTPUT(RETURN, 0, __LINE__,
1361
3
            "attributes failed:  no end of input attribute")
1362
3
        return 0;
1363
1364
0
      case PTN_ANY:
1365
14
      case PTN_CHARS:
1366
1367
14
        CHECK_OUTPUT(RETURN, 0, __LINE__, "chars failed:  no input")
1368
14
        return 0;
1369
195
      }
1370
1371
175
      CHECK_OUTPUT(SHOW, 0, __LINE__, "no input")
1372
175
    }
1373
1374
76.1M
    switch (EXPR_TYPE(expr_crs)) {
1375
1376
25.7M
    case PTN_START:
1377
1378
25.7M
      expr_crs = EXPR_NXT(expr_crs);
1379
25.7M
      CHECK_OUTPUT(SHOW, 0, __LINE__, "start next")
1380
25.7M
      break;
1381
1382
25.8k
    case PTN_GROUP:
1383
1384
25.8k
      expr_crs = EXPR_DATA_0(expr_crs);
1385
25.8k
      CHECK_OUTPUT(SHOW, 0, __LINE__, "group next")
1386
25.8k
      break;
1387
1388
8.59k
    case PTN_NOT:
1389
1390
8.59k
      notOperator = !notOperator;
1391
8.59k
      expr_crs = EXPR_DATA_0(expr_crs);
1392
8.59k
      CHECK_OUTPUT(SHOW, 0, __LINE__, "not next")
1393
8.59k
      break;
1394
1395
0
    case PTN_ONE_MORE:
1396
1397
0
      CHECK_OUTPUT(SHOW, 0, __LINE__, "loop+ start")
1398
1399
5.25k
    case PTN_ZERO_MORE:
1400
1401
      /* check if loop already started */
1402
5.25k
      if (expr_crs == loop_crs) {
1403
3.40k
        loop_cnts[EXPR_DATA_1(loop_crs)]++;
1404
3.40k
        CHECK_OUTPUT(SHOW, 0, __LINE__, "loop again")
1405
3.40k
      } else {
1406
        /* check if loop nested, wasn't running but has a count */
1407
1.84k
        if (loop_cnts[EXPR_DATA_1(expr_crs)]) {
1408
0
          CHECK_OUTPUT(SHOW, 0, __LINE__, "loop already running")
1409
0
          goto loop_next;
1410
0
        }
1411
1412
        /* start loop */
1413
1.84k
        loop_crs = expr_crs;
1414
1.84k
        loop_cnts[EXPR_DATA_1(loop_crs)] = 1;
1415
1.84k
        CHECK_OUTPUT(SHOW, 0, __LINE__, "loop start")
1416
1.84k
      }
1417
1418
      /* start loop expression */
1419
5.25k
      input_crs_prv = *input_crs;
1420
5.25k
      ret = pattern_check_expression(input, input_crs, input_minmax, input_dir,
1421
5.25k
          expr_data, hook, hook_data, hook_max, EXPR_DATA_0(expr_crs),
1422
5.25k
          notOperator, loop_crs, loop_cnts, table);
1423
5.25k
      if (ret) {
1424
3.41k
        CHECK_OUTPUT(RETURN, 1, __LINE__, "loop passed")
1425
3.41k
        return 1;
1426
3.41k
      }
1427
1.84k
      CHECK_OUTPUT(SHOW, 0, __LINE__, "loop failed")
1428
1.84k
      *input_crs = input_crs_prv;
1429
1430
      /* check loop count */
1431
1.84k
      loop_cnts[EXPR_DATA_1(loop_crs)]--;
1432
1.84k
      if (EXPR_TYPE(expr_crs) == PTN_ONE_MORE) {
1433
0
        if (loop_cnts[EXPR_DATA_1(loop_crs)] < 1) {
1434
0
          CHECK_OUTPUT(RETURN, 0, __LINE__, "loop+ failed")
1435
0
          return 0;
1436
0
        } else
1437
0
          CHECK_OUTPUT(SHOW, 0, __LINE__, "loop+ passed")
1438
0
      }
1439
1440
    /* continue after loop */
1441
1.84k
    loop_next:
1442
1.84k
      expr_crs = EXPR_NXT(expr_crs);
1443
1.84k
      CHECK_OUTPUT(SHOW, 0, __LINE__, "loop next")
1444
1.84k
      break;
1445
1446
25.6M
    case PTN_OPTIONAL:
1447
1448
      /* save current state */
1449
25.6M
      input_crs_prv = *input_crs;
1450
1451
      /* start optional expression */
1452
25.6M
      CHECK_OUTPUT(CALL, 0, __LINE__, "option start")
1453
25.6M
      if (pattern_check_expression(input, input_crs, input_minmax, input_dir,
1454
25.6M
            expr_data, hook, hook_data, hook_max, EXPR_DATA_0(expr_crs),
1455
25.6M
            notOperator, loop_crs, loop_cnts, table)) {
1456
91.0k
        CHECK_OUTPUT(RETURN, 1, __LINE__, "option passed")
1457
91.0k
        return 1;
1458
91.0k
      }
1459
25.5M
      CHECK_OUTPUT(SHOW, 0, __LINE__, "option failed")
1460
1461
      /* continue after optional expression */
1462
25.5M
      *input_crs = input_crs_prv;
1463
25.5M
      CHECK_OUTPUT(SHOW, 0, __LINE__, "no option start")
1464
25.5M
      expr_crs = EXPR_NXT(expr_crs);
1465
25.5M
      break;
1466
1467
23.3k
    case PTN_ALTERNATE:
1468
1469
      /* save current state */
1470
23.3k
      input_crs_prv = *input_crs;
1471
1472
      /* start first expression */
1473
23.3k
      CHECK_OUTPUT(CALL, 0, __LINE__, "or 1 start")
1474
23.3k
      if (pattern_check_expression(input, input_crs, input_minmax, input_dir,
1475
23.3k
            expr_data, hook, hook_data, hook_max, EXPR_DATA_0(expr_crs),
1476
23.3k
            notOperator, loop_crs, loop_cnts, table)) {
1477
11.6k
        CHECK_OUTPUT(RETURN, 1, __LINE__, "or 1 passed")
1478
11.6k
        return 1;
1479
11.6k
      }
1480
11.6k
      CHECK_OUTPUT(SHOW, 0, __LINE__, "or 1 failed")
1481
1482
      /* start second expression (no need to push) */
1483
11.6k
      *input_crs = input_crs_prv;
1484
11.6k
      CHECK_OUTPUT(SHOW, 0, __LINE__, "or 2 start")
1485
11.6k
      expr_crs = EXPR_DATA_1(expr_crs);
1486
11.6k
      break;
1487
1488
0
    case PTN_ANY:
1489
1490
0
      CHECK_OUTPUT(SHOW, 0, __LINE__, "any")
1491
0
      *input_crs += input_dir;
1492
0
      expr_crs = EXPR_NXT(expr_crs);
1493
0
      break;
1494
1495
12.0k
    case PTN_ATTRIBUTES:
1496
1497
12.0k
      ret = pattern_check_attrs(
1498
12.0k
          input[*input_crs], EXPR_CONST_DATA(expr_crs), table);
1499
12.0k
      if (ret && notOperator) {
1500
0
        CHECK_OUTPUT(RETURN, 0, __LINE__, "attributes failed:  not");
1501
0
        return 0;
1502
0
      }
1503
12.0k
      if (!ret && !notOperator) {
1504
1.75k
        CHECK_OUTPUT(RETURN, 0, __LINE__, "attributes failed");
1505
1.75k
        return 0;
1506
1.75k
      }
1507
10.2k
      CHECK_OUTPUT(SHOW, 0, __LINE__, "attributes passed")
1508
10.2k
      *input_crs += input_dir;
1509
10.2k
      expr_crs = EXPR_NXT(expr_crs);
1510
10.2k
      break;
1511
1512
22.9M
    case PTN_CHARS:
1513
1514
22.9M
      ret = pattern_check_chars(input[*input_crs], EXPR_CONST_DATA(expr_crs));
1515
22.9M
      if (ret && notOperator) {
1516
0
        CHECK_OUTPUT(RETURN, 0, __LINE__, "chars failed:  not");
1517
0
        return 0;
1518
0
      }
1519
22.9M
      if (!ret && !notOperator) {
1520
22.9M
        CHECK_OUTPUT(RETURN, 0, __LINE__, "chars failed");
1521
22.9M
        return 0;
1522
22.9M
      }
1523
77.4k
      CHECK_OUTPUT(SHOW, 0, __LINE__, "chars passed")
1524
77.4k
      *input_crs += input_dir;
1525
77.4k
      expr_crs = EXPR_NXT(expr_crs);
1526
77.4k
      break;
1527
1528
1.76M
    case PTN_HOOK:
1529
1530
1.76M
      if (hook == NULL) {
1531
1.76M
        CHECK_OUTPUT(RETURN, 0, __LINE__, "hook failed:  NULL");
1532
1.76M
        return 0;
1533
1.76M
      }
1534
1535
      /* copy expression data */
1536
0
      data = EXPR_CONST_DATA(expr_crs);
1537
0
      for (i = 0; i < data[0]; i++) hook_data[i] = data[i + 1];
1538
1539
      /* call hook function */
1540
0
      ret = hook(input[*input_crs], data[0]);
1541
0
      if (ret && notOperator) {
1542
0
        CHECK_OUTPUT(RETURN, 0, __LINE__, "hook failed:  not");
1543
0
        return 0;
1544
0
      }
1545
0
      if (!ret && !notOperator) {
1546
0
        CHECK_OUTPUT(RETURN, 0, __LINE__, "hook failed");
1547
0
        return 0;
1548
0
      }
1549
0
      CHECK_OUTPUT(SHOW, 0, __LINE__, "hook passed")
1550
0
      *input_crs += input_dir;
1551
0
      expr_crs = EXPR_NXT(expr_crs);
1552
0
      break;
1553
1554
0
    case PTN_END:
1555
0
      break;
1556
1557
0
    default:
1558
1559
0
      CHECK_OUTPUT(RETURN, 0, __LINE__, "unknown opcode")
1560
0
      return 0;
1561
76.1M
    }
1562
1563
    /* check end expression  */
1564
193M
    while (EXPR_TYPE(expr_crs) == PTN_END) {
1565
141M
      CHECK_OUTPUT(SHOW, 0, __LINE__, "end")
1566
1567
      /* check for end of expressions */
1568
141M
      if (EXPR_NXT(expr_crs) == PTN_END) break;
1569
1570
141M
      expr_crs = EXPR_NXT(expr_crs);
1571
1572
      /* returning loop */
1573
141M
      if (EXPR_TYPE(expr_crs) == PTN_ZERO_MORE ||
1574
141M
          EXPR_TYPE(expr_crs) == PTN_ONE_MORE) {
1575
3.48k
        CHECK_OUTPUT(SHOW, 0, __LINE__, "end loop")
1576
1577
        /* check that loop consumed input */
1578
3.48k
        if (*input_crs == input_start) {
1579
81
          CHECK_OUTPUT(RETURN, 0, __LINE__, "loop failed:  did not consume")
1580
81
          return 0;
1581
81
        }
1582
1583
        /* loops do not continue to the next expression */
1584
3.40k
        break;
1585
3.48k
      }
1586
1587
      /* returning not */
1588
141M
      if (EXPR_TYPE(expr_crs) == PTN_NOT) notOperator = !notOperator;
1589
1590
141M
      expr_crs = EXPR_NXT(expr_crs);
1591
1592
141M
      CHECK_OUTPUT(SHOW, 0, __LINE__, "end next")
1593
141M
    }
1594
1595
51.3M
    CHECK_OUTPUT(SHOW, 0, __LINE__, "check next")
1596
51.3M
  }
1597
1598
13.3k
  CHECK_OUTPUT(RETURN, 1, __LINE__, "check passed:  end of expression");
1599
13.3k
  return 1;
1600
25.6M
}
1601
1602
static int
1603
pattern_check_hook(const widechar *input, const int input_start, const int input_minmax,
1604
    const int input_dir, const widechar *expr_data,
1605
    int (*hook)(const widechar input, const int data_len), widechar *hook_data,
1606
40.9k
    const int hook_max, const TranslationTableHeader *table) {
1607
40.9k
  int input_crs, ret, *loop_cnts;
1608
1609
40.9k
  input_crs = input_start;
1610
40.9k
  loop_cnts = malloc(expr_data[1] * sizeof(int));
1611
40.9k
  memset(loop_cnts, 0, expr_data[1] * sizeof(int));
1612
40.9k
  ret = pattern_check_expression(input, &input_crs, input_minmax, input_dir, expr_data,
1613
40.9k
      hook, hook_data, hook_max, 2, 0, 0, loop_cnts, table);
1614
40.9k
  free(loop_cnts);
1615
40.9k
  return ret;
1616
40.9k
}
1617
1618
int EXPORT_CALL
1619
_lou_pattern_check(const widechar *input, const int input_start, const int input_minmax,
1620
    const int input_dir, const widechar *expr_data,
1621
40.9k
    const TranslationTableHeader *table) {
1622
#ifdef CHECK_OUTPUT_DEFINED
1623
  pattern_output(expr_data, table);
1624
#endif
1625
40.9k
  return pattern_check_hook(
1626
40.9k
      input, input_start, input_minmax, input_dir, expr_data, NULL, NULL, 0, table);
1627
40.9k
}
1628
1629
////////////////////////////////////////////////////////////////////////////////