Coverage Report

Created: 2026-01-09 06:51

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/liblouis/liblouis/pattern.c
Line
Count
Source
1
/* liblouis Braille Translation and Back-Translation Library
2
3
   Copyright (C) 2016 Mike Gray, American Printing House for the Blind
4
5
   This file is part of liblouis.
6
7
   liblouis is free software: you can redistribute it and/or modify it
8
   under the terms of the GNU Lesser General Public License as published
9
   by the Free Software Foundation, either version 2.1 of the License, or
10
   (at your option) any later version.
11
12
   liblouis is distributed in the hope that it will be useful, but
13
   WITHOUT ANY WARRANTY; without even the implied warranty of
14
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
   Lesser General Public License for more details.
16
17
   You should have received a copy of the GNU Lesser General Public
18
   License along with liblouis. If not, see <http://www.gnu.org/licenses/>.
19
*/
20
21
#include "config.h"
22
23
#include <stdlib.h>
24
#include <stdio.h>
25
#include <string.h>
26
#include <ctype.h>
27
28
#include "internal.h"
29
30
//#define CHECK_OUTPUT_DEFINED
31
32
/////
33
34
// TODO: these functions are static and copied serveral times
35
36
int translation_direction = 1;
37
38
static TranslationTableCharacter *
39
288k
findCharOrDots(widechar c, int m, const TranslationTableHeader *table) {
40
  /* Look up character or dot pattern in the appropriate
41
   * table. */
42
288k
  static TranslationTableCharacter noChar = { NULL, -1, 0, 0, 0, CTC_Space, 0, 0, 32, 0,
43
288k
    0 };
44
288k
  static TranslationTableCharacter noDots = { NULL, -1, 0, 0, 0, CTC_Space, 0, 0,
45
288k
    LOU_DOTS, 0, 0 };
46
288k
  TranslationTableCharacter *notFound;
47
288k
  TranslationTableCharacter *character;
48
288k
  TranslationTableOffset bucket;
49
288k
  unsigned long int makeHash = _lou_charHash(c);
50
288k
  if (m == 0) {
51
288k
    bucket = table->characters[makeHash];
52
288k
    notFound = &noChar;
53
288k
  } else {
54
0
    bucket = table->dots[makeHash];
55
0
    notFound = &noDots;
56
0
  }
57
288k
  while (bucket) {
58
77.0k
    character = (TranslationTableCharacter *)&table->ruleArea[bucket];
59
77.0k
    if (character->value == c) return character;
60
0
    bucket = character->next;
61
0
  }
62
211k
  notFound->value = c;
63
211k
  return notFound;
64
288k
}
65
66
static int
67
checkAttr(const widechar c, const TranslationTableCharacterAttributes a,
68
288k
    const TranslationTableHeader *table) {
69
288k
  return (((findCharOrDots(c, translation_direction ? 0 : 1, table))->attributes & a)
70
288k
          ? 1
71
288k
          : 0);
72
288k
}
73
74
/////
75
76
enum pattern_type {
77
  PTN_ERROR,
78
79
  PTN_START,
80
  PTN_GROUP,
81
  PTN_NOT,
82
83
  PTN_ONE_MORE,
84
  PTN_ZERO_MORE,
85
  PTN_OPTIONAL,
86
87
  PTN_ALTERNATE,
88
89
  PTN_ANY,
90
  PTN_ATTRIBUTES,
91
  PTN_CHARS,
92
  PTN_HOOK,
93
  PTN_END_OF_INPUT,
94
95
  PTN_END = 0xffff,
96
};
97
98
31.7M
#define EXPR_TYPE_IN(at, buffer) (buffer[(at) + 0])
99
22.0k
#define EXPR_PRV_IN(at, buffer) (buffer[(at) + 1])
100
13.2M
#define EXPR_NXT_IN(at, buffer) (buffer[(at) + 2])
101
1.57M
#define EXPR_DATA_0_IN(at, buffer) (buffer[(at) + 3])
102
3.01M
#define EXPR_DATA_1_IN(at, buffer) (buffer[(at) + 4])
103
#define EXPR_DATA_2_IN(at, buffer) (buffer[(at) + 5])
104
461
#define EXPR_DATA_IN(at, buffer) ((widechar *)&buffer[(at) + 3])
105
1.18M
#define EXPR_CONST_DATA_IN(at, buffer) ((const widechar *)&buffer[(at) + 3])
106
107
31.7M
#define EXPR_TYPE(at) EXPR_TYPE_IN((at), expr_data)
108
21.9k
#define EXPR_PRV(at) EXPR_PRV_IN((at), expr_data)
109
13.2M
#define EXPR_NXT(at) EXPR_NXT_IN((at), expr_data)
110
1.57M
#define EXPR_DATA_0(at) EXPR_DATA_0_IN((at), expr_data)
111
3.01M
#define EXPR_DATA_1(at) EXPR_DATA_1_IN((at), expr_data)
112
#define EXPR_DATA_2(at) EXPR_DATA_2_IN((at), expr_data)
113
461
#define EXPR_DATA(at) EXPR_DATA_IN((at), expr_data)
114
1.18M
#define EXPR_CONST_DATA(at) EXPR_CONST_DATA_IN((at), expr_data)
115
116
#ifdef CHECK_OUTPUT_DEFINED
117
118
#ifndef DEBUG
119
#define DEBUG
120
121
#endif
122
123
#define START 0
124
#define CALL 1
125
#define RETURN 2
126
#define SHOW 3
127
128
#define CHECK_OUTPUT(type, ret, line, msg)                                              \
129
  {                                                                                   \
130
    do_output(type, ret, line, input[*input_crs], input_minmax, *input_crs,         \
131
        input_dir, expr_data, expr_crs, notOperator, loop_crs, loop_cnts, msg); \
132
  }
133
134
#else
135
136
#define CHECK_OUTPUT(type, ret, line, msg) \
137
25.6M
  { ; }
138
139
#endif
140
141
struct expression {
142
  widechar type;
143
  widechar prv;
144
  widechar nxt;
145
  widechar data[1];
146
};
147
148
/* gdb won't know what this is unless it is actually used */
149
#ifdef DEBUG
150
static struct expression *expr_debug;
151
#endif
152
153
////////////////////////////////////////////////////////////////////////////////
154
155
static char spaces[] = "..............................";
156
static int space = 30;
157
158
static void
159
pattern_output_expression(
160
0
    const widechar *expr_data, int expr_crs, const TranslationTableHeader *table) {
161
0
  int i;
162
0
163
0
  if (expr_crs == PTN_END) return;
164
0
165
0
  while (EXPR_TYPE(expr_crs) != PTN_END) {
166
0
    printf("%s%d", &spaces[space], expr_crs);
167
0
    if (expr_crs < 100) printf(" ");
168
0
    if (expr_crs < 10) printf(" ");
169
0
    for (i = 0; i < 13 - (30 - space); i++) printf(" ");
170
0
171
0
    switch (EXPR_TYPE(expr_crs)) {
172
0
    case PTN_START:
173
0
174
0
      printf("START\t%d\t%d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs));
175
0
      break;
176
0
177
0
    case PTN_GROUP:
178
0
179
0
      printf("(    \t%d\t%d\t-> %d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs),
180
0
          EXPR_DATA_0(expr_crs));
181
0
      space--;
182
0
      if (space < 0) space = 0;
183
0
      pattern_output_expression(expr_data, EXPR_DATA_0(expr_crs), table);
184
0
      space++;
185
0
      if (space > 30) space = 30;
186
0
      break;
187
0
188
0
    case PTN_NOT:
189
0
190
0
      printf("!    \t%d\t%d\t-> %d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs),
191
0
          EXPR_DATA_0(expr_crs));
192
0
      space--;
193
0
      if (space < 0) space = 0;
194
0
      pattern_output_expression(expr_data, EXPR_DATA_0(expr_crs), table);
195
0
      space++;
196
0
      if (space > 30) space = 30;
197
0
      break;
198
0
199
0
    case PTN_ONE_MORE:
200
0
201
0
      printf("+    \t%d\t%d\t-> %d\t#%d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs),
202
0
          EXPR_DATA_0(expr_crs), EXPR_DATA_1(expr_crs));
203
0
      space--;
204
0
      if (space < 0) space = 0;
205
0
      pattern_output_expression(expr_data, EXPR_DATA_0(expr_crs), table);
206
0
      space++;
207
0
      if (space > 30) space = 30;
208
0
      break;
209
0
210
0
    case PTN_ZERO_MORE:
211
0
212
0
      printf("*    \t%d\t%d\t-> %d\t#%d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs),
213
0
          EXPR_DATA_0(expr_crs), EXPR_DATA_1(expr_crs));
214
0
      space--;
215
0
      if (space < 0) space = 0;
216
0
      pattern_output_expression(expr_data, EXPR_DATA_0(expr_crs), table);
217
0
      space++;
218
0
      if (space > 30) space = 30;
219
0
      break;
220
0
221
0
    case PTN_OPTIONAL:
222
0
223
0
      printf("?    \t%d\t%d\t-> %d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs),
224
0
          EXPR_DATA_0(expr_crs));
225
0
      space--;
226
0
      if (space < 0) space = 0;
227
0
      pattern_output_expression(expr_data, EXPR_DATA_0(expr_crs), table);
228
0
      space++;
229
0
      if (space > 30) space = 30;
230
0
      break;
231
0
232
0
    case PTN_ALTERNATE:
233
0
234
0
      printf("|    \t%d\t%d\t-> %d\t-> %d\n", EXPR_PRV(expr_crs),
235
0
          EXPR_NXT(expr_crs), EXPR_DATA_0(expr_crs), EXPR_DATA_1(expr_crs));
236
0
      space--;
237
0
      if (space < 0) space = 0;
238
0
      pattern_output_expression(expr_data, EXPR_DATA_0(expr_crs), table);
239
0
      pattern_output_expression(expr_data, EXPR_DATA_1(expr_crs), table);
240
0
      space++;
241
0
      if (space > 30) space = 30;
242
0
      break;
243
0
244
0
    case PTN_ANY:
245
0
246
0
      printf(".    \t%d\t%d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs));
247
0
      break;
248
0
249
0
    case PTN_ATTRIBUTES:
250
0
251
0
      printf("%%    \t%d\t%d\t", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs));
252
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[0] >> 16)) printf("0");
253
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[1] >> 16)) printf("1");
254
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[2] >> 16)) printf("2");
255
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[3] >> 16)) printf("3");
256
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[4] >> 16)) printf("4");
257
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[5] >> 16)) printf("5");
258
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[6] >> 16)) printf("6");
259
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[7] >> 16)) printf("7");
260
0
      if (EXPR_DATA_0(expr_crs) & (CTC_EndOfInput >> 16)) printf("^");
261
0
      if (EXPR_DATA_1(expr_crs) & CTC_Space) printf("_");
262
0
      if (EXPR_DATA_1(expr_crs) & CTC_Digit) printf("#");
263
0
      if (EXPR_DATA_1(expr_crs) & CTC_Letter) printf("a");
264
0
      if (EXPR_DATA_1(expr_crs) & CTC_UpperCase) printf("u");
265
0
      if (EXPR_DATA_1(expr_crs) & CTC_LowerCase) printf("l");
266
0
      if (EXPR_DATA_1(expr_crs) & CTC_Punctuation) printf(".");
267
0
      if (EXPR_DATA_1(expr_crs) & CTC_Sign) printf("$");
268
0
      if (EXPR_DATA_1(expr_crs) & CTC_SeqDelimiter) printf("~");
269
0
      if (EXPR_DATA_1(expr_crs) & CTC_SeqBefore) printf("<");
270
0
      if (EXPR_DATA_1(expr_crs) & CTC_SeqAfter) printf(">");
271
0
      puts("");
272
0
      break;
273
0
274
0
    case PTN_CHARS:
275
0
276
0
      printf("[]   \t%d\t%d\t", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs));
277
0
      for (i = 0; i < EXPR_DATA_0(expr_crs); i++)
278
0
        printf("%c", EXPR_CONST_DATA(expr_crs)[i + 1]);
279
0
      puts("");
280
0
      break;
281
0
282
0
    case PTN_HOOK:
283
0
284
0
      printf("@    \t%d\t%d\t", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs));
285
0
      for (i = 0; i < EXPR_DATA_0(expr_crs); i++)
286
0
        printf("%c", EXPR_CONST_DATA(expr_crs)[i + 1]);
287
0
      puts("");
288
0
      break;
289
0
290
0
    case PTN_END_OF_INPUT:
291
0
292
0
      printf("^    \t%d\t%d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs));
293
0
      break;
294
0
295
0
    default:
296
0
297
0
      printf("%d?    \t%d\t%d\n", EXPR_TYPE(expr_crs), EXPR_PRV(expr_crs),
298
0
          EXPR_NXT(expr_crs));
299
0
      break;
300
0
    }
301
0
302
0
    expr_crs = EXPR_NXT(expr_crs);
303
0
  }
304
0
305
0
  printf("%s%d", &spaces[space], expr_crs);
306
0
  if (expr_crs < 100) printf(" ");
307
0
  if (expr_crs < 10) printf(" ");
308
0
  for (i = 0; i < 13 - (30 - space); i++) printf(" ");
309
0
  printf("END\t%d\t%d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs));
310
0
  fflush(stdout);
311
0
  return;
312
0
}
313
314
static void
315
0
pattern_output(const widechar *expr_data, const TranslationTableHeader *table) {
316
0
  printf("%d    \tlength\n", expr_data[0]);
317
0
  printf("%d    \tloops\n", expr_data[1]);
318
0
  if (expr_data[0] > 0 && expr_data[0] != PTN_END)
319
0
    pattern_output_expression(expr_data, 2, table);
320
0
}
321
322
static void
323
pattern_print_expression(
324
0
    const widechar *expr_data, int expr_crs, const TranslationTableHeader *table) {
325
0
  int i;
326
0
327
0
  if (expr_crs == PTN_END) return;
328
0
329
0
  while (EXPR_TYPE(expr_crs) != PTN_END) {
330
0
    switch (EXPR_TYPE(expr_crs)) {
331
0
    case PTN_START:
332
0
      break;
333
0
334
0
    case PTN_GROUP:
335
0
336
0
      printf(" (");
337
0
      pattern_print_expression(expr_data, EXPR_DATA_0(expr_crs), table);
338
0
      printf(") ");
339
0
      break;
340
0
341
0
    case PTN_NOT:
342
0
343
0
      printf("!");
344
0
      pattern_print_expression(expr_data, EXPR_DATA_0(expr_crs), table);
345
0
      break;
346
0
347
0
    case PTN_ONE_MORE:
348
0
349
0
      pattern_print_expression(expr_data, EXPR_DATA_0(expr_crs), table);
350
0
      printf("+");
351
0
      break;
352
0
353
0
    case PTN_ZERO_MORE:
354
0
355
0
      pattern_print_expression(expr_data, EXPR_DATA_0(expr_crs), table);
356
0
      printf("*");
357
0
      break;
358
0
359
0
    case PTN_OPTIONAL:
360
0
361
0
      pattern_print_expression(expr_data, EXPR_DATA_0(expr_crs), table);
362
0
      printf("?");
363
0
      break;
364
0
365
0
    case PTN_ALTERNATE:
366
0
367
0
      pattern_print_expression(expr_data, EXPR_DATA_0(expr_crs), table);
368
0
      printf(" | ");
369
0
      pattern_print_expression(expr_data, EXPR_DATA_1(expr_crs), table);
370
0
      break;
371
0
372
0
    case PTN_ANY:
373
0
374
0
      printf(".");
375
0
      break;
376
0
377
0
    case PTN_ATTRIBUTES:
378
0
379
0
      printf("%%[");
380
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[0] >> 16)) printf("0");
381
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[1] >> 16)) printf("1");
382
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[2] >> 16)) printf("2");
383
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[3] >> 16)) printf("3");
384
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[4] >> 16)) printf("4");
385
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[5] >> 16)) printf("5");
386
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[6] >> 16)) printf("6");
387
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[7] >> 16)) printf("7");
388
0
      if (EXPR_DATA_0(expr_crs) & (CTC_EndOfInput >> 16)) printf("^");
389
0
      if (EXPR_DATA_1(expr_crs) & CTC_Space) printf("_");
390
0
      if (EXPR_DATA_1(expr_crs) & CTC_Digit) printf("#");
391
0
      if (EXPR_DATA_1(expr_crs) & CTC_Letter) printf("a");
392
0
      if (EXPR_DATA_1(expr_crs) & CTC_UpperCase) printf("u");
393
0
      if (EXPR_DATA_1(expr_crs) & CTC_LowerCase) printf("l");
394
0
      if (EXPR_DATA_1(expr_crs) & CTC_Punctuation) printf(".");
395
0
      if (EXPR_DATA_1(expr_crs) & CTC_Sign) printf("$");
396
0
      if (EXPR_DATA_1(expr_crs) & CTC_SeqDelimiter) printf("~");
397
0
      if (EXPR_DATA_1(expr_crs) & CTC_SeqBefore) printf("<");
398
0
      if (EXPR_DATA_1(expr_crs) & CTC_SeqAfter) printf(">");
399
0
      printf("]");
400
0
      break;
401
0
402
0
    case PTN_CHARS:
403
0
404
0
      if (EXPR_DATA_0(expr_crs) == 1)
405
0
        printf("%c", EXPR_DATA_1(expr_crs));
406
0
      else {
407
0
        printf("[");
408
0
        for (i = 0; i < EXPR_DATA_0(expr_crs); i++)
409
0
          printf("%c", EXPR_CONST_DATA(expr_crs)[i + 1]);
410
0
        printf("]");
411
0
      }
412
0
      break;
413
0
414
0
    case PTN_HOOK:
415
0
416
0
      printf("@[");
417
0
      for (i = 0; i < EXPR_DATA_0(expr_crs); i++)
418
0
        printf("%c", EXPR_CONST_DATA(expr_crs)[i + 1]);
419
0
      printf("]");
420
0
      break;
421
0
422
0
    case PTN_END_OF_INPUT:
423
0
424
0
      printf("^");
425
0
      break;
426
0
427
0
      // default:  printf("%d?\n", EXPR_TYPE(expr_crs));  break;
428
0
    }
429
0
430
0
    expr_crs = EXPR_NXT(expr_crs);
431
0
  }
432
0
433
0
  return;
434
0
}
435
436
static void
437
0
pattern_print(const widechar *expr_data, const TranslationTableHeader *table) {
438
0
  if (expr_data[0] > 0 && expr_data[0] != PTN_END)
439
0
    pattern_print_expression(expr_data, 2, table);
440
0
  puts("");
441
0
}
442
443
#ifdef CHECK_OUTPUT_DEFINED
444
445
static void
446
do_padd(const int value) {
447
  if (value < 100000) printf(" ");
448
  if (value < 10000) printf(" ");
449
  if (value < 1000) printf(" ");
450
  if (value < 100) printf(" ");
451
  if (value < 10) printf(" ");
452
}
453
454
static void
455
do_pad(const int value) {
456
  if (value < 100) printf(" ");
457
  if (value < 10) printf(" ");
458
}
459
460
static void
461
do_output(const int type, const int ret, const int line,
462
463
    const int input, const int input_minmax, const int input_crs, const int input_dir,
464
    const widechar *expr_data, const int expr_crs, const int notOperator,
465
    const int loop_crs, const int *loop_cnts,
466
467
    const char *msg) {
468
  switch (type) {
469
  case START:
470
471
    space--;
472
    if (space < 0) space = 0;
473
    printf("|%s()  ", &spaces[space]);
474
    break;
475
476
  case CALL:
477
478
    printf("|%s>   ", &spaces[space]);
479
    break;
480
481
  case RETURN:
482
483
    printf("|%s<%d  ", &spaces[space], ret);
484
    space++;
485
    if (space > 31) space = 31;
486
    break;
487
488
  case SHOW:
489
490
    printf("|%s    ", &spaces[space]);
491
    break;
492
  }
493
494
  printf("%d ", line);
495
  do_padd(line);
496
497
  switch (expr_data[expr_crs]) {
498
  case PTN_ERROR:
499
    printf("# ");
500
    break;
501
  case PTN_START:
502
    printf("> ");
503
    break;
504
  case PTN_END_OF_INPUT:
505
    printf("^ ");
506
    break;
507
  case PTN_ALTERNATE:
508
    printf("| ");
509
    break;
510
  case PTN_OPTIONAL:
511
    printf("? ");
512
    break;
513
  case PTN_ONE_MORE:
514
    printf("+ ");
515
    break;
516
  case PTN_ZERO_MORE:
517
    printf("* ");
518
    break;
519
  case PTN_NOT:
520
    printf("! ");
521
    break;
522
  case PTN_GROUP:
523
    printf("( ");
524
    break;
525
  case PTN_ANY:
526
    printf(". ");
527
    break;
528
  case PTN_ATTRIBUTES:
529
    printf("%% ");
530
    break;
531
  case PTN_CHARS:
532
    printf("[ ");
533
    break;
534
  case PTN_HOOK:
535
    printf("@ ");
536
    break;
537
  case PTN_END:
538
    printf("< ");
539
    break;
540
  default:
541
    printf("  ");
542
    break;
543
  }
544
  printf("%d ", expr_crs);
545
  do_padd(expr_crs);
546
547
  if (input > 31 && input < 127)
548
    printf("%c ", input);
549
  else
550
    printf("_ ");
551
552
  if (input_crs * input_dir >= input_minmax * input_dir)
553
    printf("#   ");
554
  else {
555
    printf("%d ", input_crs);
556
    do_pad(input_crs);
557
  }
558
559
  if (input_dir > 0)
560
    printf("<");
561
  else
562
    printf(">");
563
  printf("%d ", input_minmax);
564
  do_pad(input_minmax);
565
566
  if (notOperator)
567
    printf("!   ");
568
  else
569
    printf("    ");
570
571
  if (loop_crs) {
572
    printf("%d ", loop_crs);
573
    do_pad(loop_crs);
574
    printf("%d ", loop_cnts[EXPR_DATA_1(loop_crs)]);
575
    do_pad(loop_cnts[EXPR_DATA_1(loop_crs)]);
576
  } else
577
    printf("-   -   ");
578
  if (EXPR_TYPE(expr_crs) == PTN_ONE_MORE || EXPR_TYPE(expr_crs) == PTN_ZERO_MORE) {
579
    printf("%d ", loop_cnts[EXPR_DATA_1(expr_crs)]);
580
    do_pad(loop_cnts[EXPR_DATA_1(expr_crs)]);
581
  } else
582
    printf("-   ");
583
584
  if (msg) printf("%s", msg);
585
  puts("");
586
}
587
588
#endif
589
590
////////////////////////////////////////////////////////////////////////////////
591
592
static int
593
pattern_compile_1(const widechar *input, const int input_max, int *input_crs,
594
    widechar *expr_data, const int expr_max, widechar *expr_crs, widechar *loop_cnts,
595
    TranslationTableHeader *table, const FileInfo *nested);
596
597
static int
598
pattern_compile_expression(const widechar *input, const int input_max, int *input_crs,
599
    widechar *expr_data, const int expr_max, widechar *expr_crs, widechar *loop_cnts,
600
6.19k
    TranslationTableHeader *table, const FileInfo *nested) {
601
6.19k
  widechar *data;
602
6.19k
  int expr_start, expr_end, expr_sub, expr_crs_prv;
603
6.19k
  int input_end;
604
6.19k
  int attrs0, attrs1;
605
6.19k
  int set, esc, nest, i;
606
607
6.19k
  switch (input[*input_crs]) {
608
0
  case '(':
609
610
0
    if (*expr_crs + 10 >= expr_max) return 0;
611
612
0
    (*input_crs)++;
613
0
    if (*input_crs >= input_max) return 0;
614
615
    /* find closing parenthesis */
616
0
    nest = esc = 0;
617
0
    for (input_end = *input_crs; input_end < input_max; input_end++) {
618
0
      if (input[input_end] == '\\' && !esc) {
619
0
        esc = 1;
620
0
        continue;
621
0
      }
622
623
0
      if (input[input_end] == '(' && !esc)
624
0
        nest++;
625
0
      else if (input[input_end] == ')' && !esc) {
626
0
        if (nest)
627
0
          nest--;
628
0
        else
629
0
          break;
630
0
      }
631
632
0
      esc = 0;
633
0
    }
634
0
    if (input_end >= input_max) return 0;
635
636
0
    EXPR_TYPE(*expr_crs) = PTN_GROUP;
637
638
    /* compile sub expressions */
639
0
    expr_crs_prv = *expr_crs;
640
0
    *expr_crs += 4;
641
0
    EXPR_DATA_0(expr_crs_prv) = *expr_crs;
642
0
    expr_sub = *expr_crs;
643
0
    EXPR_TYPE(expr_sub) = PTN_ERROR;
644
0
    EXPR_PRV(expr_sub) = PTN_END;
645
0
    EXPR_NXT(expr_sub) = PTN_END;
646
0
    if (!pattern_compile_1(input, input_end, input_crs, expr_data, expr_max, expr_crs,
647
0
          loop_cnts, table, nested))
648
0
      return 0;
649
0
    (*input_crs)++;
650
651
    /* reset end expression */
652
0
    expr_end = *expr_crs;
653
0
    EXPR_NXT(expr_end) = expr_crs_prv;
654
655
0
    return *expr_crs += 3;
656
657
39
  case '!':
658
659
39
    if (*expr_crs + 10 >= expr_max) return 0;
660
661
39
    (*input_crs)++;
662
39
    EXPR_TYPE(*expr_crs) = PTN_NOT;
663
39
    expr_crs_prv = *expr_crs;
664
39
    *expr_crs += 4;
665
39
    EXPR_DATA_0(expr_crs_prv) = *expr_crs;
666
667
    /* create start expression */
668
39
    expr_start = *expr_crs;
669
39
    EXPR_TYPE(expr_start) = PTN_START;
670
39
    EXPR_PRV(expr_start) = PTN_END;
671
39
    *expr_crs += 3;
672
39
    EXPR_NXT(expr_start) = *expr_crs;
673
674
    /* compile sub expression */
675
39
    expr_sub = *expr_crs;
676
39
    EXPR_TYPE(expr_sub) = PTN_ERROR;
677
39
    EXPR_PRV(expr_sub) = expr_start;
678
39
    EXPR_NXT(expr_sub) = PTN_END;
679
680
39
    if (!pattern_compile_expression(input, input_max, input_crs, expr_data, expr_max,
681
39
          expr_crs, loop_cnts, table, nested))
682
0
      return 0;
683
684
39
    if (*expr_crs + 3 >= expr_max) return 0;
685
686
39
    EXPR_NXT(expr_sub) = *expr_crs;
687
688
    /* create end expression */
689
39
    expr_end = *expr_crs;
690
39
    EXPR_TYPE(expr_end) = PTN_END;
691
39
    EXPR_PRV(expr_end) = expr_sub;
692
39
    EXPR_NXT(expr_end) = expr_crs_prv;
693
694
39
    return *expr_crs += 3;
695
696
3
  case '+':
697
698
3
    if (*expr_crs + 5 >= expr_max) return 0;
699
3
    EXPR_TYPE(*expr_crs) = PTN_ONE_MORE;
700
3
    EXPR_DATA_1(*expr_crs) = (*loop_cnts)++;
701
3
    (*input_crs)++;
702
3
    return *expr_crs += 5;
703
704
60
  case '*':
705
706
60
    if (*expr_crs + 5 >= expr_max) return 0;
707
60
    EXPR_TYPE(*expr_crs) = PTN_ZERO_MORE;
708
60
    EXPR_DATA_1(*expr_crs) = (*loop_cnts)++;
709
60
    (*input_crs)++;
710
60
    return *expr_crs += 5;
711
712
22
  case '?':
713
714
22
    if (*expr_crs + 4 >= expr_max) return 0;
715
22
    EXPR_TYPE(*expr_crs) = PTN_OPTIONAL;
716
22
    (*input_crs)++;
717
22
    return *expr_crs += 4;
718
719
101
  case '|':
720
721
101
    if (*expr_crs + 5 >= expr_max) return 0;
722
101
    EXPR_TYPE(*expr_crs) = PTN_ALTERNATE;
723
101
    (*input_crs)++;
724
101
    return *expr_crs += 5;
725
726
145
  case '.':
727
728
145
    if (*expr_crs + 3 >= expr_max) return 0;
729
145
    EXPR_TYPE(*expr_crs) = PTN_ANY;
730
145
    (*input_crs)++;
731
145
    return *expr_crs += 3;
732
733
468
  case '%':
734
735
468
    if (*expr_crs + 5 >= expr_max) return 0;
736
737
468
    (*input_crs)++;
738
468
    if (*input_crs >= input_max) return 0;
739
740
    /* find closing bracket */
741
468
    if (input[*input_crs] == '[') {
742
184
      set = 1;
743
184
      (*input_crs)++;
744
736
      for (input_end = *input_crs; input_end < input_max; input_end++)
745
736
        if (input[input_end] == ']') break;
746
184
      if (input_end >= input_max) return 0;
747
284
    } else {
748
284
      set = 0;
749
284
      input_end = *input_crs + 1;
750
284
    }
751
752
468
    EXPR_TYPE(*expr_crs) = PTN_ATTRIBUTES;
753
754
468
    attrs0 = attrs1 = 0;
755
1.30k
    for (; (*input_crs) < input_end; (*input_crs)++) {
756
836
      switch (input[*input_crs]) {
757
0
      case '_':
758
0
        attrs0 |= CTC_Space;
759
0
        break;
760
0
      case '#':
761
0
        attrs0 |= CTC_Digit;
762
0
        break;
763
4
      case 'a':
764
4
        attrs0 |= CTC_Letter;
765
4
        break;
766
0
      case 'u':
767
0
        attrs0 |= CTC_UpperCase;
768
0
        break;
769
0
      case 'l':
770
0
        attrs0 |= CTC_LowerCase;
771
0
        break;
772
138
      case '.':
773
138
        attrs0 |= CTC_Punctuation;
774
138
        break;
775
0
      case '$':
776
0
        attrs0 |= CTC_Sign;
777
0
        break;
778
0
      case 'm':
779
0
        attrs0 |= CTC_Math;
780
0
        break;
781
0
      case '~':
782
0
        attrs0 |= CTC_SeqDelimiter;
783
0
        break;
784
0
      case '<':
785
0
        attrs0 |= CTC_SeqBefore;
786
0
        break;
787
0
      case '>':
788
0
        attrs0 |= CTC_SeqAfter;
789
0
        break;
790
791
138
      case '0':
792
138
      case '1':
793
690
      case '2':
794
690
      case '3':
795
690
      case '4':
796
690
      case '5':
797
690
      case '6':
798
690
      case '7': {
799
690
        int k = input[*input_crs] - '0';
800
690
        TranslationTableCharacterAttributes a = table->numberedAttributes[k];
801
690
        if (!a) {
802
          // attribute not used before yet: assign it a value
803
690
          a = table->numberedAttributes[k] =
804
690
              table->nextNumberedCharacterClassAttribute;
805
690
          if (a > CTC_UserDefined8) {
806
0
            _lou_logMessage(LOU_LOG_ERROR,
807
0
                "%s:%d: error: Too many character attributes defined",
808
0
                nested->fileName, nested->lineNumber);
809
0
            return 0;
810
0
          }
811
690
          table->nextNumberedCharacterClassAttribute <<= 1;
812
690
        }
813
690
        attrs1 |= (a >> 16);
814
690
        break;
815
690
      }
816
4
      case '^':
817
4
        attrs1 |= (CTC_EndOfInput >> 16);
818
4
        break;
819
820
0
      default:
821
0
        return 0;
822
836
      }
823
836
    }
824
468
    EXPR_DATA_0(*expr_crs) = attrs1;
825
468
    EXPR_DATA_1(*expr_crs) = attrs0;
826
827
468
    if (set) (*input_crs)++;
828
468
    return *expr_crs += 5;
829
830
460
  case '[':
831
832
460
    (*input_crs)++;
833
460
    if (*input_crs >= input_max) return 0;
834
835
    /* find closing bracket */
836
460
    esc = 0;
837
598
    for (input_end = *input_crs; input_end < input_max; input_end++) {
838
598
      if (input[input_end] == '\\' && !esc) {
839
0
        esc = 1;
840
0
        continue;
841
0
      }
842
843
598
      if (input[input_end] == ']' && !esc) break;
844
138
      esc = 0;
845
138
    }
846
460
    if (input_end >= input_max) return 0;
847
848
460
    if (*expr_crs + 4 + (input_end - *input_crs) >= expr_max) return 0;
849
850
460
    EXPR_TYPE(*expr_crs) = PTN_CHARS;
851
852
460
    esc = 0;
853
460
    data = EXPR_DATA(*expr_crs);
854
598
    for (i = 1; *input_crs < input_end; (*input_crs)++) {
855
138
      if (input[*input_crs] == '\\' && !esc) {
856
0
        esc = 1;
857
0
        continue;
858
0
      }
859
860
138
      esc = 0;
861
138
      data[i++] = (widechar)input[*input_crs];
862
138
    }
863
460
    data[0] = i - 1;
864
460
    (*input_crs)++;
865
460
    return *expr_crs += 4 + data[0];
866
867
1
  case '@':
868
869
1
    (*input_crs)++;
870
1
    if (*input_crs >= input_max) return 0;
871
872
    /* find closing bracket */
873
1
    if (input[*input_crs] == '[') {
874
0
      set = 1;
875
0
      (*input_crs)++;
876
0
      for (input_end = *input_crs; input_end < input_max; input_end++)
877
0
        if (input[input_end] == ']') break;
878
0
      if (input_end >= input_max) return 0;
879
1
    } else {
880
1
      set = 0;
881
1
      input_end = *input_crs + 1;
882
1
    }
883
884
1
    if (*expr_crs + 4 + (input_end - *input_crs) >= expr_max) return 0;
885
886
1
    EXPR_TYPE(*expr_crs) = PTN_HOOK;
887
888
1
    esc = 0;
889
1
    data = EXPR_DATA(*expr_crs);
890
2
    for (i = 1; *input_crs < input_end; (*input_crs)++) {
891
1
      if (input[*input_crs] == '\\' && !esc) {
892
0
        esc = 1;
893
0
        continue;
894
0
      }
895
896
1
      esc = 0;
897
1
      data[i++] = (widechar)input[*input_crs];
898
1
    }
899
1
    data[0] = i - 1;
900
1
    if (set) (*input_crs)++;
901
1
    return *expr_crs += 4 + data[0];
902
903
0
  case '^':
904
2
  case '$':
905
906
2
    if (*expr_crs + 3 >= expr_max) return 0;
907
2
    EXPR_TYPE(*expr_crs) = PTN_END_OF_INPUT;
908
2
    (*input_crs)++;
909
2
    return *expr_crs += 3;
910
911
328
  case '\\':
912
913
328
    (*input_crs)++;
914
328
    if (*input_crs >= input_max) return 0;
915
916
4.89k
  default:
917
918
4.89k
    if (*expr_crs + 5 >= expr_max) return 0;
919
4.89k
    EXPR_TYPE(*expr_crs) = PTN_CHARS;
920
4.89k
    EXPR_DATA_0(*expr_crs) = 1;
921
4.89k
    EXPR_DATA_1(*expr_crs) = (widechar)input[*input_crs];
922
4.89k
    (*input_crs)++;
923
4.89k
    return *expr_crs += 5;
924
6.19k
  }
925
6.19k
}
926
927
static int
928
pattern_insert_alternate(const widechar *input, const int input_max, int *input_crs,
929
    widechar *expr_data, const int expr_max, widechar *expr_crs, widechar *loop_cnts,
930
0
    int expr_insert, TranslationTableHeader *table, const FileInfo *nested) {
931
0
  int expr_group, expr_alt, expr_end;
932
0
933
0
  if (EXPR_TYPE(*expr_crs) == PTN_START) return 0;
934
0
935
0
  if (*expr_crs + 12 >= expr_max) return 0;
936
0
937
0
  /* setup alternate expression */
938
0
  expr_alt = *expr_crs;
939
0
  EXPR_TYPE(expr_alt) = PTN_ALTERNATE;
940
0
  EXPR_PRV(expr_alt) = PTN_END;
941
0
  EXPR_NXT(expr_alt) = PTN_END;
942
0
  *expr_crs += 5;
943
0
944
0
  /* setup group expression */
945
0
  expr_group = *expr_crs;
946
0
  EXPR_TYPE(expr_group) = PTN_GROUP;
947
0
  EXPR_PRV(expr_group) = PTN_END;
948
0
  EXPR_NXT(expr_group) = PTN_END;
949
0
  *expr_crs += 4;
950
0
  EXPR_DATA_0(expr_group) = *expr_crs;
951
0
952
0
  EXPR_TYPE(*expr_crs) = PTN_ERROR;
953
0
  EXPR_PRV(*expr_crs) = PTN_END;
954
0
  EXPR_NXT(*expr_crs) = PTN_END;
955
0
  if (!pattern_compile_1(input, input_max, input_crs, expr_data, expr_max, expr_crs,
956
0
        loop_cnts, table, nested))
957
0
    return 0;
958
0
  expr_end = *expr_crs;
959
0
  EXPR_NXT(expr_end) = expr_group;
960
0
961
0
  /* setup last end expression */
962
0
  if (*expr_crs + 3 >= expr_max) return 0;
963
0
  *expr_crs += 3;
964
0
  EXPR_TYPE(*expr_crs) = PTN_END;
965
0
  EXPR_NXT(*expr_crs) = PTN_END;
966
0
967
0
  /* replace insert expression with group expression using last end expression */
968
0
  EXPR_NXT(EXPR_PRV(expr_insert)) = expr_group;
969
0
  EXPR_PRV(expr_group) = EXPR_PRV(expr_insert);
970
0
971
0
  EXPR_NXT(expr_group) = *expr_crs;
972
0
  EXPR_PRV(*expr_crs) = expr_group;
973
0
974
0
  /* link alternate and insert expressions before group end expression */
975
0
  EXPR_NXT(EXPR_PRV(expr_end)) = expr_alt;
976
0
  EXPR_PRV(expr_alt) = EXPR_PRV(expr_end);
977
0
978
0
  EXPR_NXT(expr_alt) = expr_insert;
979
0
  EXPR_PRV(expr_insert) = expr_alt;
980
0
981
0
  EXPR_NXT(expr_insert) = expr_end;
982
0
  EXPR_PRV(expr_end) = expr_insert;
983
0
984
0
  return *expr_crs;
985
0
}
986
987
/* Compile all expression sequences, resolving character sets, attributes,
988
 * groups, nots, and hooks.  Note that unlike the other compile functions, on
989
 * returning the expr_crs is set to the last end expression, not after it.
990
 */
991
static int
992
pattern_compile_1(const widechar *input, const int input_max, int *input_crs,
993
    widechar *expr_data, const int expr_max, widechar *expr_crs, widechar *loop_cnts,
994
104
    TranslationTableHeader *table, const FileInfo *nested) {
995
104
  int expr_crs_prv;
996
997
104
  if (*expr_crs + 6 >= expr_max) return 0;
998
999
104
  expr_crs_prv = *expr_crs;
1000
1001
  /* setup start expression */
1002
104
  EXPR_TYPE(*expr_crs) = PTN_START;
1003
104
  EXPR_PRV(*expr_crs) = PTN_END;
1004
104
  *expr_crs += 3;
1005
104
  EXPR_NXT(expr_crs_prv) = *expr_crs;
1006
1007
  /* setup end expression */
1008
104
  EXPR_TYPE(*expr_crs) = PTN_END;
1009
104
  EXPR_PRV(*expr_crs) = expr_crs_prv;
1010
104
  EXPR_NXT(*expr_crs) = PTN_END;
1011
1012
6.26k
  while (*input_crs < input_max) {
1013
6.15k
    expr_crs_prv = *expr_crs;
1014
6.15k
    if (!pattern_compile_expression(input, input_max, input_crs, expr_data, expr_max,
1015
6.15k
          expr_crs, loop_cnts, table, nested))
1016
0
      return 0;
1017
1018
    /* setup end expression */
1019
6.15k
    if (*expr_crs + 3 >= expr_max) return 0;
1020
6.15k
    EXPR_NXT(expr_crs_prv) = *expr_crs;
1021
6.15k
    EXPR_TYPE(*expr_crs) = PTN_END;
1022
6.15k
    EXPR_PRV(*expr_crs) = expr_crs_prv;
1023
6.15k
    EXPR_NXT(*expr_crs) = PTN_END;
1024
1025
    /* insert seqafterexpression before attributes of seqafterchars */
1026
    // if(EXPR_TYPE(expr_crs_prv) == PTN_ATTRIBUTES)
1027
    // if(EXPR_DATA_1(expr_crs_prv) & CTC_SeqAfter)
1028
    // {
1029
    //  i = 0;
1030
    //  pattern_insert_alternate(table->seqAfterExpression,
1031
    //    table->seqAfterExpressionLength, &i, expr_data, expr_max,
1032
    //    expr_crs, loop_cnts, expr_crs_prv);
1033
    // }
1034
6.15k
  }
1035
1036
104
  return *expr_crs;
1037
104
}
1038
1039
/* Resolve optional and loop expressions.
1040
 */
1041
static int
1042
pattern_compile_2(
1043
143
    widechar *expr_data, int expr_at, const int expr_max, widechar *expr_crs) {
1044
143
  int expr_start, expr_end, expr_prv, expr_sub;
1045
1046
6.48k
  while (EXPR_TYPE(expr_at) != PTN_END) {
1047
6.34k
    if (EXPR_TYPE(expr_at) == PTN_GROUP || EXPR_TYPE(expr_at) == PTN_NOT) {
1048
39
      if (!pattern_compile_2(expr_data, EXPR_DATA_0(expr_at), expr_max, expr_crs))
1049
0
        return 0;
1050
39
    }
1051
1052
6.34k
    if (EXPR_TYPE(expr_at) == PTN_ZERO_MORE || EXPR_TYPE(expr_at) == PTN_ONE_MORE ||
1053
6.27k
        EXPR_TYPE(expr_at) == PTN_OPTIONAL) {
1054
85
      if (*expr_crs + 6 >= expr_max) return 0;
1055
1056
      /* get previous expressions, there must
1057
       * be at least something and a PTN_START */
1058
85
      expr_sub = EXPR_PRV(expr_at);
1059
85
      if (EXPR_TYPE(expr_sub) == PTN_START) return 0;
1060
85
      expr_prv = EXPR_PRV(expr_sub);
1061
1062
      /* create start expression */
1063
85
      expr_start = *expr_crs;
1064
85
      EXPR_TYPE(expr_start) = PTN_START;
1065
85
      EXPR_PRV(expr_start) = PTN_END;
1066
85
      EXPR_NXT(expr_start) = expr_sub;
1067
85
      *expr_crs += 3;
1068
1069
      /* create end expression */
1070
85
      expr_end = *expr_crs;
1071
85
      EXPR_TYPE(expr_end) = PTN_END;
1072
85
      EXPR_PRV(expr_end) = expr_sub;
1073
85
      EXPR_NXT(expr_end) = expr_at;
1074
85
      *expr_crs += 3;
1075
1076
      /* relink previous expression before sub expression */
1077
85
      EXPR_DATA_0(expr_at) = expr_start;
1078
85
      EXPR_NXT(expr_prv) = expr_at;
1079
85
      EXPR_PRV(expr_at) = expr_prv;
1080
1081
      /* relink sub expression to start and end */
1082
85
      EXPR_PRV(expr_sub) = expr_start;
1083
85
      EXPR_NXT(expr_sub) = expr_end;
1084
85
    }
1085
1086
6.34k
    expr_at = EXPR_NXT(expr_at);
1087
6.34k
  }
1088
1089
143
  return 1;
1090
143
}
1091
1092
/* Resolves alternative expressions.
1093
 */
1094
static int
1095
pattern_compile_3(
1096
329
    widechar *expr_data, int expr_at, const int expr_max, widechar *expr_crs) {
1097
329
  int expr_mrk, expr_start, expr_end, expr_sub_start, expr_sub_end;
1098
1099
6.85k
  while (EXPR_TYPE(expr_at) != PTN_END) {
1100
6.52k
    if (EXPR_TYPE(expr_at) == PTN_GROUP || EXPR_TYPE(expr_at) == PTN_NOT ||
1101
6.48k
        EXPR_TYPE(expr_at) == PTN_OPTIONAL ||
1102
6.46k
        EXPR_TYPE(expr_at) == PTN_ZERO_MORE ||
1103
6.40k
        EXPR_TYPE(expr_at) == PTN_ONE_MORE) {
1104
124
      if (!pattern_compile_3(expr_data, EXPR_DATA_0(expr_at), expr_max, expr_crs))
1105
0
        return 0;
1106
124
    }
1107
1108
6.52k
    if (EXPR_TYPE(expr_at) == PTN_ALTERNATE) {
1109
101
      if (*expr_crs + 12 >= expr_max) return 0;
1110
1111
      /* get previous start expression,
1112
       * can include alternate expressions */
1113
101
      expr_mrk = EXPR_PRV(expr_at);
1114
101
      if (EXPR_TYPE(expr_mrk) == PTN_START) return 0;
1115
101
      expr_sub_end = expr_mrk;
1116
839
      while (EXPR_TYPE(expr_mrk) != PTN_START) expr_mrk = EXPR_PRV(expr_mrk);
1117
101
      expr_sub_start = EXPR_NXT(expr_mrk);
1118
1119
      /* create first start expression */
1120
101
      expr_start = *expr_crs;
1121
101
      EXPR_TYPE(expr_start) = PTN_START;
1122
101
      EXPR_PRV(expr_start) = PTN_END;
1123
101
      EXPR_NXT(expr_start) = expr_sub_start;
1124
101
      *expr_crs += 3;
1125
1126
      /* create first end expression */
1127
101
      expr_end = *expr_crs;
1128
101
      EXPR_TYPE(expr_end) = PTN_END;
1129
101
      EXPR_PRV(expr_end) = expr_sub_end;
1130
101
      EXPR_NXT(expr_end) = expr_at;
1131
101
      *expr_crs += 3;
1132
1133
      /* relink previous expression before sub expression */
1134
101
      EXPR_DATA_0(expr_at) = expr_start;
1135
101
      EXPR_NXT(expr_mrk) = expr_at;
1136
101
      EXPR_PRV(expr_at) = expr_mrk;
1137
1138
      /* relink sub expression to start and end */
1139
101
      EXPR_PRV(expr_sub_start) = expr_start;
1140
101
      EXPR_NXT(expr_sub_end) = expr_end;
1141
1142
      /* get following PTN_END or PTN_ALTERNATE expression */
1143
101
      expr_mrk = EXPR_NXT(expr_at);
1144
101
      if (EXPR_TYPE(expr_mrk) == PTN_END || EXPR_TYPE(expr_mrk) == PTN_ALTERNATE)
1145
0
        return 0;
1146
101
      expr_sub_start = expr_mrk;
1147
5.20k
      while (EXPR_TYPE(expr_mrk) != PTN_END && EXPR_TYPE(expr_mrk) != PTN_ALTERNATE)
1148
5.10k
        expr_mrk = EXPR_NXT(expr_mrk);
1149
101
      expr_sub_end = EXPR_PRV(expr_mrk);
1150
1151
      /* create first start expression */
1152
101
      expr_start = *expr_crs;
1153
101
      EXPR_TYPE(expr_start) = PTN_START;
1154
101
      EXPR_PRV(expr_start) = PTN_END;
1155
101
      EXPR_NXT(expr_start) = expr_sub_start;
1156
101
      *expr_crs += 3;
1157
1158
      /* create first end expression */
1159
101
      expr_end = *expr_crs;
1160
101
      EXPR_TYPE(expr_end) = PTN_END;
1161
101
      EXPR_PRV(expr_end) = expr_sub_end;
1162
101
      EXPR_NXT(expr_end) = expr_at;
1163
101
      *expr_crs += 3;
1164
1165
      /* relink following expression before sub expression */
1166
101
      EXPR_DATA_1(expr_at) = expr_start;
1167
101
      EXPR_PRV(expr_mrk) = expr_at;
1168
101
      EXPR_NXT(expr_at) = expr_mrk;
1169
1170
      /* relink sub expression to start and end */
1171
101
      EXPR_PRV(expr_sub_start) = expr_start;
1172
101
      EXPR_NXT(expr_sub_end) = expr_end;
1173
1174
      /* check expressions were after alternate and got moved into
1175
       * a sub expression, previous expressions already checked */
1176
101
      if (!pattern_compile_3(expr_data, EXPR_DATA_1(expr_at), expr_max, expr_crs))
1177
0
        return 0;
1178
101
    }
1179
1180
6.52k
    expr_at = EXPR_NXT(expr_at);
1181
6.52k
  }
1182
1183
329
  return 1;
1184
329
}
1185
1186
int EXPORT_CALL
1187
_lou_pattern_compile(const widechar *input, const int input_max, widechar *expr_data,
1188
104
    const int expr_max, TranslationTableHeader *table, const FileInfo *nested) {
1189
104
  int input_crs;
1190
1191
104
  input_crs = 0;
1192
104
  expr_data[0] = 2;
1193
104
  expr_data[1] = 0;
1194
1195
104
  if (!pattern_compile_1(input, input_max, &input_crs, expr_data, expr_max,
1196
104
        &expr_data[0], &expr_data[1], table, nested))
1197
0
    return 0;
1198
1199
  /* shift past the last end */
1200
104
  expr_data[0] += 3;
1201
1202
104
  if (!pattern_compile_2(expr_data, 2, expr_max, &expr_data[0])) return 0;
1203
1204
104
  if (!pattern_compile_3(expr_data, 2, expr_max, &expr_data[0])) return 0;
1205
1206
104
  return expr_data[0];
1207
104
}
1208
1209
////////////////////////////////////////////////////////////////////////////////
1210
1211
static void
1212
pattern_reverse_expression(widechar *expr_data, const int expr_start);
1213
1214
static void
1215
6.02k
pattern_reverse_branch(widechar *expr_data, const int expr_at) {
1216
6.02k
  widechar expr_swap;
1217
1218
6.02k
  switch (EXPR_TYPE(expr_at)) {
1219
101
  case PTN_ALTERNATE:
1220
1221
101
    pattern_reverse_expression(expr_data, EXPR_DATA_0(expr_at));
1222
101
    expr_swap = EXPR_DATA_0(expr_at);
1223
101
    EXPR_DATA_0(expr_at) = EXPR_DATA_1(expr_at);
1224
101
    EXPR_DATA_1(expr_at) = expr_swap;
1225
1226
101
  case PTN_GROUP:
1227
139
  case PTN_NOT:
1228
142
  case PTN_ONE_MORE:
1229
202
  case PTN_ZERO_MORE:
1230
224
  case PTN_OPTIONAL:
1231
1232
224
    pattern_reverse_expression(expr_data, EXPR_DATA_0(expr_at));
1233
6.02k
  }
1234
6.02k
}
1235
1236
static void
1237
377
pattern_reverse_expression(widechar *expr_data, const int expr_start) {
1238
377
  widechar expr_end, expr_crs, expr_prv;
1239
1240
377
  expr_end = EXPR_NXT(expr_start);
1241
1242
  /* empty expression */
1243
377
  if (EXPR_TYPE(expr_end) == PTN_END) return;
1244
1245
  /* find end expression */
1246
6.40k
  while (EXPR_TYPE(expr_end) != PTN_END) expr_end = EXPR_NXT(expr_end);
1247
1248
377
  expr_crs = EXPR_PRV(expr_end);
1249
377
  expr_prv = EXPR_PRV(expr_crs);
1250
1251
  /* relink expression before end expression */
1252
377
  EXPR_NXT(expr_start) = expr_crs;
1253
377
  EXPR_PRV(expr_crs) = expr_start;
1254
377
  EXPR_NXT(expr_crs) = expr_prv;
1255
1256
  /* reverse any branching expressions */
1257
377
  pattern_reverse_branch(expr_data, expr_crs);
1258
1259
6.02k
  while (expr_prv != expr_start) {
1260
    /* shift current expression */
1261
5.65k
    expr_crs = expr_prv;
1262
5.65k
    expr_prv = EXPR_PRV(expr_prv);
1263
1264
    /* reverse any branching expressions */
1265
5.65k
    pattern_reverse_branch(expr_data, expr_crs);
1266
1267
    /* relink current expression */
1268
5.65k
    EXPR_PRV(expr_crs) = EXPR_NXT(expr_crs);
1269
5.65k
    EXPR_NXT(expr_crs) = expr_prv;
1270
5.65k
  }
1271
1272
  /* relink expression after start expression */
1273
377
  EXPR_PRV(expr_crs) = EXPR_NXT(expr_crs);
1274
377
  EXPR_NXT(expr_crs) = expr_end;
1275
377
  EXPR_PRV(expr_end) = expr_crs;
1276
377
}
1277
1278
void EXPORT_CALL
1279
52
_lou_pattern_reverse(widechar *expr_data) {
1280
52
  pattern_reverse_expression(expr_data, 2);
1281
52
}
1282
1283
////////////////////////////////////////////////////////////////////////////////
1284
1285
static int
1286
898k
pattern_check_chars(const widechar input_char, const widechar *expr_data) {
1287
898k
  int expr_cnt, i;
1288
1289
898k
  expr_cnt = expr_data[0] + 1;
1290
1291
1.79M
  for (i = 1; i < expr_cnt; i++)
1292
898k
    if (input_char == expr_data[i]) break;
1293
1294
898k
  if (i == expr_cnt) return 0;
1295
696
  return 1;
1296
898k
}
1297
1298
static int
1299
pattern_check_attrs(const widechar input_char, const widechar *expr_data,
1300
288k
    const TranslationTableHeader *table) {
1301
288k
  int attrs;
1302
1303
288k
  attrs = ((expr_data[0] << 16) | expr_data[1]) & ~(CTC_EndOfInput | CTC_EmpMatch);
1304
288k
  if (!checkAttr(input_char, attrs, table)) return 0;
1305
0
  return 1;
1306
288k
}
1307
1308
static int
1309
pattern_check_expression(const widechar *const input, int *input_crs,
1310
    const int input_minmax, const int input_dir, const widechar *const expr_data,
1311
    int (*hook)(const widechar input, const int data_len), widechar *hook_data,
1312
    const int hook_max, int expr_crs, int notOperator, int loop_crs, int *loop_cnts,
1313
1.24M
    const TranslationTableHeader *table) {
1314
1.24M
  int input_crs_prv, input_start, attrs, ret, i;
1315
1.24M
  const widechar *data;
1316
1317
1.24M
  data = NULL;
1318
1319
  /* save input_crs to know if loop consumed input */
1320
1.24M
  input_start = *input_crs;
1321
1322
1.24M
  CHECK_OUTPUT(START, 0, __LINE__, "check start")
1323
1324
6.68M
  while (!(EXPR_TYPE(expr_crs) == PTN_END && EXPR_TYPE(expr_crs) == PTN_END)) {
1325
    /* end of input expression */
1326
6.68M
    if (EXPR_TYPE(expr_crs) == PTN_END_OF_INPUT) {
1327
0
      if (*input_crs * input_dir >= input_minmax * input_dir) {
1328
0
        if (notOperator)
1329
0
          CHECK_OUTPUT(
1330
0
              RETURN, 0, __LINE__, "end of input failed:  no input and not")
1331
0
        else
1332
0
          CHECK_OUTPUT(RETURN, 1, __LINE__, "end of input passed:  no input")
1333
0
        return !notOperator;
1334
0
      } else {
1335
0
        if (notOperator)
1336
0
          CHECK_OUTPUT(
1337
0
              RETURN, 1, __LINE__, "end of input passed:  input and not")
1338
0
        else
1339
0
          CHECK_OUTPUT(RETURN, 0, __LINE__, "end of input failed:  input")
1340
0
        return notOperator;
1341
0
      }
1342
0
    }
1343
1344
    /* no more input */
1345
6.68M
    if (*input_crs * input_dir >= input_minmax * input_dir) {
1346
37.8k
      switch (EXPR_TYPE(expr_crs)) {
1347
1.67k
      case PTN_ATTRIBUTES:
1348
1349
1.67k
        attrs = (EXPR_DATA_0(expr_crs) << 16);
1350
1.67k
        if (attrs & CTC_EndOfInput) {
1351
1.67k
          if (notOperator) {
1352
1.46k
            CHECK_OUTPUT(RETURN, 0, __LINE__,
1353
1.46k
                "attributes failed:  end of input attribute:  not")
1354
1.46k
            return 0;
1355
1.46k
          }
1356
203
          CHECK_OUTPUT(RETURN, 1, __LINE__,
1357
203
              "attributes passed:  end of input attribute")
1358
203
          return 1;
1359
1.67k
        }
1360
0
        CHECK_OUTPUT(RETURN, 0, __LINE__,
1361
0
            "attributes failed:  no end of input attribute")
1362
0
        return 0;
1363
1364
207
      case PTN_ANY:
1365
4.88k
      case PTN_CHARS:
1366
1367
4.88k
        CHECK_OUTPUT(RETURN, 0, __LINE__, "chars failed:  no input")
1368
4.88k
        return 0;
1369
37.8k
      }
1370
1371
31.3k
      CHECK_OUTPUT(SHOW, 0, __LINE__, "no input")
1372
31.3k
    }
1373
1374
6.68M
    switch (EXPR_TYPE(expr_crs)) {
1375
1376
1.57M
    case PTN_START:
1377
1378
1.57M
      expr_crs = EXPR_NXT(expr_crs);
1379
1.57M
      CHECK_OUTPUT(SHOW, 0, __LINE__, "start next")
1380
1.57M
      break;
1381
1382
0
    case PTN_GROUP:
1383
1384
0
      expr_crs = EXPR_DATA_0(expr_crs);
1385
0
      CHECK_OUTPUT(SHOW, 0, __LINE__, "group next")
1386
0
      break;
1387
1388
321k
    case PTN_NOT:
1389
1390
321k
      notOperator = !notOperator;
1391
321k
      expr_crs = EXPR_DATA_0(expr_crs);
1392
321k
      CHECK_OUTPUT(SHOW, 0, __LINE__, "not next")
1393
321k
      break;
1394
1395
31.7k
    case PTN_ONE_MORE:
1396
1397
31.7k
      CHECK_OUTPUT(SHOW, 0, __LINE__, "loop+ start")
1398
1399
2.68M
    case PTN_ZERO_MORE:
1400
1401
      /* check if loop already started */
1402
2.68M
      if (expr_crs == loop_crs) {
1403
362k
        loop_cnts[EXPR_DATA_1(loop_crs)]++;
1404
362k
        CHECK_OUTPUT(SHOW, 0, __LINE__, "loop again")
1405
2.32M
      } else {
1406
        /* check if loop nested, wasn't running but has a count */
1407
2.32M
        if (loop_cnts[EXPR_DATA_1(expr_crs)]) {
1408
2.31M
          CHECK_OUTPUT(SHOW, 0, __LINE__, "loop already running")
1409
2.31M
          goto loop_next;
1410
2.31M
        }
1411
1412
        /* start loop */
1413
15.5k
        loop_crs = expr_crs;
1414
15.5k
        loop_cnts[EXPR_DATA_1(loop_crs)] = 1;
1415
15.5k
        CHECK_OUTPUT(SHOW, 0, __LINE__, "loop start")
1416
15.5k
      }
1417
1418
      /* start loop expression */
1419
378k
      input_crs_prv = *input_crs;
1420
378k
      ret = pattern_check_expression(input, input_crs, input_minmax, input_dir,
1421
378k
          expr_data, hook, hook_data, hook_max, EXPR_DATA_0(expr_crs),
1422
378k
          notOperator, loop_crs, loop_cnts, table);
1423
378k
      if (ret) {
1424
73.5k
        CHECK_OUTPUT(RETURN, 1, __LINE__, "loop passed")
1425
73.5k
        return 1;
1426
73.5k
      }
1427
304k
      CHECK_OUTPUT(SHOW, 0, __LINE__, "loop failed")
1428
304k
      *input_crs = input_crs_prv;
1429
1430
      /* check loop count */
1431
304k
      loop_cnts[EXPR_DATA_1(loop_crs)]--;
1432
304k
      if (EXPR_TYPE(expr_crs) == PTN_ONE_MORE) {
1433
65
        if (loop_cnts[EXPR_DATA_1(loop_crs)] < 1) {
1434
0
          CHECK_OUTPUT(RETURN, 0, __LINE__, "loop+ failed")
1435
0
          return 0;
1436
0
        } else
1437
65
          CHECK_OUTPUT(SHOW, 0, __LINE__, "loop+ passed")
1438
65
      }
1439
1440
    /* continue after loop */
1441
2.61M
    loop_next:
1442
2.61M
      expr_crs = EXPR_NXT(expr_crs);
1443
2.61M
      CHECK_OUTPUT(SHOW, 0, __LINE__, "loop next")
1444
2.61M
      break;
1445
1446
870k
    case PTN_OPTIONAL:
1447
1448
      /* save current state */
1449
870k
      input_crs_prv = *input_crs;
1450
1451
      /* start optional expression */
1452
870k
      CHECK_OUTPUT(CALL, 0, __LINE__, "option start")
1453
870k
      if (pattern_check_expression(input, input_crs, input_minmax, input_dir,
1454
870k
            expr_data, hook, hook_data, hook_max, EXPR_DATA_0(expr_crs),
1455
870k
            notOperator, loop_crs, loop_cnts, table)) {
1456
268
        CHECK_OUTPUT(RETURN, 1, __LINE__, "option passed")
1457
268
        return 1;
1458
268
      }
1459
870k
      CHECK_OUTPUT(SHOW, 0, __LINE__, "option failed")
1460
1461
      /* continue after optional expression */
1462
870k
      *input_crs = input_crs_prv;
1463
870k
      CHECK_OUTPUT(SHOW, 0, __LINE__, "no option start")
1464
870k
      expr_crs = EXPR_NXT(expr_crs);
1465
870k
      break;
1466
1467
213
    case PTN_ALTERNATE:
1468
1469
      /* save current state */
1470
213
      input_crs_prv = *input_crs;
1471
1472
      /* start first expression */
1473
213
      CHECK_OUTPUT(CALL, 0, __LINE__, "or 1 start")
1474
213
      if (pattern_check_expression(input, input_crs, input_minmax, input_dir,
1475
213
            expr_data, hook, hook_data, hook_max, EXPR_DATA_0(expr_crs),
1476
213
            notOperator, loop_crs, loop_cnts, table)) {
1477
203
        CHECK_OUTPUT(RETURN, 1, __LINE__, "or 1 passed")
1478
203
        return 1;
1479
203
      }
1480
10
      CHECK_OUTPUT(SHOW, 0, __LINE__, "or 1 failed")
1481
1482
      /* start second expression (no need to push) */
1483
10
      *input_crs = input_crs_prv;
1484
10
      CHECK_OUTPUT(SHOW, 0, __LINE__, "or 2 start")
1485
10
      expr_crs = EXPR_DATA_1(expr_crs);
1486
10
      break;
1487
1488
42.4k
    case PTN_ANY:
1489
1490
42.4k
      CHECK_OUTPUT(SHOW, 0, __LINE__, "any")
1491
42.4k
      *input_crs += input_dir;
1492
42.4k
      expr_crs = EXPR_NXT(expr_crs);
1493
42.4k
      break;
1494
1495
288k
    case PTN_ATTRIBUTES:
1496
1497
288k
      ret = pattern_check_attrs(
1498
288k
          input[*input_crs], EXPR_CONST_DATA(expr_crs), table);
1499
288k
      if (ret && notOperator) {
1500
0
        CHECK_OUTPUT(RETURN, 0, __LINE__, "attributes failed:  not");
1501
0
        return 0;
1502
0
      }
1503
288k
      if (!ret && !notOperator) {
1504
0
        CHECK_OUTPUT(RETURN, 0, __LINE__, "attributes failed");
1505
0
        return 0;
1506
0
      }
1507
288k
      CHECK_OUTPUT(SHOW, 0, __LINE__, "attributes passed")
1508
288k
      *input_crs += input_dir;
1509
288k
      expr_crs = EXPR_NXT(expr_crs);
1510
288k
      break;
1511
1512
898k
    case PTN_CHARS:
1513
1514
898k
      ret = pattern_check_chars(input[*input_crs], EXPR_CONST_DATA(expr_crs));
1515
898k
      if (ret && notOperator) {
1516
0
        CHECK_OUTPUT(RETURN, 0, __LINE__, "chars failed:  not");
1517
0
        return 0;
1518
0
      }
1519
898k
      if (!ret && !notOperator) {
1520
866k
        CHECK_OUTPUT(RETURN, 0, __LINE__, "chars failed");
1521
866k
        return 0;
1522
866k
      }
1523
32.4k
      CHECK_OUTPUT(SHOW, 0, __LINE__, "chars passed")
1524
32.4k
      *input_crs += input_dir;
1525
32.4k
      expr_crs = EXPR_NXT(expr_crs);
1526
32.4k
      break;
1527
1528
0
    case PTN_HOOK:
1529
1530
0
      if (hook == NULL) {
1531
0
        CHECK_OUTPUT(RETURN, 0, __LINE__, "hook failed:  NULL");
1532
0
        return 0;
1533
0
      }
1534
1535
      /* copy expression data */
1536
0
      data = EXPR_CONST_DATA(expr_crs);
1537
0
      for (i = 0; i < data[0]; i++) hook_data[i] = data[i + 1];
1538
1539
      /* call hook function */
1540
0
      ret = hook(input[*input_crs], data[0]);
1541
0
      if (ret && notOperator) {
1542
0
        CHECK_OUTPUT(RETURN, 0, __LINE__, "hook failed:  not");
1543
0
        return 0;
1544
0
      }
1545
0
      if (!ret && !notOperator) {
1546
0
        CHECK_OUTPUT(RETURN, 0, __LINE__, "hook failed");
1547
0
        return 0;
1548
0
      }
1549
0
      CHECK_OUTPUT(SHOW, 0, __LINE__, "hook passed")
1550
0
      *input_crs += input_dir;
1551
0
      expr_crs = EXPR_NXT(expr_crs);
1552
0
      break;
1553
1554
0
    case PTN_END:
1555
0
      break;
1556
1557
0
    default:
1558
1559
0
      CHECK_OUTPUT(RETURN, 0, __LINE__, "unknown opcode")
1560
0
      return 0;
1561
6.68M
    }
1562
1563
    /* check end expression  */
1564
6.35M
    while (EXPR_TYPE(expr_crs) == PTN_END) {
1565
3.58M
      CHECK_OUTPUT(SHOW, 0, __LINE__, "end")
1566
1567
      /* check for end of expressions */
1568
3.58M
      if (EXPR_NXT(expr_crs) == PTN_END) break;
1569
1570
3.58M
      expr_crs = EXPR_NXT(expr_crs);
1571
1572
      /* returning loop */
1573
3.58M
      if (EXPR_TYPE(expr_crs) == PTN_ZERO_MORE ||
1574
2.97M
          EXPR_TYPE(expr_crs) == PTN_ONE_MORE) {
1575
2.97M
        CHECK_OUTPUT(SHOW, 0, __LINE__, "end loop")
1576
1577
        /* check that loop consumed input */
1578
2.97M
        if (*input_crs == input_start) {
1579
302k
          CHECK_OUTPUT(RETURN, 0, __LINE__, "loop failed:  did not consume")
1580
302k
          return 0;
1581
302k
        }
1582
1583
        /* loops do not continue to the next expression */
1584
2.67M
        break;
1585
2.97M
      }
1586
1587
      /* returning not */
1588
609k
      if (EXPR_TYPE(expr_crs) == PTN_NOT) notOperator = !notOperator;
1589
1590
609k
      expr_crs = EXPR_NXT(expr_crs);
1591
1592
609k
      CHECK_OUTPUT(SHOW, 0, __LINE__, "end next")
1593
609k
    }
1594
1595
5.43M
    CHECK_OUTPUT(SHOW, 0, __LINE__, "check next")
1596
5.43M
  }
1597
1598
65
  CHECK_OUTPUT(RETURN, 1, __LINE__, "check passed:  end of expression");
1599
65
  return 1;
1600
1.24M
}
1601
1602
static int
1603
pattern_check_hook(const widechar *input, const int input_start, const int input_minmax,
1604
    const int input_dir, const widechar *expr_data,
1605
    int (*hook)(const widechar input, const int data_len), widechar *hook_data,
1606
541
    const int hook_max, const TranslationTableHeader *table) {
1607
541
  int input_crs, ret, *loop_cnts;
1608
1609
541
  input_crs = input_start;
1610
541
  loop_cnts = malloc(expr_data[1] * sizeof(int));
1611
541
  memset(loop_cnts, 0, expr_data[1] * sizeof(int));
1612
541
  ret = pattern_check_expression(input, &input_crs, input_minmax, input_dir, expr_data,
1613
541
      hook, hook_data, hook_max, 2, 0, 0, loop_cnts, table);
1614
541
  free(loop_cnts);
1615
541
  return ret;
1616
541
}
1617
1618
int EXPORT_CALL
1619
_lou_pattern_check(const widechar *input, const int input_start, const int input_minmax,
1620
    const int input_dir, const widechar *expr_data,
1621
541
    const TranslationTableHeader *table) {
1622
#ifdef CHECK_OUTPUT_DEFINED
1623
  pattern_output(expr_data, table);
1624
#endif
1625
541
  return pattern_check_hook(
1626
541
      input, input_start, input_minmax, input_dir, expr_data, NULL, NULL, 0, table);
1627
541
}
1628
1629
////////////////////////////////////////////////////////////////////////////////