Coverage Report

Created: 2025-07-18 06:54

/src/liblouis/liblouis/pattern.c
Line
Count
Source (jump to first uncovered line)
1
/* liblouis Braille Translation and Back-Translation Library
2
3
   Copyright (C) 2016 Mike Gray, American Printing House for the Blind
4
5
   This file is part of liblouis.
6
7
   liblouis is free software: you can redistribute it and/or modify it
8
   under the terms of the GNU Lesser General Public License as published
9
   by the Free Software Foundation, either version 2.1 of the License, or
10
   (at your option) any later version.
11
12
   liblouis is distributed in the hope that it will be useful, but
13
   WITHOUT ANY WARRANTY; without even the implied warranty of
14
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
   Lesser General Public License for more details.
16
17
   You should have received a copy of the GNU Lesser General Public
18
   License along with liblouis. If not, see <http://www.gnu.org/licenses/>.
19
*/
20
21
#include <config.h>
22
23
#include <stdlib.h>
24
#include <stdio.h>
25
#include <string.h>
26
#include <ctype.h>
27
28
#include "internal.h"
29
30
//#define CHECK_OUTPUT_DEFINED
31
32
/////
33
34
// TODO: these functions are static and copied serveral times
35
36
int translation_direction = 1;
37
38
static TranslationTableCharacter *
39
0
findCharOrDots(widechar c, int m, const TranslationTableHeader *table) {
40
  /* Look up character or dot pattern in the appropriate
41
   * table. */
42
0
  static TranslationTableCharacter noChar = { NULL, -1, 0, 0, 0, CTC_Space, 0, 0, 32, 0,
43
0
    0 };
44
0
  static TranslationTableCharacter noDots = { NULL, -1, 0, 0, 0, CTC_Space, 0, 0,
45
0
    LOU_DOTS, 0, 0 };
46
0
  TranslationTableCharacter *notFound;
47
0
  TranslationTableCharacter *character;
48
0
  TranslationTableOffset bucket;
49
0
  unsigned long int makeHash = _lou_charHash(c);
50
0
  if (m == 0) {
51
0
    bucket = table->characters[makeHash];
52
0
    notFound = &noChar;
53
0
  } else {
54
0
    bucket = table->dots[makeHash];
55
0
    notFound = &noDots;
56
0
  }
57
0
  while (bucket) {
58
0
    character = (TranslationTableCharacter *)&table->ruleArea[bucket];
59
0
    if (character->value == c) return character;
60
0
    bucket = character->next;
61
0
  }
62
0
  notFound->value = c;
63
0
  return notFound;
64
0
}
65
66
static int
67
checkAttr(const widechar c, const TranslationTableCharacterAttributes a,
68
0
    const TranslationTableHeader *table) {
69
0
  return (((findCharOrDots(c, translation_direction ? 0 : 1, table))->attributes & a)
70
0
          ? 1
71
0
          : 0);
72
0
}
73
74
/////
75
76
enum pattern_type {
77
  PTN_ERROR,
78
79
  PTN_START,
80
  PTN_GROUP,
81
  PTN_NOT,
82
83
  PTN_ONE_MORE,
84
  PTN_ZERO_MORE,
85
  PTN_OPTIONAL,
86
87
  PTN_ALTERNATE,
88
89
  PTN_ANY,
90
  PTN_ATTRIBUTES,
91
  PTN_CHARS,
92
  PTN_HOOK,
93
  PTN_END_OF_INPUT,
94
95
  PTN_END = 0xffff,
96
};
97
98
0
#define EXPR_TYPE_IN(at, buffer) (buffer[(at) + 0])
99
0
#define EXPR_PRV_IN(at, buffer) (buffer[(at) + 1])
100
0
#define EXPR_NXT_IN(at, buffer) (buffer[(at) + 2])
101
0
#define EXPR_DATA_0_IN(at, buffer) (buffer[(at) + 3])
102
0
#define EXPR_DATA_1_IN(at, buffer) (buffer[(at) + 4])
103
#define EXPR_DATA_2_IN(at, buffer) (buffer[(at) + 5])
104
0
#define EXPR_DATA_IN(at, buffer) ((widechar *)&buffer[(at) + 3])
105
0
#define EXPR_CONST_DATA_IN(at, buffer) ((const widechar *)&buffer[(at) + 3])
106
107
0
#define EXPR_TYPE(at) EXPR_TYPE_IN((at), expr_data)
108
0
#define EXPR_PRV(at) EXPR_PRV_IN((at), expr_data)
109
0
#define EXPR_NXT(at) EXPR_NXT_IN((at), expr_data)
110
0
#define EXPR_DATA_0(at) EXPR_DATA_0_IN((at), expr_data)
111
0
#define EXPR_DATA_1(at) EXPR_DATA_1_IN((at), expr_data)
112
#define EXPR_DATA_2(at) EXPR_DATA_2_IN((at), expr_data)
113
0
#define EXPR_DATA(at) EXPR_DATA_IN((at), expr_data)
114
0
#define EXPR_CONST_DATA(at) EXPR_CONST_DATA_IN((at), expr_data)
115
116
#ifdef CHECK_OUTPUT_DEFINED
117
118
#ifndef DEBUG
119
#define DEBUG
120
121
#endif
122
123
#define START 0
124
#define CALL 1
125
#define RETURN 2
126
#define SHOW 3
127
128
#define CHECK_OUTPUT(type, ret, line, msg)                                              \
129
  {                                                                                   \
130
    do_output(type, ret, line, input[*input_crs], input_minmax, *input_crs,         \
131
        input_dir, expr_data, expr_crs, notOperator, loop_crs, loop_cnts, msg); \
132
  }
133
134
#else
135
136
#define CHECK_OUTPUT(type, ret, line, msg) \
137
0
  { ; }
138
139
#endif
140
141
struct expression {
142
  widechar type;
143
  widechar prv;
144
  widechar nxt;
145
  widechar data[1];
146
};
147
148
/* gdb won't know what this is unless it is actually used */
149
#ifdef DEBUG
150
static struct expression *expr_debug;
151
#endif
152
153
////////////////////////////////////////////////////////////////////////////////
154
155
static char spaces[] = "..............................";
156
static int space = 30;
157
158
static void
159
pattern_output_expression(
160
0
    const widechar *expr_data, int expr_crs, const TranslationTableHeader *table) {
161
0
  int i;
162
0
163
0
  if (expr_crs == PTN_END) return;
164
0
165
0
  while (EXPR_TYPE(expr_crs) != PTN_END) {
166
0
    printf("%s%d", &spaces[space], expr_crs);
167
0
    if (expr_crs < 100) printf(" ");
168
0
    if (expr_crs < 10) printf(" ");
169
0
    for (i = 0; i < 13 - (30 - space); i++) printf(" ");
170
0
171
0
    switch (EXPR_TYPE(expr_crs)) {
172
0
    case PTN_START:
173
0
174
0
      printf("START\t%d\t%d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs));
175
0
      break;
176
0
177
0
    case PTN_GROUP:
178
0
179
0
      printf("(    \t%d\t%d\t-> %d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs),
180
0
          EXPR_DATA_0(expr_crs));
181
0
      space--;
182
0
      if (space < 0) space = 0;
183
0
      pattern_output_expression(expr_data, EXPR_DATA_0(expr_crs), table);
184
0
      space++;
185
0
      if (space > 30) space = 30;
186
0
      break;
187
0
188
0
    case PTN_NOT:
189
0
190
0
      printf("!    \t%d\t%d\t-> %d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs),
191
0
          EXPR_DATA_0(expr_crs));
192
0
      space--;
193
0
      if (space < 0) space = 0;
194
0
      pattern_output_expression(expr_data, EXPR_DATA_0(expr_crs), table);
195
0
      space++;
196
0
      if (space > 30) space = 30;
197
0
      break;
198
0
199
0
    case PTN_ONE_MORE:
200
0
201
0
      printf("+    \t%d\t%d\t-> %d\t#%d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs),
202
0
          EXPR_DATA_0(expr_crs), EXPR_DATA_1(expr_crs));
203
0
      space--;
204
0
      if (space < 0) space = 0;
205
0
      pattern_output_expression(expr_data, EXPR_DATA_0(expr_crs), table);
206
0
      space++;
207
0
      if (space > 30) space = 30;
208
0
      break;
209
0
210
0
    case PTN_ZERO_MORE:
211
0
212
0
      printf("*    \t%d\t%d\t-> %d\t#%d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs),
213
0
          EXPR_DATA_0(expr_crs), EXPR_DATA_1(expr_crs));
214
0
      space--;
215
0
      if (space < 0) space = 0;
216
0
      pattern_output_expression(expr_data, EXPR_DATA_0(expr_crs), table);
217
0
      space++;
218
0
      if (space > 30) space = 30;
219
0
      break;
220
0
221
0
    case PTN_OPTIONAL:
222
0
223
0
      printf("?    \t%d\t%d\t-> %d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs),
224
0
          EXPR_DATA_0(expr_crs));
225
0
      space--;
226
0
      if (space < 0) space = 0;
227
0
      pattern_output_expression(expr_data, EXPR_DATA_0(expr_crs), table);
228
0
      space++;
229
0
      if (space > 30) space = 30;
230
0
      break;
231
0
232
0
    case PTN_ALTERNATE:
233
0
234
0
      printf("|    \t%d\t%d\t-> %d\t-> %d\n", EXPR_PRV(expr_crs),
235
0
          EXPR_NXT(expr_crs), EXPR_DATA_0(expr_crs), EXPR_DATA_1(expr_crs));
236
0
      space--;
237
0
      if (space < 0) space = 0;
238
0
      pattern_output_expression(expr_data, EXPR_DATA_0(expr_crs), table);
239
0
      pattern_output_expression(expr_data, EXPR_DATA_1(expr_crs), table);
240
0
      space++;
241
0
      if (space > 30) space = 30;
242
0
      break;
243
0
244
0
    case PTN_ANY:
245
0
246
0
      printf(".    \t%d\t%d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs));
247
0
      break;
248
0
249
0
    case PTN_ATTRIBUTES:
250
0
251
0
      printf("%%    \t%d\t%d\t", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs));
252
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[0] >> 16)) printf("0");
253
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[1] >> 16)) printf("1");
254
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[2] >> 16)) printf("2");
255
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[3] >> 16)) printf("3");
256
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[4] >> 16)) printf("4");
257
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[5] >> 16)) printf("5");
258
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[6] >> 16)) printf("6");
259
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[7] >> 16)) printf("7");
260
0
      if (EXPR_DATA_0(expr_crs) & (CTC_EndOfInput >> 16)) printf("^");
261
0
      if (EXPR_DATA_1(expr_crs) & CTC_Space) printf("_");
262
0
      if (EXPR_DATA_1(expr_crs) & CTC_Digit) printf("#");
263
0
      if (EXPR_DATA_1(expr_crs) & CTC_Letter) printf("a");
264
0
      if (EXPR_DATA_1(expr_crs) & CTC_UpperCase) printf("u");
265
0
      if (EXPR_DATA_1(expr_crs) & CTC_LowerCase) printf("l");
266
0
      if (EXPR_DATA_1(expr_crs) & CTC_Punctuation) printf(".");
267
0
      if (EXPR_DATA_1(expr_crs) & CTC_Sign) printf("$");
268
0
      if (EXPR_DATA_1(expr_crs) & CTC_SeqDelimiter) printf("~");
269
0
      if (EXPR_DATA_1(expr_crs) & CTC_SeqBefore) printf("<");
270
0
      if (EXPR_DATA_1(expr_crs) & CTC_SeqAfter) printf(">");
271
0
      puts("");
272
0
      break;
273
0
274
0
    case PTN_CHARS:
275
0
276
0
      printf("[]   \t%d\t%d\t", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs));
277
0
      for (i = 0; i < EXPR_DATA_0(expr_crs); i++)
278
0
        printf("%c", EXPR_CONST_DATA(expr_crs)[i + 1]);
279
0
      puts("");
280
0
      break;
281
0
282
0
    case PTN_HOOK:
283
0
284
0
      printf("@    \t%d\t%d\t", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs));
285
0
      for (i = 0; i < EXPR_DATA_0(expr_crs); i++)
286
0
        printf("%c", EXPR_CONST_DATA(expr_crs)[i + 1]);
287
0
      puts("");
288
0
      break;
289
0
290
0
    case PTN_END_OF_INPUT:
291
0
292
0
      printf("^    \t%d\t%d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs));
293
0
      break;
294
0
295
0
    default:
296
0
297
0
      printf("%d?    \t%d\t%d\n", EXPR_TYPE(expr_crs), EXPR_PRV(expr_crs),
298
0
          EXPR_NXT(expr_crs));
299
0
      break;
300
0
    }
301
0
302
0
    expr_crs = EXPR_NXT(expr_crs);
303
0
  }
304
0
305
0
  printf("%s%d", &spaces[space], expr_crs);
306
0
  if (expr_crs < 100) printf(" ");
307
0
  if (expr_crs < 10) printf(" ");
308
0
  for (i = 0; i < 13 - (30 - space); i++) printf(" ");
309
0
  printf("END\t%d\t%d\n", EXPR_PRV(expr_crs), EXPR_NXT(expr_crs));
310
0
  fflush(stdout);
311
0
  return;
312
0
}
313
314
static void
315
0
pattern_output(const widechar *expr_data, const TranslationTableHeader *table) {
316
0
  printf("%d    \tlength\n", expr_data[0]);
317
0
  printf("%d    \tloops\n", expr_data[1]);
318
0
  if (expr_data[0] > 0 && expr_data[0] != PTN_END)
319
0
    pattern_output_expression(expr_data, 2, table);
320
0
}
321
322
static void
323
pattern_print_expression(
324
0
    const widechar *expr_data, int expr_crs, const TranslationTableHeader *table) {
325
0
  int i;
326
0
327
0
  if (expr_crs == PTN_END) return;
328
0
329
0
  while (EXPR_TYPE(expr_crs) != PTN_END) {
330
0
    switch (EXPR_TYPE(expr_crs)) {
331
0
    case PTN_START:
332
0
      break;
333
0
334
0
    case PTN_GROUP:
335
0
336
0
      printf(" (");
337
0
      pattern_print_expression(expr_data, EXPR_DATA_0(expr_crs), table);
338
0
      printf(") ");
339
0
      break;
340
0
341
0
    case PTN_NOT:
342
0
343
0
      printf("!");
344
0
      pattern_print_expression(expr_data, EXPR_DATA_0(expr_crs), table);
345
0
      break;
346
0
347
0
    case PTN_ONE_MORE:
348
0
349
0
      pattern_print_expression(expr_data, EXPR_DATA_0(expr_crs), table);
350
0
      printf("+");
351
0
      break;
352
0
353
0
    case PTN_ZERO_MORE:
354
0
355
0
      pattern_print_expression(expr_data, EXPR_DATA_0(expr_crs), table);
356
0
      printf("*");
357
0
      break;
358
0
359
0
    case PTN_OPTIONAL:
360
0
361
0
      pattern_print_expression(expr_data, EXPR_DATA_0(expr_crs), table);
362
0
      printf("?");
363
0
      break;
364
0
365
0
    case PTN_ALTERNATE:
366
0
367
0
      pattern_print_expression(expr_data, EXPR_DATA_0(expr_crs), table);
368
0
      printf(" | ");
369
0
      pattern_print_expression(expr_data, EXPR_DATA_1(expr_crs), table);
370
0
      break;
371
0
372
0
    case PTN_ANY:
373
0
374
0
      printf(".");
375
0
      break;
376
0
377
0
    case PTN_ATTRIBUTES:
378
0
379
0
      printf("%%[");
380
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[0] >> 16)) printf("0");
381
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[1] >> 16)) printf("1");
382
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[2] >> 16)) printf("2");
383
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[3] >> 16)) printf("3");
384
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[4] >> 16)) printf("4");
385
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[5] >> 16)) printf("5");
386
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[6] >> 16)) printf("6");
387
0
      if (EXPR_DATA_0(expr_crs) & (table->numberedAttributes[7] >> 16)) printf("7");
388
0
      if (EXPR_DATA_0(expr_crs) & (CTC_EndOfInput >> 16)) printf("^");
389
0
      if (EXPR_DATA_1(expr_crs) & CTC_Space) printf("_");
390
0
      if (EXPR_DATA_1(expr_crs) & CTC_Digit) printf("#");
391
0
      if (EXPR_DATA_1(expr_crs) & CTC_Letter) printf("a");
392
0
      if (EXPR_DATA_1(expr_crs) & CTC_UpperCase) printf("u");
393
0
      if (EXPR_DATA_1(expr_crs) & CTC_LowerCase) printf("l");
394
0
      if (EXPR_DATA_1(expr_crs) & CTC_Punctuation) printf(".");
395
0
      if (EXPR_DATA_1(expr_crs) & CTC_Sign) printf("$");
396
0
      if (EXPR_DATA_1(expr_crs) & CTC_SeqDelimiter) printf("~");
397
0
      if (EXPR_DATA_1(expr_crs) & CTC_SeqBefore) printf("<");
398
0
      if (EXPR_DATA_1(expr_crs) & CTC_SeqAfter) printf(">");
399
0
      printf("]");
400
0
      break;
401
0
402
0
    case PTN_CHARS:
403
0
404
0
      if (EXPR_DATA_0(expr_crs) == 1)
405
0
        printf("%c", EXPR_DATA_1(expr_crs));
406
0
      else {
407
0
        printf("[");
408
0
        for (i = 0; i < EXPR_DATA_0(expr_crs); i++)
409
0
          printf("%c", EXPR_CONST_DATA(expr_crs)[i + 1]);
410
0
        printf("]");
411
0
      }
412
0
      break;
413
0
414
0
    case PTN_HOOK:
415
0
416
0
      printf("@[");
417
0
      for (i = 0; i < EXPR_DATA_0(expr_crs); i++)
418
0
        printf("%c", EXPR_CONST_DATA(expr_crs)[i + 1]);
419
0
      printf("]");
420
0
      break;
421
0
422
0
    case PTN_END_OF_INPUT:
423
0
424
0
      printf("^");
425
0
      break;
426
0
427
0
      // default:  printf("%d?\n", EXPR_TYPE(expr_crs));  break;
428
0
    }
429
0
430
0
    expr_crs = EXPR_NXT(expr_crs);
431
0
  }
432
0
433
0
  return;
434
0
}
435
436
static void
437
0
pattern_print(const widechar *expr_data, const TranslationTableHeader *table) {
438
0
  if (expr_data[0] > 0 && expr_data[0] != PTN_END)
439
0
    pattern_print_expression(expr_data, 2, table);
440
0
  puts("");
441
0
}
442
443
#ifdef CHECK_OUTPUT_DEFINED
444
445
static void
446
do_padd(const int value) {
447
  if (value < 100000) printf(" ");
448
  if (value < 10000) printf(" ");
449
  if (value < 1000) printf(" ");
450
  if (value < 100) printf(" ");
451
  if (value < 10) printf(" ");
452
}
453
454
static void
455
do_pad(const int value) {
456
  if (value < 100) printf(" ");
457
  if (value < 10) printf(" ");
458
}
459
460
static void
461
do_output(const int type, const int ret, const int line,
462
463
    const int input, const int input_minmax, const int input_crs, const int input_dir,
464
    const widechar *expr_data, const int expr_crs, const int notOperator,
465
    const int loop_crs, const int *loop_cnts,
466
467
    const char *msg) {
468
  switch (type) {
469
  case START:
470
471
    space--;
472
    if (space < 0) space = 0;
473
    printf("|%s()  ", &spaces[space]);
474
    break;
475
476
  case CALL:
477
478
    printf("|%s>   ", &spaces[space]);
479
    break;
480
481
  case RETURN:
482
483
    printf("|%s<%d  ", &spaces[space], ret);
484
    space++;
485
    if (space > 31) space = 31;
486
    break;
487
488
  case SHOW:
489
490
    printf("|%s    ", &spaces[space]);
491
    break;
492
  }
493
494
  printf("%d ", line);
495
  do_padd(line);
496
497
  switch (expr_data[expr_crs]) {
498
  case PTN_ERROR:
499
    printf("# ");
500
    break;
501
  case PTN_START:
502
    printf("> ");
503
    break;
504
  case PTN_END_OF_INPUT:
505
    printf("^ ");
506
    break;
507
  case PTN_ALTERNATE:
508
    printf("| ");
509
    break;
510
  case PTN_OPTIONAL:
511
    printf("? ");
512
    break;
513
  case PTN_ONE_MORE:
514
    printf("+ ");
515
    break;
516
  case PTN_ZERO_MORE:
517
    printf("* ");
518
    break;
519
  case PTN_NOT:
520
    printf("! ");
521
    break;
522
  case PTN_GROUP:
523
    printf("( ");
524
    break;
525
  case PTN_ANY:
526
    printf(". ");
527
    break;
528
  case PTN_ATTRIBUTES:
529
    printf("%% ");
530
    break;
531
  case PTN_CHARS:
532
    printf("[ ");
533
    break;
534
  case PTN_HOOK:
535
    printf("@ ");
536
    break;
537
  case PTN_END:
538
    printf("< ");
539
    break;
540
  default:
541
    printf("  ");
542
    break;
543
  }
544
  printf("%d ", expr_crs);
545
  do_padd(expr_crs);
546
547
  if (input > 31 && input < 127)
548
    printf("%c ", input);
549
  else
550
    printf("_ ");
551
552
  if (input_crs * input_dir >= input_minmax * input_dir)
553
    printf("#   ");
554
  else {
555
    printf("%d ", input_crs);
556
    do_pad(input_crs);
557
  }
558
559
  if (input_dir > 0)
560
    printf("<");
561
  else
562
    printf(">");
563
  printf("%d ", input_minmax);
564
  do_pad(input_minmax);
565
566
  if (notOperator)
567
    printf("!   ");
568
  else
569
    printf("    ");
570
571
  if (loop_crs) {
572
    printf("%d ", loop_crs);
573
    do_pad(loop_crs);
574
    printf("%d ", loop_cnts[EXPR_DATA_1(loop_crs)]);
575
    do_pad(loop_cnts[EXPR_DATA_1(loop_crs)]);
576
  } else
577
    printf("-   -   ");
578
  if (EXPR_TYPE(expr_crs) == PTN_ONE_MORE || EXPR_TYPE(expr_crs) == PTN_ZERO_MORE) {
579
    printf("%d ", loop_cnts[EXPR_DATA_1(expr_crs)]);
580
    do_pad(loop_cnts[EXPR_DATA_1(expr_crs)]);
581
  } else
582
    printf("-   ");
583
584
  if (msg) printf("%s", msg);
585
  puts("");
586
}
587
588
#endif
589
590
////////////////////////////////////////////////////////////////////////////////
591
592
static int
593
pattern_compile_1(const widechar *input, const int input_max, int *input_crs,
594
    widechar *expr_data, const int expr_max, widechar *expr_crs, widechar *loop_cnts,
595
    TranslationTableHeader *table, const FileInfo *nested);
596
597
static int
598
pattern_compile_expression(const widechar *input, const int input_max, int *input_crs,
599
    widechar *expr_data, const int expr_max, widechar *expr_crs, widechar *loop_cnts,
600
0
    TranslationTableHeader *table, const FileInfo *nested) {
601
0
  widechar *data;
602
0
  int expr_start, expr_end, expr_sub, expr_crs_prv;
603
0
  int input_end;
604
0
  int attrs0, attrs1;
605
0
  int set, esc, nest, i;
606
607
0
  switch (input[*input_crs]) {
608
0
  case '(':
609
610
0
    if (*expr_crs + 10 >= expr_max) return 0;
611
612
0
    (*input_crs)++;
613
0
    if (*input_crs >= input_max) return 0;
614
615
    /* find closing parenthesis */
616
0
    nest = esc = 0;
617
0
    for (input_end = *input_crs; input_end < input_max; input_end++) {
618
0
      if (input[input_end] == '\\' && !esc) {
619
0
        esc = 1;
620
0
        continue;
621
0
      }
622
623
0
      if (input[input_end] == '(' && !esc)
624
0
        nest++;
625
0
      else if (input[input_end] == ')' && !esc) {
626
0
        if (nest)
627
0
          nest--;
628
0
        else
629
0
          break;
630
0
      }
631
632
0
      esc = 0;
633
0
    }
634
0
    if (input_end >= input_max) return 0;
635
636
0
    EXPR_TYPE(*expr_crs) = PTN_GROUP;
637
638
    /* compile sub expressions */
639
0
    expr_crs_prv = *expr_crs;
640
0
    *expr_crs += 4;
641
0
    EXPR_DATA_0(expr_crs_prv) = *expr_crs;
642
0
    expr_sub = *expr_crs;
643
0
    EXPR_TYPE(expr_sub) = PTN_ERROR;
644
0
    EXPR_PRV(expr_sub) = PTN_END;
645
0
    EXPR_NXT(expr_sub) = PTN_END;
646
0
    if (!pattern_compile_1(input, input_end, input_crs, expr_data, expr_max, expr_crs,
647
0
          loop_cnts, table, nested))
648
0
      return 0;
649
0
    (*input_crs)++;
650
651
    /* reset end expression */
652
0
    expr_end = *expr_crs;
653
0
    EXPR_NXT(expr_end) = expr_crs_prv;
654
655
0
    return *expr_crs += 3;
656
657
0
  case '!':
658
659
0
    if (*expr_crs + 10 >= expr_max) return 0;
660
661
0
    (*input_crs)++;
662
0
    EXPR_TYPE(*expr_crs) = PTN_NOT;
663
0
    expr_crs_prv = *expr_crs;
664
0
    *expr_crs += 4;
665
0
    EXPR_DATA_0(expr_crs_prv) = *expr_crs;
666
667
    /* create start expression */
668
0
    expr_start = *expr_crs;
669
0
    EXPR_TYPE(expr_start) = PTN_START;
670
0
    EXPR_PRV(expr_start) = PTN_END;
671
0
    *expr_crs += 3;
672
0
    EXPR_NXT(expr_start) = *expr_crs;
673
674
    /* compile sub expression */
675
0
    expr_sub = *expr_crs;
676
0
    EXPR_TYPE(expr_sub) = PTN_ERROR;
677
0
    EXPR_PRV(expr_sub) = expr_start;
678
0
    EXPR_NXT(expr_sub) = PTN_END;
679
680
0
    if (!pattern_compile_expression(input, input_max, input_crs, expr_data, expr_max,
681
0
          expr_crs, loop_cnts, table, nested))
682
0
      return 0;
683
684
0
    if (*expr_crs + 3 >= expr_max) return 0;
685
686
0
    EXPR_NXT(expr_sub) = *expr_crs;
687
688
    /* create end expression */
689
0
    expr_end = *expr_crs;
690
0
    EXPR_TYPE(expr_end) = PTN_END;
691
0
    EXPR_PRV(expr_end) = expr_sub;
692
0
    EXPR_NXT(expr_end) = expr_crs_prv;
693
694
0
    return *expr_crs += 3;
695
696
0
  case '+':
697
698
0
    if (*expr_crs + 5 >= expr_max) return 0;
699
0
    EXPR_TYPE(*expr_crs) = PTN_ONE_MORE;
700
0
    EXPR_DATA_1(*expr_crs) = (*loop_cnts)++;
701
0
    (*input_crs)++;
702
0
    return *expr_crs += 5;
703
704
0
  case '*':
705
706
0
    if (*expr_crs + 5 >= expr_max) return 0;
707
0
    EXPR_TYPE(*expr_crs) = PTN_ZERO_MORE;
708
0
    EXPR_DATA_1(*expr_crs) = (*loop_cnts)++;
709
0
    (*input_crs)++;
710
0
    return *expr_crs += 5;
711
712
0
  case '?':
713
714
0
    if (*expr_crs + 4 >= expr_max) return 0;
715
0
    EXPR_TYPE(*expr_crs) = PTN_OPTIONAL;
716
0
    (*input_crs)++;
717
0
    return *expr_crs += 4;
718
719
0
  case '|':
720
721
0
    if (*expr_crs + 5 >= expr_max) return 0;
722
0
    EXPR_TYPE(*expr_crs) = PTN_ALTERNATE;
723
0
    (*input_crs)++;
724
0
    return *expr_crs += 5;
725
726
0
  case '.':
727
728
0
    if (*expr_crs + 3 >= expr_max) return 0;
729
0
    EXPR_TYPE(*expr_crs) = PTN_ANY;
730
0
    (*input_crs)++;
731
0
    return *expr_crs += 3;
732
733
0
  case '%':
734
735
0
    if (*expr_crs + 5 >= expr_max) return 0;
736
737
0
    (*input_crs)++;
738
0
    if (*input_crs >= input_max) return 0;
739
740
    /* find closing bracket */
741
0
    if (input[*input_crs] == '[') {
742
0
      set = 1;
743
0
      (*input_crs)++;
744
0
      for (input_end = *input_crs; input_end < input_max; input_end++)
745
0
        if (input[input_end] == ']') break;
746
0
      if (input_end >= input_max) return 0;
747
0
    } else {
748
0
      set = 0;
749
0
      input_end = *input_crs + 1;
750
0
    }
751
752
0
    EXPR_TYPE(*expr_crs) = PTN_ATTRIBUTES;
753
754
0
    attrs0 = attrs1 = 0;
755
0
    for (; (*input_crs) < input_end; (*input_crs)++) {
756
0
      switch (input[*input_crs]) {
757
0
      case '_':
758
0
        attrs0 |= CTC_Space;
759
0
        break;
760
0
      case '#':
761
0
        attrs0 |= CTC_Digit;
762
0
        break;
763
0
      case 'a':
764
0
        attrs0 |= CTC_Letter;
765
0
        break;
766
0
      case 'u':
767
0
        attrs0 |= CTC_UpperCase;
768
0
        break;
769
0
      case 'l':
770
0
        attrs0 |= CTC_LowerCase;
771
0
        break;
772
0
      case '.':
773
0
        attrs0 |= CTC_Punctuation;
774
0
        break;
775
0
      case '$':
776
0
        attrs0 |= CTC_Sign;
777
0
        break;
778
0
      case '~':
779
0
        attrs0 |= CTC_SeqDelimiter;
780
0
        break;
781
0
      case '<':
782
0
        attrs0 |= CTC_SeqBefore;
783
0
        break;
784
0
      case '>':
785
0
        attrs0 |= CTC_SeqAfter;
786
0
        break;
787
788
0
      case '0':
789
0
      case '1':
790
0
      case '2':
791
0
      case '3':
792
0
      case '4':
793
0
      case '5':
794
0
      case '6':
795
0
      case '7': {
796
0
        int k = input[*input_crs] - '0';
797
0
        TranslationTableCharacterAttributes a = table->numberedAttributes[k];
798
0
        if (!a) {
799
          // attribute not used before yet: assign it a value
800
0
          a = table->numberedAttributes[k] =
801
0
              table->nextNumberedCharacterClassAttribute;
802
0
          if (a > CTC_UserDefined8) {
803
0
            _lou_logMessage(LOU_LOG_ERROR,
804
0
                "%s:%d: error: Too many character attributes defined",
805
0
                nested->fileName, nested->lineNumber);
806
0
            return 0;
807
0
          }
808
0
          table->nextNumberedCharacterClassAttribute <<= 1;
809
0
        }
810
0
        attrs1 |= (a >> 16);
811
0
        break;
812
0
      }
813
0
      case '^':
814
0
        attrs1 |= (CTC_EndOfInput >> 16);
815
0
        break;
816
817
0
      default:
818
0
        return 0;
819
0
      }
820
0
    }
821
0
    EXPR_DATA_0(*expr_crs) = attrs1;
822
0
    EXPR_DATA_1(*expr_crs) = attrs0;
823
824
0
    if (set) (*input_crs)++;
825
0
    return *expr_crs += 5;
826
827
0
  case '[':
828
829
0
    (*input_crs)++;
830
0
    if (*input_crs >= input_max) return 0;
831
832
    /* find closing bracket */
833
0
    esc = 0;
834
0
    for (input_end = *input_crs; input_end < input_max; input_end++) {
835
0
      if (input[input_end] == '\\' && !esc) {
836
0
        esc = 1;
837
0
        continue;
838
0
      }
839
840
0
      if (input[input_end] == ']' && !esc) break;
841
0
      esc = 0;
842
0
    }
843
0
    if (input_end >= input_max) return 0;
844
845
0
    if (*expr_crs + 4 + (input_end - *input_crs) >= expr_max) return 0;
846
847
0
    EXPR_TYPE(*expr_crs) = PTN_CHARS;
848
849
0
    esc = 0;
850
0
    data = EXPR_DATA(*expr_crs);
851
0
    for (i = 1; *input_crs < input_end; (*input_crs)++) {
852
0
      if (input[*input_crs] == '\\' && !esc) {
853
0
        esc = 1;
854
0
        continue;
855
0
      }
856
857
0
      esc = 0;
858
0
      data[i++] = (widechar)input[*input_crs];
859
0
    }
860
0
    data[0] = i - 1;
861
0
    (*input_crs)++;
862
0
    return *expr_crs += 4 + data[0];
863
864
0
  case '@':
865
866
0
    (*input_crs)++;
867
0
    if (*input_crs >= input_max) return 0;
868
869
    /* find closing bracket */
870
0
    if (input[*input_crs] == '[') {
871
0
      set = 1;
872
0
      (*input_crs)++;
873
0
      for (input_end = *input_crs; input_end < input_max; input_end++)
874
0
        if (input[input_end] == ']') break;
875
0
      if (input_end >= input_max) return 0;
876
0
    } else {
877
0
      set = 0;
878
0
      input_end = *input_crs + 1;
879
0
    }
880
881
0
    if (*expr_crs + 4 + (input_end - *input_crs) >= expr_max) return 0;
882
883
0
    EXPR_TYPE(*expr_crs) = PTN_HOOK;
884
885
0
    esc = 0;
886
0
    data = EXPR_DATA(*expr_crs);
887
0
    for (i = 1; *input_crs < input_end; (*input_crs)++) {
888
0
      if (input[*input_crs] == '\\' && !esc) {
889
0
        esc = 1;
890
0
        continue;
891
0
      }
892
893
0
      esc = 0;
894
0
      data[i++] = (widechar)input[*input_crs];
895
0
    }
896
0
    data[0] = i - 1;
897
0
    if (set) (*input_crs)++;
898
0
    return *expr_crs += 4 + data[0];
899
900
0
  case '^':
901
0
  case '$':
902
903
0
    if (*expr_crs + 3 >= expr_max) return 0;
904
0
    EXPR_TYPE(*expr_crs) = PTN_END_OF_INPUT;
905
0
    (*input_crs)++;
906
0
    return *expr_crs += 3;
907
908
0
  case '\\':
909
910
0
    (*input_crs)++;
911
0
    if (*input_crs >= input_max) return 0;
912
913
0
  default:
914
915
0
    if (*expr_crs + 5 >= expr_max) return 0;
916
0
    EXPR_TYPE(*expr_crs) = PTN_CHARS;
917
0
    EXPR_DATA_0(*expr_crs) = 1;
918
0
    EXPR_DATA_1(*expr_crs) = (widechar)input[*input_crs];
919
0
    (*input_crs)++;
920
0
    return *expr_crs += 5;
921
0
  }
922
0
}
923
924
static int
925
pattern_insert_alternate(const widechar *input, const int input_max, int *input_crs,
926
    widechar *expr_data, const int expr_max, widechar *expr_crs, widechar *loop_cnts,
927
0
    int expr_insert, TranslationTableHeader *table, const FileInfo *nested) {
928
0
  int expr_group, expr_alt, expr_end;
929
0
930
0
  if (EXPR_TYPE(*expr_crs) == PTN_START) return 0;
931
0
932
0
  if (*expr_crs + 12 >= expr_max) return 0;
933
0
934
0
  /* setup alternate expression */
935
0
  expr_alt = *expr_crs;
936
0
  EXPR_TYPE(expr_alt) = PTN_ALTERNATE;
937
0
  EXPR_PRV(expr_alt) = PTN_END;
938
0
  EXPR_NXT(expr_alt) = PTN_END;
939
0
  *expr_crs += 5;
940
0
941
0
  /* setup group expression */
942
0
  expr_group = *expr_crs;
943
0
  EXPR_TYPE(expr_group) = PTN_GROUP;
944
0
  EXPR_PRV(expr_group) = PTN_END;
945
0
  EXPR_NXT(expr_group) = PTN_END;
946
0
  *expr_crs += 4;
947
0
  EXPR_DATA_0(expr_group) = *expr_crs;
948
0
949
0
  EXPR_TYPE(*expr_crs) = PTN_ERROR;
950
0
  EXPR_PRV(*expr_crs) = PTN_END;
951
0
  EXPR_NXT(*expr_crs) = PTN_END;
952
0
  if (!pattern_compile_1(input, input_max, input_crs, expr_data, expr_max, expr_crs,
953
0
        loop_cnts, table, nested))
954
0
    return 0;
955
0
  expr_end = *expr_crs;
956
0
  EXPR_NXT(expr_end) = expr_group;
957
0
958
0
  /* setup last end expression */
959
0
  if (*expr_crs + 3 >= expr_max) return 0;
960
0
  *expr_crs += 3;
961
0
  EXPR_TYPE(*expr_crs) = PTN_END;
962
0
  EXPR_NXT(*expr_crs) = PTN_END;
963
0
964
0
  /* replace insert expression with group expression using last end expression */
965
0
  EXPR_NXT(EXPR_PRV(expr_insert)) = expr_group;
966
0
  EXPR_PRV(expr_group) = EXPR_PRV(expr_insert);
967
0
968
0
  EXPR_NXT(expr_group) = *expr_crs;
969
0
  EXPR_PRV(*expr_crs) = expr_group;
970
0
971
0
  /* link alternate and insert expressions before group end expression */
972
0
  EXPR_NXT(EXPR_PRV(expr_end)) = expr_alt;
973
0
  EXPR_PRV(expr_alt) = EXPR_PRV(expr_end);
974
0
975
0
  EXPR_NXT(expr_alt) = expr_insert;
976
0
  EXPR_PRV(expr_insert) = expr_alt;
977
0
978
0
  EXPR_NXT(expr_insert) = expr_end;
979
0
  EXPR_PRV(expr_end) = expr_insert;
980
0
981
0
  return *expr_crs;
982
0
}
983
984
/* Compile all expression sequences, resolving character sets, attributes,
985
 * groups, nots, and hooks.  Note that unlike the other compile functions, on
986
 * returning the expr_crs is set to the last end expression, not after it.
987
 */
988
static int
989
pattern_compile_1(const widechar *input, const int input_max, int *input_crs,
990
    widechar *expr_data, const int expr_max, widechar *expr_crs, widechar *loop_cnts,
991
0
    TranslationTableHeader *table, const FileInfo *nested) {
992
0
  int expr_crs_prv;
993
994
0
  if (*expr_crs + 6 >= expr_max) return 0;
995
996
0
  expr_crs_prv = *expr_crs;
997
998
  /* setup start expression */
999
0
  EXPR_TYPE(*expr_crs) = PTN_START;
1000
0
  EXPR_PRV(*expr_crs) = PTN_END;
1001
0
  *expr_crs += 3;
1002
0
  EXPR_NXT(expr_crs_prv) = *expr_crs;
1003
1004
  /* setup end expression */
1005
0
  EXPR_TYPE(*expr_crs) = PTN_END;
1006
0
  EXPR_PRV(*expr_crs) = expr_crs_prv;
1007
0
  EXPR_NXT(*expr_crs) = PTN_END;
1008
1009
0
  while (*input_crs < input_max) {
1010
0
    expr_crs_prv = *expr_crs;
1011
0
    if (!pattern_compile_expression(input, input_max, input_crs, expr_data, expr_max,
1012
0
          expr_crs, loop_cnts, table, nested))
1013
0
      return 0;
1014
1015
    /* setup end expression */
1016
0
    if (*expr_crs + 3 >= expr_max) return 0;
1017
0
    EXPR_NXT(expr_crs_prv) = *expr_crs;
1018
0
    EXPR_TYPE(*expr_crs) = PTN_END;
1019
0
    EXPR_PRV(*expr_crs) = expr_crs_prv;
1020
0
    EXPR_NXT(*expr_crs) = PTN_END;
1021
1022
    /* insert seqafterexpression before attributes of seqafterchars */
1023
    // if(EXPR_TYPE(expr_crs_prv) == PTN_ATTRIBUTES)
1024
    // if(EXPR_DATA_1(expr_crs_prv) & CTC_SeqAfter)
1025
    // {
1026
    //  i = 0;
1027
    //  pattern_insert_alternate(table->seqAfterExpression,
1028
    //    table->seqAfterExpressionLength, &i, expr_data, expr_max,
1029
    //    expr_crs, loop_cnts, expr_crs_prv);
1030
    // }
1031
0
  }
1032
1033
0
  return *expr_crs;
1034
0
}
1035
1036
/* Resolve optional and loop expressions.
1037
 */
1038
static int
1039
pattern_compile_2(
1040
0
    widechar *expr_data, int expr_at, const int expr_max, widechar *expr_crs) {
1041
0
  int expr_start, expr_end, expr_prv, expr_sub;
1042
1043
0
  while (EXPR_TYPE(expr_at) != PTN_END) {
1044
0
    if (EXPR_TYPE(expr_at) == PTN_GROUP || EXPR_TYPE(expr_at) == PTN_NOT) {
1045
0
      if (!pattern_compile_2(expr_data, EXPR_DATA_0(expr_at), expr_max, expr_crs))
1046
0
        return 0;
1047
0
    }
1048
1049
0
    if (EXPR_TYPE(expr_at) == PTN_ZERO_MORE || EXPR_TYPE(expr_at) == PTN_ONE_MORE ||
1050
0
        EXPR_TYPE(expr_at) == PTN_OPTIONAL) {
1051
0
      if (*expr_crs + 6 >= expr_max) return 0;
1052
1053
      /* get previous expressions, there must
1054
       * be at least something and a PTN_START */
1055
0
      expr_sub = EXPR_PRV(expr_at);
1056
0
      if (EXPR_TYPE(expr_sub) == PTN_START) return 0;
1057
0
      expr_prv = EXPR_PRV(expr_sub);
1058
1059
      /* create start expression */
1060
0
      expr_start = *expr_crs;
1061
0
      EXPR_TYPE(expr_start) = PTN_START;
1062
0
      EXPR_PRV(expr_start) = PTN_END;
1063
0
      EXPR_NXT(expr_start) = expr_sub;
1064
0
      *expr_crs += 3;
1065
1066
      /* create end expression */
1067
0
      expr_end = *expr_crs;
1068
0
      EXPR_TYPE(expr_end) = PTN_END;
1069
0
      EXPR_PRV(expr_end) = expr_sub;
1070
0
      EXPR_NXT(expr_end) = expr_at;
1071
0
      *expr_crs += 3;
1072
1073
      /* relink previous expression before sub expression */
1074
0
      EXPR_DATA_0(expr_at) = expr_start;
1075
0
      EXPR_NXT(expr_prv) = expr_at;
1076
0
      EXPR_PRV(expr_at) = expr_prv;
1077
1078
      /* relink sub expression to start and end */
1079
0
      EXPR_PRV(expr_sub) = expr_start;
1080
0
      EXPR_NXT(expr_sub) = expr_end;
1081
0
    }
1082
1083
0
    expr_at = EXPR_NXT(expr_at);
1084
0
  }
1085
1086
0
  return 1;
1087
0
}
1088
1089
/* Resolves alternative expressions.
1090
 */
1091
static int
1092
pattern_compile_3(
1093
0
    widechar *expr_data, int expr_at, const int expr_max, widechar *expr_crs) {
1094
0
  int expr_mrk, expr_start, expr_end, expr_sub_start, expr_sub_end;
1095
1096
0
  while (EXPR_TYPE(expr_at) != PTN_END) {
1097
0
    if (EXPR_TYPE(expr_at) == PTN_GROUP || EXPR_TYPE(expr_at) == PTN_NOT ||
1098
0
        EXPR_TYPE(expr_at) == PTN_OPTIONAL ||
1099
0
        EXPR_TYPE(expr_at) == PTN_ZERO_MORE ||
1100
0
        EXPR_TYPE(expr_at) == PTN_ONE_MORE) {
1101
0
      if (!pattern_compile_3(expr_data, EXPR_DATA_0(expr_at), expr_max, expr_crs))
1102
0
        return 0;
1103
0
    }
1104
1105
0
    if (EXPR_TYPE(expr_at) == PTN_ALTERNATE) {
1106
0
      if (*expr_crs + 12 >= expr_max) return 0;
1107
1108
      /* get previous start expression,
1109
       * can include alternate expressions */
1110
0
      expr_mrk = EXPR_PRV(expr_at);
1111
0
      if (EXPR_TYPE(expr_mrk) == PTN_START) return 0;
1112
0
      expr_sub_end = expr_mrk;
1113
0
      while (EXPR_TYPE(expr_mrk) != PTN_START) expr_mrk = EXPR_PRV(expr_mrk);
1114
0
      expr_sub_start = EXPR_NXT(expr_mrk);
1115
1116
      /* create first start expression */
1117
0
      expr_start = *expr_crs;
1118
0
      EXPR_TYPE(expr_start) = PTN_START;
1119
0
      EXPR_PRV(expr_start) = PTN_END;
1120
0
      EXPR_NXT(expr_start) = expr_sub_start;
1121
0
      *expr_crs += 3;
1122
1123
      /* create first end expression */
1124
0
      expr_end = *expr_crs;
1125
0
      EXPR_TYPE(expr_end) = PTN_END;
1126
0
      EXPR_PRV(expr_end) = expr_sub_end;
1127
0
      EXPR_NXT(expr_end) = expr_at;
1128
0
      *expr_crs += 3;
1129
1130
      /* relink previous expression before sub expression */
1131
0
      EXPR_DATA_0(expr_at) = expr_start;
1132
0
      EXPR_NXT(expr_mrk) = expr_at;
1133
0
      EXPR_PRV(expr_at) = expr_mrk;
1134
1135
      /* relink sub expression to start and end */
1136
0
      EXPR_PRV(expr_sub_start) = expr_start;
1137
0
      EXPR_NXT(expr_sub_end) = expr_end;
1138
1139
      /* get following PTN_END or PTN_ALTERNATE expression */
1140
0
      expr_mrk = EXPR_NXT(expr_at);
1141
0
      if (EXPR_TYPE(expr_mrk) == PTN_END || EXPR_TYPE(expr_mrk) == PTN_ALTERNATE)
1142
0
        return 0;
1143
0
      expr_sub_start = expr_mrk;
1144
0
      while (EXPR_TYPE(expr_mrk) != PTN_END && EXPR_TYPE(expr_mrk) != PTN_ALTERNATE)
1145
0
        expr_mrk = EXPR_NXT(expr_mrk);
1146
0
      expr_sub_end = EXPR_PRV(expr_mrk);
1147
1148
      /* create first start expression */
1149
0
      expr_start = *expr_crs;
1150
0
      EXPR_TYPE(expr_start) = PTN_START;
1151
0
      EXPR_PRV(expr_start) = PTN_END;
1152
0
      EXPR_NXT(expr_start) = expr_sub_start;
1153
0
      *expr_crs += 3;
1154
1155
      /* create first end expression */
1156
0
      expr_end = *expr_crs;
1157
0
      EXPR_TYPE(expr_end) = PTN_END;
1158
0
      EXPR_PRV(expr_end) = expr_sub_end;
1159
0
      EXPR_NXT(expr_end) = expr_at;
1160
0
      *expr_crs += 3;
1161
1162
      /* relink following expression before sub expression */
1163
0
      EXPR_DATA_1(expr_at) = expr_start;
1164
0
      EXPR_PRV(expr_mrk) = expr_at;
1165
0
      EXPR_NXT(expr_at) = expr_mrk;
1166
1167
      /* relink sub expression to start and end */
1168
0
      EXPR_PRV(expr_sub_start) = expr_start;
1169
0
      EXPR_NXT(expr_sub_end) = expr_end;
1170
1171
      /* check expressions were after alternate and got moved into
1172
       * a sub expression, previous expressions already checked */
1173
0
      if (!pattern_compile_3(expr_data, EXPR_DATA_1(expr_at), expr_max, expr_crs))
1174
0
        return 0;
1175
0
    }
1176
1177
0
    expr_at = EXPR_NXT(expr_at);
1178
0
  }
1179
1180
0
  return 1;
1181
0
}
1182
1183
int EXPORT_CALL
1184
_lou_pattern_compile(const widechar *input, const int input_max, widechar *expr_data,
1185
0
    const int expr_max, TranslationTableHeader *table, const FileInfo *nested) {
1186
0
  int input_crs;
1187
1188
0
  input_crs = 0;
1189
0
  expr_data[0] = 2;
1190
0
  expr_data[1] = 0;
1191
1192
0
  if (!pattern_compile_1(input, input_max, &input_crs, expr_data, expr_max,
1193
0
        &expr_data[0], &expr_data[1], table, nested))
1194
0
    return 0;
1195
1196
  /* shift past the last end */
1197
0
  expr_data[0] += 3;
1198
1199
0
  if (!pattern_compile_2(expr_data, 2, expr_max, &expr_data[0])) return 0;
1200
1201
0
  if (!pattern_compile_3(expr_data, 2, expr_max, &expr_data[0])) return 0;
1202
1203
0
  return expr_data[0];
1204
0
}
1205
1206
////////////////////////////////////////////////////////////////////////////////
1207
1208
static void
1209
pattern_reverse_expression(widechar *expr_data, const int expr_start);
1210
1211
static void
1212
0
pattern_reverse_branch(widechar *expr_data, const int expr_at) {
1213
0
  widechar expr_swap;
1214
1215
0
  switch (EXPR_TYPE(expr_at)) {
1216
0
  case PTN_ALTERNATE:
1217
1218
0
    pattern_reverse_expression(expr_data, EXPR_DATA_0(expr_at));
1219
0
    expr_swap = EXPR_DATA_0(expr_at);
1220
0
    EXPR_DATA_0(expr_at) = EXPR_DATA_1(expr_at);
1221
0
    EXPR_DATA_1(expr_at) = expr_swap;
1222
1223
0
  case PTN_GROUP:
1224
0
  case PTN_NOT:
1225
0
  case PTN_ONE_MORE:
1226
0
  case PTN_ZERO_MORE:
1227
0
  case PTN_OPTIONAL:
1228
1229
0
    pattern_reverse_expression(expr_data, EXPR_DATA_0(expr_at));
1230
0
  }
1231
0
}
1232
1233
static void
1234
0
pattern_reverse_expression(widechar *expr_data, const int expr_start) {
1235
0
  widechar expr_end, expr_crs, expr_prv;
1236
1237
0
  expr_end = EXPR_NXT(expr_start);
1238
1239
  /* empty expression */
1240
0
  if (EXPR_TYPE(expr_end) == PTN_END) return;
1241
1242
  /* find end expression */
1243
0
  while (EXPR_TYPE(expr_end) != PTN_END) expr_end = EXPR_NXT(expr_end);
1244
1245
0
  expr_crs = EXPR_PRV(expr_end);
1246
0
  expr_prv = EXPR_PRV(expr_crs);
1247
1248
  /* relink expression before end expression */
1249
0
  EXPR_NXT(expr_start) = expr_crs;
1250
0
  EXPR_PRV(expr_crs) = expr_start;
1251
0
  EXPR_NXT(expr_crs) = expr_prv;
1252
1253
  /* reverse any branching expressions */
1254
0
  pattern_reverse_branch(expr_data, expr_crs);
1255
1256
0
  while (expr_prv != expr_start) {
1257
    /* shift current expression */
1258
0
    expr_crs = expr_prv;
1259
0
    expr_prv = EXPR_PRV(expr_prv);
1260
1261
    /* reverse any branching expressions */
1262
0
    pattern_reverse_branch(expr_data, expr_crs);
1263
1264
    /* relink current expression */
1265
0
    EXPR_PRV(expr_crs) = EXPR_NXT(expr_crs);
1266
0
    EXPR_NXT(expr_crs) = expr_prv;
1267
0
  }
1268
1269
  /* relink expression after start expression */
1270
0
  EXPR_PRV(expr_crs) = EXPR_NXT(expr_crs);
1271
0
  EXPR_NXT(expr_crs) = expr_end;
1272
0
  EXPR_PRV(expr_end) = expr_crs;
1273
0
}
1274
1275
void EXPORT_CALL
1276
0
_lou_pattern_reverse(widechar *expr_data) {
1277
0
  pattern_reverse_expression(expr_data, 2);
1278
0
}
1279
1280
////////////////////////////////////////////////////////////////////////////////
1281
1282
static int
1283
0
pattern_check_chars(const widechar input_char, const widechar *expr_data) {
1284
0
  int expr_cnt, i;
1285
1286
0
  expr_cnt = expr_data[0] + 1;
1287
1288
0
  for (i = 1; i < expr_cnt; i++)
1289
0
    if (input_char == expr_data[i]) break;
1290
1291
0
  if (i == expr_cnt) return 0;
1292
0
  return 1;
1293
0
}
1294
1295
static int
1296
pattern_check_attrs(const widechar input_char, const widechar *expr_data,
1297
0
    const TranslationTableHeader *table) {
1298
0
  int attrs;
1299
1300
0
  attrs = ((expr_data[0] << 16) | expr_data[1]) & ~(CTC_EndOfInput | CTC_EmpMatch);
1301
0
  if (!checkAttr(input_char, attrs, table)) return 0;
1302
0
  return 1;
1303
0
}
1304
1305
static int
1306
pattern_check_expression(const widechar *const input, int *input_crs,
1307
    const int input_minmax, const int input_dir, const widechar *const expr_data,
1308
    int (*hook)(const widechar input, const int data_len), widechar *hook_data,
1309
    const int hook_max, int expr_crs, int notOperator, int loop_crs, int *loop_cnts,
1310
0
    const TranslationTableHeader *table) {
1311
0
  int input_crs_prv, input_start, attrs, ret, i;
1312
0
  const widechar *data;
1313
1314
0
  data = NULL;
1315
1316
  /* save input_crs to know if loop consumed input */
1317
0
  input_start = *input_crs;
1318
1319
0
  CHECK_OUTPUT(START, 0, __LINE__, "check start")
1320
1321
0
  while (!(EXPR_TYPE(expr_crs) == PTN_END && EXPR_TYPE(expr_crs) == PTN_END)) {
1322
    /* end of input expression */
1323
0
    if (EXPR_TYPE(expr_crs) == PTN_END_OF_INPUT) {
1324
0
      if (*input_crs * input_dir >= input_minmax * input_dir) {
1325
0
        if (notOperator)
1326
0
          CHECK_OUTPUT(
1327
0
              RETURN, 0, __LINE__, "end of input failed:  no input and not")
1328
0
        else
1329
0
          CHECK_OUTPUT(RETURN, 1, __LINE__, "end of input passed:  no input")
1330
0
        return !notOperator;
1331
0
      } else {
1332
0
        if (notOperator)
1333
0
          CHECK_OUTPUT(
1334
0
              RETURN, 1, __LINE__, "end of input passed:  input and not")
1335
0
        else
1336
0
          CHECK_OUTPUT(RETURN, 0, __LINE__, "end of input failed:  input")
1337
0
        return notOperator;
1338
0
      }
1339
0
    }
1340
1341
    /* no more input */
1342
0
    if (*input_crs * input_dir >= input_minmax * input_dir) {
1343
0
      switch (EXPR_TYPE(expr_crs)) {
1344
0
      case PTN_ATTRIBUTES:
1345
1346
0
        attrs = (EXPR_DATA_0(expr_crs) << 16);
1347
0
        if (attrs & CTC_EndOfInput) {
1348
0
          if (notOperator) {
1349
0
            CHECK_OUTPUT(RETURN, 0, __LINE__,
1350
0
                "attributes failed:  end of input attribute:  not")
1351
0
            return 0;
1352
0
          }
1353
0
          CHECK_OUTPUT(RETURN, 1, __LINE__,
1354
0
              "attributes passed:  end of input attribute")
1355
0
          return 1;
1356
0
        }
1357
0
        CHECK_OUTPUT(RETURN, 0, __LINE__,
1358
0
            "attributes failed:  no end of input attribute")
1359
0
        return 0;
1360
1361
0
      case PTN_ANY:
1362
0
      case PTN_CHARS:
1363
1364
0
        CHECK_OUTPUT(RETURN, 0, __LINE__, "chars failed:  no input")
1365
0
        return 0;
1366
0
      }
1367
1368
0
      CHECK_OUTPUT(SHOW, 0, __LINE__, "no input")
1369
0
    }
1370
1371
0
    switch (EXPR_TYPE(expr_crs)) {
1372
1373
0
    case PTN_START:
1374
1375
0
      expr_crs = EXPR_NXT(expr_crs);
1376
0
      CHECK_OUTPUT(SHOW, 0, __LINE__, "start next")
1377
0
      break;
1378
1379
0
    case PTN_GROUP:
1380
1381
0
      expr_crs = EXPR_DATA_0(expr_crs);
1382
0
      CHECK_OUTPUT(SHOW, 0, __LINE__, "group next")
1383
0
      break;
1384
1385
0
    case PTN_NOT:
1386
1387
0
      notOperator = !notOperator;
1388
0
      expr_crs = EXPR_DATA_0(expr_crs);
1389
0
      CHECK_OUTPUT(SHOW, 0, __LINE__, "not next")
1390
0
      break;
1391
1392
0
    case PTN_ONE_MORE:
1393
1394
0
      CHECK_OUTPUT(SHOW, 0, __LINE__, "loop+ start")
1395
1396
0
    case PTN_ZERO_MORE:
1397
1398
      /* check if loop already started */
1399
0
      if (expr_crs == loop_crs) {
1400
0
        loop_cnts[EXPR_DATA_1(loop_crs)]++;
1401
0
        CHECK_OUTPUT(SHOW, 0, __LINE__, "loop again")
1402
0
      } else {
1403
        /* check if loop nested, wasn't running but has a count */
1404
0
        if (loop_cnts[EXPR_DATA_1(expr_crs)]) {
1405
0
          CHECK_OUTPUT(SHOW, 0, __LINE__, "loop already running")
1406
0
          goto loop_next;
1407
0
        }
1408
1409
        /* start loop */
1410
0
        loop_crs = expr_crs;
1411
0
        loop_cnts[EXPR_DATA_1(loop_crs)] = 1;
1412
0
        CHECK_OUTPUT(SHOW, 0, __LINE__, "loop start")
1413
0
      }
1414
1415
      /* start loop expression */
1416
0
      input_crs_prv = *input_crs;
1417
0
      ret = pattern_check_expression(input, input_crs, input_minmax, input_dir,
1418
0
          expr_data, hook, hook_data, hook_max, EXPR_DATA_0(expr_crs),
1419
0
          notOperator, loop_crs, loop_cnts, table);
1420
0
      if (ret) {
1421
0
        CHECK_OUTPUT(RETURN, 1, __LINE__, "loop passed")
1422
0
        return 1;
1423
0
      }
1424
0
      CHECK_OUTPUT(SHOW, 0, __LINE__, "loop failed")
1425
0
      *input_crs = input_crs_prv;
1426
1427
      /* check loop count */
1428
0
      loop_cnts[EXPR_DATA_1(loop_crs)]--;
1429
0
      if (EXPR_TYPE(expr_crs) == PTN_ONE_MORE) {
1430
0
        if (loop_cnts[EXPR_DATA_1(loop_crs)] < 1) {
1431
0
          CHECK_OUTPUT(RETURN, 0, __LINE__, "loop+ failed")
1432
0
          return 0;
1433
0
        } else
1434
0
          CHECK_OUTPUT(SHOW, 0, __LINE__, "loop+ passed")
1435
0
      }
1436
1437
    /* continue after loop */
1438
0
    loop_next:
1439
0
      expr_crs = EXPR_NXT(expr_crs);
1440
0
      CHECK_OUTPUT(SHOW, 0, __LINE__, "loop next")
1441
0
      break;
1442
1443
0
    case PTN_OPTIONAL:
1444
1445
      /* save current state */
1446
0
      input_crs_prv = *input_crs;
1447
1448
      /* start optional expression */
1449
0
      CHECK_OUTPUT(CALL, 0, __LINE__, "option start")
1450
0
      if (pattern_check_expression(input, input_crs, input_minmax, input_dir,
1451
0
            expr_data, hook, hook_data, hook_max, EXPR_DATA_0(expr_crs),
1452
0
            notOperator, loop_crs, loop_cnts, table)) {
1453
0
        CHECK_OUTPUT(RETURN, 1, __LINE__, "option passed")
1454
0
        return 1;
1455
0
      }
1456
0
      CHECK_OUTPUT(SHOW, 0, __LINE__, "option failed")
1457
1458
      /* continue after optional expression */
1459
0
      *input_crs = input_crs_prv;
1460
0
      CHECK_OUTPUT(SHOW, 0, __LINE__, "no option start")
1461
0
      expr_crs = EXPR_NXT(expr_crs);
1462
0
      break;
1463
1464
0
    case PTN_ALTERNATE:
1465
1466
      /* save current state */
1467
0
      input_crs_prv = *input_crs;
1468
1469
      /* start first expression */
1470
0
      CHECK_OUTPUT(CALL, 0, __LINE__, "or 1 start")
1471
0
      if (pattern_check_expression(input, input_crs, input_minmax, input_dir,
1472
0
            expr_data, hook, hook_data, hook_max, EXPR_DATA_0(expr_crs),
1473
0
            notOperator, loop_crs, loop_cnts, table)) {
1474
0
        CHECK_OUTPUT(RETURN, 1, __LINE__, "or 1 passed")
1475
0
        return 1;
1476
0
      }
1477
0
      CHECK_OUTPUT(SHOW, 0, __LINE__, "or 1 failed")
1478
1479
      /* start second expression (no need to push) */
1480
0
      *input_crs = input_crs_prv;
1481
0
      CHECK_OUTPUT(SHOW, 0, __LINE__, "or 2 start")
1482
0
      expr_crs = EXPR_DATA_1(expr_crs);
1483
0
      break;
1484
1485
0
    case PTN_ANY:
1486
1487
0
      CHECK_OUTPUT(SHOW, 0, __LINE__, "any")
1488
0
      *input_crs += input_dir;
1489
0
      expr_crs = EXPR_NXT(expr_crs);
1490
0
      break;
1491
1492
0
    case PTN_ATTRIBUTES:
1493
1494
0
      ret = pattern_check_attrs(
1495
0
          input[*input_crs], EXPR_CONST_DATA(expr_crs), table);
1496
0
      if (ret && notOperator) {
1497
0
        CHECK_OUTPUT(RETURN, 0, __LINE__, "attributes failed:  not");
1498
0
        return 0;
1499
0
      }
1500
0
      if (!ret && !notOperator) {
1501
0
        CHECK_OUTPUT(RETURN, 0, __LINE__, "attributes failed");
1502
0
        return 0;
1503
0
      }
1504
0
      CHECK_OUTPUT(SHOW, 0, __LINE__, "attributes passed")
1505
0
      *input_crs += input_dir;
1506
0
      expr_crs = EXPR_NXT(expr_crs);
1507
0
      break;
1508
1509
0
    case PTN_CHARS:
1510
1511
0
      ret = pattern_check_chars(input[*input_crs], EXPR_CONST_DATA(expr_crs));
1512
0
      if (ret && notOperator) {
1513
0
        CHECK_OUTPUT(RETURN, 0, __LINE__, "chars failed:  not");
1514
0
        return 0;
1515
0
      }
1516
0
      if (!ret && !notOperator) {
1517
0
        CHECK_OUTPUT(RETURN, 0, __LINE__, "chars failed");
1518
0
        return 0;
1519
0
      }
1520
0
      CHECK_OUTPUT(SHOW, 0, __LINE__, "chars passed")
1521
0
      *input_crs += input_dir;
1522
0
      expr_crs = EXPR_NXT(expr_crs);
1523
0
      break;
1524
1525
0
    case PTN_HOOK:
1526
1527
0
      if (hook == NULL) {
1528
0
        CHECK_OUTPUT(RETURN, 0, __LINE__, "hook failed:  NULL");
1529
0
        return 0;
1530
0
      }
1531
1532
      /* copy expression data */
1533
0
      data = EXPR_CONST_DATA(expr_crs);
1534
0
      for (i = 0; i < data[0]; i++) hook_data[i] = data[i + 1];
1535
1536
      /* call hook function */
1537
0
      ret = hook(input[*input_crs], data[0]);
1538
0
      if (ret && notOperator) {
1539
0
        CHECK_OUTPUT(RETURN, 0, __LINE__, "hook failed:  not");
1540
0
        return 0;
1541
0
      }
1542
0
      if (!ret && !notOperator) {
1543
0
        CHECK_OUTPUT(RETURN, 0, __LINE__, "hook failed");
1544
0
        return 0;
1545
0
      }
1546
0
      CHECK_OUTPUT(SHOW, 0, __LINE__, "hook passed")
1547
0
      *input_crs += input_dir;
1548
0
      expr_crs = EXPR_NXT(expr_crs);
1549
0
      break;
1550
1551
0
    case PTN_END:
1552
0
      break;
1553
1554
0
    default:
1555
1556
0
      CHECK_OUTPUT(RETURN, 0, __LINE__, "unknown opcode")
1557
0
      return 0;
1558
0
    }
1559
1560
    /* check end expression  */
1561
0
    while (EXPR_TYPE(expr_crs) == PTN_END) {
1562
0
      CHECK_OUTPUT(SHOW, 0, __LINE__, "end")
1563
1564
      /* check for end of expressions */
1565
0
      if (EXPR_NXT(expr_crs) == PTN_END) break;
1566
1567
0
      expr_crs = EXPR_NXT(expr_crs);
1568
1569
      /* returning loop */
1570
0
      if (EXPR_TYPE(expr_crs) == PTN_ZERO_MORE ||
1571
0
          EXPR_TYPE(expr_crs) == PTN_ONE_MORE) {
1572
0
        CHECK_OUTPUT(SHOW, 0, __LINE__, "end loop")
1573
1574
        /* check that loop consumed input */
1575
0
        if (*input_crs == input_start) {
1576
0
          CHECK_OUTPUT(RETURN, 0, __LINE__, "loop failed:  did not consume")
1577
0
          return 0;
1578
0
        }
1579
1580
        /* loops do not continue to the next expression */
1581
0
        break;
1582
0
      }
1583
1584
      /* returning not */
1585
0
      if (EXPR_TYPE(expr_crs) == PTN_NOT) notOperator = !notOperator;
1586
1587
0
      expr_crs = EXPR_NXT(expr_crs);
1588
1589
0
      CHECK_OUTPUT(SHOW, 0, __LINE__, "end next")
1590
0
    }
1591
1592
0
    CHECK_OUTPUT(SHOW, 0, __LINE__, "check next")
1593
0
  }
1594
1595
0
  CHECK_OUTPUT(RETURN, 1, __LINE__, "check passed:  end of expression");
1596
0
  return 1;
1597
0
}
1598
1599
static int
1600
pattern_check_hook(const widechar *input, const int input_start, const int input_minmax,
1601
    const int input_dir, const widechar *expr_data,
1602
    int (*hook)(const widechar input, const int data_len), widechar *hook_data,
1603
0
    const int hook_max, const TranslationTableHeader *table) {
1604
0
  int input_crs, ret, *loop_cnts;
1605
1606
0
  input_crs = input_start;
1607
0
  loop_cnts = malloc(expr_data[1] * sizeof(int));
1608
0
  memset(loop_cnts, 0, expr_data[1] * sizeof(int));
1609
0
  ret = pattern_check_expression(input, &input_crs, input_minmax, input_dir, expr_data,
1610
0
      hook, hook_data, hook_max, 2, 0, 0, loop_cnts, table);
1611
0
  free(loop_cnts);
1612
0
  return ret;
1613
0
}
1614
1615
int EXPORT_CALL
1616
_lou_pattern_check(const widechar *input, const int input_start, const int input_minmax,
1617
    const int input_dir, const widechar *expr_data,
1618
0
    const TranslationTableHeader *table) {
1619
#ifdef CHECK_OUTPUT_DEFINED
1620
  pattern_output(expr_data, table);
1621
#endif
1622
0
  return pattern_check_hook(
1623
0
      input, input_start, input_minmax, input_dir, expr_data, NULL, NULL, 0, table);
1624
0
}
1625
1626
////////////////////////////////////////////////////////////////////////////////