Coverage Report

Created: 2026-03-12 06:54

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/flex/src/nfa.c
Line
Count
Source
1
/* nfa - NFA construction routines */
2
3
/*  Copyright (c) 1990 The Regents of the University of California. */
4
/*  All rights reserved. */
5
6
/*  This code is derived from software contributed to Berkeley by */
7
/*  Vern Paxson. */
8
9
/*  The United States Government has rights in this work pursuant */
10
/*  to contract no. DE-AC03-76SF00098 between the United States */
11
/*  Department of Energy and the University of California. */
12
13
/*  This file is part of flex. */
14
15
/*  Redistribution and use in source and binary forms, with or without */
16
/*  modification, are permitted provided that the following conditions */
17
/*  are met: */
18
19
/*  1. Redistributions of source code must retain the above copyright */
20
/*     notice, this list of conditions and the following disclaimer. */
21
/*  2. Redistributions in binary form must reproduce the above copyright */
22
/*     notice, this list of conditions and the following disclaimer in the */
23
/*     documentation and/or other materials provided with the distribution. */
24
25
/*  Neither the name of the University nor the names of its contributors */
26
/*  may be used to endorse or promote products derived from this software */
27
/*  without specific prior written permission. */
28
29
/*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
30
/*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
31
/*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
32
/*  PURPOSE. */
33
34
#include "flexdef.h"
35
36
37
/* declare functions that have forward references */
38
39
int dupmachine(int);
40
void  mkxtion(int, int);
41
42
43
/* add_accept - add an accepting state to a machine
44
 *
45
 * accepting_number becomes mach's accepting number.
46
 */
47
48
void    add_accept (int mach, int accepting_number)
49
307
{
50
  /* Hang the accepting number off an epsilon state.  if it is associated
51
   * with a state that has a non-epsilon out-transition, then the state
52
   * will accept BEFORE it makes that transition, i.e., one character
53
   * too soon.
54
   */
55
56
307
  if (transchar[finalst[mach]] == SYM_EPSILON)
57
0
    accptnum[finalst[mach]] = accepting_number;
58
59
307
  else {
60
307
    int     astate = mkstate (SYM_EPSILON);
61
62
307
    accptnum[astate] = accepting_number;
63
307
    (void) link_machines (mach, astate);
64
307
  }
65
307
}
66
67
68
/* copysingl - make a given number of copies of a singleton machine
69
 *
70
 * synopsis
71
 *
72
 *   newsng = copysingl( singl, num );
73
 *
74
 *     newsng - a new singleton composed of num copies of singl
75
 *     singl  - a singleton machine
76
 *     num    - the number of copies of singl to be present in newsng
77
 */
78
79
int     copysingl (int singl, int num)
80
0
{
81
0
  int     copy, i;
82
83
0
  copy = mkstate (SYM_EPSILON);
84
85
0
  for (i = 1; i <= num; ++i)
86
0
    copy = link_machines (copy, dupmachine (singl));
87
88
0
  return copy;
89
0
}
90
91
92
/* dumpnfa - debugging routine to write out an nfa */
93
94
void    dumpnfa (int state1)
95
0
{
96
0
  int     sym, tsp1, tsp2, anum, ns;
97
98
0
  fprintf (stderr,
99
0
     _
100
0
     ("\n\n********** beginning dump of nfa with start state %d\n"),
101
0
     state1);
102
103
  /* We probably should loop starting at firstst[state1] and going to
104
   * lastst[state1], but they're not maintained properly when we "or"
105
   * all of the rules together.  So we use our knowledge that the machine
106
   * starts at state 1 and ends at lastnfa.
107
   */
108
109
  /* for ( ns = firstst[state1]; ns <= lastst[state1]; ++ns ) */
110
0
  for (ns = 1; ns <= lastnfa; ++ns) {
111
0
    fprintf (stderr, _("state # %4d\t"), ns);
112
113
0
    sym = transchar[ns];
114
0
    tsp1 = trans1[ns];
115
0
    tsp2 = trans2[ns];
116
0
    anum = accptnum[ns];
117
118
0
    fprintf (stderr, "%3d:  %4d, %4d", sym, tsp1, tsp2);
119
120
0
    if (anum != NIL)
121
0
      fprintf (stderr, "  [%d]", anum);
122
123
0
    fprintf (stderr, "\n");
124
0
  }
125
126
0
  fprintf (stderr, _("********** end of dump\n"));
127
0
}
128
129
130
/* dupmachine - make a duplicate of a given machine
131
 *
132
 * synopsis
133
 *
134
 *   copy = dupmachine( mach );
135
 *
136
 *     copy - holds duplicate of mach
137
 *     mach - machine to be duplicated
138
 *
139
 * note that the copy of mach is NOT an exact duplicate; rather, all the
140
 * transition states values are adjusted so that the copy is self-contained,
141
 * as the original should have been.
142
 *
143
 * also note that the original MUST be contiguous, with its low and high
144
 * states accessible by the arrays firstst and lastst
145
 */
146
147
int     dupmachine (int mach)
148
0
{
149
0
  int     i, init, state_offset;
150
0
  int     state = 0;
151
0
  int     last = lastst[mach];
152
153
0
  for (i = firstst[mach]; i <= last; ++i) {
154
0
    state = mkstate (transchar[i]);
155
156
0
    if (trans1[i] != NO_TRANSITION) {
157
0
      mkxtion (finalst[state], trans1[i] + state - i);
158
159
0
      if (transchar[i] == SYM_EPSILON &&
160
0
          trans2[i] != NO_TRANSITION)
161
0
          mkxtion (finalst[state],
162
0
             trans2[i] + state - i);
163
0
    }
164
165
0
    accptnum[state] = accptnum[i];
166
0
  }
167
168
0
  if (state == 0)
169
0
    flexfatal (_("empty machine in dupmachine()"));
170
171
0
  state_offset = state - i + 1;
172
173
0
  init = mach + state_offset;
174
0
  firstst[init] = firstst[mach] + state_offset;
175
0
  finalst[init] = finalst[mach] + state_offset;
176
0
  lastst[init] = lastst[mach] + state_offset;
177
178
0
  return init;
179
0
}
180
181
182
/* finish_rule - finish up the processing for a rule
183
 *
184
 * An accepting number is added to the given machine.  If variable_trail_rule
185
 * is true then the rule has trailing context and both the head and trail
186
 * are variable size.  Otherwise if headcnt or trailcnt is non-zero then
187
 * the machine recognizes a pattern with trailing context and headcnt is
188
 * the number of characters in the matched part of the pattern, or zero
189
 * if the matched part has variable length.  trailcnt is the number of
190
 * trailing context characters in the pattern, or zero if the trailing
191
 * context has variable length.
192
 */
193
194
void    finish_rule (int mach, bool variable_trail_rule, int headcnt, int trailcnt,
195
         int pcont_act)
196
307
{
197
307
  char    action_text[MAXLINE];
198
199
307
  add_accept (mach, num_rules);
200
201
  /* We did this in new_rule(), but it often gets the wrong
202
   * number because we do it before we start parsing the current rule.
203
   */
204
307
  rule_linenum[num_rules] = linenum;
205
206
  /* If this is a continued action, then the line-number has already
207
   * been updated, giving us the wrong number.
208
   */
209
307
  if (continued_action)
210
0
    --rule_linenum[num_rules];
211
212
213
  /* If the previous rule was continued action, then we inherit the
214
   * previous newline flag, possibly overriding the current one.
215
   */
216
307
  if (pcont_act && rule_has_nl[num_rules - 1])
217
0
    rule_has_nl[num_rules] = true;
218
219
307
  snprintf (action_text, sizeof(action_text), "M4_HOOK_NORMAL_STATE_CASE_ARM(%d)\n", num_rules);
220
307
  add_action (action_text);
221
307
  if (rule_has_nl[num_rules]) {
222
0
    snprintf (action_text, sizeof(action_text), "M4_HOOK_COMMENT_OPEN rule %d can match eol M4_HOOK_COMMENT_CLOSE\n",
223
0
       num_rules);
224
0
    add_action (action_text);
225
0
  }
226
227
228
307
  if (variable_trail_rule) {
229
0
    rule_type[num_rules] = RULE_VARIABLE;
230
231
0
    if (env.performance_hint > 0)
232
0
      fprintf (stderr,
233
0
         _
234
0
         ("Variable trailing context rule at line %d\n"),
235
0
         rule_linenum[num_rules]);
236
237
0
    variable_trailing_context_rules = true;
238
0
  }
239
240
307
  else {
241
307
    rule_type[num_rules] = RULE_NORMAL;
242
243
307
    if (headcnt > 0 || trailcnt > 0) {
244
      /* Do trailing context magic to not match the trailing
245
       * characters.
246
       */
247
0
      add_action ("M4_HOOK_RELEASE_YYTEXT\n");
248
249
0
      if (headcnt > 0) {
250
0
        if (rule_has_nl[num_rules]) {
251
0
          snprintf (action_text, sizeof(action_text),
252
0
            "M4_HOOK_LINE_FORWARD(%d)\n", headcnt);
253
0
          add_action (action_text);
254
0
        }
255
0
        snprintf (action_text, sizeof(action_text), "M4_HOOK_CHAR_FORWARD(%d)\n",
256
0
           headcnt);
257
0
        add_action (action_text);
258
0
      }
259
260
0
      else {
261
0
        if (rule_has_nl[num_rules]) {
262
0
          snprintf (action_text, sizeof(action_text),
263
0
             "M4_HOOK_LINE_REWIND(%d)\n", trailcnt);
264
0
          add_action (action_text);
265
0
        }
266
267
0
        snprintf (action_text, sizeof(action_text), "M4_HOOK_CHAR_REWIND(%d)\n",
268
0
           trailcnt);
269
0
        add_action (action_text);
270
0
      }
271
272
0
      add_action
273
0
        ("M4_HOOK_TAKE_YYTEXT\n");
274
0
    }
275
307
  }
276
277
  /* Okay, in the action code at this point yytext and yyleng have
278
   * their proper final values for this rule, so here's the point
279
   * to do any user action.  But don't do it for continued actions,
280
   * as that'll result in multiple rule-setup calls.
281
   */
282
307
  if (!continued_action)
283
307
    add_action ("M4_HOOK_SET_RULE_SETUP\n");
284
285
307
  line_directive_out(NULL, infilename, linenum);
286
307
        add_action("[[");
287
307
}
288
289
290
/* link_machines - connect two machines together
291
 *
292
 * synopsis
293
 *
294
 *   new = link_machines( first, last );
295
 *
296
 *     new    - a machine constructed by connecting first to last
297
 *     first  - the machine whose successor is to be last
298
 *     last   - the machine whose predecessor is to be first
299
 *
300
 * note: this routine concatenates the machine first with the machine
301
 *  last to produce a machine new which will pattern-match first first
302
 *  and then last, and will fail if either of the sub-patterns fails.
303
 *  FIRST is set to new by the operation.  last is unmolested.
304
 */
305
306
int     link_machines (int first, int last)
307
339
{
308
339
  if (first == NIL)
309
0
    return last;
310
311
339
  else if (last == NIL)
312
0
    return first;
313
314
339
  else {
315
339
    mkxtion (finalst[first], last);
316
339
    finalst[first] = finalst[last];
317
339
    lastst[first] = MAX (lastst[first], lastst[last]);
318
339
    firstst[first] = MIN (firstst[first], firstst[last]);
319
320
339
    return first;
321
339
  }
322
339
}
323
324
325
/* mark_beginning_as_normal - mark each "beginning" state in a machine
326
 *                            as being a "normal" (i.e., not trailing context-
327
 *                            associated) states
328
 *
329
 * The "beginning" states are the epsilon closure of the first state
330
 */
331
332
void    mark_beginning_as_normal (int mach)
333
0
{
334
0
  switch (state_type[mach]) {
335
0
  case STATE_NORMAL:
336
    /* Oh, we've already visited here. */
337
0
    return;
338
339
0
  case STATE_TRAILING_CONTEXT:
340
0
    state_type[mach] = STATE_NORMAL;
341
342
0
    if (transchar[mach] == SYM_EPSILON) {
343
0
      if (trans1[mach] != NO_TRANSITION)
344
0
        mark_beginning_as_normal (trans1[mach]);
345
346
0
      if (trans2[mach] != NO_TRANSITION)
347
0
        mark_beginning_as_normal (trans2[mach]);
348
0
    }
349
0
    break;
350
351
0
  default:
352
0
    flexerror (_
353
0
         ("bad state type in mark_beginning_as_normal()"));
354
0
    break;
355
0
  }
356
0
}
357
358
359
/* mkbranch - make a machine that branches to two machines
360
 *
361
 * synopsis
362
 *
363
 *   branch = mkbranch( first, second );
364
 *
365
 *     branch - a machine which matches either first's pattern or second's
366
 *     first, second - machines whose patterns are to be or'ed (the | operator)
367
 *
368
 * Note that first and second are NEITHER destroyed by the operation.  Also,
369
 * the resulting machine CANNOT be used with any other "mk" operation except
370
 * more mkbranch's.  Compare with mkor()
371
 */
372
373
int     mkbranch (int first, int second)
374
381
{
375
381
  int     eps;
376
377
381
  if (first == NO_TRANSITION)
378
0
    return second;
379
380
381
  else if (second == NO_TRANSITION)
381
0
    return first;
382
383
381
  eps = mkstate (SYM_EPSILON);
384
385
381
  mkxtion (eps, first);
386
381
  mkxtion (eps, second);
387
388
381
  return eps;
389
381
}
390
391
392
/* mkclos - convert a machine into a closure
393
 *
394
 * synopsis
395
 *   new = mkclos( state );
396
 *
397
 * new - a new state which matches the closure of "state"
398
 */
399
400
int     mkclos (int state)
401
2
{
402
2
  return mkopt (mkposcl (state));
403
2
}
404
405
406
/* mkopt - make a machine optional
407
 *
408
 * synopsis
409
 *
410
 *   new = mkopt( mach );
411
 *
412
 *     new  - a machine which optionally matches whatever mach matched
413
 *     mach - the machine to make optional
414
 *
415
 * notes:
416
 *     1. mach must be the last machine created
417
 *     2. mach is destroyed by the call
418
 */
419
420
int     mkopt (int mach)
421
2
{
422
2
  int     eps;
423
424
2
  if (!SUPER_FREE_EPSILON (finalst[mach])) {
425
2
    eps = mkstate (SYM_EPSILON);
426
2
    mach = link_machines (mach, eps);
427
2
  }
428
429
  /* Can't skimp on the following if FREE_EPSILON(mach) is true because
430
   * some state interior to "mach" might point back to the beginning
431
   * for a closure.
432
   */
433
2
  eps = mkstate (SYM_EPSILON);
434
2
  mach = link_machines (eps, mach);
435
436
2
  mkxtion (mach, finalst[mach]);
437
438
2
  return mach;
439
2
}
440
441
442
/* mkor - make a machine that matches either one of two machines
443
 *
444
 * synopsis
445
 *
446
 *   new = mkor( first, second );
447
 *
448
 *     new - a machine which matches either first's pattern or second's
449
 *     first, second - machines whose patterns are to be or'ed (the | operator)
450
 *
451
 * note that first and second are both destroyed by the operation
452
 * the code is rather convoluted because an attempt is made to minimize
453
 * the number of epsilon states needed
454
 */
455
456
int     mkor (int first, int second)
457
0
{
458
0
  int     eps, orend;
459
460
0
  if (first == NIL)
461
0
    return second;
462
463
0
  else if (second == NIL)
464
0
    return first;
465
466
0
  else {
467
    /* See comment in mkopt() about why we can't use the first
468
     * state of "first" or "second" if they satisfy "FREE_EPSILON".
469
     */
470
0
    eps = mkstate (SYM_EPSILON);
471
472
0
    first = link_machines (eps, first);
473
474
0
    mkxtion (first, second);
475
476
0
    if (SUPER_FREE_EPSILON (finalst[first]) &&
477
0
        accptnum[finalst[first]] == NIL) {
478
0
      orend = finalst[first];
479
0
      mkxtion (finalst[second], orend);
480
0
    }
481
482
0
    else if (SUPER_FREE_EPSILON (finalst[second]) &&
483
0
       accptnum[finalst[second]] == NIL) {
484
0
      orend = finalst[second];
485
0
      mkxtion (finalst[first], orend);
486
0
    }
487
488
0
    else {
489
0
      eps = mkstate (SYM_EPSILON);
490
491
0
      first = link_machines (first, eps);
492
0
      orend = finalst[first];
493
494
0
      mkxtion (finalst[second], orend);
495
0
    }
496
0
  }
497
498
0
  firstst[first] = MIN(firstst[first], firstst[second]);
499
500
0
  finalst[first] = orend;
501
0
  return first;
502
0
}
503
504
505
/* mkposcl - convert a machine into a positive closure
506
 *
507
 * synopsis
508
 *   new = mkposcl( state );
509
 *
510
 *    new - a machine matching the positive closure of "state"
511
 */
512
513
int     mkposcl (int state)
514
2
{
515
2
  int     eps;
516
517
2
  if (SUPER_FREE_EPSILON (finalst[state])) {
518
1
    mkxtion (finalst[state], state);
519
1
    return state;
520
1
  }
521
522
1
  else {
523
1
    eps = mkstate (SYM_EPSILON);
524
1
    mkxtion (eps, state);
525
1
    return link_machines (state, eps);
526
1
  }
527
2
}
528
529
530
/* mkrep - make a replicated machine
531
 *
532
 * synopsis
533
 *   new = mkrep( mach, lb, ub );
534
 *
535
 *    new - a machine that matches whatever "mach" matched from "lb"
536
 *          number of times to "ub" number of times
537
 *
538
 * note
539
 *   if "ub" is INFINITE_REPEAT then "new" matches "lb" or more occurrences of "mach"
540
 */
541
542
int     mkrep (int mach, int lb, int ub)
543
0
{
544
0
  int     base_mach, tail, copy, i;
545
546
0
  base_mach = copysingl (mach, lb - 1);
547
548
0
  if (ub == INFINITE_REPEAT) {
549
0
    copy = dupmachine (mach);
550
0
    mach = link_machines (mach,
551
0
              link_machines (base_mach,
552
0
                 mkclos (copy)));
553
0
  }
554
555
0
  else {
556
0
    tail = mkstate (SYM_EPSILON);
557
558
0
    for (i = lb; i < ub; ++i) {
559
0
      copy = dupmachine (mach);
560
0
      tail = mkopt (link_machines (copy, tail));
561
0
    }
562
563
0
    mach =
564
0
      link_machines (mach,
565
0
               link_machines (base_mach, tail));
566
0
  }
567
568
0
  return mach;
569
0
}
570
571
572
/* mkstate - create a state with a transition on a given symbol
573
 *
574
 * synopsis
575
 *
576
 *   state = mkstate( sym );
577
 *
578
 *     state - a new state matching sym
579
 *     sym   - the symbol the new state is to have an out-transition on
580
 *
581
 * note that this routine makes new states in ascending order through the
582
 * state array (and increments LASTNFA accordingly).  The routine DUPMACHINE
583
 * relies on machines being made in ascending order and that they are
584
 * CONTIGUOUS.  Change it and you will have to rewrite DUPMACHINE (kludge
585
 * that it admittedly is)
586
 */
587
588
int     mkstate (int sym)
589
3.42k
{
590
3.42k
  if (++lastnfa >= current_mns) {
591
0
    if ((current_mns += MNS_INCREMENT) >= maximum_mns)
592
0
      lerr(_
593
0
        ("input rules are too complicated (>= %d NFA states)"),
594
0
current_mns);
595
596
0
    ++num_reallocs;
597
598
0
    firstst = reallocate_integer_array (firstst, current_mns);
599
0
    lastst = reallocate_integer_array (lastst, current_mns);
600
0
    finalst = reallocate_integer_array (finalst, current_mns);
601
0
    transchar =
602
0
      reallocate_integer_array (transchar, current_mns);
603
0
    trans1 = reallocate_integer_array (trans1, current_mns);
604
0
    trans2 = reallocate_integer_array (trans2, current_mns);
605
0
    accptnum =
606
0
      reallocate_integer_array (accptnum, current_mns);
607
0
    assoc_rule =
608
0
      reallocate_integer_array (assoc_rule, current_mns);
609
0
    state_type =
610
0
      reallocate_integer_array (state_type, current_mns);
611
0
  }
612
613
3.42k
  firstst[lastnfa] = lastnfa;
614
3.42k
  finalst[lastnfa] = lastnfa;
615
3.42k
  lastst[lastnfa] = lastnfa;
616
3.42k
  transchar[lastnfa] = sym;
617
3.42k
  trans1[lastnfa] = NO_TRANSITION;
618
3.42k
  trans2[lastnfa] = NO_TRANSITION;
619
3.42k
  accptnum[lastnfa] = NIL;
620
3.42k
  assoc_rule[lastnfa] = num_rules;
621
3.42k
  state_type[lastnfa] = current_state_type;
622
623
  /* Fix up equivalence classes base on this transition.  Note that any
624
   * character which has its own transition gets its own equivalence
625
   * class.  Thus only characters which are only in character classes
626
   * have a chance at being in the same equivalence class.  E.g. "a|b"
627
   * puts 'a' and 'b' into two different equivalence classes.  "[ab]"
628
   * puts them in the same equivalence class (barring other differences
629
   * elsewhere in the input).
630
   */
631
632
3.42k
  if (sym < 0) {
633
    /* We don't have to update the equivalence classes since
634
     * that was already done when the ccl was created for the
635
     * first time.
636
     */
637
306
  }
638
639
3.11k
  else if (sym == SYM_EPSILON)
640
3.08k
    ++numeps;
641
642
28
  else {
643
28
    check_char (sym);
644
645
28
    if (ctrl.useecs)
646
      /* Map NUL's to csize. */
647
0
      mkechar (sym ? sym : ctrl.csize, nextecm, ecgroup);
648
28
  }
649
650
3.42k
  return lastnfa;
651
3.42k
}
652
653
654
/* mkxtion - make a transition from one state to another
655
 *
656
 * synopsis
657
 *
658
 *   mkxtion( statefrom, stateto );
659
 *
660
 *     statefrom - the state from which the transition is to be made
661
 *     stateto   - the state to which the transition is to be made
662
 */
663
664
void    mkxtion (int statefrom, int stateto)
665
1.10k
{
666
1.10k
  if (trans1[statefrom] == NO_TRANSITION)
667
720
    trans1[statefrom] = stateto;
668
669
385
  else if ((transchar[statefrom] != SYM_EPSILON) ||
670
385
     (trans2[statefrom] != NO_TRANSITION))
671
0
    flexfatal (_("found too many transitions in mkxtion()"));
672
673
385
  else {     /* second out-transition for an epsilon state */
674
385
    ++eps2;
675
385
    trans2[statefrom] = stateto;
676
385
  }
677
1.10k
}
678
679
/* new_rule - initialize for a new rule */
680
681
void    new_rule (void)
682
307
{
683
307
  if (++num_rules >= current_max_rules) {
684
0
    ++num_reallocs;
685
0
    current_max_rules += MAX_RULES_INCREMENT;
686
0
    rule_type = reallocate_integer_array (rule_type,
687
0
                  current_max_rules);
688
0
    rule_linenum = reallocate_integer_array (rule_linenum,
689
0
               current_max_rules);
690
0
    rule_useful = reallocate_array(rule_useful,
691
0
          current_max_rules, sizeof(char));
692
0
    rule_has_nl = reallocate_array(rule_has_nl,
693
0
          current_max_rules, sizeof(char));
694
0
  }
695
696
307
  if (num_rules > MAX_RULE)
697
0
    lerr (_("too many rules (> %d)!"), MAX_RULE);
698
699
307
  rule_linenum[num_rules] = linenum;
700
307
  rule_useful[num_rules] = false;
701
  rule_has_nl[num_rules] = false;
702
307
}