/src/flex/src/nfa.c

Source
/* nfa - NFA construction routines */

/*  Copyright (c) 1990 The Regents of the University of California. */
/*  All rights reserved. */

/*  This code is derived from software contributed to Berkeley by */
/*  Vern Paxson. */

/*  The United States Government has rights in this work pursuant */
/*  to contract no. DE-AC03-76SF00098 between the United States */
/*  Department of Energy and the University of California. */

/*  This file is part of flex. */

/*  Redistribution and use in source and binary forms, with or without */
/*  modification, are permitted provided that the following conditions */
/*  are met: */

/*  1. Redistributions of source code must retain the above copyright */
/*     notice, this list of conditions and the following disclaimer. */
/*  2. Redistributions in binary form must reproduce the above copyright */
/*     notice, this list of conditions and the following disclaimer in the */
/*     documentation and/or other materials provided with the distribution. */

/*  Neither the name of the University nor the names of its contributors */
/*  may be used to endorse or promote products derived from this software */
/*  without specific prior written permission. */

/*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
/*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
/*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
/*  PURPOSE. */

#include "flexdef.h"


/* declare functions that have forward references */

int dupmachine(int);
void  mkxtion(int, int);


/* add_accept - add an accepting state to a machine
 *
 * accepting_number becomes mach's accepting number.
 */

void    add_accept (int mach, int accepting_number)
{
  /* Hang the accepting number off an epsilon state.  if it is associated
   * with a state that has a non-epsilon out-transition, then the state
   * will accept BEFORE it makes that transition, i.e., one character
   * too soon.
   */

  if (transchar[finalst[mach]] == SYM_EPSILON)
    accptnum[finalst[mach]] = accepting_number;

  else {
    int     astate = mkstate (SYM_EPSILON);

    accptnum[astate] = accepting_number;
    (void) link_machines (mach, astate);
  }
}


/* copysingl - make a given number of copies of a singleton machine
 *
 * synopsis
 *
 *   newsng = copysingl( singl, num );
 *
 *     newsng - a new singleton composed of num copies of singl
 *     singl  - a singleton machine
 *     num    - the number of copies of singl to be present in newsng
 */

int     copysingl (int singl, int num)
{
  int     copy, i;

  copy = mkstate (SYM_EPSILON);

  for (i = 1; i <= num; ++i)
    copy = link_machines (copy, dupmachine (singl));

  return copy;
}


/* dumpnfa - debugging routine to write out an nfa */

void    dumpnfa (int state1)
{
  int     sym, tsp1, tsp2, anum, ns;

  fprintf (stderr,
     _
     ("\n\n********** beginning dump of nfa with start state %d\n"),
     state1);

  /* We probably should loop starting at firstst[state1] and going to
   * lastst[state1], but they're not maintained properly when we "or"
   * all of the rules together.  So we use our knowledge that the machine
   * starts at state 1 and ends at lastnfa.
   */

  /* for ( ns = firstst[state1]; ns <= lastst[state1]; ++ns ) */
  for (ns = 1; ns <= lastnfa; ++ns) {
    fprintf (stderr, _("state # %4d\t"), ns);

    sym = transchar[ns];
    tsp1 = trans1[ns];
    tsp2 = trans2[ns];
    anum = accptnum[ns];

    fprintf (stderr, "%3d:  %4d, %4d", sym, tsp1, tsp2);

    if (anum != NIL)
      fprintf (stderr, "  [%d]", anum);

    fprintf (stderr, "\n");
  }

  fprintf (stderr, _("********** end of dump\n"));
}


/* dupmachine - make a duplicate of a given machine
 *
 * synopsis
 *
 *   copy = dupmachine( mach );
 *
 *     copy - holds duplicate of mach
 *     mach - machine to be duplicated
 *
 * note that the copy of mach is NOT an exact duplicate; rather, all the
 * transition states values are adjusted so that the copy is self-contained,
 * as the original should have been.
 *
 * also note that the original MUST be contiguous, with its low and high
 * states accessible by the arrays firstst and lastst
 */

int     dupmachine (int mach)
{
  int     i, init, state_offset;
  int     state = 0;
  int     last = lastst[mach];

  for (i = firstst[mach]; i <= last; ++i) {
    state = mkstate (transchar[i]);

    if (trans1[i] != NO_TRANSITION) {
      mkxtion (finalst[state], trans1[i] + state - i);

      if (transchar[i] == SYM_EPSILON &&
          trans2[i] != NO_TRANSITION)
          mkxtion (finalst[state],
             trans2[i] + state - i);
    }

    accptnum[state] = accptnum[i];
  }

  if (state == 0)
    flexfatal (_("empty machine in dupmachine()"));

  state_offset = state - i + 1;

  init = mach + state_offset;
  firstst[init] = firstst[mach] + state_offset;
  finalst[init] = finalst[mach] + state_offset;
  lastst[init] = lastst[mach] + state_offset;

  return init;
}


/* finish_rule - finish up the processing for a rule
 *
 * An accepting number is added to the given machine.  If variable_trail_rule
 * is true then the rule has trailing context and both the head and trail
 * are variable size.  Otherwise if headcnt or trailcnt is non-zero then
 * the machine recognizes a pattern with trailing context and headcnt is
 * the number of characters in the matched part of the pattern, or zero
 * if the matched part has variable length.  trailcnt is the number of
 * trailing context characters in the pattern, or zero if the trailing
 * context has variable length.
 */

void    finish_rule (int mach, bool variable_trail_rule, int headcnt, int trailcnt,
         int pcont_act)
{
  char    action_text[MAXLINE];

  add_accept (mach, num_rules);

  /* We did this in new_rule(), but it often gets the wrong
   * number because we do it before we start parsing the current rule.
   */
  rule_linenum[num_rules] = linenum;

  /* If this is a continued action, then the line-number has already
   * been updated, giving us the wrong number.
   */
  if (continued_action)
    --rule_linenum[num_rules];


  /* If the previous rule was continued action, then we inherit the
   * previous newline flag, possibly overriding the current one.
   */
  if (pcont_act && rule_has_nl[num_rules - 1])
    rule_has_nl[num_rules] = true;

  snprintf (action_text, sizeof(action_text), "M4_HOOK_NORMAL_STATE_CASE_ARM(%d)\n", num_rules);
  add_action (action_text);
  if (rule_has_nl[num_rules]) {
    snprintf (action_text, sizeof(action_text), "M4_HOOK_COMMENT_OPEN rule %d can match eol M4_HOOK_COMMENT_CLOSE\n",
       num_rules);
    add_action (action_text);
  }


  if (variable_trail_rule) {
    rule_type[num_rules] = RULE_VARIABLE;

    if (env.performance_hint > 0)
      fprintf (stderr,
         _
         ("Variable trailing context rule at line %d\n"),
         rule_linenum[num_rules]);

    variable_trailing_context_rules = true;
  }

  else {
    rule_type[num_rules] = RULE_NORMAL;

    if (headcnt > 0 || trailcnt > 0) {
      /* Do trailing context magic to not match the trailing
       * characters.
       */
      add_action ("M4_HOOK_RELEASE_YYTEXT\n");

      if (headcnt > 0) {
        if (rule_has_nl[num_rules]) {
          snprintf (action_text, sizeof(action_text),
            "M4_HOOK_LINE_FORWARD(%d)\n", headcnt);
          add_action (action_text);
        }
        snprintf (action_text, sizeof(action_text), "M4_HOOK_CHAR_FORWARD(%d)\n",
           headcnt);
        add_action (action_text);
      }

      else {
        if (rule_has_nl[num_rules]) {
          snprintf (action_text, sizeof(action_text),
             "M4_HOOK_LINE_REWIND(%d)\n", trailcnt);
          add_action (action_text);
        }

        snprintf (action_text, sizeof(action_text), "M4_HOOK_CHAR_REWIND(%d)\n",
           trailcnt);
        add_action (action_text);
      }

      add_action
        ("M4_HOOK_TAKE_YYTEXT\n");
    }
  }

  /* Okay, in the action code at this point yytext and yyleng have
   * their proper final values for this rule, so here's the point
   * to do any user action.  But don't do it for continued actions,
   * as that'll result in multiple rule-setup calls.
   */
  if (!continued_action)
    add_action ("M4_HOOK_SET_RULE_SETUP\n");

  line_directive_out(NULL, infilename, linenum);
        add_action("[[");
}


/* link_machines - connect two machines together
 *
 * synopsis
 *
 *   new = link_machines( first, last );
 *
 *     new    - a machine constructed by connecting first to last
 *     first  - the machine whose successor is to be last
 *     last   - the machine whose predecessor is to be first
 *
 * note: this routine concatenates the machine first with the machine
 *  last to produce a machine new which will pattern-match first first
 *  and then last, and will fail if either of the sub-patterns fails.
 *  FIRST is set to new by the operation.  last is unmolested.
 */

int     link_machines (int first, int last)
{
  if (first == NIL)
    return last;

  else if (last == NIL)
    return first;

  else {
    mkxtion (finalst[first], last);
    finalst[first] = finalst[last];
    lastst[first] = MAX (lastst[first], lastst[last]);
    firstst[first] = MIN (firstst[first], firstst[last]);

    return first;
  }
}


/* mark_beginning_as_normal - mark each "beginning" state in a machine
 *                            as being a "normal" (i.e., not trailing context-
 *                            associated) states
 *
 * The "beginning" states are the epsilon closure of the first state
 */

void    mark_beginning_as_normal (int mach)
{
  switch (state_type[mach]) {
  case STATE_NORMAL:
    /* Oh, we've already visited here. */
    return;

  case STATE_TRAILING_CONTEXT:
    state_type[mach] = STATE_NORMAL;

    if (transchar[mach] == SYM_EPSILON) {
      if (trans1[mach] != NO_TRANSITION)
        mark_beginning_as_normal (trans1[mach]);

      if (trans2[mach] != NO_TRANSITION)
        mark_beginning_as_normal (trans2[mach]);
    }
    break;

  default:
    flexerror (_
         ("bad state type in mark_beginning_as_normal()"));
    break;
  }
}


/* mkbranch - make a machine that branches to two machines
 *
 * synopsis
 *
 *   branch = mkbranch( first, second );
 *
 *     branch - a machine which matches either first's pattern or second's
 *     first, second - machines whose patterns are to be or'ed (the | operator)
 *
 * Note that first and second are NEITHER destroyed by the operation.  Also,
 * the resulting machine CANNOT be used with any other "mk" operation except
 * more mkbranch's.  Compare with mkor()
 */

int     mkbranch (int first, int second)
{
  int     eps;

  if (first == NO_TRANSITION)
    return second;

  else if (second == NO_TRANSITION)
    return first;

  eps = mkstate (SYM_EPSILON);

  mkxtion (eps, first);
  mkxtion (eps, second);

  return eps;
}


/* mkclos - convert a machine into a closure
 *
 * synopsis
 *   new = mkclos( state );
 *
 * new - a new state which matches the closure of "state"
 */

int     mkclos (int state)
{
  return mkopt (mkposcl (state));
}


/* mkopt - make a machine optional
 *
 * synopsis
 *
 *   new = mkopt( mach );
 *
 *     new  - a machine which optionally matches whatever mach matched
 *     mach - the machine to make optional
 *
 * notes:
 *     1. mach must be the last machine created
 *     2. mach is destroyed by the call
 */

int     mkopt (int mach)
{
  int     eps;

  if (!SUPER_FREE_EPSILON (finalst[mach])) {
    eps = mkstate (SYM_EPSILON);
    mach = link_machines (mach, eps);
  }

  /* Can't skimp on the following if FREE_EPSILON(mach) is true because
   * some state interior to "mach" might point back to the beginning
   * for a closure.
   */
  eps = mkstate (SYM_EPSILON);
  mach = link_machines (eps, mach);

  mkxtion (mach, finalst[mach]);

  return mach;
}


/* mkor - make a machine that matches either one of two machines
 *
 * synopsis
 *
 *   new = mkor( first, second );
 *
 *     new - a machine which matches either first's pattern or second's
 *     first, second - machines whose patterns are to be or'ed (the | operator)
 *
 * note that first and second are both destroyed by the operation
 * the code is rather convoluted because an attempt is made to minimize
 * the number of epsilon states needed
 */

int     mkor (int first, int second)
{
  int     eps, orend;

  if (first == NIL)
    return second;

  else if (second == NIL)
    return first;

  else {
    /* See comment in mkopt() about why we can't use the first
     * state of "first" or "second" if they satisfy "FREE_EPSILON".
     */
    eps = mkstate (SYM_EPSILON);

    first = link_machines (eps, first);

    mkxtion (first, second);

    if (SUPER_FREE_EPSILON (finalst[first]) &&
        accptnum[finalst[first]] == NIL) {
      orend = finalst[first];
      mkxtion (finalst[second], orend);
    }

    else if (SUPER_FREE_EPSILON (finalst[second]) &&
       accptnum[finalst[second]] == NIL) {
      orend = finalst[second];
      mkxtion (finalst[first], orend);
    }

    else {
      eps = mkstate (SYM_EPSILON);

      first = link_machines (first, eps);
      orend = finalst[first];

      mkxtion (finalst[second], orend);
    }
  }

  firstst[first] = MIN(firstst[first], firstst[second]);

  finalst[first] = orend;
  return first;
}


/* mkposcl - convert a machine into a positive closure
 *
 * synopsis
 *   new = mkposcl( state );
 *
 *    new - a machine matching the positive closure of "state"
 */

int     mkposcl (int state)
{
  int     eps;

  if (SUPER_FREE_EPSILON (finalst[state])) {
    mkxtion (finalst[state], state);
    return state;
  }

  else {
    eps = mkstate (SYM_EPSILON);
    mkxtion (eps, state);
    return link_machines (state, eps);
  }
}


/* mkrep - make a replicated machine
 *
 * synopsis
 *   new = mkrep( mach, lb, ub );
 *
 *    new - a machine that matches whatever "mach" matched from "lb"
 *          number of times to "ub" number of times
 *
 * note
 *   if "ub" is INFINITE_REPEAT then "new" matches "lb" or more occurrences of "mach"
 */

int     mkrep (int mach, int lb, int ub)
{
  int     base_mach, tail, copy, i;

  base_mach = copysingl (mach, lb - 1);

  if (ub == INFINITE_REPEAT) {
    copy = dupmachine (mach);
    mach = link_machines (mach,
              link_machines (base_mach,
                 mkclos (copy)));
  }

  else {
    tail = mkstate (SYM_EPSILON);

    for (i = lb; i < ub; ++i) {
      copy = dupmachine (mach);
      tail = mkopt (link_machines (copy, tail));
    }

    mach =
      link_machines (mach,
               link_machines (base_mach, tail));
  }

  return mach;
}


/* mkstate - create a state with a transition on a given symbol
 *
 * synopsis
 *
 *   state = mkstate( sym );
 *
 *     state - a new state matching sym
 *     sym   - the symbol the new state is to have an out-transition on
 *
 * note that this routine makes new states in ascending order through the
 * state array (and increments LASTNFA accordingly).  The routine DUPMACHINE
 * relies on machines being made in ascending order and that they are
 * CONTIGUOUS.  Change it and you will have to rewrite DUPMACHINE (kludge
 * that it admittedly is)
 */

int     mkstate (int sym)
{
  if (++lastnfa >= current_mns) {
    if ((current_mns += MNS_INCREMENT) >= maximum_mns)
      lerr(_
        ("input rules are too complicated (>= %d NFA states)"),
current_mns);

    ++num_reallocs;

    firstst = reallocate_integer_array (firstst, current_mns);
    lastst = reallocate_integer_array (lastst, current_mns);
    finalst = reallocate_integer_array (finalst, current_mns);
    transchar =
      reallocate_integer_array (transchar, current_mns);
    trans1 = reallocate_integer_array (trans1, current_mns);
    trans2 = reallocate_integer_array (trans2, current_mns);
    accptnum =
      reallocate_integer_array (accptnum, current_mns);
    assoc_rule =
      reallocate_integer_array (assoc_rule, current_mns);
    state_type =
      reallocate_integer_array (state_type, current_mns);
  }

  firstst[lastnfa] = lastnfa;
  finalst[lastnfa] = lastnfa;
  lastst[lastnfa] = lastnfa;
  transchar[lastnfa] = sym;
  trans1[lastnfa] = NO_TRANSITION;
  trans2[lastnfa] = NO_TRANSITION;
  accptnum[lastnfa] = NIL;
  assoc_rule[lastnfa] = num_rules;
  state_type[lastnfa] = current_state_type;

  /* Fix up equivalence classes base on this transition.  Note that any
   * character which has its own transition gets its own equivalence
   * class.  Thus only characters which are only in character classes
   * have a chance at being in the same equivalence class.  E.g. "a|b"
   * puts 'a' and 'b' into two different equivalence classes.  "[ab]"
   * puts them in the same equivalence class (barring other differences
   * elsewhere in the input).
   */

  if (sym < 0) {
    /* We don't have to update the equivalence classes since
     * that was already done when the ccl was created for the
     * first time.
     */
  }

  else if (sym == SYM_EPSILON)
    ++numeps;

  else {
    check_char (sym);

    if (ctrl.useecs)
      /* Map NUL's to csize. */
      mkechar (sym ? sym : ctrl.csize, nextecm, ecgroup);
  }

  return lastnfa;
}


/* mkxtion - make a transition from one state to another
 *
 * synopsis
 *
 *   mkxtion( statefrom, stateto );
 *
 *     statefrom - the state from which the transition is to be made
 *     stateto   - the state to which the transition is to be made
 */

void    mkxtion (int statefrom, int stateto)
{
  if (trans1[statefrom] == NO_TRANSITION)
    trans1[statefrom] = stateto;

  else if ((transchar[statefrom] != SYM_EPSILON) ||
     (trans2[statefrom] != NO_TRANSITION))
    flexfatal (_("found too many transitions in mkxtion()"));

  else {     /* second out-transition for an epsilon state */
    ++eps2;
    trans2[statefrom] = stateto;
  }
}

/* new_rule - initialize for a new rule */

void    new_rule (void)
{
  if (++num_rules >= current_max_rules) {
    ++num_reallocs;
    current_max_rules += MAX_RULES_INCREMENT;
    rule_type = reallocate_integer_array (rule_type,
                  current_max_rules);
    rule_linenum = reallocate_integer_array (rule_linenum,
               current_max_rules);
    rule_useful = reallocate_array(rule_useful,
          current_max_rules, sizeof(char));
    rule_has_nl = reallocate_array(rule_has_nl,
          current_max_rules, sizeof(char));
  }

  if (num_rules > MAX_RULE)
    lerr (_("too many rules (> %d)!"), MAX_RULE);

  rule_linenum[num_rules] = linenum;
  rule_useful[num_rules] = false;
  rule_has_nl[num_rules] = false;
}

Coverage Report

Created: 2026-03-12 06:54

Line	Count	Source
1		/* nfa - NFA construction routines */
2
3		/* Copyright (c) 1990 The Regents of the University of California. */
4		/* All rights reserved. */
5
6		/* This code is derived from software contributed to Berkeley by */
7		/* Vern Paxson. */
8
9		/* The United States Government has rights in this work pursuant */
10		/* to contract no. DE-AC03-76SF00098 between the United States */
11		/* Department of Energy and the University of California. */
12
13		/* This file is part of flex. */
14
15		/* Redistribution and use in source and binary forms, with or without */
16		/* modification, are permitted provided that the following conditions */
17		/* are met: */
18
19		/* 1. Redistributions of source code must retain the above copyright */
20		/* notice, this list of conditions and the following disclaimer. */
21		/* 2. Redistributions in binary form must reproduce the above copyright */
22		/* notice, this list of conditions and the following disclaimer in the */
23		/* documentation and/or other materials provided with the distribution. */
24
25		/* Neither the name of the University nor the names of its contributors */
26		/* may be used to endorse or promote products derived from this software */
27		/* without specific prior written permission. */
28
29		/* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
30		/* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
31		/* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
32		/* PURPOSE. */
33
34		#include "flexdef.h"
35
36
37		/* declare functions that have forward references */
38
39		int dupmachine(int);
40		void mkxtion(int, int);
41
42
43		/* add_accept - add an accepting state to a machine
44		*
45		* accepting_number becomes mach's accepting number.
46		*/
47
48		void add_accept (int mach, int accepting_number)
49	307	{
50		/* Hang the accepting number off an epsilon state. if it is associated
51		* with a state that has a non-epsilon out-transition, then the state
52		* will accept BEFORE it makes that transition, i.e., one character
53		* too soon.
54		*/
55
56	307	if (transchar[finalst[mach]] == SYM_EPSILON)
57	0	accptnum[finalst[mach]] = accepting_number;
58
59	307	else {
60	307	int astate = mkstate (SYM_EPSILON);
61
62	307	accptnum[astate] = accepting_number;
63	307	(void) link_machines (mach, astate);
64	307	}
65	307	}
66
67
68		/* copysingl - make a given number of copies of a singleton machine
69		*
70		* synopsis
71		*
72		* newsng = copysingl( singl, num );
73		*
74		* newsng - a new singleton composed of num copies of singl
75		* singl - a singleton machine
76		* num - the number of copies of singl to be present in newsng
77		*/
78
79		int copysingl (int singl, int num)
80	0	{
81	0	int copy, i;
82
83	0	copy = mkstate (SYM_EPSILON);
84
85	0	for (i = 1; i <= num; ++i)
86	0	copy = link_machines (copy, dupmachine (singl));
87
88	0	return copy;
89	0	}
90
91
92		/* dumpnfa - debugging routine to write out an nfa */
93
94		void dumpnfa (int state1)
95	0	{
96	0	int sym, tsp1, tsp2, anum, ns;
97
98	0	fprintf (stderr,
99	0	_
100	0	("\n\n********** beginning dump of nfa with start state %d\n"),
101	0	state1);
102
103		/* We probably should loop starting at firstst[state1] and going to
104		* lastst[state1], but they're not maintained properly when we "or"
105		* all of the rules together. So we use our knowledge that the machine
106		* starts at state 1 and ends at lastnfa.
107		*/
108
109		/* for ( ns = firstst[state1]; ns <= lastst[state1]; ++ns ) */
110	0	for (ns = 1; ns <= lastnfa; ++ns) {
111	0	fprintf (stderr, _("state # %4d\t"), ns);
112
113	0	sym = transchar[ns];
114	0	tsp1 = trans1[ns];
115	0	tsp2 = trans2[ns];
116	0	anum = accptnum[ns];
117
118	0	fprintf (stderr, "%3d: %4d, %4d", sym, tsp1, tsp2);
119
120	0	if (anum != NIL)
121	0	fprintf (stderr, " [%d]", anum);
122
123	0	fprintf (stderr, "\n");
124	0	}
125
126	0	fprintf (stderr, _("********** end of dump\n"));
127	0	}
128
129
130		/* dupmachine - make a duplicate of a given machine
131		*
132		* synopsis
133		*
134		* copy = dupmachine( mach );
135		*
136		* copy - holds duplicate of mach
137		* mach - machine to be duplicated
138		*
139		* note that the copy of mach is NOT an exact duplicate; rather, all the
140		* transition states values are adjusted so that the copy is self-contained,
141		* as the original should have been.
142		*
143		* also note that the original MUST be contiguous, with its low and high
144		* states accessible by the arrays firstst and lastst
145		*/
146
147		int dupmachine (int mach)
148	0	{
149	0	int i, init, state_offset;
150	0	int state = 0;
151	0	int last = lastst[mach];
152
153	0	for (i = firstst[mach]; i <= last; ++i) {
154	0	state = mkstate (transchar[i]);
155
156	0	if (trans1[i] != NO_TRANSITION) {
157	0	mkxtion (finalst[state], trans1[i] + state - i);
158
159	0	if (transchar[i] == SYM_EPSILON &&
160	0	trans2[i] != NO_TRANSITION)
161	0	mkxtion (finalst[state],
162	0	trans2[i] + state - i);
163	0	}
164
165	0	accptnum[state] = accptnum[i];
166	0	}
167
168	0	if (state == 0)
169	0	flexfatal (_("empty machine in dupmachine()"));
170
171	0	state_offset = state - i + 1;
172
173	0	init = mach + state_offset;
174	0	firstst[init] = firstst[mach] + state_offset;
175	0	finalst[init] = finalst[mach] + state_offset;
176	0	lastst[init] = lastst[mach] + state_offset;
177
178	0	return init;
179	0	}
180
181
182		/* finish_rule - finish up the processing for a rule
183		*
184		* An accepting number is added to the given machine. If variable_trail_rule
185		* is true then the rule has trailing context and both the head and trail
186		* are variable size. Otherwise if headcnt or trailcnt is non-zero then
187		* the machine recognizes a pattern with trailing context and headcnt is
188		* the number of characters in the matched part of the pattern, or zero
189		* if the matched part has variable length. trailcnt is the number of
190		* trailing context characters in the pattern, or zero if the trailing
191		* context has variable length.
192		*/
193
194		void finish_rule (int mach, bool variable_trail_rule, int headcnt, int trailcnt,
195		int pcont_act)
196	307	{
197	307	char action_text[MAXLINE];
198
199	307	add_accept (mach, num_rules);
200
201		/* We did this in new_rule(), but it often gets the wrong
202		* number because we do it before we start parsing the current rule.
203		*/
204	307	rule_linenum[num_rules] = linenum;
205
206		/* If this is a continued action, then the line-number has already
207		* been updated, giving us the wrong number.
208		*/
209	307	if (continued_action)
210	0	--rule_linenum[num_rules];
211
212
213		/* If the previous rule was continued action, then we inherit the
214		* previous newline flag, possibly overriding the current one.
215		*/
216	307	if (pcont_act && rule_has_nl[num_rules - 1])
217	0	rule_has_nl[num_rules] = true;
218
219	307	snprintf (action_text, sizeof(action_text), "M4_HOOK_NORMAL_STATE_CASE_ARM(%d)\n", num_rules);
220	307	add_action (action_text);
221	307	if (rule_has_nl[num_rules]) {
222	0	snprintf (action_text, sizeof(action_text), "M4_HOOK_COMMENT_OPEN rule %d can match eol M4_HOOK_COMMENT_CLOSE\n",
223	0	num_rules);
224	0	add_action (action_text);
225	0	}
226
227
228	307	if (variable_trail_rule) {
229	0	rule_type[num_rules] = RULE_VARIABLE;
230
231	0	if (env.performance_hint > 0)
232	0	fprintf (stderr,
233	0	_
234	0	("Variable trailing context rule at line %d\n"),
235	0	rule_linenum[num_rules]);
236
237	0	variable_trailing_context_rules = true;
238	0	}
239
240	307	else {
241	307	rule_type[num_rules] = RULE_NORMAL;
242
243	307	if (headcnt > 0 \|\| trailcnt > 0) {
244		/* Do trailing context magic to not match the trailing
245		* characters.
246		*/
247	0	add_action ("M4_HOOK_RELEASE_YYTEXT\n");
248
249	0	if (headcnt > 0) {
250	0	if (rule_has_nl[num_rules]) {
251	0	snprintf (action_text, sizeof(action_text),
252	0	"M4_HOOK_LINE_FORWARD(%d)\n", headcnt);
253	0	add_action (action_text);
254	0	}
255	0	snprintf (action_text, sizeof(action_text), "M4_HOOK_CHAR_FORWARD(%d)\n",
256	0	headcnt);
257	0	add_action (action_text);
258	0	}
259
260	0	else {
261	0	if (rule_has_nl[num_rules]) {
262	0	snprintf (action_text, sizeof(action_text),
263	0	"M4_HOOK_LINE_REWIND(%d)\n", trailcnt);
264	0	add_action (action_text);
265	0	}
266
267	0	snprintf (action_text, sizeof(action_text), "M4_HOOK_CHAR_REWIND(%d)\n",
268	0	trailcnt);
269	0	add_action (action_text);
270	0	}
271
272	0	add_action
273	0	("M4_HOOK_TAKE_YYTEXT\n");
274	0	}
275	307	}
276
277		/* Okay, in the action code at this point yytext and yyleng have
278		* their proper final values for this rule, so here's the point
279		* to do any user action. But don't do it for continued actions,
280		* as that'll result in multiple rule-setup calls.
281		*/
282	307	if (!continued_action)
283	307	add_action ("M4_HOOK_SET_RULE_SETUP\n");
284
285	307	line_directive_out(NULL, infilename, linenum);
286	307	add_action("[[");
287	307	}
288
289
290		/* link_machines - connect two machines together
291		*
292		* synopsis
293		*
294		* new = link_machines( first, last );
295		*
296		* new - a machine constructed by connecting first to last
297		* first - the machine whose successor is to be last
298		* last - the machine whose predecessor is to be first
299		*
300		* note: this routine concatenates the machine first with the machine
301		* last to produce a machine new which will pattern-match first first
302		* and then last, and will fail if either of the sub-patterns fails.
303		* FIRST is set to new by the operation. last is unmolested.
304		*/
305
306		int link_machines (int first, int last)
307	339	{
308	339	if (first == NIL)
309	0	return last;
310
311	339	else if (last == NIL)
312	0	return first;
313
314	339	else {
315	339	mkxtion (finalst[first], last);
316	339	finalst[first] = finalst[last];
317	339	lastst[first] = MAX (lastst[first], lastst[last]);
318	339	firstst[first] = MIN (firstst[first], firstst[last]);
319
320	339	return first;
321	339	}
322	339	}
323
324
325		/* mark_beginning_as_normal - mark each "beginning" state in a machine
326		* as being a "normal" (i.e., not trailing context-
327		* associated) states
328		*
329		* The "beginning" states are the epsilon closure of the first state
330		*/
331
332		void mark_beginning_as_normal (int mach)
333	0	{
334	0	switch (state_type[mach]) {
335	0	case STATE_NORMAL:
336		/* Oh, we've already visited here. */
337	0	return;
338
339	0	case STATE_TRAILING_CONTEXT:
340	0	state_type[mach] = STATE_NORMAL;
341
342	0	if (transchar[mach] == SYM_EPSILON) {
343	0	if (trans1[mach] != NO_TRANSITION)
344	0	mark_beginning_as_normal (trans1[mach]);
345
346	0	if (trans2[mach] != NO_TRANSITION)
347	0	mark_beginning_as_normal (trans2[mach]);
348	0	}
349	0	break;
350
351	0	default:
352	0	flexerror (_
353	0	("bad state type in mark_beginning_as_normal()"));
354	0	break;
355	0	}
356	0	}
357
358
359		/* mkbranch - make a machine that branches to two machines
360		*
361		* synopsis
362		*
363		* branch = mkbranch( first, second );
364		*
365		* branch - a machine which matches either first's pattern or second's
366		* first, second - machines whose patterns are to be or'ed (the \| operator)
367		*
368		* Note that first and second are NEITHER destroyed by the operation. Also,
369		* the resulting machine CANNOT be used with any other "mk" operation except
370		* more mkbranch's. Compare with mkor()
371		*/
372
373		int mkbranch (int first, int second)
374	381	{
375	381	int eps;
376
377	381	if (first == NO_TRANSITION)
378	0	return second;
379
380	381	else if (second == NO_TRANSITION)
381	0	return first;
382
383	381	eps = mkstate (SYM_EPSILON);
384
385	381	mkxtion (eps, first);
386	381	mkxtion (eps, second);
387
388	381	return eps;
389	381	}
390
391
392		/* mkclos - convert a machine into a closure
393		*
394		* synopsis
395		* new = mkclos( state );
396		*
397		* new - a new state which matches the closure of "state"
398		*/
399
400		int mkclos (int state)
401	2	{
402	2	return mkopt (mkposcl (state));
403	2	}
404
405
406		/* mkopt - make a machine optional
407		*
408		* synopsis
409		*
410		* new = mkopt( mach );
411		*
412		* new - a machine which optionally matches whatever mach matched
413		* mach - the machine to make optional
414		*
415		* notes:
416		* 1. mach must be the last machine created
417		* 2. mach is destroyed by the call
418		*/
419
420		int mkopt (int mach)
421	2	{
422	2	int eps;
423
424	2	if (!SUPER_FREE_EPSILON (finalst[mach])) {
425	2	eps = mkstate (SYM_EPSILON);
426	2	mach = link_machines (mach, eps);
427	2	}
428
429		/* Can't skimp on the following if FREE_EPSILON(mach) is true because
430		* some state interior to "mach" might point back to the beginning
431		* for a closure.
432		*/
433	2	eps = mkstate (SYM_EPSILON);
434	2	mach = link_machines (eps, mach);
435
436	2	mkxtion (mach, finalst[mach]);
437
438	2	return mach;
439	2	}
440
441
442		/* mkor - make a machine that matches either one of two machines
443		*
444		* synopsis
445		*
446		* new = mkor( first, second );
447		*
448		* new - a machine which matches either first's pattern or second's
449		* first, second - machines whose patterns are to be or'ed (the \| operator)
450		*
451		* note that first and second are both destroyed by the operation
452		* the code is rather convoluted because an attempt is made to minimize
453		* the number of epsilon states needed
454		*/
455
456		int mkor (int first, int second)
457	0	{
458	0	int eps, orend;
459
460	0	if (first == NIL)
461	0	return second;
462
463	0	else if (second == NIL)
464	0	return first;
465
466	0	else {
467		/* See comment in mkopt() about why we can't use the first
468		* state of "first" or "second" if they satisfy "FREE_EPSILON".
469		*/
470	0	eps = mkstate (SYM_EPSILON);
471
472	0	first = link_machines (eps, first);
473
474	0	mkxtion (first, second);
475
476	0	if (SUPER_FREE_EPSILON (finalst[first]) &&
477	0	accptnum[finalst[first]] == NIL) {
478	0	orend = finalst[first];
479	0	mkxtion (finalst[second], orend);
480	0	}
481
482	0	else if (SUPER_FREE_EPSILON (finalst[second]) &&
483	0	accptnum[finalst[second]] == NIL) {
484	0	orend = finalst[second];
485	0	mkxtion (finalst[first], orend);
486	0	}
487
488	0	else {
489	0	eps = mkstate (SYM_EPSILON);
490
491	0	first = link_machines (first, eps);
492	0	orend = finalst[first];
493
494	0	mkxtion (finalst[second], orend);
495	0	}
496	0	}
497
498	0	firstst[first] = MIN(firstst[first], firstst[second]);
499
500	0	finalst[first] = orend;
501	0	return first;
502	0	}
503
504
505		/* mkposcl - convert a machine into a positive closure
506		*
507		* synopsis
508		* new = mkposcl( state );
509		*
510		* new - a machine matching the positive closure of "state"
511		*/
512
513		int mkposcl (int state)
514	2	{
515	2	int eps;
516
517	2	if (SUPER_FREE_EPSILON (finalst[state])) {
518	1	mkxtion (finalst[state], state);
519	1	return state;
520	1	}
521
522	1	else {
523	1	eps = mkstate (SYM_EPSILON);
524	1	mkxtion (eps, state);
525	1	return link_machines (state, eps);
526	1	}
527	2	}
528
529
530		/* mkrep - make a replicated machine
531		*
532		* synopsis
533		* new = mkrep( mach, lb, ub );
534		*
535		* new - a machine that matches whatever "mach" matched from "lb"
536		* number of times to "ub" number of times
537		*
538		* note
539		* if "ub" is INFINITE_REPEAT then "new" matches "lb" or more occurrences of "mach"
540		*/
541
542		int mkrep (int mach, int lb, int ub)
543	0	{
544	0	int base_mach, tail, copy, i;
545
546	0	base_mach = copysingl (mach, lb - 1);
547
548	0	if (ub == INFINITE_REPEAT) {
549	0	copy = dupmachine (mach);
550	0	mach = link_machines (mach,
551	0	link_machines (base_mach,
552	0	mkclos (copy)));
553	0	}
554
555	0	else {
556	0	tail = mkstate (SYM_EPSILON);
557
558	0	for (i = lb; i < ub; ++i) {
559	0	copy = dupmachine (mach);
560	0	tail = mkopt (link_machines (copy, tail));
561	0	}
562
563	0	mach =
564	0	link_machines (mach,
565	0	link_machines (base_mach, tail));
566	0	}
567
568	0	return mach;
569	0	}
570
571
572		/* mkstate - create a state with a transition on a given symbol
573		*
574		* synopsis
575		*
576		* state = mkstate( sym );
577		*
578		* state - a new state matching sym
579		* sym - the symbol the new state is to have an out-transition on
580		*
581		* note that this routine makes new states in ascending order through the
582		* state array (and increments LASTNFA accordingly). The routine DUPMACHINE
583		* relies on machines being made in ascending order and that they are
584		* CONTIGUOUS. Change it and you will have to rewrite DUPMACHINE (kludge
585		* that it admittedly is)
586		*/
587
588		int mkstate (int sym)
589	3.42k	{
590	3.42k	if (++lastnfa >= current_mns) {
591	0	if ((current_mns += MNS_INCREMENT) >= maximum_mns)
592	0	lerr(_
593	0	("input rules are too complicated (>= %d NFA states)"),
594	0	current_mns);
595
596	0	++num_reallocs;
597
598	0	firstst = reallocate_integer_array (firstst, current_mns);
599	0	lastst = reallocate_integer_array (lastst, current_mns);
600	0	finalst = reallocate_integer_array (finalst, current_mns);
601	0	transchar =
602	0	reallocate_integer_array (transchar, current_mns);
603	0	trans1 = reallocate_integer_array (trans1, current_mns);
604	0	trans2 = reallocate_integer_array (trans2, current_mns);
605	0	accptnum =
606	0	reallocate_integer_array (accptnum, current_mns);
607	0	assoc_rule =
608	0	reallocate_integer_array (assoc_rule, current_mns);
609	0	state_type =
610	0	reallocate_integer_array (state_type, current_mns);
611	0	}
612
613	3.42k	firstst[lastnfa] = lastnfa;
614	3.42k	finalst[lastnfa] = lastnfa;
615	3.42k	lastst[lastnfa] = lastnfa;
616	3.42k	transchar[lastnfa] = sym;
617	3.42k	trans1[lastnfa] = NO_TRANSITION;
618	3.42k	trans2[lastnfa] = NO_TRANSITION;
619	3.42k	accptnum[lastnfa] = NIL;
620	3.42k	assoc_rule[lastnfa] = num_rules;
621	3.42k	state_type[lastnfa] = current_state_type;
622
623		/* Fix up equivalence classes base on this transition. Note that any
624		* character which has its own transition gets its own equivalence
625		* class. Thus only characters which are only in character classes
626		* have a chance at being in the same equivalence class. E.g. "a\|b"
627		* puts 'a' and 'b' into two different equivalence classes. "[ab]"
628		* puts them in the same equivalence class (barring other differences
629		* elsewhere in the input).
630		*/
631
632	3.42k	if (sym < 0) {
633		/* We don't have to update the equivalence classes since
634		* that was already done when the ccl was created for the
635		* first time.
636		*/
637	306	}
638
639	3.11k	else if (sym == SYM_EPSILON)
640	3.08k	++numeps;
641
642	28	else {
643	28	check_char (sym);
644
645	28	if (ctrl.useecs)
646		/* Map NUL's to csize. */
647	0	mkechar (sym ? sym : ctrl.csize, nextecm, ecgroup);
648	28	}
649
650	3.42k	return lastnfa;
651	3.42k	}
652
653
654		/* mkxtion - make a transition from one state to another
655		*
656		* synopsis
657		*
658		* mkxtion( statefrom, stateto );
659		*
660		* statefrom - the state from which the transition is to be made
661		* stateto - the state to which the transition is to be made
662		*/
663
664		void mkxtion (int statefrom, int stateto)
665	1.10k	{
666	1.10k	if (trans1[statefrom] == NO_TRANSITION)
667	720	trans1[statefrom] = stateto;
668
669	385	else if ((transchar[statefrom] != SYM_EPSILON) \|\|
670	385	(trans2[statefrom] != NO_TRANSITION))
671	0	flexfatal (_("found too many transitions in mkxtion()"));
672
673	385	else { /* second out-transition for an epsilon state */
674	385	++eps2;
675	385	trans2[statefrom] = stateto;
676	385	}
677	1.10k	}
678
679		/* new_rule - initialize for a new rule */
680
681		void new_rule (void)
682	307	{
683	307	if (++num_rules >= current_max_rules) {
684	0	++num_reallocs;
685	0	current_max_rules += MAX_RULES_INCREMENT;
686	0	rule_type = reallocate_integer_array (rule_type,
687	0	current_max_rules);
688	0	rule_linenum = reallocate_integer_array (rule_linenum,
689	0	current_max_rules);
690	0	rule_useful = reallocate_array(rule_useful,
691	0	current_max_rules, sizeof(char));
692	0	rule_has_nl = reallocate_array(rule_has_nl,
693	0	current_max_rules, sizeof(char));
694	0	}
695
696	307	if (num_rules > MAX_RULE)
697	0	lerr (_("too many rules (> %d)!"), MAX_RULE);
698
699	307	rule_linenum[num_rules] = linenum;
700	307	rule_useful[num_rules] = false;
701		rule_has_nl[num_rules] = false;
702	307	}