Line | Count | Source |
1 | | /* dfa - DFA construction routines */ |
2 | | |
3 | | /* Copyright (c) 1990 The Regents of the University of California. */ |
4 | | /* All rights reserved. */ |
5 | | |
6 | | /* This code is derived from software contributed to Berkeley by */ |
7 | | /* Vern Paxson. */ |
8 | | |
9 | | /* The United States Government has rights in this work pursuant */ |
10 | | /* to contract no. DE-AC03-76SF00098 between the United States */ |
11 | | /* Department of Energy and the University of California. */ |
12 | | |
13 | | /* Redistribution and use in source and binary forms, with or without */ |
14 | | /* modification, are permitted provided that the following conditions */ |
15 | | /* are met: */ |
16 | | |
17 | | /* 1. Redistributions of source code must retain the above copyright */ |
18 | | /* notice, this list of conditions and the following disclaimer. */ |
19 | | /* 2. Redistributions in binary form must reproduce the above copyright */ |
20 | | /* notice, this list of conditions and the following disclaimer in the */ |
21 | | /* documentation and/or other materials provided with the distribution. */ |
22 | | |
23 | | /* Neither the name of the University nor the names of its contributors */ |
24 | | /* may be used to endorse or promote products derived from this software */ |
25 | | /* without specific prior written permission. */ |
26 | | |
27 | | /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */ |
28 | | /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */ |
29 | | /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ |
30 | | /* PURPOSE. */ |
31 | | |
32 | | #include "flexdef.h" |
33 | | #include "tables.h" |
34 | | |
35 | | /* declare functions that have forward references */ |
36 | | |
37 | | static void dump_associated_rules(FILE *, int); |
38 | | static void dump_transitions(FILE *, int[]); |
39 | | static int snstods(int[], int, int[], int, int, int *); |
40 | | static void sympartition(int[], int, int[], int[]); |
41 | | static int symfollowset(int[], int, int, int[]); |
42 | | |
43 | | |
44 | | /* check_for_backing_up - check a DFA state for backing up |
45 | | * |
46 | | * synopsis |
47 | | * void check_for_backing_up( int ds, int state[numecs] ); |
48 | | * |
49 | | * ds is the number of the state to check and state[] is its out-transitions, |
50 | | * indexed by equivalence class. |
51 | | */ |
52 | | |
53 | | static void check_for_backing_up(int ds, int state[]) |
54 | 0 | { |
55 | 0 | if ((reject && !dfaacc[ds].dfaacc_set) || (!reject && !dfaacc[ds].dfaacc_state)) { /* state is non-accepting */ |
56 | 0 | ++num_backing_up; |
57 | |
|
58 | 0 | if (env.backing_up_report) { |
59 | 0 | fprintf (ctrl.backing_up_file, |
60 | 0 | _("State #%d is non-accepting -\n"), ds); |
61 | | |
62 | | /* identify the state */ |
63 | 0 | dump_associated_rules (ctrl.backing_up_file, ds); |
64 | | |
65 | | /* Now identify it further using the out- and |
66 | | * jam-transitions. |
67 | | */ |
68 | 0 | dump_transitions (ctrl.backing_up_file, state); |
69 | |
|
70 | 0 | putc ('\n', ctrl.backing_up_file); |
71 | 0 | } |
72 | 0 | } |
73 | 0 | } |
74 | | |
75 | | |
76 | | /* check_trailing_context - check to see if NFA state set constitutes |
77 | | * "dangerous" trailing context |
78 | | * |
79 | | * synopsis |
80 | | * void check_trailing_context( int nfa_states[num_states+1], int num_states, |
81 | | * int accset[nacc+1], int nacc ); |
82 | | * |
83 | | * NOTES |
84 | | * Trailing context is "dangerous" if both the head and the trailing |
85 | | * part are of variable size \and/ there's a DFA state which contains |
86 | | * both an accepting state for the head part of the rule and NFA states |
87 | | * which occur after the beginning of the trailing context. |
88 | | * |
89 | | * When such a rule is matched, it's impossible to tell if having been |
90 | | * in the DFA state indicates the beginning of the trailing context or |
91 | | * further-along scanning of the pattern. In these cases, a warning |
92 | | * message is issued. |
93 | | * |
94 | | * nfa_states[1 .. num_states] is the list of NFA states in the DFA. |
95 | | * accset[1 .. nacc] is the list of accepting numbers for the DFA state. |
96 | | */ |
97 | | |
98 | | static void check_trailing_context(int *nfa_states, int num_states, int *accset, int nacc) |
99 | 0 | { |
100 | 0 | int i, j; |
101 | |
|
102 | 0 | for (i = 1; i <= num_states; ++i) { |
103 | 0 | int ns = nfa_states[i]; |
104 | 0 | int type = state_type[ns]; |
105 | 0 | int ar = assoc_rule[ns]; |
106 | |
|
107 | 0 | if (type == STATE_NORMAL || rule_type[ar] != RULE_VARIABLE) { /* do nothing */ |
108 | 0 | } |
109 | | |
110 | 0 | else if (type == STATE_TRAILING_CONTEXT) { |
111 | | /* Potential trouble. Scan set of accepting numbers |
112 | | * for the one marking the end of the "head". We |
113 | | * assume that this looping will be fairly cheap |
114 | | * since it's rare that an accepting number set |
115 | | * is large. |
116 | | */ |
117 | 0 | for (j = 1; j <= nacc; ++j) |
118 | 0 | if (accset[j] & YY_TRAILING_HEAD_MASK) { |
119 | 0 | line_warning (_ |
120 | 0 | ("dangerous trailing context"), |
121 | 0 | rule_linenum[ar]); |
122 | 0 | return; |
123 | 0 | } |
124 | 0 | } |
125 | 0 | } |
126 | 0 | } |
127 | | |
128 | | |
129 | | /* dump_associated_rules - list the rules associated with a DFA state |
130 | | * |
131 | | * Goes through the set of NFA states associated with the DFA and |
132 | | * extracts the first MAX_ASSOC_RULES unique rules, sorts them, |
133 | | * and writes a report to the given file. |
134 | | */ |
135 | | |
136 | | static void dump_associated_rules(FILE *file, int ds) |
137 | 0 | { |
138 | 0 | int i, j; |
139 | 0 | int num_associated_rules = 0; |
140 | 0 | int rule_set[MAX_ASSOC_RULES + 1]; |
141 | 0 | int *dset = dss[ds]; |
142 | 0 | int size = dfasiz[ds]; |
143 | |
|
144 | 0 | for (i = 1; i <= size; ++i) { |
145 | 0 | int rule_num = rule_linenum[assoc_rule[dset[i]]]; |
146 | |
|
147 | 0 | for (j = 1; j <= num_associated_rules; ++j) |
148 | 0 | if (rule_num == rule_set[j]) |
149 | 0 | break; |
150 | |
|
151 | 0 | if (j > num_associated_rules) { /* new rule */ |
152 | 0 | if (num_associated_rules < MAX_ASSOC_RULES) |
153 | 0 | rule_set[++num_associated_rules] = |
154 | 0 | rule_num; |
155 | 0 | } |
156 | 0 | } |
157 | |
|
158 | 0 | qsort (&rule_set [1], (size_t) num_associated_rules, sizeof (rule_set [1]), intcmp); |
159 | |
|
160 | 0 | fprintf (file, _(" associated rule line numbers:")); |
161 | |
|
162 | 0 | for (i = 1; i <= num_associated_rules; ++i) { |
163 | 0 | if (i % 8 == 1) |
164 | 0 | putc ('\n', file); |
165 | |
|
166 | 0 | fprintf (file, "\t%d", rule_set[i]); |
167 | 0 | } |
168 | |
|
169 | 0 | putc ('\n', file); |
170 | 0 | } |
171 | | |
172 | | |
173 | | /* dump_transitions - list the transitions associated with a DFA state |
174 | | * |
175 | | * synopsis |
176 | | * dump_transitions( FILE *file, int state[numecs] ); |
177 | | * |
178 | | * Goes through the set of out-transitions and lists them in human-readable |
179 | | * form (i.e., not as equivalence classes); also lists jam transitions |
180 | | * (i.e., all those which are not out-transitions, plus EOF). The dump |
181 | | * is done to the given file. |
182 | | */ |
183 | | |
184 | | static void dump_transitions(FILE *file, int state[]) |
185 | 0 | { |
186 | 0 | int i, ec; |
187 | 0 | int out_char_set[CSIZE]; |
188 | |
|
189 | 0 | for (i = 0; i < ctrl.csize; ++i) { |
190 | 0 | ec = ABS (ecgroup[i]); |
191 | 0 | out_char_set[i] = state[ec]; |
192 | 0 | } |
193 | |
|
194 | 0 | fprintf (file, _(" out-transitions: ")); |
195 | |
|
196 | 0 | list_character_set (file, out_char_set); |
197 | | |
198 | | /* now invert the members of the set to get the jam transitions */ |
199 | 0 | for (i = 0; i < ctrl.csize; ++i) |
200 | 0 | out_char_set[i] = !out_char_set[i]; |
201 | |
|
202 | 0 | fprintf (file, _("\n jam-transitions: EOF ")); |
203 | |
|
204 | 0 | list_character_set (file, out_char_set); |
205 | |
|
206 | 0 | putc ('\n', file); |
207 | 0 | } |
208 | | |
209 | | |
210 | | /* epsclosure - construct the epsilon closure of a set of ndfa states |
211 | | * |
212 | | * synopsis |
213 | | * int *epsclosure( int t[num_states], int *numstates_addr, |
214 | | * int accset[num_rules+1], int *nacc_addr, |
215 | | * int *hashval_addr ); |
216 | | * |
217 | | * NOTES |
218 | | * The epsilon closure is the set of all states reachable by an arbitrary |
219 | | * number of epsilon transitions, which themselves do not have epsilon |
220 | | * transitions going out, unioned with the set of states which have non-null |
221 | | * accepting numbers. t is an array of size numstates of nfa state numbers. |
222 | | * Upon return, t holds the epsilon closure and *numstates_addr is updated. |
223 | | * accset holds a list of the accepting numbers, and the size of accset is |
224 | | * given by *nacc_addr. t may be subjected to reallocation if it is not |
225 | | * large enough to hold the epsilon closure. |
226 | | * |
227 | | * hashval is the hash value for the dfa corresponding to the state set. |
228 | | */ |
229 | | |
230 | | static int *epsclosure(int *t, int *ns_addr, int accset[], int *nacc_addr, int *hv_addr) |
231 | 0 | { |
232 | 0 | int stkpos, ns, tsp; |
233 | 0 | int numstates = *ns_addr, nacc, hashval, transsym, nfaccnum; |
234 | 0 | int stkend, nstate; |
235 | 0 | static int did_stk_init = false, *stk; |
236 | |
|
237 | 0 | #define MARK_STATE(state) \ |
238 | 0 | do{ trans1[state] = trans1[state] - MARKER_DIFFERENCE;} while(0) |
239 | |
|
240 | 0 | #define IS_MARKED(state) (trans1[state] < 0) |
241 | |
|
242 | 0 | #define UNMARK_STATE(state) \ |
243 | 0 | do{ trans1[state] = trans1[state] + MARKER_DIFFERENCE;} while(0) |
244 | |
|
245 | 0 | #define CHECK_ACCEPT(state) \ |
246 | 0 | do{ \ |
247 | 0 | nfaccnum = accptnum[state]; \ |
248 | 0 | if ( nfaccnum != NIL ) \ |
249 | 0 | accset[++nacc] = nfaccnum; \ |
250 | 0 | }while(0) |
251 | |
|
252 | 0 | #define DO_REALLOCATION() \ |
253 | 0 | do { \ |
254 | 0 | current_max_dfa_size += MAX_DFA_SIZE_INCREMENT; \ |
255 | 0 | ++num_reallocs; \ |
256 | 0 | t = reallocate_integer_array( t, current_max_dfa_size ); \ |
257 | 0 | stk = reallocate_integer_array( stk, current_max_dfa_size ); \ |
258 | 0 | }while(0) \ |
259 | 0 |
|
260 | 0 | #define PUT_ON_STACK(state) \ |
261 | 0 | do { \ |
262 | 0 | if ( ++stkend >= current_max_dfa_size ) \ |
263 | 0 | DO_REALLOCATION(); \ |
264 | 0 | stk[stkend] = state; \ |
265 | 0 | MARK_STATE(state); \ |
266 | 0 | }while(0) |
267 | |
|
268 | 0 | #define ADD_STATE(state) \ |
269 | 0 | do { \ |
270 | 0 | if ( ++numstates >= current_max_dfa_size ) \ |
271 | 0 | DO_REALLOCATION(); \ |
272 | 0 | t[numstates] = state; \ |
273 | 0 | hashval += state; \ |
274 | 0 | }while(0) |
275 | |
|
276 | 0 | #define STACK_STATE(state) \ |
277 | 0 | do { \ |
278 | 0 | PUT_ON_STACK(state); \ |
279 | 0 | CHECK_ACCEPT(state); \ |
280 | 0 | if ( nfaccnum != NIL || transchar[state] != SYM_EPSILON ) \ |
281 | 0 | ADD_STATE(state); \ |
282 | 0 | }while(0) |
283 | | |
284 | |
|
285 | 0 | if (!did_stk_init) { |
286 | 0 | stk = allocate_integer_array (current_max_dfa_size); |
287 | 0 | did_stk_init = true; |
288 | 0 | } |
289 | |
|
290 | 0 | nacc = stkend = hashval = 0; |
291 | |
|
292 | 0 | for (nstate = 1; nstate <= numstates; ++nstate) { |
293 | 0 | ns = t[nstate]; |
294 | | |
295 | | /* The state could be marked if we've already pushed it onto |
296 | | * the stack. |
297 | | */ |
298 | 0 | if (!IS_MARKED (ns)) { |
299 | 0 | PUT_ON_STACK (ns); |
300 | 0 | CHECK_ACCEPT (ns); |
301 | 0 | hashval += ns; |
302 | 0 | } |
303 | 0 | } |
304 | |
|
305 | 0 | for (stkpos = 1; stkpos <= stkend; ++stkpos) { |
306 | 0 | ns = stk[stkpos]; |
307 | 0 | transsym = transchar[ns]; |
308 | |
|
309 | 0 | if (transsym == SYM_EPSILON) { |
310 | 0 | tsp = trans1[ns] + MARKER_DIFFERENCE; |
311 | |
|
312 | 0 | if (tsp != NO_TRANSITION) { |
313 | 0 | if (!IS_MARKED (tsp)) |
314 | 0 | STACK_STATE (tsp); |
315 | |
|
316 | 0 | tsp = trans2[ns]; |
317 | |
|
318 | 0 | if (tsp != NO_TRANSITION |
319 | 0 | && !IS_MARKED (tsp)) |
320 | 0 | STACK_STATE (tsp); |
321 | 0 | } |
322 | 0 | } |
323 | 0 | } |
324 | | |
325 | | /* Clear out "visit" markers. */ |
326 | |
|
327 | 0 | for (stkpos = 1; stkpos <= stkend; ++stkpos) { |
328 | 0 | if (IS_MARKED (stk[stkpos])) |
329 | 0 | UNMARK_STATE (stk[stkpos]); |
330 | 0 | else |
331 | 0 | flexfatal (_ |
332 | 0 | ("consistency check failed in epsclosure()")); |
333 | 0 | } |
334 | |
|
335 | 0 | *ns_addr = numstates; |
336 | 0 | *hv_addr = hashval; |
337 | 0 | *nacc_addr = nacc; |
338 | |
|
339 | 0 | return t; |
340 | 0 | } |
341 | | |
342 | | |
343 | | /* increase_max_dfas - increase the maximum number of DFAs */ |
344 | | |
345 | | void increase_max_dfas (void) |
346 | 0 | { |
347 | 0 | current_max_dfas += MAX_DFAS_INCREMENT; |
348 | |
|
349 | 0 | ++num_reallocs; |
350 | |
|
351 | 0 | base = reallocate_integer_array (base, current_max_dfas); |
352 | 0 | def = reallocate_integer_array (def, current_max_dfas); |
353 | 0 | dfasiz = reallocate_integer_array (dfasiz, current_max_dfas); |
354 | 0 | accsiz = reallocate_integer_array (accsiz, current_max_dfas); |
355 | 0 | dhash = reallocate_integer_array (dhash, current_max_dfas); |
356 | 0 | dss = reallocate_int_ptr_array (dss, current_max_dfas); |
357 | 0 | dfaacc = reallocate_array(dfaacc, current_max_dfas, |
358 | 0 | sizeof(union dfaacc_union)); |
359 | |
|
360 | 0 | if (nultrans) |
361 | 0 | nultrans = |
362 | 0 | reallocate_integer_array (nultrans, |
363 | 0 | current_max_dfas); |
364 | 0 | } |
365 | | |
366 | | |
367 | | /* ntod - convert an ndfa to a dfa |
368 | | * |
369 | | * Creates the dfa corresponding to the ndfa we've constructed. The |
370 | | * dfa starts out in state #1. |
371 | | * |
372 | | * Return the amount of space, in bytes, allocated for the next table. |
373 | | * In some modes this can be zero. |
374 | | */ |
375 | | |
376 | | size_t ntod (void) |
377 | 0 | { |
378 | 0 | int *accset, ds, nacc, newds; |
379 | 0 | int sym, hashval, numstates, dsize; |
380 | 0 | int num_full_table_rows=0; /* used only for -f */ |
381 | 0 | int *nset, *dset; |
382 | 0 | int targptr, totaltrans, i, comstate, comfreq, targ; |
383 | 0 | int symlist[CSIZE + 1]; |
384 | 0 | int num_start_states; |
385 | 0 | int todo_head, todo_next; |
386 | |
|
387 | 0 | struct yytbl_data *yynxt_tbl = 0; |
388 | 0 | flex_int32_t *yynxt_data = 0, yynxt_curr = 0; |
389 | | |
390 | | /* Note that the following are indexed by *equivalence classes* |
391 | | * and not by characters. Since equivalence classes are indexed |
392 | | * beginning with 1, even if the scanner accepts NUL's, this |
393 | | * means that (since every character is potentially in its own |
394 | | * equivalence class) these arrays must have room for indices |
395 | | * from 1 to CSIZE, so their size must be CSIZE + 1. |
396 | | */ |
397 | 0 | int duplist[CSIZE + 1], state[CSIZE + 1]; |
398 | 0 | int targfreq[CSIZE + 1] = {0}, targstate[CSIZE + 1]; |
399 | | |
400 | | /* accset needs to be large enough to hold all of the rules present |
401 | | * in the input, *plus* their YY_TRAILING_HEAD_MASK variants. |
402 | | */ |
403 | 0 | accset = allocate_integer_array ((num_rules + 1) * 2); |
404 | 0 | nset = allocate_integer_array (current_max_dfa_size); |
405 | | |
406 | | /* The "todo" queue is represented by the head, which is the DFA |
407 | | * state currently being processed, and the "next", which is the |
408 | | * next DFA state number available (not in use). We depend on the |
409 | | * fact that snstods() returns DFA's \in increasing order/, and thus |
410 | | * need only know the bounds of the dfas to be processed. |
411 | | */ |
412 | 0 | todo_head = todo_next = 0; |
413 | |
|
414 | 0 | for (i = 0; i <= ctrl.csize; ++i) { |
415 | 0 | duplist[i] = NIL; |
416 | 0 | symlist[i] = false; |
417 | 0 | } |
418 | |
|
419 | 0 | for (i = 0; i <= num_rules; ++i) |
420 | 0 | accset[i] = NIL; |
421 | |
|
422 | 0 | if (env.trace) { |
423 | 0 | dumpnfa (scset[1]); |
424 | 0 | fputs (_("\n\nDFA Dump:\n\n"), stderr); |
425 | 0 | } |
426 | |
|
427 | 0 | inittbl (); |
428 | | |
429 | | /* Check to see whether we should build a separate table for |
430 | | * transitions on NUL characters. We don't do this for full-speed |
431 | | * (-F) scanners, since for them we don't have a simple state |
432 | | * number lying around with which to index the table. We also |
433 | | * don't bother doing it for scanners unless (1) NUL is in its own |
434 | | * equivalence class (indicated by a positive value of |
435 | | * ecgroup[NUL]), (2) NUL's equivalence class is the last |
436 | | * equivalence class, and (3) the number of equivalence classes is |
437 | | * the same as the number of characters. This latter case comes |
438 | | * about when useecs is false or when it's true but every character |
439 | | * still manages to land in its own class (unlikely, but it's |
440 | | * cheap to check for). If all these things are true then the |
441 | | * character code needed to represent NUL's equivalence class for |
442 | | * indexing the tables is going to take one more bit than the |
443 | | * number of characters, and therefore we won't be assured of |
444 | | * being able to fit it into a YY_CHAR variable. This rules out |
445 | | * storing the transitions in a compressed table, since the code |
446 | | * for interpreting them uses a YY_CHAR variable (perhaps it |
447 | | * should just use an integer, though; this is worth pondering ... |
448 | | * ###). |
449 | | * |
450 | | * Finally, for full tables, we want the number of entries in the |
451 | | * table to be a power of two so the array references go fast (it |
452 | | * will just take a shift to compute the major index). If |
453 | | * encoding NUL's transitions in the table will spoil this, we |
454 | | * give it its own table (note that this will be the case if we're |
455 | | * not using equivalence classes). |
456 | | */ |
457 | | |
458 | | /* Note that the test for ecgroup[0] == numecs below accomplishes |
459 | | * both (1) and (2) above |
460 | | * |
461 | | * New way: we will only use NUL table for fulltbl, because the |
462 | | * scanner will use an integer instead of YY_CHAR as noted above |
463 | | */ |
464 | 0 | if (ctrl.fulltbl && ecgroup[0] == numecs && is_power_of_2(numecs)) |
465 | 0 | nultrans = allocate_integer_array (current_max_dfas); |
466 | |
|
467 | 0 | if (ctrl.fullspd) { |
468 | 0 | for (i = 0; i <= numecs; ++i) |
469 | 0 | state[i] = 0; |
470 | |
|
471 | 0 | place_state (state, 0, 0); |
472 | 0 | dfaacc[0].dfaacc_state = 0; |
473 | 0 | } |
474 | | |
475 | 0 | else if (ctrl.fulltbl) { |
476 | 0 | if (nultrans) |
477 | | /* We won't be including NUL's transitions in the |
478 | | * table, so build it for entries from 0 .. numecs - 1. |
479 | | */ |
480 | 0 | num_full_table_rows = numecs; |
481 | | |
482 | 0 | else |
483 | | /* Take into account the fact that we'll be including |
484 | | * the NUL entries in the transition table. Build it |
485 | | * from 0 .. numecs. |
486 | | */ |
487 | 0 | num_full_table_rows = numecs + 1; |
488 | | |
489 | | /* Begin generating yy_nxt[][] |
490 | | * This spans the entire LONG function. |
491 | | * This table is tricky because we don't know how big it will be. |
492 | | * So we'll have to realloc() on the way... |
493 | | * we'll wait until we can calculate yynxt_tbl->td_hilen. |
494 | | */ |
495 | 0 | yynxt_tbl = calloc(1, sizeof (struct yytbl_data)); |
496 | | |
497 | 0 | yytbl_data_init (yynxt_tbl, YYTD_ID_NXT); |
498 | 0 | yynxt_tbl->td_hilen = 1; |
499 | 0 | yynxt_tbl->td_lolen = (flex_uint32_t) num_full_table_rows; |
500 | 0 | yynxt_tbl->td_data = yynxt_data = |
501 | 0 | calloc(yynxt_tbl->td_lolen * |
502 | 0 | yynxt_tbl->td_hilen, |
503 | 0 | sizeof (flex_int32_t)); |
504 | 0 | yynxt_curr = 0; |
505 | |
|
506 | 0 | struct packtype_t *ptype = optimize_pack(0); |
507 | | /* Note: Used when ctrl.fulltbl is on. Alternately defined elsewhere */ |
508 | 0 | out_str ("m4_define([[M4_HOOK_NXT_TYPE]], [[%s]])", ptype->name); |
509 | 0 | out_dec ("m4_define([[M4_HOOK_NXT_ROWS]], [[%d]])", num_full_table_rows); |
510 | 0 | outn ("m4_define([[M4_HOOK_NXT_BODY]], [[m4_dnl"); |
511 | 0 | outn ("M4_HOOK_TABLE_OPENER"); |
512 | 0 | if (gentables) |
513 | 0 | outn ("M4_HOOK_TABLE_OPENER"); |
514 | | |
515 | | /* Generate 0 entries for state #0. */ |
516 | 0 | for (i = 0; i < num_full_table_rows; ++i) { |
517 | 0 | mk2data (0); |
518 | 0 | yynxt_data[yynxt_curr++] = 0; |
519 | 0 | } |
520 | |
|
521 | 0 | if (gentables) { |
522 | 0 | dataflush (); |
523 | 0 | outn ("M4_HOOK_TABLE_CONTINUE"); |
524 | 0 | } |
525 | 0 | } |
526 | | |
527 | | /* Create the first states. */ |
528 | |
|
529 | 0 | num_start_states = lastsc * 2; |
530 | |
|
531 | 0 | for (i = 1; i <= num_start_states; ++i) { |
532 | 0 | numstates = 1; |
533 | | |
534 | | /* For each start condition, make one state for the case when |
535 | | * we're at the beginning of the line (the '^' operator) and |
536 | | * one for the case when we're not. |
537 | | */ |
538 | 0 | if (i % 2 == 1) |
539 | 0 | nset[numstates] = scset[(i / 2) + 1]; |
540 | 0 | else |
541 | 0 | nset[numstates] = |
542 | 0 | mkbranch (scbol[i / 2], scset[i / 2]); |
543 | |
|
544 | 0 | nset = epsclosure (nset, &numstates, accset, &nacc, |
545 | 0 | &hashval); |
546 | |
|
547 | 0 | if (snstods (nset, numstates, accset, nacc, hashval, &ds)) { |
548 | 0 | numas += nacc; |
549 | 0 | totnst += numstates; |
550 | 0 | ++todo_next; |
551 | |
|
552 | 0 | if (variable_trailing_context_rules && nacc > 0) |
553 | 0 | check_trailing_context (nset, numstates, |
554 | 0 | accset, nacc); |
555 | 0 | } |
556 | 0 | } |
557 | |
|
558 | 0 | if (!ctrl.fullspd) { |
559 | 0 | if (!snstods (nset, 0, accset, 0, 0, &end_of_buffer_state)) |
560 | 0 | flexfatal (_ |
561 | 0 | ("could not create unique end-of-buffer state")); |
562 | |
|
563 | 0 | ++numas; |
564 | 0 | ++num_start_states; |
565 | 0 | ++todo_next; |
566 | 0 | } |
567 | | |
568 | |
|
569 | 0 | while (todo_head < todo_next) { |
570 | 0 | targptr = 0; |
571 | 0 | totaltrans = 0; |
572 | |
|
573 | 0 | for (i = 1; i <= numecs; ++i) |
574 | 0 | state[i] = 0; |
575 | |
|
576 | 0 | ds = ++todo_head; |
577 | |
|
578 | 0 | dset = dss[ds]; |
579 | 0 | dsize = dfasiz[ds]; |
580 | |
|
581 | 0 | if (env.trace) |
582 | 0 | fprintf (stderr, _("state # %d:\n"), ds); |
583 | |
|
584 | 0 | sympartition (dset, dsize, symlist, duplist); |
585 | |
|
586 | 0 | for (sym = 1; sym <= numecs; ++sym) { |
587 | 0 | if (symlist[sym]) { |
588 | 0 | symlist[sym] = 0; |
589 | |
|
590 | 0 | if (duplist[sym] == NIL) { |
591 | | /* Symbol has unique out-transitions. */ |
592 | 0 | numstates = |
593 | 0 | symfollowset (dset, dsize, |
594 | 0 | sym, nset); |
595 | 0 | nset = epsclosure (nset, |
596 | 0 | &numstates, |
597 | 0 | accset, &nacc, |
598 | 0 | &hashval); |
599 | |
|
600 | 0 | if (snstods |
601 | 0 | (nset, numstates, accset, nacc, |
602 | 0 | hashval, &newds)) { |
603 | 0 | totnst = totnst + |
604 | 0 | numstates; |
605 | 0 | ++todo_next; |
606 | 0 | numas += nacc; |
607 | |
|
608 | 0 | if (variable_trailing_context_rules && nacc > 0) |
609 | 0 | check_trailing_context |
610 | 0 | (nset, |
611 | 0 | numstates, |
612 | 0 | accset, |
613 | 0 | nacc); |
614 | 0 | } |
615 | |
|
616 | 0 | state[sym] = newds; |
617 | |
|
618 | 0 | if (env.trace) |
619 | 0 | fprintf (stderr, |
620 | 0 | "\t%d\t%d\n", sym, |
621 | 0 | newds); |
622 | |
|
623 | 0 | targfreq[++targptr] = 1; |
624 | 0 | targstate[targptr] = newds; |
625 | 0 | ++numuniq; |
626 | 0 | } |
627 | | |
628 | 0 | else { |
629 | | /* sym's equivalence class has the same |
630 | | * transitions as duplist(sym)'s |
631 | | * equivalence class. |
632 | | */ |
633 | 0 | targ = state[duplist[sym]]; |
634 | 0 | state[sym] = targ; |
635 | |
|
636 | 0 | if (env.trace) |
637 | 0 | fprintf (stderr, |
638 | 0 | "\t%d\t%d\n", sym, |
639 | 0 | targ); |
640 | | |
641 | | /* Update frequency count for |
642 | | * destination state. |
643 | | */ |
644 | |
|
645 | 0 | i = 0; |
646 | 0 | while (targstate[++i] != targ) ; |
647 | |
|
648 | 0 | ++targfreq[i]; |
649 | 0 | ++numdup; |
650 | 0 | } |
651 | |
|
652 | 0 | ++totaltrans; |
653 | 0 | duplist[sym] = NIL; |
654 | 0 | } |
655 | 0 | } |
656 | | |
657 | |
|
658 | 0 | numsnpairs += totaltrans; |
659 | |
|
660 | 0 | if (ds > num_start_states) |
661 | 0 | check_for_backing_up (ds, state); |
662 | |
|
663 | 0 | if (nultrans) { |
664 | 0 | nultrans[ds] = state[NUL_ec]; |
665 | 0 | state[NUL_ec] = 0; /* remove transition */ |
666 | 0 | } |
667 | |
|
668 | 0 | if (ctrl.fulltbl) { |
669 | | |
670 | | /* Each time we hit here, it's another td_hilen, so we realloc. */ |
671 | 0 | yynxt_tbl->td_hilen++; |
672 | 0 | yynxt_tbl->td_data = yynxt_data = |
673 | 0 | realloc (yynxt_data, |
674 | 0 | yynxt_tbl->td_hilen * |
675 | 0 | yynxt_tbl->td_lolen * |
676 | 0 | sizeof (flex_int32_t)); |
677 | 0 | if (gentables) |
678 | 0 | outn ("M4_HOOK_TABLE_OPENER"); |
679 | | |
680 | | /* Supply array's 0-element. */ |
681 | 0 | if (ds == end_of_buffer_state) { |
682 | 0 | mk2data (-end_of_buffer_state); |
683 | 0 | yynxt_data[yynxt_curr++] = |
684 | 0 | -end_of_buffer_state; |
685 | 0 | } |
686 | 0 | else { |
687 | 0 | mk2data (end_of_buffer_state); |
688 | 0 | yynxt_data[yynxt_curr++] = |
689 | 0 | end_of_buffer_state; |
690 | 0 | } |
691 | |
|
692 | 0 | for (i = 1; i < num_full_table_rows; ++i) { |
693 | | /* Jams are marked by negative of state |
694 | | * number. |
695 | | */ |
696 | 0 | mk2data (state[i] ? state[i] : -ds); |
697 | 0 | yynxt_data[yynxt_curr++] = |
698 | 0 | state[i] ? state[i] : -ds; |
699 | 0 | } |
700 | |
|
701 | 0 | if (gentables) { |
702 | 0 | dataflush (); |
703 | 0 | outn ("M4_HOOK_TABLE_CONTINUE"); |
704 | 0 | } |
705 | 0 | } |
706 | | |
707 | 0 | else if (ctrl.fullspd) |
708 | 0 | place_state (state, ds, totaltrans); |
709 | | |
710 | 0 | else if (ds == end_of_buffer_state) |
711 | | /* Special case this state to make sure it does what |
712 | | * it's supposed to, i.e., jam on end-of-buffer. |
713 | | */ |
714 | 0 | stack1 (ds, 0, 0, JAMSTATE); |
715 | | |
716 | 0 | else { /* normal, compressed state */ |
717 | | |
718 | | /* Determine which destination state is the most |
719 | | * common, and how many transitions to it there are. |
720 | | */ |
721 | |
|
722 | 0 | comfreq = 0; |
723 | 0 | comstate = 0; |
724 | |
|
725 | 0 | for (i = 1; i <= targptr; ++i) |
726 | 0 | if (targfreq[i] > comfreq) { |
727 | 0 | comfreq = targfreq[i]; |
728 | 0 | comstate = targstate[i]; |
729 | 0 | } |
730 | |
|
731 | 0 | bldtbl (state, ds, totaltrans, comstate, comfreq); |
732 | 0 | } |
733 | 0 | } |
734 | |
|
735 | 0 | if (ctrl.fulltbl) { |
736 | 0 | dataend ("M4_HOOK_TABLE_CLOSER"); |
737 | 0 | outn("/* body */]])"); |
738 | 0 | if (tablesext) { |
739 | 0 | yytbl_data_compress (yynxt_tbl); |
740 | 0 | if (yytbl_data_fwrite (&tableswr, yynxt_tbl) < 0) |
741 | 0 | flexerror (_ |
742 | 0 | ("Could not write yynxt_tbl[][]")); |
743 | 0 | } |
744 | 0 | if (yynxt_tbl) { |
745 | 0 | yytbl_data_destroy (yynxt_tbl); |
746 | 0 | yynxt_tbl = 0; |
747 | 0 | } |
748 | 0 | } |
749 | | |
750 | 0 | else if (!ctrl.fullspd) { |
751 | 0 | cmptmps (); /* create compressed template entries */ |
752 | | |
753 | | /* Create tables for all the states with only one |
754 | | * out-transition. |
755 | | */ |
756 | 0 | while (onesp > 0) { |
757 | 0 | mk1tbl (onestate[onesp], onesym[onesp], |
758 | 0 | onenext[onesp], onedef[onesp]); |
759 | 0 | --onesp; |
760 | 0 | } |
761 | |
|
762 | 0 | mkdeftbl (); |
763 | 0 | } |
764 | | |
765 | |
|
766 | 0 | free(accset); |
767 | 0 | free(nset); |
768 | |
|
769 | 0 | return (yynxt_tbl != NULL) ? (yynxt_tbl->td_hilen * sizeof(int32_t)) : 0; |
770 | 0 | } |
771 | | |
772 | | |
773 | | /* snstods - converts a set of ndfa states into a dfa state |
774 | | * |
775 | | * synopsis |
776 | | * is_new_state = snstods( int sns[numstates], int numstates, |
777 | | * int accset[num_rules+1], int nacc, |
778 | | * int hashval, int *newds_addr ); |
779 | | * |
780 | | * On return, the dfa state number is in newds. |
781 | | */ |
782 | | |
783 | | static int snstods(int sns[], int numstates, int accset[], int nacc, int hashval, int *newds_addr) |
784 | 0 | { |
785 | 0 | int didsort = 0; |
786 | 0 | int i, j; |
787 | 0 | int newds, *oldsns; |
788 | |
|
789 | 0 | for (i = 1; i <= lastdfa; ++i) |
790 | 0 | if (hashval == dhash[i]) { |
791 | 0 | if (numstates == dfasiz[i]) { |
792 | 0 | oldsns = dss[i]; |
793 | |
|
794 | 0 | if (!didsort) { |
795 | | /* We sort the states in sns so we |
796 | | * can compare it to oldsns quickly. |
797 | | */ |
798 | 0 | qsort (&sns [1], (size_t) numstates, sizeof (sns [1]), intcmp); |
799 | 0 | didsort = 1; |
800 | 0 | } |
801 | |
|
802 | 0 | for (j = 1; j <= numstates; ++j) |
803 | 0 | if (sns[j] != oldsns[j]) |
804 | 0 | break; |
805 | |
|
806 | 0 | if (j > numstates) { |
807 | 0 | ++dfaeql; |
808 | 0 | *newds_addr = i; |
809 | 0 | return 0; |
810 | 0 | } |
811 | | |
812 | 0 | ++hshcol; |
813 | 0 | } |
814 | | |
815 | 0 | else |
816 | 0 | ++hshsave; |
817 | 0 | } |
818 | | |
819 | | /* Make a new dfa. */ |
820 | | |
821 | 0 | if (++lastdfa >= current_max_dfas) |
822 | 0 | increase_max_dfas (); |
823 | |
|
824 | 0 | newds = lastdfa; |
825 | |
|
826 | 0 | dss[newds] = allocate_integer_array (numstates + 1); |
827 | | |
828 | | /* If we haven't already sorted the states in sns, we do so now, |
829 | | * so that future comparisons with it can be made quickly. |
830 | | */ |
831 | |
|
832 | 0 | if (!didsort) |
833 | 0 | qsort (&sns [1], (size_t) numstates, sizeof (sns [1]), intcmp); |
834 | |
|
835 | 0 | for (i = 1; i <= numstates; ++i) |
836 | 0 | dss[newds][i] = sns[i]; |
837 | |
|
838 | 0 | dfasiz[newds] = numstates; |
839 | 0 | dhash[newds] = hashval; |
840 | |
|
841 | 0 | if (nacc == 0) { |
842 | 0 | if (reject) |
843 | 0 | dfaacc[newds].dfaacc_set = NULL; |
844 | 0 | else |
845 | 0 | dfaacc[newds].dfaacc_state = 0; |
846 | |
|
847 | 0 | accsiz[newds] = 0; |
848 | 0 | } |
849 | | |
850 | 0 | else if (reject) { |
851 | | /* We sort the accepting set in increasing order so the |
852 | | * disambiguating rule that the first rule listed is considered |
853 | | * match in the event of ties will work. |
854 | | */ |
855 | |
|
856 | 0 | qsort (&accset [1], (size_t) nacc, sizeof (accset [1]), intcmp); |
857 | |
|
858 | 0 | dfaacc[newds].dfaacc_set = |
859 | 0 | allocate_integer_array (nacc + 1); |
860 | | |
861 | | /* Save the accepting set for later */ |
862 | 0 | for (i = 1; i <= nacc; ++i) { |
863 | 0 | dfaacc[newds].dfaacc_set[i] = accset[i]; |
864 | |
|
865 | 0 | if (accset[i] <= num_rules) |
866 | | /* Who knows, perhaps a REJECT can yield |
867 | | * this rule. |
868 | | */ |
869 | 0 | rule_useful[accset[i]] = true; |
870 | 0 | } |
871 | |
|
872 | 0 | accsiz[newds] = nacc; |
873 | 0 | } |
874 | | |
875 | 0 | else { |
876 | | /* Find lowest numbered rule so the disambiguating rule |
877 | | * will work. |
878 | | */ |
879 | 0 | j = num_rules + 1; |
880 | |
|
881 | 0 | for (i = 1; i <= nacc; ++i) |
882 | 0 | if (accset[i] < j) |
883 | 0 | j = accset[i]; |
884 | |
|
885 | 0 | dfaacc[newds].dfaacc_state = j; |
886 | |
|
887 | 0 | if (j <= num_rules) |
888 | 0 | rule_useful[j] = true; |
889 | 0 | } |
890 | |
|
891 | 0 | *newds_addr = newds; |
892 | |
|
893 | 0 | return 1; |
894 | 0 | } |
895 | | |
896 | | |
897 | | /* symfollowset - follow the symbol transitions one step |
898 | | * |
899 | | * synopsis |
900 | | * numstates = symfollowset( int ds[current_max_dfa_size], int dsize, |
901 | | * int transsym, int nset[current_max_dfa_size] ); |
902 | | */ |
903 | | |
904 | | static int symfollowset(int ds[], int dsize, int transsym, int nset[]) |
905 | 0 | { |
906 | 0 | int ns, tsp, sym, i, j, lenccl, ch, numstates, ccllist; |
907 | |
|
908 | 0 | numstates = 0; |
909 | |
|
910 | 0 | for (i = 1; i <= dsize; ++i) { /* for each nfa state ns in the state set of ds */ |
911 | 0 | ns = ds[i]; |
912 | 0 | sym = transchar[ns]; |
913 | 0 | tsp = trans1[ns]; |
914 | |
|
915 | 0 | if (sym < 0) { /* it's a character class */ |
916 | 0 | sym = -sym; |
917 | 0 | ccllist = cclmap[sym]; |
918 | 0 | lenccl = ccllen[sym]; |
919 | |
|
920 | 0 | if (cclng[sym]) { |
921 | 0 | for (j = 0; j < lenccl; ++j) { |
922 | | /* Loop through negated character |
923 | | * class. |
924 | | */ |
925 | 0 | ch = ccltbl[ccllist + j]; |
926 | |
|
927 | 0 | if (ch == 0) |
928 | 0 | ch = NUL_ec; |
929 | |
|
930 | 0 | if (ch > transsym) |
931 | | /* Transsym isn't in negated |
932 | | * ccl. |
933 | | */ |
934 | 0 | break; |
935 | | |
936 | 0 | else if (ch == transsym) |
937 | | /* next 2 */ |
938 | 0 | goto bottom; |
939 | 0 | } |
940 | | |
941 | | /* Didn't find transsym in ccl. */ |
942 | 0 | nset[++numstates] = tsp; |
943 | 0 | } |
944 | | |
945 | 0 | else |
946 | 0 | for (j = 0; j < lenccl; ++j) { |
947 | 0 | ch = ccltbl[ccllist + j]; |
948 | |
|
949 | 0 | if (ch == 0) |
950 | 0 | ch = NUL_ec; |
951 | |
|
952 | 0 | if (ch > transsym) |
953 | 0 | break; |
954 | 0 | else if (ch == transsym) { |
955 | 0 | nset[++numstates] = tsp; |
956 | 0 | break; |
957 | 0 | } |
958 | 0 | } |
959 | 0 | } |
960 | | |
961 | 0 | else if (sym == SYM_EPSILON) { /* do nothing */ |
962 | 0 | } |
963 | | |
964 | 0 | else if (ABS (ecgroup[sym]) == transsym) |
965 | 0 | nset[++numstates] = tsp; |
966 | | |
967 | 0 | bottom:; |
968 | 0 | } |
969 | | |
970 | 0 | return numstates; |
971 | 0 | } |
972 | | |
973 | | |
974 | | /* sympartition - partition characters with same out-transitions |
975 | | * |
976 | | * synopsis |
977 | | * sympartition( int ds[current_max_dfa_size], int numstates, |
978 | | * int symlist[numecs], int duplist[numecs] ); |
979 | | */ |
980 | | |
981 | | static void sympartition(int ds[], int numstates, int symlist[], int duplist[]) |
982 | 0 | { |
983 | 0 | int tch, i, j, k, ns, dupfwd[CSIZE + 1], lenccl, cclp, ich; |
984 | | |
985 | | /* Partitioning is done by creating equivalence classes for those |
986 | | * characters which have out-transitions from the given state. Thus |
987 | | * we are really creating equivalence classes of equivalence classes. |
988 | | */ |
989 | |
|
990 | 0 | for (i = 1; i <= numecs; ++i) { /* initialize equivalence class list */ |
991 | 0 | duplist[i] = i - 1; |
992 | 0 | dupfwd[i] = i + 1; |
993 | 0 | } |
994 | |
|
995 | 0 | duplist[1] = NIL; |
996 | 0 | dupfwd[numecs] = NIL; |
997 | |
|
998 | 0 | for (i = 1; i <= numstates; ++i) { |
999 | 0 | ns = ds[i]; |
1000 | 0 | tch = transchar[ns]; |
1001 | |
|
1002 | 0 | if (tch != SYM_EPSILON) { |
1003 | 0 | if (tch < -lastccl || tch >= ctrl.csize) { |
1004 | 0 | flexfatal (_ |
1005 | 0 | ("bad transition character detected in sympartition()")); |
1006 | 0 | } |
1007 | |
|
1008 | 0 | if (tch >= 0) { /* character transition */ |
1009 | 0 | int ec = ecgroup[tch]; |
1010 | |
|
1011 | 0 | mkechar (ec, dupfwd, duplist); |
1012 | 0 | symlist[ec] = 1; |
1013 | 0 | } |
1014 | | |
1015 | 0 | else { /* character class */ |
1016 | 0 | tch = -tch; |
1017 | |
|
1018 | 0 | lenccl = ccllen[tch]; |
1019 | 0 | cclp = cclmap[tch]; |
1020 | 0 | mkeccl (ccltbl + cclp, lenccl, dupfwd, |
1021 | 0 | duplist, numecs, NUL_ec); |
1022 | |
|
1023 | 0 | if (cclng[tch]) { |
1024 | 0 | j = 0; |
1025 | |
|
1026 | 0 | for (k = 0; k < lenccl; ++k) { |
1027 | 0 | ich = ccltbl[cclp + k]; |
1028 | |
|
1029 | 0 | if (ich == 0) |
1030 | 0 | ich = NUL_ec; |
1031 | |
|
1032 | 0 | for (++j; j < ich; ++j) |
1033 | 0 | symlist[j] = 1; |
1034 | 0 | } |
1035 | |
|
1036 | 0 | for (++j; j <= numecs; ++j) |
1037 | 0 | symlist[j] = 1; |
1038 | 0 | } |
1039 | | |
1040 | 0 | else |
1041 | 0 | for (k = 0; k < lenccl; ++k) { |
1042 | 0 | ich = ccltbl[cclp + k]; |
1043 | |
|
1044 | 0 | if (ich == 0) |
1045 | 0 | ich = NUL_ec; |
1046 | |
|
1047 | 0 | symlist[ich] = 1; |
1048 | 0 | } |
1049 | 0 | } |
1050 | 0 | } |
1051 | 0 | } |
1052 | 0 | } |