Line  | Count  | Source  | 
1  |  | /*  | 
2  |  |  * regexp.c: generic and extensible Regular Expression engine  | 
3  |  |  *  | 
4  |  |  * Basically designed with the purpose of compiling regexps for  | 
5  |  |  * the variety of validation/schemas mechanisms now available in  | 
6  |  |  * XML related specifications these include:  | 
7  |  |  *    - XML-1.0 DTD validation  | 
8  |  |  *    - XML Schemas structure part 1  | 
9  |  |  *    - XML Schemas Datatypes part 2 especially Appendix F  | 
10  |  |  *    - RELAX-NG/TREX i.e. the counter proposal  | 
11  |  |  *  | 
12  |  |  * See Copyright for the status of this software.  | 
13  |  |  *  | 
14  |  |  * Author: Daniel Veillard  | 
15  |  |  */  | 
16  |  |  | 
17  |  | #define IN_LIBXML  | 
18  |  | #include "libxml.h"  | 
19  |  |  | 
20  |  | #ifdef LIBXML_REGEXP_ENABLED  | 
21  |  |  | 
22  |  | #include <stdio.h>  | 
23  |  | #include <string.h>  | 
24  |  | #include <limits.h>  | 
25  |  |  | 
26  |  | #include <libxml/tree.h>  | 
27  |  | #include <libxml/parserInternals.h>  | 
28  |  | #include <libxml/xmlregexp.h>  | 
29  |  | #include <libxml/xmlautomata.h>  | 
30  |  |  | 
31  |  | #include "private/error.h"  | 
32  |  | #include "private/memory.h"  | 
33  |  | #include "private/regexp.h"  | 
34  |  |  | 
35  |  | #ifndef SIZE_MAX  | 
36  |  | #define SIZE_MAX ((size_t) -1)  | 
37  |  | #endif  | 
38  |  |  | 
39  |  | /* #define DEBUG_REGEXP */  | 
40  |  |  | 
41  | 0  | #define MAX_PUSH 10000000  | 
42  |  |  | 
43  |  | #ifdef ERROR  | 
44  |  | #undef ERROR  | 
45  |  | #endif  | 
46  |  | #define ERROR(str)              \  | 
47  | 0  |     ctxt->error = XML_REGEXP_COMPILE_ERROR;       \  | 
48  | 0  |     xmlRegexpErrCompile(ctxt, str);  | 
49  | 0  | #define NEXT ctxt->cur++  | 
50  | 0  | #define CUR (*(ctxt->cur))  | 
51  | 0  | #define NXT(index) (ctxt->cur[index])  | 
52  |  |  | 
53  | 0  | #define NEXTL(l) ctxt->cur += l;  | 
54  | 0  | #define XML_REG_STRING_SEPARATOR '|'  | 
55  |  | /*  | 
56  |  |  * Need PREV to check on a '-' within a Character Group. May only be used  | 
57  |  |  * when it's guaranteed that cur is not at the beginning of ctxt->string!  | 
58  |  |  */  | 
59  | 0  | #define PREV (ctxt->cur[-1])  | 
60  |  |  | 
61  |  | /************************************************************************  | 
62  |  |  *                  *  | 
63  |  |  *      Unicode support         *  | 
64  |  |  *                  *  | 
65  |  |  ************************************************************************/  | 
66  |  |  | 
67  |  | typedef struct { | 
68  |  |     const char *rangename;  | 
69  |  |     const xmlChRangeGroup group;  | 
70  |  | } xmlUnicodeRange;  | 
71  |  |  | 
72  |  | #include "codegen/unicode.inc"  | 
73  |  |  | 
74  |  | /**  | 
75  |  |  * binary table lookup for user-supplied name  | 
76  |  |  *  | 
77  |  |  * @param sptr  a table of xmlUnicodeRange structs  | 
78  |  |  * @param numentries  number of table entries  | 
79  |  |  * @param tname  name to be found  | 
80  |  |  * @returns pointer to range function if found, otherwise NULL  | 
81  |  |  */  | 
82  |  | static const xmlChRangeGroup *  | 
83  |  | xmlUnicodeLookup(const xmlUnicodeRange *sptr, int numentries,  | 
84  | 0  |                  const char *tname) { | 
85  | 0  |     int low, high, mid, cmp;  | 
86  |  | 
  | 
87  | 0  |     if (tname == NULL) return(NULL);  | 
88  |  |  | 
89  | 0  |     low = 0;  | 
90  | 0  |     high = numentries - 1;  | 
91  | 0  |     while (low <= high) { | 
92  | 0  |   mid = (low + high) / 2;  | 
93  | 0  |   cmp = strcmp(tname, sptr[mid].rangename);  | 
94  | 0  |   if (cmp == 0)  | 
95  | 0  |       return (&sptr[mid].group);  | 
96  | 0  |   if (cmp < 0)  | 
97  | 0  |       high = mid - 1;  | 
98  | 0  |   else  | 
99  | 0  |       low = mid + 1;  | 
100  | 0  |     }  | 
101  | 0  |     return (NULL);  | 
102  | 0  | }  | 
103  |  |  | 
104  |  | /**  | 
105  |  |  * Check whether the character is part of the UCS Block  | 
106  |  |  *  | 
107  |  |  * @param code  UCS code point  | 
108  |  |  * @param block  UCS block name  | 
109  |  |  * @returns 1 if true, 0 if false and -1 on unknown block  | 
110  |  |  */  | 
111  |  | static int  | 
112  | 0  | xmlUCSIsBlock(int code, const char *block) { | 
113  | 0  |     const xmlChRangeGroup *group;  | 
114  |  | 
  | 
115  | 0  |     group = xmlUnicodeLookup(xmlUnicodeBlocks,  | 
116  | 0  |             sizeof(xmlUnicodeBlocks) / sizeof(xmlUnicodeBlocks[0]), block);  | 
117  | 0  |     if (group == NULL)  | 
118  | 0  |   return (-1);  | 
119  | 0  |     return (xmlCharInRange(code, group));  | 
120  | 0  | }  | 
121  |  |  | 
122  |  | /************************************************************************  | 
123  |  |  *                  *  | 
124  |  |  *      Datatypes and structures      *  | 
125  |  |  *                  *  | 
126  |  |  ************************************************************************/  | 
127  |  |  | 
128  |  | /*  | 
129  |  |  * Note: the order of the enums below is significant, do not shuffle  | 
130  |  |  */  | 
131  |  | typedef enum { | 
132  |  |     XML_REGEXP_EPSILON = 1,  | 
133  |  |     XML_REGEXP_CHARVAL,  | 
134  |  |     XML_REGEXP_RANGES,  | 
135  |  |     XML_REGEXP_SUBREG,  /* used for () sub regexps */  | 
136  |  |     XML_REGEXP_STRING,  | 
137  |  |     XML_REGEXP_ANYCHAR, /* . */  | 
138  |  |     XML_REGEXP_ANYSPACE, /* \s */  | 
139  |  |     XML_REGEXP_NOTSPACE, /* \S */  | 
140  |  |     XML_REGEXP_INITNAME, /* \l */  | 
141  |  |     XML_REGEXP_NOTINITNAME, /* \L */  | 
142  |  |     XML_REGEXP_NAMECHAR, /* \c */  | 
143  |  |     XML_REGEXP_NOTNAMECHAR, /* \C */  | 
144  |  |     XML_REGEXP_DECIMAL, /* \d */  | 
145  |  |     XML_REGEXP_NOTDECIMAL, /* \D */  | 
146  |  |     XML_REGEXP_REALCHAR, /* \w */  | 
147  |  |     XML_REGEXP_NOTREALCHAR, /* \W */  | 
148  |  |     XML_REGEXP_LETTER = 100,  | 
149  |  |     XML_REGEXP_LETTER_UPPERCASE,  | 
150  |  |     XML_REGEXP_LETTER_LOWERCASE,  | 
151  |  |     XML_REGEXP_LETTER_TITLECASE,  | 
152  |  |     XML_REGEXP_LETTER_MODIFIER,  | 
153  |  |     XML_REGEXP_LETTER_OTHERS,  | 
154  |  |     XML_REGEXP_MARK,  | 
155  |  |     XML_REGEXP_MARK_NONSPACING,  | 
156  |  |     XML_REGEXP_MARK_SPACECOMBINING,  | 
157  |  |     XML_REGEXP_MARK_ENCLOSING,  | 
158  |  |     XML_REGEXP_NUMBER,  | 
159  |  |     XML_REGEXP_NUMBER_DECIMAL,  | 
160  |  |     XML_REGEXP_NUMBER_LETTER,  | 
161  |  |     XML_REGEXP_NUMBER_OTHERS,  | 
162  |  |     XML_REGEXP_PUNCT,  | 
163  |  |     XML_REGEXP_PUNCT_CONNECTOR,  | 
164  |  |     XML_REGEXP_PUNCT_DASH,  | 
165  |  |     XML_REGEXP_PUNCT_OPEN,  | 
166  |  |     XML_REGEXP_PUNCT_CLOSE,  | 
167  |  |     XML_REGEXP_PUNCT_INITQUOTE,  | 
168  |  |     XML_REGEXP_PUNCT_FINQUOTE,  | 
169  |  |     XML_REGEXP_PUNCT_OTHERS,  | 
170  |  |     XML_REGEXP_SEPAR,  | 
171  |  |     XML_REGEXP_SEPAR_SPACE,  | 
172  |  |     XML_REGEXP_SEPAR_LINE,  | 
173  |  |     XML_REGEXP_SEPAR_PARA,  | 
174  |  |     XML_REGEXP_SYMBOL,  | 
175  |  |     XML_REGEXP_SYMBOL_MATH,  | 
176  |  |     XML_REGEXP_SYMBOL_CURRENCY,  | 
177  |  |     XML_REGEXP_SYMBOL_MODIFIER,  | 
178  |  |     XML_REGEXP_SYMBOL_OTHERS,  | 
179  |  |     XML_REGEXP_OTHER,  | 
180  |  |     XML_REGEXP_OTHER_CONTROL,  | 
181  |  |     XML_REGEXP_OTHER_FORMAT,  | 
182  |  |     XML_REGEXP_OTHER_PRIVATE,  | 
183  |  |     XML_REGEXP_OTHER_NA,  | 
184  |  |     XML_REGEXP_BLOCK_NAME  | 
185  |  | } xmlRegAtomType;  | 
186  |  |  | 
187  |  | typedef enum { | 
188  |  |     XML_REGEXP_QUANT_EPSILON = 1,  | 
189  |  |     XML_REGEXP_QUANT_ONCE,  | 
190  |  |     XML_REGEXP_QUANT_OPT,  | 
191  |  |     XML_REGEXP_QUANT_MULT,  | 
192  |  |     XML_REGEXP_QUANT_PLUS,  | 
193  |  |     XML_REGEXP_QUANT_ONCEONLY,  | 
194  |  |     XML_REGEXP_QUANT_ALL,  | 
195  |  |     XML_REGEXP_QUANT_RANGE  | 
196  |  | } xmlRegQuantType;  | 
197  |  |  | 
198  |  | typedef enum { | 
199  |  |     XML_REGEXP_START_STATE = 1,  | 
200  |  |     XML_REGEXP_FINAL_STATE,  | 
201  |  |     XML_REGEXP_TRANS_STATE,  | 
202  |  |     XML_REGEXP_SINK_STATE,  | 
203  |  |     XML_REGEXP_UNREACH_STATE  | 
204  |  | } xmlRegStateType;  | 
205  |  |  | 
206  |  | typedef enum { | 
207  |  |     XML_REGEXP_MARK_NORMAL = 0,  | 
208  |  |     XML_REGEXP_MARK_START,  | 
209  |  |     XML_REGEXP_MARK_VISITED  | 
210  |  | } xmlRegMarkedType;  | 
211  |  |  | 
212  |  | typedef struct _xmlRegRange xmlRegRange;  | 
213  |  | typedef xmlRegRange *xmlRegRangePtr;  | 
214  |  |  | 
215  |  | struct _xmlRegRange { | 
216  |  |     int neg;    /* 0 normal, 1 not, 2 exclude */  | 
217  |  |     xmlRegAtomType type;  | 
218  |  |     int start;  | 
219  |  |     int end;  | 
220  |  |     xmlChar *blockName;  | 
221  |  | };  | 
222  |  |  | 
223  |  | typedef struct _xmlRegAtom xmlRegAtom;  | 
224  |  | typedef xmlRegAtom *xmlRegAtomPtr;  | 
225  |  |  | 
226  |  | typedef struct _xmlAutomataState xmlRegState;  | 
227  |  | typedef xmlRegState *xmlRegStatePtr;  | 
228  |  |  | 
229  |  | struct _xmlRegAtom { | 
230  |  |     int no;  | 
231  |  |     xmlRegAtomType type;  | 
232  |  |     xmlRegQuantType quant;  | 
233  |  |     int min;  | 
234  |  |     int max;  | 
235  |  |  | 
236  |  |     void *valuep;  | 
237  |  |     void *valuep2;  | 
238  |  |     int neg;  | 
239  |  |     int codepoint;  | 
240  |  |     xmlRegStatePtr start;  | 
241  |  |     xmlRegStatePtr start0;  | 
242  |  |     xmlRegStatePtr stop;  | 
243  |  |     int maxRanges;  | 
244  |  |     int nbRanges;  | 
245  |  |     xmlRegRangePtr *ranges;  | 
246  |  |     void *data;  | 
247  |  | };  | 
248  |  |  | 
249  |  | typedef struct _xmlRegCounter xmlRegCounter;  | 
250  |  | typedef xmlRegCounter *xmlRegCounterPtr;  | 
251  |  |  | 
252  |  | struct _xmlRegCounter { | 
253  |  |     int min;  | 
254  |  |     int max;  | 
255  |  | };  | 
256  |  |  | 
257  |  | typedef struct _xmlRegTrans xmlRegTrans;  | 
258  |  | typedef xmlRegTrans *xmlRegTransPtr;  | 
259  |  |  | 
260  |  | struct _xmlRegTrans { | 
261  |  |     xmlRegAtomPtr atom;  | 
262  |  |     int to;  | 
263  |  |     int counter;  | 
264  |  |     int count;  | 
265  |  |     int nd;  | 
266  |  | };  | 
267  |  |  | 
268  |  | struct _xmlAutomataState { | 
269  |  |     xmlRegStateType type;  | 
270  |  |     xmlRegMarkedType mark;  | 
271  |  |     xmlRegMarkedType markd;  | 
272  |  |     xmlRegMarkedType reached;  | 
273  |  |     int no;  | 
274  |  |     int maxTrans;  | 
275  |  |     int nbTrans;  | 
276  |  |     xmlRegTrans *trans;  | 
277  |  |     /*  knowing states pointing to us can speed things up */  | 
278  |  |     int maxTransTo;  | 
279  |  |     int nbTransTo;  | 
280  |  |     int *transTo;  | 
281  |  | };  | 
282  |  |  | 
283  |  | typedef struct _xmlAutomata xmlRegParserCtxt;  | 
284  |  | typedef xmlRegParserCtxt *xmlRegParserCtxtPtr;  | 
285  |  |  | 
286  | 0  | #define AM_AUTOMATA_RNG 1  | 
287  |  |  | 
288  |  | struct _xmlAutomata { | 
289  |  |     xmlChar *string;  | 
290  |  |     xmlChar *cur;  | 
291  |  |  | 
292  |  |     int error;  | 
293  |  |     int neg;  | 
294  |  |  | 
295  |  |     xmlRegStatePtr start;  | 
296  |  |     xmlRegStatePtr end;  | 
297  |  |     xmlRegStatePtr state;  | 
298  |  |  | 
299  |  |     xmlRegAtomPtr atom;  | 
300  |  |  | 
301  |  |     int maxAtoms;  | 
302  |  |     int nbAtoms;  | 
303  |  |     xmlRegAtomPtr *atoms;  | 
304  |  |  | 
305  |  |     int maxStates;  | 
306  |  |     int nbStates;  | 
307  |  |     xmlRegStatePtr *states;  | 
308  |  |  | 
309  |  |     int maxCounters;  | 
310  |  |     int nbCounters;  | 
311  |  |     xmlRegCounter *counters;  | 
312  |  |  | 
313  |  |     int determinist;  | 
314  |  |     int negs;  | 
315  |  |     int flags;  | 
316  |  |  | 
317  |  |     int depth;  | 
318  |  | };  | 
319  |  |  | 
320  |  | struct _xmlRegexp { | 
321  |  |     xmlChar *string;  | 
322  |  |     int nbStates;  | 
323  |  |     xmlRegStatePtr *states;  | 
324  |  |     int nbAtoms;  | 
325  |  |     xmlRegAtomPtr *atoms;  | 
326  |  |     int nbCounters;  | 
327  |  |     xmlRegCounter *counters;  | 
328  |  |     int determinist;  | 
329  |  |     int flags;  | 
330  |  |     /*  | 
331  |  |      * That's the compact form for determinists automatas  | 
332  |  |      */  | 
333  |  |     int nbstates;  | 
334  |  |     int *compact;  | 
335  |  |     void **transdata;  | 
336  |  |     int nbstrings;  | 
337  |  |     xmlChar **stringMap;  | 
338  |  | };  | 
339  |  |  | 
340  |  | typedef struct _xmlRegExecRollback xmlRegExecRollback;  | 
341  |  | typedef xmlRegExecRollback *xmlRegExecRollbackPtr;  | 
342  |  |  | 
343  |  | struct _xmlRegExecRollback { | 
344  |  |     xmlRegStatePtr state;/* the current state */  | 
345  |  |     int index;    /* the index in the input stack */  | 
346  |  |     int nextbranch; /* the next transition to explore in that state */  | 
347  |  |     int *counts;  /* save the automata state if it has some */  | 
348  |  | };  | 
349  |  |  | 
350  |  | typedef struct _xmlRegInputToken xmlRegInputToken;  | 
351  |  | typedef xmlRegInputToken *xmlRegInputTokenPtr;  | 
352  |  |  | 
353  |  | struct _xmlRegInputToken { | 
354  |  |     xmlChar *value;  | 
355  |  |     void *data;  | 
356  |  | };  | 
357  |  |  | 
358  |  | struct _xmlRegExecCtxt { | 
359  |  |     int status;   /* execution status != 0 indicate an error */  | 
360  |  |     int determinist;  /* did we find an indeterministic behaviour */  | 
361  |  |     xmlRegexpPtr comp;  /* the compiled regexp */  | 
362  |  |     xmlRegExecCallbacks callback;  | 
363  |  |     void *data;  | 
364  |  |  | 
365  |  |     xmlRegStatePtr state;/* the current state */  | 
366  |  |     int transno;  /* the current transition on that state */  | 
367  |  |     int transcount; /* the number of chars in char counted transitions */  | 
368  |  |  | 
369  |  |     /*  | 
370  |  |      * A stack of rollback states  | 
371  |  |      */  | 
372  |  |     int maxRollbacks;  | 
373  |  |     int nbRollbacks;  | 
374  |  |     xmlRegExecRollback *rollbacks;  | 
375  |  |  | 
376  |  |     /*  | 
377  |  |      * The state of the automata if any  | 
378  |  |      */  | 
379  |  |     int *counts;  | 
380  |  |  | 
381  |  |     /*  | 
382  |  |      * The input stack  | 
383  |  |      */  | 
384  |  |     int inputStackMax;  | 
385  |  |     int inputStackNr;  | 
386  |  |     int index;  | 
387  |  |     int *charStack;  | 
388  |  |     const xmlChar *inputString; /* when operating on characters */  | 
389  |  |     xmlRegInputTokenPtr inputStack;/* when operating on strings */  | 
390  |  |  | 
391  |  |     /*  | 
392  |  |      * error handling  | 
393  |  |      */  | 
394  |  |     int errStateNo;   /* the error state number */  | 
395  |  |     xmlRegStatePtr errState;    /* the error state */  | 
396  |  |     xmlChar *errString;   /* the string raising the error */  | 
397  |  |     int *errCounts;   /* counters at the error state */  | 
398  |  |     int nbPush;  | 
399  |  | };  | 
400  |  |  | 
401  | 0  | #define REGEXP_ALL_COUNTER  0x123456  | 
402  | 0  | #define REGEXP_ALL_LAX_COUNTER  0x123457  | 
403  |  |  | 
404  |  | static void xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top);  | 
405  |  | static void xmlRegFreeState(xmlRegStatePtr state);  | 
406  |  | static void xmlRegFreeAtom(xmlRegAtomPtr atom);  | 
407  |  | static int xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr);  | 
408  |  | static int xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint);  | 
409  |  | static int xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint,  | 
410  |  |                   int neg, int start, int end, const xmlChar *blockName);  | 
411  |  |  | 
412  |  | /************************************************************************  | 
413  |  |  *                  *  | 
414  |  |  *    Regexp memory error handler       *  | 
415  |  |  *                  *  | 
416  |  |  ************************************************************************/  | 
417  |  | /**  | 
418  |  |  * Handle an out of memory condition  | 
419  |  |  *  | 
420  |  |  * @param ctxt  regexp parser context  | 
421  |  |  */  | 
422  |  | static void  | 
423  |  | xmlRegexpErrMemory(xmlRegParserCtxtPtr ctxt)  | 
424  | 0  | { | 
425  | 0  |     if (ctxt != NULL)  | 
426  | 0  |         ctxt->error = XML_ERR_NO_MEMORY;  | 
427  |  | 
  | 
428  | 0  |     xmlRaiseMemoryError(NULL, NULL, NULL, XML_FROM_REGEXP, NULL);  | 
429  | 0  | }  | 
430  |  |  | 
431  |  | /**  | 
432  |  |  * Handle a compilation failure  | 
433  |  |  *  | 
434  |  |  * @param ctxt  regexp parser context  | 
435  |  |  * @param extra  extra information  | 
436  |  |  */  | 
437  |  | static void  | 
438  |  | xmlRegexpErrCompile(xmlRegParserCtxtPtr ctxt, const char *extra)  | 
439  | 0  | { | 
440  | 0  |     const char *regexp = NULL;  | 
441  | 0  |     int idx = 0;  | 
442  | 0  |     int res;  | 
443  |  | 
  | 
444  | 0  |     if (ctxt != NULL) { | 
445  | 0  |         regexp = (const char *) ctxt->string;  | 
446  | 0  |   idx = ctxt->cur - ctxt->string;  | 
447  | 0  |   ctxt->error = XML_REGEXP_COMPILE_ERROR;  | 
448  | 0  |     }  | 
449  |  | 
  | 
450  | 0  |     res = xmlRaiseError(NULL, NULL, NULL, NULL, NULL, XML_FROM_REGEXP,  | 
451  | 0  |                         XML_REGEXP_COMPILE_ERROR, XML_ERR_FATAL,  | 
452  | 0  |                         NULL, 0, extra, regexp, NULL, idx, 0,  | 
453  | 0  |                         "failed to compile: %s\n", extra);  | 
454  | 0  |     if (res < 0)  | 
455  | 0  |         xmlRegexpErrMemory(ctxt);  | 
456  | 0  | }  | 
457  |  |  | 
458  |  | /************************************************************************  | 
459  |  |  *                  *  | 
460  |  |  *      Allocation/Deallocation       *  | 
461  |  |  *                  *  | 
462  |  |  ************************************************************************/  | 
463  |  |  | 
464  |  | static int xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt);  | 
465  |  |  | 
466  |  | /**  | 
467  |  |  * Allocate a two-dimensional array and set all elements to zero.  | 
468  |  |  *  | 
469  |  |  * @param dim1  size of first dimension  | 
470  |  |  * @param dim2  size of second dimension  | 
471  |  |  * @param elemSize  size of element  | 
472  |  |  * @returns the new array or NULL in case of error.  | 
473  |  |  */  | 
474  |  | static void*  | 
475  | 0  | xmlRegCalloc2(size_t dim1, size_t dim2, size_t elemSize) { | 
476  | 0  |     size_t numElems, totalSize;  | 
477  | 0  |     void *ret;  | 
478  |  |  | 
479  |  |     /* Check for overflow */  | 
480  | 0  |     if ((dim2 == 0) || (elemSize == 0) ||  | 
481  | 0  |         (dim1 > SIZE_MAX / dim2 / elemSize))  | 
482  | 0  |         return (NULL);  | 
483  | 0  |     numElems = dim1 * dim2;  | 
484  | 0  |     if (numElems > XML_MAX_ITEMS)  | 
485  | 0  |         return NULL;  | 
486  | 0  |     totalSize = numElems * elemSize;  | 
487  | 0  |     ret = xmlMalloc(totalSize);  | 
488  | 0  |     if (ret != NULL)  | 
489  | 0  |         memset(ret, 0, totalSize);  | 
490  | 0  |     return (ret);  | 
491  | 0  | }  | 
492  |  |  | 
493  |  | /**  | 
494  |  |  * Allocate a new regexp and fill it with the result from the parser  | 
495  |  |  *  | 
496  |  |  * @param ctxt  the parser context used to build it  | 
497  |  |  * @returns the new regexp or NULL in case of error  | 
498  |  |  */  | 
499  |  | static xmlRegexpPtr  | 
500  | 0  | xmlRegEpxFromParse(xmlRegParserCtxtPtr ctxt) { | 
501  | 0  |     xmlRegexpPtr ret;  | 
502  |  | 
  | 
503  | 0  |     ret = (xmlRegexpPtr) xmlMalloc(sizeof(xmlRegexp));  | 
504  | 0  |     if (ret == NULL) { | 
505  | 0  |   xmlRegexpErrMemory(ctxt);  | 
506  | 0  |   return(NULL);  | 
507  | 0  |     }  | 
508  | 0  |     memset(ret, 0, sizeof(xmlRegexp));  | 
509  | 0  |     ret->string = ctxt->string;  | 
510  | 0  |     ret->nbStates = ctxt->nbStates;  | 
511  | 0  |     ret->states = ctxt->states;  | 
512  | 0  |     ret->nbAtoms = ctxt->nbAtoms;  | 
513  | 0  |     ret->atoms = ctxt->atoms;  | 
514  | 0  |     ret->nbCounters = ctxt->nbCounters;  | 
515  | 0  |     ret->counters = ctxt->counters;  | 
516  | 0  |     ret->determinist = ctxt->determinist;  | 
517  | 0  |     ret->flags = ctxt->flags;  | 
518  | 0  |     if (ret->determinist == -1) { | 
519  | 0  |         if (xmlRegexpIsDeterminist(ret) < 0) { | 
520  | 0  |             xmlRegexpErrMemory(ctxt);  | 
521  | 0  |             xmlFree(ret);  | 
522  | 0  |             return(NULL);  | 
523  | 0  |         }  | 
524  | 0  |     }  | 
525  |  |  | 
526  | 0  |     if ((ret->determinist != 0) &&  | 
527  | 0  |   (ret->nbCounters == 0) &&  | 
528  | 0  |   (ctxt->negs == 0) &&  | 
529  | 0  |   (ret->atoms != NULL) &&  | 
530  | 0  |   (ret->atoms[0] != NULL) &&  | 
531  | 0  |   (ret->atoms[0]->type == XML_REGEXP_STRING)) { | 
532  | 0  |   int i, j, nbstates = 0, nbatoms = 0;  | 
533  | 0  |   int *stateRemap;  | 
534  | 0  |   int *stringRemap;  | 
535  | 0  |   int *transitions;  | 
536  | 0  |   void **transdata;  | 
537  | 0  |   xmlChar **stringMap;  | 
538  | 0  |         xmlChar *value;  | 
539  |  |  | 
540  |  |   /*  | 
541  |  |    * Switch to a compact representation  | 
542  |  |    * 1/ counting the effective number of states left  | 
543  |  |    * 2/ counting the unique number of atoms, and check that  | 
544  |  |    *    they are all of the string type  | 
545  |  |    * 3/ build a table state x atom for the transitions  | 
546  |  |    */  | 
547  |  | 
  | 
548  | 0  |   stateRemap = xmlMalloc(ret->nbStates * sizeof(int));  | 
549  | 0  |   if (stateRemap == NULL) { | 
550  | 0  |       xmlRegexpErrMemory(ctxt);  | 
551  | 0  |       xmlFree(ret);  | 
552  | 0  |       return(NULL);  | 
553  | 0  |   }  | 
554  | 0  |   for (i = 0;i < ret->nbStates;i++) { | 
555  | 0  |       if (ret->states[i] != NULL) { | 
556  | 0  |     stateRemap[i] = nbstates;  | 
557  | 0  |     nbstates++;  | 
558  | 0  |       } else { | 
559  | 0  |     stateRemap[i] = -1;  | 
560  | 0  |       }  | 
561  | 0  |   }  | 
562  | 0  |   stringMap = xmlMalloc(ret->nbAtoms * sizeof(char *));  | 
563  | 0  |   if (stringMap == NULL) { | 
564  | 0  |       xmlRegexpErrMemory(ctxt);  | 
565  | 0  |       xmlFree(stateRemap);  | 
566  | 0  |       xmlFree(ret);  | 
567  | 0  |       return(NULL);  | 
568  | 0  |   }  | 
569  | 0  |   stringRemap = xmlMalloc(ret->nbAtoms * sizeof(int));  | 
570  | 0  |   if (stringRemap == NULL) { | 
571  | 0  |       xmlRegexpErrMemory(ctxt);  | 
572  | 0  |       xmlFree(stringMap);  | 
573  | 0  |       xmlFree(stateRemap);  | 
574  | 0  |       xmlFree(ret);  | 
575  | 0  |       return(NULL);  | 
576  | 0  |   }  | 
577  | 0  |   for (i = 0;i < ret->nbAtoms;i++) { | 
578  | 0  |       if ((ret->atoms[i]->type == XML_REGEXP_STRING) &&  | 
579  | 0  |     (ret->atoms[i]->quant == XML_REGEXP_QUANT_ONCE)) { | 
580  | 0  |     value = ret->atoms[i]->valuep;  | 
581  | 0  |                 for (j = 0;j < nbatoms;j++) { | 
582  | 0  |         if (xmlStrEqual(stringMap[j], value)) { | 
583  | 0  |       stringRemap[i] = j;  | 
584  | 0  |       break;  | 
585  | 0  |         }  | 
586  | 0  |     }  | 
587  | 0  |     if (j >= nbatoms) { | 
588  | 0  |         stringRemap[i] = nbatoms;  | 
589  | 0  |         stringMap[nbatoms] = xmlStrdup(value);  | 
590  | 0  |         if (stringMap[nbatoms] == NULL) { | 
591  | 0  |       for (i = 0;i < nbatoms;i++)  | 
592  | 0  |           xmlFree(stringMap[i]);  | 
593  | 0  |       xmlFree(stringRemap);  | 
594  | 0  |       xmlFree(stringMap);  | 
595  | 0  |       xmlFree(stateRemap);  | 
596  | 0  |       xmlFree(ret);  | 
597  | 0  |       return(NULL);  | 
598  | 0  |         }  | 
599  | 0  |         nbatoms++;  | 
600  | 0  |     }  | 
601  | 0  |       } else { | 
602  | 0  |     xmlFree(stateRemap);  | 
603  | 0  |     xmlFree(stringRemap);  | 
604  | 0  |     for (i = 0;i < nbatoms;i++)  | 
605  | 0  |         xmlFree(stringMap[i]);  | 
606  | 0  |     xmlFree(stringMap);  | 
607  | 0  |     xmlFree(ret);  | 
608  | 0  |     return(NULL);  | 
609  | 0  |       }  | 
610  | 0  |   }  | 
611  | 0  |   transitions = (int *) xmlRegCalloc2(nbstates + 1, nbatoms + 1,  | 
612  | 0  |                                             sizeof(int));  | 
613  | 0  |   if (transitions == NULL) { | 
614  | 0  |       xmlFree(stateRemap);  | 
615  | 0  |       xmlFree(stringRemap);  | 
616  | 0  |             for (i = 0;i < nbatoms;i++)  | 
617  | 0  |     xmlFree(stringMap[i]);  | 
618  | 0  |       xmlFree(stringMap);  | 
619  | 0  |       xmlFree(ret);  | 
620  | 0  |       return(NULL);  | 
621  | 0  |   }  | 
622  |  |  | 
623  |  |   /*  | 
624  |  |    * Allocate the transition table. The first entry for each  | 
625  |  |    * state corresponds to the state type.  | 
626  |  |    */  | 
627  | 0  |   transdata = NULL;  | 
628  |  | 
  | 
629  | 0  |   for (i = 0;i < ret->nbStates;i++) { | 
630  | 0  |       int stateno, atomno, targetno, prev;  | 
631  | 0  |       xmlRegStatePtr state;  | 
632  | 0  |       xmlRegTransPtr trans;  | 
633  |  | 
  | 
634  | 0  |       stateno = stateRemap[i];  | 
635  | 0  |       if (stateno == -1)  | 
636  | 0  |     continue;  | 
637  | 0  |       state = ret->states[i];  | 
638  |  | 
  | 
639  | 0  |       transitions[stateno * (nbatoms + 1)] = state->type;  | 
640  |  | 
  | 
641  | 0  |       for (j = 0;j < state->nbTrans;j++) { | 
642  | 0  |     trans = &(state->trans[j]);  | 
643  | 0  |     if ((trans->to < 0) || (trans->atom == NULL))  | 
644  | 0  |         continue;  | 
645  | 0  |                 atomno = stringRemap[trans->atom->no];  | 
646  | 0  |     if ((trans->atom->data != NULL) && (transdata == NULL)) { | 
647  | 0  |         transdata = (void **) xmlRegCalloc2(nbstates, nbatoms,  | 
648  | 0  |                                       sizeof(void *));  | 
649  | 0  |         if (transdata == NULL) { | 
650  | 0  |       xmlRegexpErrMemory(ctxt);  | 
651  | 0  |       break;  | 
652  | 0  |         }  | 
653  | 0  |     }  | 
654  | 0  |     targetno = stateRemap[trans->to];  | 
655  |  |     /*  | 
656  |  |      * if the same atom can generate transitions to 2 different  | 
657  |  |      * states then it means the automata is not deterministic and  | 
658  |  |      * the compact form can't be used !  | 
659  |  |      */  | 
660  | 0  |     prev = transitions[stateno * (nbatoms + 1) + atomno + 1];  | 
661  | 0  |     if (prev != 0) { | 
662  | 0  |         if (prev != targetno + 1) { | 
663  | 0  |       ret->determinist = 0;  | 
664  | 0  |       if (transdata != NULL)  | 
665  | 0  |           xmlFree(transdata);  | 
666  | 0  |       xmlFree(transitions);  | 
667  | 0  |       xmlFree(stateRemap);  | 
668  | 0  |       xmlFree(stringRemap);  | 
669  | 0  |       for (i = 0;i < nbatoms;i++)  | 
670  | 0  |           xmlFree(stringMap[i]);  | 
671  | 0  |       xmlFree(stringMap);  | 
672  | 0  |       goto not_determ;  | 
673  | 0  |         }  | 
674  | 0  |     } else { | 
675  | 0  |         transitions[stateno * (nbatoms + 1) + atomno + 1] =  | 
676  | 0  |       targetno + 1; /* to avoid 0 */  | 
677  | 0  |         if (transdata != NULL)  | 
678  | 0  |       transdata[stateno * nbatoms + atomno] =  | 
679  | 0  |           trans->atom->data;  | 
680  | 0  |     }  | 
681  | 0  |       }  | 
682  | 0  |   }  | 
683  | 0  |   ret->determinist = 1;  | 
684  |  |   /*  | 
685  |  |    * Cleanup of the old data  | 
686  |  |    */  | 
687  | 0  |   if (ret->states != NULL) { | 
688  | 0  |       for (i = 0;i < ret->nbStates;i++)  | 
689  | 0  |     xmlRegFreeState(ret->states[i]);  | 
690  | 0  |       xmlFree(ret->states);  | 
691  | 0  |   }  | 
692  | 0  |   ret->states = NULL;  | 
693  | 0  |   ret->nbStates = 0;  | 
694  | 0  |   if (ret->atoms != NULL) { | 
695  | 0  |       for (i = 0;i < ret->nbAtoms;i++)  | 
696  | 0  |     xmlRegFreeAtom(ret->atoms[i]);  | 
697  | 0  |       xmlFree(ret->atoms);  | 
698  | 0  |   }  | 
699  | 0  |   ret->atoms = NULL;  | 
700  | 0  |   ret->nbAtoms = 0;  | 
701  |  | 
  | 
702  | 0  |   ret->compact = transitions;  | 
703  | 0  |   ret->transdata = transdata;  | 
704  | 0  |   ret->stringMap = stringMap;  | 
705  | 0  |   ret->nbstrings = nbatoms;  | 
706  | 0  |   ret->nbstates = nbstates;  | 
707  | 0  |   xmlFree(stateRemap);  | 
708  | 0  |   xmlFree(stringRemap);  | 
709  | 0  |     }  | 
710  | 0  | not_determ:  | 
711  | 0  |     ctxt->string = NULL;  | 
712  | 0  |     ctxt->nbStates = 0;  | 
713  | 0  |     ctxt->states = NULL;  | 
714  | 0  |     ctxt->nbAtoms = 0;  | 
715  | 0  |     ctxt->atoms = NULL;  | 
716  | 0  |     ctxt->nbCounters = 0;  | 
717  | 0  |     ctxt->counters = NULL;  | 
718  | 0  |     return(ret);  | 
719  | 0  | }  | 
720  |  |  | 
721  |  | /**  | 
722  |  |  * Allocate a new regexp parser context  | 
723  |  |  *  | 
724  |  |  * @param string  the string to parse  | 
725  |  |  * @returns the new context or NULL in case of error  | 
726  |  |  */  | 
727  |  | static xmlRegParserCtxtPtr  | 
728  | 0  | xmlRegNewParserCtxt(const xmlChar *string) { | 
729  | 0  |     xmlRegParserCtxtPtr ret;  | 
730  |  | 
  | 
731  | 0  |     ret = (xmlRegParserCtxtPtr) xmlMalloc(sizeof(xmlRegParserCtxt));  | 
732  | 0  |     if (ret == NULL)  | 
733  | 0  |   return(NULL);  | 
734  | 0  |     memset(ret, 0, sizeof(xmlRegParserCtxt));  | 
735  | 0  |     if (string != NULL) { | 
736  | 0  |   ret->string = xmlStrdup(string);  | 
737  | 0  |         if (ret->string == NULL) { | 
738  | 0  |             xmlFree(ret);  | 
739  | 0  |             return(NULL);  | 
740  | 0  |         }  | 
741  | 0  |     }  | 
742  | 0  |     ret->cur = ret->string;  | 
743  | 0  |     ret->neg = 0;  | 
744  | 0  |     ret->negs = 0;  | 
745  | 0  |     ret->error = 0;  | 
746  | 0  |     ret->determinist = -1;  | 
747  | 0  |     return(ret);  | 
748  | 0  | }  | 
749  |  |  | 
750  |  | /**  | 
751  |  |  * Allocate a new regexp range  | 
752  |  |  *  | 
753  |  |  * @param ctxt  the regexp parser context  | 
754  |  |  * @param neg  is that negative  | 
755  |  |  * @param type  the type of range  | 
756  |  |  * @param start  the start codepoint  | 
757  |  |  * @param end  the end codepoint  | 
758  |  |  * @returns the new range or NULL in case of error  | 
759  |  |  */  | 
760  |  | static xmlRegRangePtr  | 
761  |  | xmlRegNewRange(xmlRegParserCtxtPtr ctxt,  | 
762  | 0  |          int neg, xmlRegAtomType type, int start, int end) { | 
763  | 0  |     xmlRegRangePtr ret;  | 
764  |  | 
  | 
765  | 0  |     ret = (xmlRegRangePtr) xmlMalloc(sizeof(xmlRegRange));  | 
766  | 0  |     if (ret == NULL) { | 
767  | 0  |   xmlRegexpErrMemory(ctxt);  | 
768  | 0  |   return(NULL);  | 
769  | 0  |     }  | 
770  | 0  |     ret->neg = neg;  | 
771  | 0  |     ret->type = type;  | 
772  | 0  |     ret->start = start;  | 
773  | 0  |     ret->end = end;  | 
774  | 0  |     return(ret);  | 
775  | 0  | }  | 
776  |  |  | 
777  |  | /**  | 
778  |  |  * Free a regexp range  | 
779  |  |  *  | 
780  |  |  * @param range  the regexp range  | 
781  |  |  */  | 
782  |  | static void  | 
783  | 0  | xmlRegFreeRange(xmlRegRangePtr range) { | 
784  | 0  |     if (range == NULL)  | 
785  | 0  |   return;  | 
786  |  |  | 
787  | 0  |     if (range->blockName != NULL)  | 
788  | 0  |   xmlFree(range->blockName);  | 
789  | 0  |     xmlFree(range);  | 
790  | 0  | }  | 
791  |  |  | 
792  |  | /**  | 
793  |  |  * Copy a regexp range  | 
794  |  |  *  | 
795  |  |  * @param ctxt  regexp parser context  | 
796  |  |  * @param range  the regexp range  | 
797  |  |  * @returns the new copy or NULL in case of error.  | 
798  |  |  */  | 
799  |  | static xmlRegRangePtr  | 
800  | 0  | xmlRegCopyRange(xmlRegParserCtxtPtr ctxt, xmlRegRangePtr range) { | 
801  | 0  |     xmlRegRangePtr ret;  | 
802  |  | 
  | 
803  | 0  |     if (range == NULL)  | 
804  | 0  |   return(NULL);  | 
805  |  |  | 
806  | 0  |     ret = xmlRegNewRange(ctxt, range->neg, range->type, range->start,  | 
807  | 0  |                          range->end);  | 
808  | 0  |     if (ret == NULL)  | 
809  | 0  |         return(NULL);  | 
810  | 0  |     if (range->blockName != NULL) { | 
811  | 0  |   ret->blockName = xmlStrdup(range->blockName);  | 
812  | 0  |   if (ret->blockName == NULL) { | 
813  | 0  |       xmlRegexpErrMemory(ctxt);  | 
814  | 0  |       xmlRegFreeRange(ret);  | 
815  | 0  |       return(NULL);  | 
816  | 0  |   }  | 
817  | 0  |     }  | 
818  | 0  |     return(ret);  | 
819  | 0  | }  | 
820  |  |  | 
821  |  | /**  | 
822  |  |  * Allocate a new atom  | 
823  |  |  *  | 
824  |  |  * @param ctxt  the regexp parser context  | 
825  |  |  * @param type  the type of atom  | 
826  |  |  * @returns the new atom or NULL in case of error  | 
827  |  |  */  | 
828  |  | static xmlRegAtomPtr  | 
829  | 0  | xmlRegNewAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomType type) { | 
830  | 0  |     xmlRegAtomPtr ret;  | 
831  |  | 
  | 
832  | 0  |     ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom));  | 
833  | 0  |     if (ret == NULL) { | 
834  | 0  |   xmlRegexpErrMemory(ctxt);  | 
835  | 0  |   return(NULL);  | 
836  | 0  |     }  | 
837  | 0  |     memset(ret, 0, sizeof(xmlRegAtom));  | 
838  | 0  |     ret->type = type;  | 
839  | 0  |     ret->quant = XML_REGEXP_QUANT_ONCE;  | 
840  | 0  |     ret->min = 0;  | 
841  | 0  |     ret->max = 0;  | 
842  | 0  |     return(ret);  | 
843  | 0  | }  | 
844  |  |  | 
845  |  | /**  | 
846  |  |  * Free a regexp atom  | 
847  |  |  *  | 
848  |  |  * @param atom  the regexp atom  | 
849  |  |  */  | 
850  |  | static void  | 
851  | 0  | xmlRegFreeAtom(xmlRegAtomPtr atom) { | 
852  | 0  |     int i;  | 
853  |  | 
  | 
854  | 0  |     if (atom == NULL)  | 
855  | 0  |   return;  | 
856  |  |  | 
857  | 0  |     for (i = 0;i < atom->nbRanges;i++)  | 
858  | 0  |   xmlRegFreeRange(atom->ranges[i]);  | 
859  | 0  |     if (atom->ranges != NULL)  | 
860  | 0  |   xmlFree(atom->ranges);  | 
861  | 0  |     if ((atom->type == XML_REGEXP_STRING) && (atom->valuep != NULL))  | 
862  | 0  |   xmlFree(atom->valuep);  | 
863  | 0  |     if ((atom->type == XML_REGEXP_STRING) && (atom->valuep2 != NULL))  | 
864  | 0  |   xmlFree(atom->valuep2);  | 
865  | 0  |     if ((atom->type == XML_REGEXP_BLOCK_NAME) && (atom->valuep != NULL))  | 
866  | 0  |   xmlFree(atom->valuep);  | 
867  | 0  |     xmlFree(atom);  | 
868  | 0  | }  | 
869  |  |  | 
870  |  | /**  | 
871  |  |  * Allocate a new regexp range  | 
872  |  |  *  | 
873  |  |  * @param ctxt  the regexp parser context  | 
874  |  |  * @param atom  the original atom  | 
875  |  |  * @returns the new atom or NULL in case of error  | 
876  |  |  */  | 
877  |  | static xmlRegAtomPtr  | 
878  | 0  | xmlRegCopyAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) { | 
879  | 0  |     xmlRegAtomPtr ret;  | 
880  |  | 
  | 
881  | 0  |     ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom));  | 
882  | 0  |     if (ret == NULL) { | 
883  | 0  |   xmlRegexpErrMemory(ctxt);  | 
884  | 0  |   return(NULL);  | 
885  | 0  |     }  | 
886  | 0  |     memset(ret, 0, sizeof(xmlRegAtom));  | 
887  | 0  |     ret->type = atom->type;  | 
888  | 0  |     ret->quant = atom->quant;  | 
889  | 0  |     ret->min = atom->min;  | 
890  | 0  |     ret->max = atom->max;  | 
891  | 0  |     if (atom->nbRanges > 0) { | 
892  | 0  |         int i;  | 
893  |  | 
  | 
894  | 0  |         ret->ranges = (xmlRegRangePtr *) xmlMalloc(sizeof(xmlRegRangePtr) *  | 
895  | 0  |                                              atom->nbRanges);  | 
896  | 0  |   if (ret->ranges == NULL) { | 
897  | 0  |       xmlRegexpErrMemory(ctxt);  | 
898  | 0  |       goto error;  | 
899  | 0  |   }  | 
900  | 0  |   for (i = 0;i < atom->nbRanges;i++) { | 
901  | 0  |       ret->ranges[i] = xmlRegCopyRange(ctxt, atom->ranges[i]);  | 
902  | 0  |       if (ret->ranges[i] == NULL)  | 
903  | 0  |           goto error;  | 
904  | 0  |       ret->nbRanges = i + 1;  | 
905  | 0  |   }  | 
906  | 0  |     }  | 
907  | 0  |     return(ret);  | 
908  |  |  | 
909  | 0  | error:  | 
910  | 0  |     xmlRegFreeAtom(ret);  | 
911  | 0  |     return(NULL);  | 
912  | 0  | }  | 
913  |  |  | 
914  |  | static xmlRegStatePtr  | 
915  | 0  | xmlRegNewState(xmlRegParserCtxtPtr ctxt) { | 
916  | 0  |     xmlRegStatePtr ret;  | 
917  |  | 
  | 
918  | 0  |     ret = (xmlRegStatePtr) xmlMalloc(sizeof(xmlRegState));  | 
919  | 0  |     if (ret == NULL) { | 
920  | 0  |   xmlRegexpErrMemory(ctxt);  | 
921  | 0  |   return(NULL);  | 
922  | 0  |     }  | 
923  | 0  |     memset(ret, 0, sizeof(xmlRegState));  | 
924  | 0  |     ret->type = XML_REGEXP_TRANS_STATE;  | 
925  | 0  |     ret->mark = XML_REGEXP_MARK_NORMAL;  | 
926  | 0  |     return(ret);  | 
927  | 0  | }  | 
928  |  |  | 
929  |  | /**  | 
930  |  |  * Free a regexp state  | 
931  |  |  *  | 
932  |  |  * @param state  the regexp state  | 
933  |  |  */  | 
934  |  | static void  | 
935  | 0  | xmlRegFreeState(xmlRegStatePtr state) { | 
936  | 0  |     if (state == NULL)  | 
937  | 0  |   return;  | 
938  |  |  | 
939  | 0  |     if (state->trans != NULL)  | 
940  | 0  |   xmlFree(state->trans);  | 
941  | 0  |     if (state->transTo != NULL)  | 
942  | 0  |   xmlFree(state->transTo);  | 
943  | 0  |     xmlFree(state);  | 
944  | 0  | }  | 
945  |  |  | 
946  |  | /**  | 
947  |  |  * Free a regexp parser context  | 
948  |  |  *  | 
949  |  |  * @param ctxt  the regexp parser context  | 
950  |  |  */  | 
951  |  | static void  | 
952  | 0  | xmlRegFreeParserCtxt(xmlRegParserCtxtPtr ctxt) { | 
953  | 0  |     int i;  | 
954  | 0  |     if (ctxt == NULL)  | 
955  | 0  |   return;  | 
956  |  |  | 
957  | 0  |     if (ctxt->string != NULL)  | 
958  | 0  |   xmlFree(ctxt->string);  | 
959  | 0  |     if (ctxt->states != NULL) { | 
960  | 0  |   for (i = 0;i < ctxt->nbStates;i++)  | 
961  | 0  |       xmlRegFreeState(ctxt->states[i]);  | 
962  | 0  |   xmlFree(ctxt->states);  | 
963  | 0  |     }  | 
964  | 0  |     if (ctxt->atoms != NULL) { | 
965  | 0  |   for (i = 0;i < ctxt->nbAtoms;i++)  | 
966  | 0  |       xmlRegFreeAtom(ctxt->atoms[i]);  | 
967  | 0  |   xmlFree(ctxt->atoms);  | 
968  | 0  |     }  | 
969  | 0  |     if (ctxt->counters != NULL)  | 
970  | 0  |   xmlFree(ctxt->counters);  | 
971  | 0  |     xmlFree(ctxt);  | 
972  | 0  | }  | 
973  |  |  | 
974  |  | /************************************************************************  | 
975  |  |  *                  *  | 
976  |  |  *      Display of Data structures      *  | 
977  |  |  *                  *  | 
978  |  |  ************************************************************************/  | 
979  |  |  | 
980  |  | #ifdef DEBUG_REGEXP  | 
981  |  | static void  | 
982  |  | xmlRegPrintAtomType(FILE *output, xmlRegAtomType type) { | 
983  |  |     switch (type) { | 
984  |  |         case XML_REGEXP_EPSILON:  | 
985  |  |       fprintf(output, "epsilon "); break;  | 
986  |  |         case XML_REGEXP_CHARVAL:  | 
987  |  |       fprintf(output, "charval "); break;  | 
988  |  |         case XML_REGEXP_RANGES:  | 
989  |  |       fprintf(output, "ranges "); break;  | 
990  |  |         case XML_REGEXP_SUBREG:  | 
991  |  |       fprintf(output, "subexpr "); break;  | 
992  |  |         case XML_REGEXP_STRING:  | 
993  |  |       fprintf(output, "string "); break;  | 
994  |  |         case XML_REGEXP_ANYCHAR:  | 
995  |  |       fprintf(output, "anychar "); break;  | 
996  |  |         case XML_REGEXP_ANYSPACE:  | 
997  |  |       fprintf(output, "anyspace "); break;  | 
998  |  |         case XML_REGEXP_NOTSPACE:  | 
999  |  |       fprintf(output, "notspace "); break;  | 
1000  |  |         case XML_REGEXP_INITNAME:  | 
1001  |  |       fprintf(output, "initname "); break;  | 
1002  |  |         case XML_REGEXP_NOTINITNAME:  | 
1003  |  |       fprintf(output, "notinitname "); break;  | 
1004  |  |         case XML_REGEXP_NAMECHAR:  | 
1005  |  |       fprintf(output, "namechar "); break;  | 
1006  |  |         case XML_REGEXP_NOTNAMECHAR:  | 
1007  |  |       fprintf(output, "notnamechar "); break;  | 
1008  |  |         case XML_REGEXP_DECIMAL:  | 
1009  |  |       fprintf(output, "decimal "); break;  | 
1010  |  |         case XML_REGEXP_NOTDECIMAL:  | 
1011  |  |       fprintf(output, "notdecimal "); break;  | 
1012  |  |         case XML_REGEXP_REALCHAR:  | 
1013  |  |       fprintf(output, "realchar "); break;  | 
1014  |  |         case XML_REGEXP_NOTREALCHAR:  | 
1015  |  |       fprintf(output, "notrealchar "); break;  | 
1016  |  |         case XML_REGEXP_LETTER:  | 
1017  |  |             fprintf(output, "LETTER "); break;  | 
1018  |  |         case XML_REGEXP_LETTER_UPPERCASE:  | 
1019  |  |             fprintf(output, "LETTER_UPPERCASE "); break;  | 
1020  |  |         case XML_REGEXP_LETTER_LOWERCASE:  | 
1021  |  |             fprintf(output, "LETTER_LOWERCASE "); break;  | 
1022  |  |         case XML_REGEXP_LETTER_TITLECASE:  | 
1023  |  |             fprintf(output, "LETTER_TITLECASE "); break;  | 
1024  |  |         case XML_REGEXP_LETTER_MODIFIER:  | 
1025  |  |             fprintf(output, "LETTER_MODIFIER "); break;  | 
1026  |  |         case XML_REGEXP_LETTER_OTHERS:  | 
1027  |  |             fprintf(output, "LETTER_OTHERS "); break;  | 
1028  |  |         case XML_REGEXP_MARK:  | 
1029  |  |             fprintf(output, "MARK "); break;  | 
1030  |  |         case XML_REGEXP_MARK_NONSPACING:  | 
1031  |  |             fprintf(output, "MARK_NONSPACING "); break;  | 
1032  |  |         case XML_REGEXP_MARK_SPACECOMBINING:  | 
1033  |  |             fprintf(output, "MARK_SPACECOMBINING "); break;  | 
1034  |  |         case XML_REGEXP_MARK_ENCLOSING:  | 
1035  |  |             fprintf(output, "MARK_ENCLOSING "); break;  | 
1036  |  |         case XML_REGEXP_NUMBER:  | 
1037  |  |             fprintf(output, "NUMBER "); break;  | 
1038  |  |         case XML_REGEXP_NUMBER_DECIMAL:  | 
1039  |  |             fprintf(output, "NUMBER_DECIMAL "); break;  | 
1040  |  |         case XML_REGEXP_NUMBER_LETTER:  | 
1041  |  |             fprintf(output, "NUMBER_LETTER "); break;  | 
1042  |  |         case XML_REGEXP_NUMBER_OTHERS:  | 
1043  |  |             fprintf(output, "NUMBER_OTHERS "); break;  | 
1044  |  |         case XML_REGEXP_PUNCT:  | 
1045  |  |             fprintf(output, "PUNCT "); break;  | 
1046  |  |         case XML_REGEXP_PUNCT_CONNECTOR:  | 
1047  |  |             fprintf(output, "PUNCT_CONNECTOR "); break;  | 
1048  |  |         case XML_REGEXP_PUNCT_DASH:  | 
1049  |  |             fprintf(output, "PUNCT_DASH "); break;  | 
1050  |  |         case XML_REGEXP_PUNCT_OPEN:  | 
1051  |  |             fprintf(output, "PUNCT_OPEN "); break;  | 
1052  |  |         case XML_REGEXP_PUNCT_CLOSE:  | 
1053  |  |             fprintf(output, "PUNCT_CLOSE "); break;  | 
1054  |  |         case XML_REGEXP_PUNCT_INITQUOTE:  | 
1055  |  |             fprintf(output, "PUNCT_INITQUOTE "); break;  | 
1056  |  |         case XML_REGEXP_PUNCT_FINQUOTE:  | 
1057  |  |             fprintf(output, "PUNCT_FINQUOTE "); break;  | 
1058  |  |         case XML_REGEXP_PUNCT_OTHERS:  | 
1059  |  |             fprintf(output, "PUNCT_OTHERS "); break;  | 
1060  |  |         case XML_REGEXP_SEPAR:  | 
1061  |  |             fprintf(output, "SEPAR "); break;  | 
1062  |  |         case XML_REGEXP_SEPAR_SPACE:  | 
1063  |  |             fprintf(output, "SEPAR_SPACE "); break;  | 
1064  |  |         case XML_REGEXP_SEPAR_LINE:  | 
1065  |  |             fprintf(output, "SEPAR_LINE "); break;  | 
1066  |  |         case XML_REGEXP_SEPAR_PARA:  | 
1067  |  |             fprintf(output, "SEPAR_PARA "); break;  | 
1068  |  |         case XML_REGEXP_SYMBOL:  | 
1069  |  |             fprintf(output, "SYMBOL "); break;  | 
1070  |  |         case XML_REGEXP_SYMBOL_MATH:  | 
1071  |  |             fprintf(output, "SYMBOL_MATH "); break;  | 
1072  |  |         case XML_REGEXP_SYMBOL_CURRENCY:  | 
1073  |  |             fprintf(output, "SYMBOL_CURRENCY "); break;  | 
1074  |  |         case XML_REGEXP_SYMBOL_MODIFIER:  | 
1075  |  |             fprintf(output, "SYMBOL_MODIFIER "); break;  | 
1076  |  |         case XML_REGEXP_SYMBOL_OTHERS:  | 
1077  |  |             fprintf(output, "SYMBOL_OTHERS "); break;  | 
1078  |  |         case XML_REGEXP_OTHER:  | 
1079  |  |             fprintf(output, "OTHER "); break;  | 
1080  |  |         case XML_REGEXP_OTHER_CONTROL:  | 
1081  |  |             fprintf(output, "OTHER_CONTROL "); break;  | 
1082  |  |         case XML_REGEXP_OTHER_FORMAT:  | 
1083  |  |             fprintf(output, "OTHER_FORMAT "); break;  | 
1084  |  |         case XML_REGEXP_OTHER_PRIVATE:  | 
1085  |  |             fprintf(output, "OTHER_PRIVATE "); break;  | 
1086  |  |         case XML_REGEXP_OTHER_NA:  | 
1087  |  |             fprintf(output, "OTHER_NA "); break;  | 
1088  |  |         case XML_REGEXP_BLOCK_NAME:  | 
1089  |  |       fprintf(output, "BLOCK "); break;  | 
1090  |  |     }  | 
1091  |  | }  | 
1092  |  |  | 
1093  |  | static void  | 
1094  |  | xmlRegPrintQuantType(FILE *output, xmlRegQuantType type) { | 
1095  |  |     switch (type) { | 
1096  |  |         case XML_REGEXP_QUANT_EPSILON:  | 
1097  |  |       fprintf(output, "epsilon "); break;  | 
1098  |  |         case XML_REGEXP_QUANT_ONCE:  | 
1099  |  |       fprintf(output, "once "); break;  | 
1100  |  |         case XML_REGEXP_QUANT_OPT:  | 
1101  |  |       fprintf(output, "? "); break;  | 
1102  |  |         case XML_REGEXP_QUANT_MULT:  | 
1103  |  |       fprintf(output, "* "); break;  | 
1104  |  |         case XML_REGEXP_QUANT_PLUS:  | 
1105  |  |       fprintf(output, "+ "); break;  | 
1106  |  |   case XML_REGEXP_QUANT_RANGE:  | 
1107  |  |       fprintf(output, "range "); break;  | 
1108  |  |   case XML_REGEXP_QUANT_ONCEONLY:  | 
1109  |  |       fprintf(output, "onceonly "); break;  | 
1110  |  |   case XML_REGEXP_QUANT_ALL:  | 
1111  |  |       fprintf(output, "all "); break;  | 
1112  |  |     }  | 
1113  |  | }  | 
1114  |  | static void  | 
1115  |  | xmlRegPrintRange(FILE *output, xmlRegRangePtr range) { | 
1116  |  |     fprintf(output, "  range: ");  | 
1117  |  |     if (range->neg)  | 
1118  |  |   fprintf(output, "negative ");  | 
1119  |  |     xmlRegPrintAtomType(output, range->type);  | 
1120  |  |     fprintf(output, "%c - %c\n", range->start, range->end);  | 
1121  |  | }  | 
1122  |  |  | 
1123  |  | static void  | 
1124  |  | xmlRegPrintAtom(FILE *output, xmlRegAtomPtr atom) { | 
1125  |  |     fprintf(output, " atom: ");  | 
1126  |  |     if (atom == NULL) { | 
1127  |  |   fprintf(output, "NULL\n");  | 
1128  |  |   return;  | 
1129  |  |     }  | 
1130  |  |     if (atom->neg)  | 
1131  |  |         fprintf(output, "not ");  | 
1132  |  |     xmlRegPrintAtomType(output, atom->type);  | 
1133  |  |     xmlRegPrintQuantType(output, atom->quant);  | 
1134  |  |     if (atom->quant == XML_REGEXP_QUANT_RANGE)  | 
1135  |  |   fprintf(output, "%d-%d ", atom->min, atom->max);  | 
1136  |  |     if (atom->type == XML_REGEXP_STRING)  | 
1137  |  |   fprintf(output, "'%s' ", (char *) atom->valuep);  | 
1138  |  |     if (atom->type == XML_REGEXP_CHARVAL)  | 
1139  |  |   fprintf(output, "char %c\n", atom->codepoint);  | 
1140  |  |     else if (atom->type == XML_REGEXP_RANGES) { | 
1141  |  |   int i;  | 
1142  |  |   fprintf(output, "%d entries\n", atom->nbRanges);  | 
1143  |  |   for (i = 0; i < atom->nbRanges;i++)  | 
1144  |  |       xmlRegPrintRange(output, atom->ranges[i]);  | 
1145  |  |     } else { | 
1146  |  |   fprintf(output, "\n");  | 
1147  |  |     }  | 
1148  |  | }  | 
1149  |  |  | 
1150  |  | static void  | 
1151  |  | xmlRegPrintAtomCompact(FILE* output, xmlRegexpPtr regexp, int atom)  | 
1152  |  | { | 
1153  |  |     if (output == NULL || regexp == NULL || atom < 0 ||   | 
1154  |  |         atom >= regexp->nbstrings) { | 
1155  |  |         return;  | 
1156  |  |     }  | 
1157  |  |     fprintf(output, " atom: ");  | 
1158  |  |  | 
1159  |  |     xmlRegPrintAtomType(output, XML_REGEXP_STRING);  | 
1160  |  |     xmlRegPrintQuantType(output, XML_REGEXP_QUANT_ONCE);  | 
1161  |  |     fprintf(output, "'%s' ", (char *) regexp->stringMap[atom]);  | 
1162  |  |     fprintf(output, "\n");  | 
1163  |  | }  | 
1164  |  |  | 
1165  |  | static void  | 
1166  |  | xmlRegPrintTrans(FILE *output, xmlRegTransPtr trans) { | 
1167  |  |     fprintf(output, "  trans: ");  | 
1168  |  |     if (trans == NULL) { | 
1169  |  |   fprintf(output, "NULL\n");  | 
1170  |  |   return;  | 
1171  |  |     }  | 
1172  |  |     if (trans->to < 0) { | 
1173  |  |   fprintf(output, "removed\n");  | 
1174  |  |   return;  | 
1175  |  |     }  | 
1176  |  |     if (trans->nd != 0) { | 
1177  |  |   if (trans->nd == 2)  | 
1178  |  |       fprintf(output, "last not determinist, ");  | 
1179  |  |   else  | 
1180  |  |       fprintf(output, "not determinist, ");  | 
1181  |  |     }  | 
1182  |  |     if (trans->counter >= 0) { | 
1183  |  |   fprintf(output, "counted %d, ", trans->counter);  | 
1184  |  |     }  | 
1185  |  |     if (trans->count == REGEXP_ALL_COUNTER) { | 
1186  |  |   fprintf(output, "all transition, ");  | 
1187  |  |     } else if (trans->count >= 0) { | 
1188  |  |   fprintf(output, "count based %d, ", trans->count);  | 
1189  |  |     }  | 
1190  |  |     if (trans->atom == NULL) { | 
1191  |  |   fprintf(output, "epsilon to %d\n", trans->to);  | 
1192  |  |   return;  | 
1193  |  |     }  | 
1194  |  |     if (trans->atom->type == XML_REGEXP_CHARVAL)  | 
1195  |  |   fprintf(output, "char %c ", trans->atom->codepoint);  | 
1196  |  |     fprintf(output, "atom %d, to %d\n", trans->atom->no, trans->to);  | 
1197  |  | }  | 
1198  |  |  | 
1199  |  | static void  | 
1200  |  | xmlRegPrintTransCompact(  | 
1201  |  |     FILE* output,  | 
1202  |  |     xmlRegexpPtr regexp,  | 
1203  |  |     int state,  | 
1204  |  |     int atom  | 
1205  |  | )  | 
1206  |  | { | 
1207  |  |     int target;  | 
1208  |  |     if (output == NULL || regexp == NULL || regexp->compact == NULL ||   | 
1209  |  |         state < 0 || atom < 0) { | 
1210  |  |         return;  | 
1211  |  |     }  | 
1212  |  |     target = regexp->compact[state * (regexp->nbstrings + 1) + atom + 1];  | 
1213  |  |     fprintf(output, "  trans: ");  | 
1214  |  |  | 
1215  |  |     /* TODO maybe skip 'removed' transitions, because they actually never existed */  | 
1216  |  |     if (target < 0) { | 
1217  |  |         fprintf(output, "removed\n");  | 
1218  |  |         return;  | 
1219  |  |     }  | 
1220  |  |  | 
1221  |  |     /* We will ignore most of the attributes used in xmlRegPrintTrans,  | 
1222  |  |      * since the compact form is much simpler and uses only a part of the   | 
1223  |  |      * features provided by the libxml2 regexp libary   | 
1224  |  |      * (no rollbacks, counters etc.) */  | 
1225  |  |  | 
1226  |  |     /* Compared to the standard representation, an automata written using the  | 
1227  |  |      * compact form will ALWAYS be deterministic!   | 
1228  |  |      * From    xmlRegPrintTrans:  | 
1229  |  |          if (trans->nd != 0) { | 
1230  |  |             ...  | 
1231  |  |       * trans->nd will always be 0! */  | 
1232  |  |  | 
1233  |  |     /* In automata represented in compact form, the transitions will not use  | 
1234  |  |      * counters.   | 
1235  |  |      * From    xmlRegPrintTrans:  | 
1236  |  |          if (trans->counter >= 0) { | 
1237  |  |             ...  | 
1238  |  |      * regexp->counters == NULL, so trans->counter < 0 */  | 
1239  |  |  | 
1240  |  |     /* In compact form, we won't use */  | 
1241  |  |  | 
1242  |  |     /* An automata in the compact representation will always use string   | 
1243  |  |      * atoms.   | 
1244  |  |      * From    xmlRegPrintTrans:  | 
1245  |  |          if (trans->atom->type == XML_REGEXP_CHARVAL)  | 
1246  |  |              ...  | 
1247  |  |      * trans->atom != NULL && trans->atom->type == XML_REGEXP_STRING */  | 
1248  |  |  | 
1249  |  |     fprintf(output, "atom %d, to %d\n", atom, target);  | 
1250  |  | }  | 
1251  |  |  | 
1252  |  | static void  | 
1253  |  | xmlRegPrintState(FILE *output, xmlRegStatePtr state) { | 
1254  |  |     int i;  | 
1255  |  |  | 
1256  |  |     fprintf(output, " state: ");  | 
1257  |  |     if (state == NULL) { | 
1258  |  |   fprintf(output, "NULL\n");  | 
1259  |  |   return;  | 
1260  |  |     }  | 
1261  |  |     if (state->type == XML_REGEXP_START_STATE)  | 
1262  |  |   fprintf(output, "START ");  | 
1263  |  |     if (state->type == XML_REGEXP_FINAL_STATE)  | 
1264  |  |   fprintf(output, "FINAL ");  | 
1265  |  |  | 
1266  |  |     fprintf(output, "%d, %d transitions:\n", state->no, state->nbTrans);  | 
1267  |  |     for (i = 0;i < state->nbTrans; i++) { | 
1268  |  |   xmlRegPrintTrans(output, &(state->trans[i]));  | 
1269  |  |     }  | 
1270  |  | }  | 
1271  |  |  | 
1272  |  | static void  | 
1273  |  | xmlRegPrintStateCompact(FILE* output, xmlRegexpPtr regexp, int state)  | 
1274  |  | { | 
1275  |  |     int nbTrans = 0;  | 
1276  |  |     int i;  | 
1277  |  |     int target;  | 
1278  |  |     xmlRegStateType stateType;  | 
1279  |  |  | 
1280  |  |     if (output == NULL || regexp == NULL || regexp->compact == NULL ||  | 
1281  |  |         state < 0) { | 
1282  |  |         return;  | 
1283  |  |     }  | 
1284  |  |       | 
1285  |  |     fprintf(output, " state: ");  | 
1286  |  |  | 
1287  |  |     stateType = regexp->compact[state * (regexp->nbstrings + 1)];  | 
1288  |  |     if (stateType == XML_REGEXP_START_STATE) { | 
1289  |  |         fprintf(output, " START ");  | 
1290  |  |     }  | 
1291  |  |       | 
1292  |  |     if (stateType == XML_REGEXP_FINAL_STATE) { | 
1293  |  |         fprintf(output, " FINAL ");  | 
1294  |  |     }  | 
1295  |  |  | 
1296  |  |     /* Print all atoms. */  | 
1297  |  |     for (i = 0; i < regexp->nbstrings; i++) { | 
1298  |  |         xmlRegPrintAtomCompact(output, regexp, i);  | 
1299  |  |     }  | 
1300  |  |  | 
1301  |  |     /* Count all the transitions from the compact representation. */  | 
1302  |  |     for (i = 0; i < regexp->nbstrings; i++) { | 
1303  |  |         target = regexp->compact[state * (regexp->nbstrings + 1) + i + 1];  | 
1304  |  |         if (target > 0 && target <= regexp->nbstates &&   | 
1305  |  |             regexp->compact[(target - 1) * (regexp->nbstrings + 1)] ==   | 
1306  |  |             XML_REGEXP_SINK_STATE) { | 
1307  |  |                 nbTrans++;  | 
1308  |  |             }  | 
1309  |  |     }  | 
1310  |  |  | 
1311  |  |     fprintf(output, "%d, %d transitions:\n", state, nbTrans);  | 
1312  |  |       | 
1313  |  |     /* Print all transitions */  | 
1314  |  |     for (i = 0; i < regexp->nbstrings; i++) { | 
1315  |  |         xmlRegPrintTransCompact(output, regexp, state, i);  | 
1316  |  |     }  | 
1317  |  | }  | 
1318  |  |  | 
1319  |  | /*  | 
1320  |  |  * @param output  an output stream  | 
1321  |  |  * @param regexp  the regexp instance  | 
1322  |  |  *   | 
1323  |  |  * Print the compact representation of a regexp, in the same fashion as the  | 
1324  |  |  * public #xmlRegexpPrint function.  | 
1325  |  |  */  | 
1326  |  | static void  | 
1327  |  | xmlRegPrintCompact(FILE* output, xmlRegexpPtr regexp)  | 
1328  |  | { | 
1329  |  |     int i;  | 
1330  |  |     if (output == NULL || regexp == NULL || regexp->compact == NULL) { | 
1331  |  |         return;  | 
1332  |  |     }  | 
1333  |  |       | 
1334  |  |     fprintf(output, "'%s' ", regexp->string);  | 
1335  |  |  | 
1336  |  |     fprintf(output, "%d atoms:\n", regexp->nbstrings);  | 
1337  |  |     fprintf(output, "\n");  | 
1338  |  |     for (i = 0; i < regexp->nbstrings; i++) { | 
1339  |  |         fprintf(output, " %02d ", i);  | 
1340  |  |         xmlRegPrintAtomCompact(output, regexp, i);  | 
1341  |  |     }  | 
1342  |  |  | 
1343  |  |     fprintf(output, "%d states:", regexp->nbstates);  | 
1344  |  |     fprintf(output, "\n");  | 
1345  |  |     for (i = 0; i < regexp->nbstates; i++) { | 
1346  |  |         xmlRegPrintStateCompact(output, regexp, i);  | 
1347  |  |     }  | 
1348  |  |  | 
1349  |  |     fprintf(output, "%d counters:\n", 0);  | 
1350  |  | }  | 
1351  |  |  | 
1352  |  | static void  | 
1353  |  | xmlRegexpPrintInternal(FILE *output, xmlRegexpPtr regexp) { | 
1354  |  |     int i;  | 
1355  |  |  | 
1356  |  |     if (output == NULL)  | 
1357  |  |         return;  | 
1358  |  |     fprintf(output, " regexp: ");  | 
1359  |  |     if (regexp == NULL) { | 
1360  |  |   fprintf(output, "NULL\n");  | 
1361  |  |   return;  | 
1362  |  |     }  | 
1363  |  |   if (regexp->compact) { | 
1364  |  |     xmlRegPrintCompact(output, regexp);  | 
1365  |  |     return;  | 
1366  |  |   }  | 
1367  |  |  | 
1368  |  |     fprintf(output, "'%s' ", regexp->string);  | 
1369  |  |     fprintf(output, "\n");  | 
1370  |  |     fprintf(output, "%d atoms:\n", regexp->nbAtoms);  | 
1371  |  |     for (i = 0;i < regexp->nbAtoms; i++) { | 
1372  |  |   fprintf(output, " %02d ", i);  | 
1373  |  |   xmlRegPrintAtom(output, regexp->atoms[i]);  | 
1374  |  |     }  | 
1375  |  |     fprintf(output, "%d states:", regexp->nbStates);  | 
1376  |  |     fprintf(output, "\n");  | 
1377  |  |     for (i = 0;i < regexp->nbStates; i++) { | 
1378  |  |   xmlRegPrintState(output, regexp->states[i]);  | 
1379  |  |     }  | 
1380  |  |     fprintf(output, "%d counters:\n", regexp->nbCounters);  | 
1381  |  |     for (i = 0;i < regexp->nbCounters; i++) { | 
1382  |  |   fprintf(output, " %d: min %d max %d\n", i, regexp->counters[i].min,  | 
1383  |  |                                     regexp->counters[i].max);  | 
1384  |  |     }  | 
1385  |  | }  | 
1386  |  | #endif /* DEBUG_REGEXP */  | 
1387  |  |  | 
1388  |  | /************************************************************************  | 
1389  |  |  *                  *  | 
1390  |  |  *     Finite Automata structures manipulations   *  | 
1391  |  |  *                  *  | 
1392  |  |  ************************************************************************/  | 
1393  |  |  | 
1394  |  | static xmlRegRangePtr  | 
1395  |  | xmlRegAtomAddRange(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom,  | 
1396  |  |              int neg, xmlRegAtomType type, int start, int end,  | 
1397  | 0  |        xmlChar *blockName) { | 
1398  | 0  |     xmlRegRangePtr range;  | 
1399  |  | 
  | 
1400  | 0  |     if (atom == NULL) { | 
1401  | 0  |   ERROR("add range: atom is NULL"); | 
1402  | 0  |   return(NULL);  | 
1403  | 0  |     }  | 
1404  | 0  |     if (atom->type != XML_REGEXP_RANGES) { | 
1405  | 0  |   ERROR("add range: atom is not ranges"); | 
1406  | 0  |   return(NULL);  | 
1407  | 0  |     }  | 
1408  | 0  |     if (atom->nbRanges >= atom->maxRanges) { | 
1409  | 0  |   xmlRegRangePtr *tmp;  | 
1410  | 0  |         int newSize;  | 
1411  |  | 
  | 
1412  | 0  |         newSize = xmlGrowCapacity(atom->maxRanges, sizeof(tmp[0]),  | 
1413  | 0  |                                   4, XML_MAX_ITEMS);  | 
1414  | 0  |         if (newSize < 0) { | 
1415  | 0  |       xmlRegexpErrMemory(ctxt);  | 
1416  | 0  |       return(NULL);  | 
1417  | 0  |         }  | 
1418  | 0  |   tmp = xmlRealloc(atom->ranges, newSize * sizeof(tmp[0]));  | 
1419  | 0  |   if (tmp == NULL) { | 
1420  | 0  |       xmlRegexpErrMemory(ctxt);  | 
1421  | 0  |       return(NULL);  | 
1422  | 0  |   }  | 
1423  | 0  |   atom->ranges = tmp;  | 
1424  | 0  |   atom->maxRanges = newSize;  | 
1425  | 0  |     }  | 
1426  | 0  |     range = xmlRegNewRange(ctxt, neg, type, start, end);  | 
1427  | 0  |     if (range == NULL)  | 
1428  | 0  |   return(NULL);  | 
1429  | 0  |     range->blockName = blockName;  | 
1430  | 0  |     atom->ranges[atom->nbRanges++] = range;  | 
1431  |  | 
  | 
1432  | 0  |     return(range);  | 
1433  | 0  | }  | 
1434  |  |  | 
1435  |  | static int  | 
1436  | 0  | xmlRegGetCounter(xmlRegParserCtxtPtr ctxt) { | 
1437  | 0  |     if (ctxt->nbCounters >= ctxt->maxCounters) { | 
1438  | 0  |   xmlRegCounter *tmp;  | 
1439  | 0  |         int newSize;  | 
1440  |  | 
  | 
1441  | 0  |         newSize = xmlGrowCapacity(ctxt->maxCounters, sizeof(tmp[0]),  | 
1442  | 0  |                                   4, XML_MAX_ITEMS);  | 
1443  | 0  |   if (newSize < 0) { | 
1444  | 0  |       xmlRegexpErrMemory(ctxt);  | 
1445  | 0  |       return(-1);  | 
1446  | 0  |   }  | 
1447  | 0  |   tmp = xmlRealloc(ctxt->counters, newSize * sizeof(tmp[0]));  | 
1448  | 0  |   if (tmp == NULL) { | 
1449  | 0  |       xmlRegexpErrMemory(ctxt);  | 
1450  | 0  |       return(-1);  | 
1451  | 0  |   }  | 
1452  | 0  |   ctxt->counters = tmp;  | 
1453  | 0  |   ctxt->maxCounters = newSize;  | 
1454  | 0  |     }  | 
1455  | 0  |     ctxt->counters[ctxt->nbCounters].min = -1;  | 
1456  | 0  |     ctxt->counters[ctxt->nbCounters].max = -1;  | 
1457  | 0  |     return(ctxt->nbCounters++);  | 
1458  | 0  | }  | 
1459  |  |  | 
1460  |  | static int  | 
1461  | 0  | xmlRegAtomPush(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) { | 
1462  | 0  |     if (atom == NULL) { | 
1463  | 0  |   ERROR("atom push: atom is NULL"); | 
1464  | 0  |   return(-1);  | 
1465  | 0  |     }  | 
1466  | 0  |     if (ctxt->nbAtoms >= ctxt->maxAtoms) { | 
1467  | 0  |   xmlRegAtomPtr *tmp;  | 
1468  | 0  |         int newSize;  | 
1469  |  | 
  | 
1470  | 0  |         newSize = xmlGrowCapacity(ctxt->maxAtoms, sizeof(tmp[0]),  | 
1471  | 0  |                                   4, XML_MAX_ITEMS);  | 
1472  | 0  |   if (newSize < 0) { | 
1473  | 0  |       xmlRegexpErrMemory(ctxt);  | 
1474  | 0  |       return(-1);  | 
1475  | 0  |   }  | 
1476  | 0  |   tmp = xmlRealloc(ctxt->atoms, newSize * sizeof(tmp[0]));  | 
1477  | 0  |   if (tmp == NULL) { | 
1478  | 0  |       xmlRegexpErrMemory(ctxt);  | 
1479  | 0  |       return(-1);  | 
1480  | 0  |   }  | 
1481  | 0  |   ctxt->atoms = tmp;  | 
1482  | 0  |         ctxt->maxAtoms = newSize;  | 
1483  | 0  |     }  | 
1484  | 0  |     atom->no = ctxt->nbAtoms;  | 
1485  | 0  |     ctxt->atoms[ctxt->nbAtoms++] = atom;  | 
1486  | 0  |     return(0);  | 
1487  | 0  | }  | 
1488  |  |  | 
1489  |  | static void  | 
1490  |  | xmlRegStateAddTransTo(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr target,  | 
1491  | 0  |                       int from) { | 
1492  | 0  |     if (target->nbTransTo >= target->maxTransTo) { | 
1493  | 0  |   int *tmp;  | 
1494  | 0  |         int newSize;  | 
1495  |  | 
  | 
1496  | 0  |         newSize = xmlGrowCapacity(target->maxTransTo, sizeof(tmp[0]),  | 
1497  | 0  |                                   8, XML_MAX_ITEMS);  | 
1498  | 0  |   if (newSize < 0) { | 
1499  | 0  |       xmlRegexpErrMemory(ctxt);  | 
1500  | 0  |       return;  | 
1501  | 0  |   }  | 
1502  | 0  |   tmp = xmlRealloc(target->transTo, newSize * sizeof(tmp[0]));  | 
1503  | 0  |   if (tmp == NULL) { | 
1504  | 0  |       xmlRegexpErrMemory(ctxt);  | 
1505  | 0  |       return;  | 
1506  | 0  |   }  | 
1507  | 0  |   target->transTo = tmp;  | 
1508  | 0  |   target->maxTransTo = newSize;  | 
1509  | 0  |     }  | 
1510  | 0  |     target->transTo[target->nbTransTo] = from;  | 
1511  | 0  |     target->nbTransTo++;  | 
1512  | 0  | }  | 
1513  |  |  | 
1514  |  | static void  | 
1515  |  | xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,  | 
1516  |  |               xmlRegAtomPtr atom, xmlRegStatePtr target,  | 
1517  | 0  |         int counter, int count) { | 
1518  |  | 
  | 
1519  | 0  |     int nrtrans;  | 
1520  |  | 
  | 
1521  | 0  |     if (state == NULL) { | 
1522  | 0  |   ERROR("add state: state is NULL"); | 
1523  | 0  |   return;  | 
1524  | 0  |     }  | 
1525  | 0  |     if (target == NULL) { | 
1526  | 0  |   ERROR("add state: target is NULL"); | 
1527  | 0  |   return;  | 
1528  | 0  |     }  | 
1529  |  |     /*  | 
1530  |  |      * Other routines follow the philosophy 'When in doubt, add a transition'  | 
1531  |  |      * so we check here whether such a transition is already present and, if  | 
1532  |  |      * so, silently ignore this request.  | 
1533  |  |      */  | 
1534  |  |  | 
1535  | 0  |     for (nrtrans = state->nbTrans - 1; nrtrans >= 0; nrtrans--) { | 
1536  | 0  |   xmlRegTransPtr trans = &(state->trans[nrtrans]);  | 
1537  | 0  |   if ((trans->atom == atom) &&  | 
1538  | 0  |       (trans->to == target->no) &&  | 
1539  | 0  |       (trans->counter == counter) &&  | 
1540  | 0  |       (trans->count == count)) { | 
1541  | 0  |       return;  | 
1542  | 0  |   }  | 
1543  | 0  |     }  | 
1544  |  |  | 
1545  | 0  |     if (state->nbTrans >= state->maxTrans) { | 
1546  | 0  |   xmlRegTrans *tmp;  | 
1547  | 0  |         int newSize;  | 
1548  |  | 
  | 
1549  | 0  |         newSize = xmlGrowCapacity(state->maxTrans, sizeof(tmp[0]),  | 
1550  | 0  |                                   8, XML_MAX_ITEMS);  | 
1551  | 0  |   if (newSize < 0) { | 
1552  | 0  |       xmlRegexpErrMemory(ctxt);  | 
1553  | 0  |       return;  | 
1554  | 0  |   }  | 
1555  | 0  |   tmp = xmlRealloc(state->trans, newSize * sizeof(tmp[0]));  | 
1556  | 0  |   if (tmp == NULL) { | 
1557  | 0  |       xmlRegexpErrMemory(ctxt);  | 
1558  | 0  |       return;  | 
1559  | 0  |   }  | 
1560  | 0  |   state->trans = tmp;  | 
1561  | 0  |   state->maxTrans = newSize;  | 
1562  | 0  |     }  | 
1563  |  |  | 
1564  | 0  |     state->trans[state->nbTrans].atom = atom;  | 
1565  | 0  |     state->trans[state->nbTrans].to = target->no;  | 
1566  | 0  |     state->trans[state->nbTrans].counter = counter;  | 
1567  | 0  |     state->trans[state->nbTrans].count = count;  | 
1568  | 0  |     state->trans[state->nbTrans].nd = 0;  | 
1569  | 0  |     state->nbTrans++;  | 
1570  | 0  |     xmlRegStateAddTransTo(ctxt, target, state->no);  | 
1571  | 0  | }  | 
1572  |  |  | 
1573  |  | static xmlRegStatePtr  | 
1574  | 0  | xmlRegStatePush(xmlRegParserCtxtPtr ctxt) { | 
1575  | 0  |     xmlRegStatePtr state;  | 
1576  |  | 
  | 
1577  | 0  |     if (ctxt->nbStates >= ctxt->maxStates) { | 
1578  | 0  |   xmlRegStatePtr *tmp;  | 
1579  | 0  |         int newSize;  | 
1580  |  | 
  | 
1581  | 0  |         newSize = xmlGrowCapacity(ctxt->maxStates, sizeof(tmp[0]),  | 
1582  | 0  |                                   4, XML_MAX_ITEMS);  | 
1583  | 0  |   if (newSize < 0) { | 
1584  | 0  |       xmlRegexpErrMemory(ctxt);  | 
1585  | 0  |       return(NULL);  | 
1586  | 0  |   }  | 
1587  | 0  |   tmp = xmlRealloc(ctxt->states, newSize * sizeof(tmp[0]));  | 
1588  | 0  |   if (tmp == NULL) { | 
1589  | 0  |       xmlRegexpErrMemory(ctxt);  | 
1590  | 0  |       return(NULL);  | 
1591  | 0  |   }  | 
1592  | 0  |   ctxt->states = tmp;  | 
1593  | 0  |   ctxt->maxStates = newSize;  | 
1594  | 0  |     }  | 
1595  |  |  | 
1596  | 0  |     state = xmlRegNewState(ctxt);  | 
1597  | 0  |     if (state == NULL)  | 
1598  | 0  |         return(NULL);  | 
1599  |  |  | 
1600  | 0  |     state->no = ctxt->nbStates;  | 
1601  | 0  |     ctxt->states[ctxt->nbStates++] = state;  | 
1602  |  | 
  | 
1603  | 0  |     return(state);  | 
1604  | 0  | }  | 
1605  |  |  | 
1606  |  | /**  | 
1607  |  |  * @param ctxt  a regexp parser context  | 
1608  |  |  * @param from  the from state  | 
1609  |  |  * @param to  the target state or NULL for building a new one  | 
1610  |  |  * @param lax    | 
1611  |  |  */  | 
1612  |  | static int  | 
1613  |  | xmlFAGenerateAllTransition(xmlRegParserCtxtPtr ctxt,  | 
1614  |  |          xmlRegStatePtr from, xmlRegStatePtr to,  | 
1615  | 0  |          int lax) { | 
1616  | 0  |     if (to == NULL) { | 
1617  | 0  |   to = xmlRegStatePush(ctxt);  | 
1618  | 0  |         if (to == NULL)  | 
1619  | 0  |             return(-1);  | 
1620  | 0  |   ctxt->state = to;  | 
1621  | 0  |     }  | 
1622  | 0  |     if (lax)  | 
1623  | 0  |   xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_LAX_COUNTER);  | 
1624  | 0  |     else  | 
1625  | 0  |   xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_COUNTER);  | 
1626  | 0  |     return(0);  | 
1627  | 0  | }  | 
1628  |  |  | 
1629  |  | /**  | 
1630  |  |  * @param ctxt  a regexp parser context  | 
1631  |  |  * @param from  the from state  | 
1632  |  |  * @param to  the target state or NULL for building a new one  | 
1633  |  |  */  | 
1634  |  | static int  | 
1635  |  | xmlFAGenerateEpsilonTransition(xmlRegParserCtxtPtr ctxt,  | 
1636  | 0  |              xmlRegStatePtr from, xmlRegStatePtr to) { | 
1637  | 0  |     if (to == NULL) { | 
1638  | 0  |   to = xmlRegStatePush(ctxt);  | 
1639  | 0  |         if (to == NULL)  | 
1640  | 0  |             return(-1);  | 
1641  | 0  |   ctxt->state = to;  | 
1642  | 0  |     }  | 
1643  | 0  |     xmlRegStateAddTrans(ctxt, from, NULL, to, -1, -1);  | 
1644  | 0  |     return(0);  | 
1645  | 0  | }  | 
1646  |  |  | 
1647  |  | /**  | 
1648  |  |  * @param ctxt  a regexp parser context  | 
1649  |  |  * @param from  the from state  | 
1650  |  |  * @param to  the target state or NULL for building a new one  | 
1651  |  |  * @param counter  the counter for that transition  | 
1652  |  |  */  | 
1653  |  | static int  | 
1654  |  | xmlFAGenerateCountedEpsilonTransition(xmlRegParserCtxtPtr ctxt,  | 
1655  | 0  |       xmlRegStatePtr from, xmlRegStatePtr to, int counter) { | 
1656  | 0  |     if (to == NULL) { | 
1657  | 0  |   to = xmlRegStatePush(ctxt);  | 
1658  | 0  |         if (to == NULL)  | 
1659  | 0  |             return(-1);  | 
1660  | 0  |   ctxt->state = to;  | 
1661  | 0  |     }  | 
1662  | 0  |     xmlRegStateAddTrans(ctxt, from, NULL, to, counter, -1);  | 
1663  | 0  |     return(0);  | 
1664  | 0  | }  | 
1665  |  |  | 
1666  |  | /**  | 
1667  |  |  * @param ctxt  a regexp parser context  | 
1668  |  |  * @param from  the from state  | 
1669  |  |  * @param to  the target state or NULL for building a new one  | 
1670  |  |  * @param counter  the counter for that transition  | 
1671  |  |  */  | 
1672  |  | static int  | 
1673  |  | xmlFAGenerateCountedTransition(xmlRegParserCtxtPtr ctxt,  | 
1674  | 0  |       xmlRegStatePtr from, xmlRegStatePtr to, int counter) { | 
1675  | 0  |     if (to == NULL) { | 
1676  | 0  |   to = xmlRegStatePush(ctxt);  | 
1677  | 0  |         if (to == NULL)  | 
1678  | 0  |             return(-1);  | 
1679  | 0  |   ctxt->state = to;  | 
1680  | 0  |     }  | 
1681  | 0  |     xmlRegStateAddTrans(ctxt, from, NULL, to, -1, counter);  | 
1682  | 0  |     return(0);  | 
1683  | 0  | }  | 
1684  |  |  | 
1685  |  | /**  | 
1686  |  |  * @param ctxt  a regexp parser context  | 
1687  |  |  * @param from  the from state  | 
1688  |  |  * @param to  the target state or NULL for building a new one  | 
1689  |  |  * @param atom  the atom generating the transition  | 
1690  |  |  * @returns 0 if success and -1 in case of error.  | 
1691  |  |  */  | 
1692  |  | static int  | 
1693  |  | xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,  | 
1694  | 0  |                    xmlRegStatePtr to, xmlRegAtomPtr atom) { | 
1695  | 0  |     xmlRegStatePtr end;  | 
1696  | 0  |     int nullable = 0;  | 
1697  |  | 
  | 
1698  | 0  |     if (atom == NULL) { | 
1699  | 0  |   ERROR("generate transition: atom == NULL"); | 
1700  | 0  |   return(-1);  | 
1701  | 0  |     }  | 
1702  | 0  |     if (atom->type == XML_REGEXP_SUBREG) { | 
1703  |  |   /*  | 
1704  |  |    * this is a subexpression handling one should not need to  | 
1705  |  |    * create a new node except for XML_REGEXP_QUANT_RANGE.  | 
1706  |  |    */  | 
1707  | 0  |   if ((to != NULL) && (atom->stop != to) &&  | 
1708  | 0  |       (atom->quant != XML_REGEXP_QUANT_RANGE)) { | 
1709  |  |       /*  | 
1710  |  |        * Generate an epsilon transition to link to the target  | 
1711  |  |        */  | 
1712  | 0  |       xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);  | 
1713  |  | #ifdef DV  | 
1714  |  |   } else if ((to == NULL) && (atom->quant != XML_REGEXP_QUANT_RANGE) &&  | 
1715  |  |        (atom->quant != XML_REGEXP_QUANT_ONCE)) { | 
1716  |  |       to = xmlRegStatePush(ctxt, to);  | 
1717  |  |             if (to == NULL)  | 
1718  |  |                 return(-1);  | 
1719  |  |       ctxt->state = to;  | 
1720  |  |       xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);  | 
1721  |  | #endif  | 
1722  | 0  |   }  | 
1723  | 0  |   switch (atom->quant) { | 
1724  | 0  |       case XML_REGEXP_QUANT_OPT:  | 
1725  | 0  |     atom->quant = XML_REGEXP_QUANT_ONCE;  | 
1726  |  |     /*  | 
1727  |  |      * transition done to the state after end of atom.  | 
1728  |  |      *      1. set transition from atom start to new state  | 
1729  |  |      *      2. set transition from atom end to this state.  | 
1730  |  |      */  | 
1731  | 0  |                 if (to == NULL) { | 
1732  | 0  |                     xmlFAGenerateEpsilonTransition(ctxt, atom->start, 0);  | 
1733  | 0  |                     xmlFAGenerateEpsilonTransition(ctxt, atom->stop,  | 
1734  | 0  |                                                    ctxt->state);  | 
1735  | 0  |                 } else { | 
1736  | 0  |                     xmlFAGenerateEpsilonTransition(ctxt, atom->start, to);  | 
1737  | 0  |                 }  | 
1738  | 0  |     break;  | 
1739  | 0  |       case XML_REGEXP_QUANT_MULT:  | 
1740  | 0  |     atom->quant = XML_REGEXP_QUANT_ONCE;  | 
1741  | 0  |     xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop);  | 
1742  | 0  |     xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);  | 
1743  | 0  |     break;  | 
1744  | 0  |       case XML_REGEXP_QUANT_PLUS:  | 
1745  | 0  |     atom->quant = XML_REGEXP_QUANT_ONCE;  | 
1746  | 0  |     xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);  | 
1747  | 0  |     break;  | 
1748  | 0  |       case XML_REGEXP_QUANT_RANGE: { | 
1749  | 0  |     int counter;  | 
1750  | 0  |     xmlRegStatePtr inter, newstate;  | 
1751  |  |  | 
1752  |  |     /*  | 
1753  |  |      * create the final state now if needed  | 
1754  |  |      */  | 
1755  | 0  |     if (to != NULL) { | 
1756  | 0  |         newstate = to;  | 
1757  | 0  |     } else { | 
1758  | 0  |         newstate = xmlRegStatePush(ctxt);  | 
1759  | 0  |                     if (newstate == NULL)  | 
1760  | 0  |                         return(-1);  | 
1761  | 0  |     }  | 
1762  |  |  | 
1763  |  |     /*  | 
1764  |  |      * The principle here is to use counted transition  | 
1765  |  |      * to avoid explosion in the number of states in the  | 
1766  |  |      * graph. This is clearly more complex but should not  | 
1767  |  |      * be exploitable at runtime.  | 
1768  |  |      */  | 
1769  | 0  |     if ((atom->min == 0) && (atom->start0 == NULL)) { | 
1770  | 0  |         xmlRegAtomPtr copy;  | 
1771  |  |         /*  | 
1772  |  |          * duplicate a transition based on atom to count next  | 
1773  |  |          * occurrences after 1. We cannot loop to atom->start  | 
1774  |  |          * directly because we need an epsilon transition to  | 
1775  |  |          * newstate.  | 
1776  |  |          */  | 
1777  |  |          /* ???? For some reason it seems we never reach that  | 
1778  |  |             case, I suppose this got optimized out before when  | 
1779  |  |       building the automata */  | 
1780  | 0  |         copy = xmlRegCopyAtom(ctxt, atom);  | 
1781  | 0  |         if (copy == NULL)  | 
1782  | 0  |             return(-1);  | 
1783  | 0  |         copy->quant = XML_REGEXP_QUANT_ONCE;  | 
1784  | 0  |         copy->min = 0;  | 
1785  | 0  |         copy->max = 0;  | 
1786  |  | 
  | 
1787  | 0  |         if (xmlFAGenerateTransitions(ctxt, atom->start, NULL, copy)  | 
1788  | 0  |             < 0) { | 
1789  | 0  |                         xmlRegFreeAtom(copy);  | 
1790  | 0  |       return(-1);  | 
1791  | 0  |                     }  | 
1792  | 0  |         inter = ctxt->state;  | 
1793  | 0  |         counter = xmlRegGetCounter(ctxt);  | 
1794  | 0  |                     if (counter < 0)  | 
1795  | 0  |                         return(-1);  | 
1796  | 0  |         ctxt->counters[counter].min = atom->min - 1;  | 
1797  | 0  |         ctxt->counters[counter].max = atom->max - 1;  | 
1798  |  |         /* count the number of times we see it again */  | 
1799  | 0  |         xmlFAGenerateCountedEpsilonTransition(ctxt, inter,  | 
1800  | 0  |                atom->stop, counter);  | 
1801  |  |         /* allow a way out based on the count */  | 
1802  | 0  |         xmlFAGenerateCountedTransition(ctxt, inter,  | 
1803  | 0  |                                  newstate, counter);  | 
1804  |  |         /* and also allow a direct exit for 0 */  | 
1805  | 0  |         xmlFAGenerateEpsilonTransition(ctxt, atom->start,  | 
1806  | 0  |                                        newstate);  | 
1807  | 0  |     } else { | 
1808  |  |         /*  | 
1809  |  |          * either we need the atom at least once or there  | 
1810  |  |          * is an atom->start0 allowing to easily plug the  | 
1811  |  |          * epsilon transition.  | 
1812  |  |          */  | 
1813  | 0  |         counter = xmlRegGetCounter(ctxt);  | 
1814  | 0  |                     if (counter < 0)  | 
1815  | 0  |                         return(-1);  | 
1816  | 0  |         ctxt->counters[counter].min = atom->min - 1;  | 
1817  | 0  |         ctxt->counters[counter].max = atom->max - 1;  | 
1818  |  |         /* allow a way out based on the count */  | 
1819  | 0  |         xmlFAGenerateCountedTransition(ctxt, atom->stop,  | 
1820  | 0  |                                  newstate, counter);  | 
1821  |  |         /* count the number of times we see it again */  | 
1822  | 0  |         xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop,  | 
1823  | 0  |                atom->start, counter);  | 
1824  |  |         /* and if needed allow a direct exit for 0 */  | 
1825  | 0  |         if (atom->min == 0)  | 
1826  | 0  |       xmlFAGenerateEpsilonTransition(ctxt, atom->start0,  | 
1827  | 0  |                    newstate);  | 
1828  |  | 
  | 
1829  | 0  |     }  | 
1830  | 0  |     atom->min = 0;  | 
1831  | 0  |     atom->max = 0;  | 
1832  | 0  |     atom->quant = XML_REGEXP_QUANT_ONCE;  | 
1833  | 0  |     ctxt->state = newstate;  | 
1834  | 0  |       }  | 
1835  | 0  |       default:  | 
1836  | 0  |     break;  | 
1837  | 0  |   }  | 
1838  | 0  |         atom->start = NULL;  | 
1839  | 0  |         atom->start0 = NULL;  | 
1840  | 0  |         atom->stop = NULL;  | 
1841  | 0  |   if (xmlRegAtomPush(ctxt, atom) < 0)  | 
1842  | 0  |       return(-1);  | 
1843  | 0  |   return(0);  | 
1844  | 0  |     }  | 
1845  | 0  |     if ((atom->min == 0) && (atom->max == 0) &&  | 
1846  | 0  |                (atom->quant == XML_REGEXP_QUANT_RANGE)) { | 
1847  |  |         /*  | 
1848  |  |    * we can discard the atom and generate an epsilon transition instead  | 
1849  |  |    */  | 
1850  | 0  |   if (to == NULL) { | 
1851  | 0  |       to = xmlRegStatePush(ctxt);  | 
1852  | 0  |       if (to == NULL)  | 
1853  | 0  |     return(-1);  | 
1854  | 0  |   }  | 
1855  | 0  |   xmlFAGenerateEpsilonTransition(ctxt, from, to);  | 
1856  | 0  |   ctxt->state = to;  | 
1857  | 0  |   xmlRegFreeAtom(atom);  | 
1858  | 0  |   return(0);  | 
1859  | 0  |     }  | 
1860  | 0  |     if (to == NULL) { | 
1861  | 0  |   to = xmlRegStatePush(ctxt);  | 
1862  | 0  |   if (to == NULL)  | 
1863  | 0  |       return(-1);  | 
1864  | 0  |     }  | 
1865  | 0  |     end = to;  | 
1866  | 0  |     if ((atom->quant == XML_REGEXP_QUANT_MULT) ||  | 
1867  | 0  |         (atom->quant == XML_REGEXP_QUANT_PLUS)) { | 
1868  |  |   /*  | 
1869  |  |    * Do not pollute the target state by adding transitions from  | 
1870  |  |    * it as it is likely to be the shared target of multiple branches.  | 
1871  |  |    * So isolate with an epsilon transition.  | 
1872  |  |    */  | 
1873  | 0  |         xmlRegStatePtr tmp;  | 
1874  |  | 
  | 
1875  | 0  |   tmp = xmlRegStatePush(ctxt);  | 
1876  | 0  |         if (tmp == NULL)  | 
1877  | 0  |       return(-1);  | 
1878  | 0  |   xmlFAGenerateEpsilonTransition(ctxt, tmp, to);  | 
1879  | 0  |   to = tmp;  | 
1880  | 0  |     }  | 
1881  | 0  |     if ((atom->quant == XML_REGEXP_QUANT_RANGE) &&  | 
1882  | 0  |         (atom->min == 0) && (atom->max > 0)) { | 
1883  | 0  |   nullable = 1;  | 
1884  | 0  |   atom->min = 1;  | 
1885  | 0  |         if (atom->max == 1)  | 
1886  | 0  |       atom->quant = XML_REGEXP_QUANT_OPT;  | 
1887  | 0  |     }  | 
1888  | 0  |     xmlRegStateAddTrans(ctxt, from, atom, to, -1, -1);  | 
1889  | 0  |     ctxt->state = end;  | 
1890  | 0  |     switch (atom->quant) { | 
1891  | 0  |   case XML_REGEXP_QUANT_OPT:  | 
1892  | 0  |       atom->quant = XML_REGEXP_QUANT_ONCE;  | 
1893  | 0  |       xmlFAGenerateEpsilonTransition(ctxt, from, to);  | 
1894  | 0  |       break;  | 
1895  | 0  |   case XML_REGEXP_QUANT_MULT:  | 
1896  | 0  |       atom->quant = XML_REGEXP_QUANT_ONCE;  | 
1897  | 0  |       xmlFAGenerateEpsilonTransition(ctxt, from, to);  | 
1898  | 0  |       xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);  | 
1899  | 0  |       break;  | 
1900  | 0  |   case XML_REGEXP_QUANT_PLUS:  | 
1901  | 0  |       atom->quant = XML_REGEXP_QUANT_ONCE;  | 
1902  | 0  |       xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);  | 
1903  | 0  |       break;  | 
1904  | 0  |   case XML_REGEXP_QUANT_RANGE:  | 
1905  | 0  |       if (nullable)  | 
1906  | 0  |     xmlFAGenerateEpsilonTransition(ctxt, from, to);  | 
1907  | 0  |       break;  | 
1908  | 0  |   default:  | 
1909  | 0  |       break;  | 
1910  | 0  |     }  | 
1911  | 0  |     if (xmlRegAtomPush(ctxt, atom) < 0)  | 
1912  | 0  |   return(-1);  | 
1913  | 0  |     return(0);  | 
1914  | 0  | }  | 
1915  |  |  | 
1916  |  | /**  | 
1917  |  |  * @param ctxt  a regexp parser context  | 
1918  |  |  * @param fromnr  the from state  | 
1919  |  |  * @param tonr  the to state  | 
1920  |  |  * @param counter  should that transition be associated to a counted  | 
1921  |  |  */  | 
1922  |  | static void  | 
1923  |  | xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr,  | 
1924  | 0  |                         int tonr, int counter) { | 
1925  | 0  |     int transnr;  | 
1926  | 0  |     xmlRegStatePtr from;  | 
1927  | 0  |     xmlRegStatePtr to;  | 
1928  |  | 
  | 
1929  | 0  |     from = ctxt->states[fromnr];  | 
1930  | 0  |     if (from == NULL)  | 
1931  | 0  |   return;  | 
1932  | 0  |     to = ctxt->states[tonr];  | 
1933  | 0  |     if (to == NULL)  | 
1934  | 0  |   return;  | 
1935  | 0  |     if ((to->mark == XML_REGEXP_MARK_START) ||  | 
1936  | 0  |   (to->mark == XML_REGEXP_MARK_VISITED))  | 
1937  | 0  |   return;  | 
1938  |  |  | 
1939  | 0  |     to->mark = XML_REGEXP_MARK_VISITED;  | 
1940  | 0  |     if (to->type == XML_REGEXP_FINAL_STATE) { | 
1941  | 0  |   from->type = XML_REGEXP_FINAL_STATE;  | 
1942  | 0  |     }  | 
1943  | 0  |     for (transnr = 0;transnr < to->nbTrans;transnr++) { | 
1944  | 0  |         xmlRegTransPtr t1 = &to->trans[transnr];  | 
1945  | 0  |         int tcounter;  | 
1946  |  | 
  | 
1947  | 0  |         if (t1->to < 0)  | 
1948  | 0  |       continue;  | 
1949  | 0  |         if (t1->counter >= 0) { | 
1950  |  |             /* assert(counter < 0); */  | 
1951  | 0  |             tcounter = t1->counter;  | 
1952  | 0  |         } else { | 
1953  | 0  |             tcounter = counter;  | 
1954  | 0  |         }  | 
1955  | 0  |   if (t1->atom == NULL) { | 
1956  |  |       /*  | 
1957  |  |        * Don't remove counted transitions  | 
1958  |  |        * Don't loop either  | 
1959  |  |        */  | 
1960  | 0  |       if (t1->to != fromnr) { | 
1961  | 0  |     if (t1->count >= 0) { | 
1962  | 0  |         xmlRegStateAddTrans(ctxt, from, NULL, ctxt->states[t1->to],  | 
1963  | 0  |           -1, t1->count);  | 
1964  | 0  |     } else { | 
1965  | 0  |                     xmlFAReduceEpsilonTransitions(ctxt, fromnr, t1->to,  | 
1966  | 0  |                                                   tcounter);  | 
1967  | 0  |     }  | 
1968  | 0  |       }  | 
1969  | 0  |   } else { | 
1970  | 0  |             xmlRegStateAddTrans(ctxt, from, t1->atom,  | 
1971  | 0  |                                 ctxt->states[t1->to], tcounter, -1);  | 
1972  | 0  |   }  | 
1973  | 0  |     }  | 
1974  | 0  | }  | 
1975  |  |  | 
1976  |  | /**  | 
1977  |  |  * @param ctxt  a regexp parser context  | 
1978  |  |  * @param tonr  the to state  | 
1979  |  |  */  | 
1980  |  | static void  | 
1981  | 0  | xmlFAFinishReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int tonr) { | 
1982  | 0  |     int transnr;  | 
1983  | 0  |     xmlRegStatePtr to;  | 
1984  |  | 
  | 
1985  | 0  |     to = ctxt->states[tonr];  | 
1986  | 0  |     if (to == NULL)  | 
1987  | 0  |   return;  | 
1988  | 0  |     if ((to->mark == XML_REGEXP_MARK_START) ||  | 
1989  | 0  |   (to->mark == XML_REGEXP_MARK_NORMAL))  | 
1990  | 0  |   return;  | 
1991  |  |  | 
1992  | 0  |     to->mark = XML_REGEXP_MARK_NORMAL;  | 
1993  | 0  |     for (transnr = 0;transnr < to->nbTrans;transnr++) { | 
1994  | 0  |   xmlRegTransPtr t1 = &to->trans[transnr];  | 
1995  | 0  |   if ((t1->to >= 0) && (t1->atom == NULL))  | 
1996  | 0  |             xmlFAFinishReduceEpsilonTransitions(ctxt, t1->to);  | 
1997  | 0  |     }  | 
1998  | 0  | }  | 
1999  |  |  | 
2000  |  | /**  | 
2001  |  |  * Eliminating general epsilon transitions can get costly in the general  | 
2002  |  |  * algorithm due to the large amount of generated new transitions and  | 
2003  |  |  * associated comparisons. However for simple epsilon transition used just  | 
2004  |  |  * to separate building blocks when generating the automata this can be  | 
2005  |  |  * reduced to state elimination:  | 
2006  |  |  *    - if there exists an epsilon from X to Y  | 
2007  |  |  *    - if there is no other transition from X  | 
2008  |  |  * then X and Y are semantically equivalent and X can be eliminated  | 
2009  |  |  * If X is the start state then make Y the start state, else replace the  | 
2010  |  |  * target of all transitions to X by transitions to Y.  | 
2011  |  |  *  | 
2012  |  |  * If X is a final state, skip it.  | 
2013  |  |  * Otherwise it would be necessary to manipulate counters for this case when  | 
2014  |  |  * eliminating state 2:  | 
2015  |  |  * State 1 has a transition with an atom to state 2.  | 
2016  |  |  * State 2 is final and has an epsilon transition to state 1.  | 
2017  |  |  *  | 
2018  |  |  * @param ctxt  a regexp parser context  | 
2019  |  |  */  | 
2020  |  | static void  | 
2021  | 0  | xmlFAEliminateSimpleEpsilonTransitions(xmlRegParserCtxtPtr ctxt) { | 
2022  | 0  |     int statenr, i, j, newto;  | 
2023  | 0  |     xmlRegStatePtr state, tmp;  | 
2024  |  | 
  | 
2025  | 0  |     for (statenr = 0;statenr < ctxt->nbStates;statenr++) { | 
2026  | 0  |   state = ctxt->states[statenr];  | 
2027  | 0  |   if (state == NULL)  | 
2028  | 0  |       continue;  | 
2029  | 0  |   if (state->nbTrans != 1)  | 
2030  | 0  |       continue;  | 
2031  | 0  |        if (state->type == XML_REGEXP_UNREACH_STATE ||  | 
2032  | 0  |            state->type == XML_REGEXP_FINAL_STATE)  | 
2033  | 0  |       continue;  | 
2034  |  |   /* is the only transition out a basic transition */  | 
2035  | 0  |   if ((state->trans[0].atom == NULL) &&  | 
2036  | 0  |       (state->trans[0].to >= 0) &&  | 
2037  | 0  |       (state->trans[0].to != statenr) &&  | 
2038  | 0  |       (state->trans[0].counter < 0) &&  | 
2039  | 0  |       (state->trans[0].count < 0)) { | 
2040  | 0  |       newto = state->trans[0].to;  | 
2041  |  | 
  | 
2042  | 0  |             if (state->type == XML_REGEXP_START_STATE) { | 
2043  | 0  |             } else { | 
2044  | 0  |           for (i = 0;i < state->nbTransTo;i++) { | 
2045  | 0  |         tmp = ctxt->states[state->transTo[i]];  | 
2046  | 0  |         for (j = 0;j < tmp->nbTrans;j++) { | 
2047  | 0  |       if (tmp->trans[j].to == statenr) { | 
2048  | 0  |           tmp->trans[j].to = -1;  | 
2049  | 0  |           xmlRegStateAddTrans(ctxt, tmp, tmp->trans[j].atom,  | 
2050  | 0  |             ctxt->states[newto],  | 
2051  | 0  |                   tmp->trans[j].counter,  | 
2052  | 0  |             tmp->trans[j].count);  | 
2053  | 0  |       }  | 
2054  | 0  |         }  | 
2055  | 0  |     }  | 
2056  | 0  |     if (state->type == XML_REGEXP_FINAL_STATE)  | 
2057  | 0  |         ctxt->states[newto]->type = XML_REGEXP_FINAL_STATE;  | 
2058  |  |     /* eliminate the transition completely */  | 
2059  | 0  |     state->nbTrans = 0;  | 
2060  |  | 
  | 
2061  | 0  |                 state->type = XML_REGEXP_UNREACH_STATE;  | 
2062  |  | 
  | 
2063  | 0  |       }  | 
2064  |  | 
  | 
2065  | 0  |   }  | 
2066  | 0  |     }  | 
2067  | 0  | }  | 
2068  |  | /**  | 
2069  |  |  * @param ctxt  a regexp parser context  | 
2070  |  |  */  | 
2071  |  | static void  | 
2072  | 0  | xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) { | 
2073  | 0  |     int statenr, transnr;  | 
2074  | 0  |     xmlRegStatePtr state;  | 
2075  | 0  |     int has_epsilon;  | 
2076  |  | 
  | 
2077  | 0  |     if (ctxt->states == NULL) return;  | 
2078  |  |  | 
2079  |  |     /*  | 
2080  |  |      * Eliminate simple epsilon transition and the associated unreachable  | 
2081  |  |      * states.  | 
2082  |  |      */  | 
2083  | 0  |     xmlFAEliminateSimpleEpsilonTransitions(ctxt);  | 
2084  | 0  |     for (statenr = 0;statenr < ctxt->nbStates;statenr++) { | 
2085  | 0  |   state = ctxt->states[statenr];  | 
2086  | 0  |   if ((state != NULL) && (state->type == XML_REGEXP_UNREACH_STATE)) { | 
2087  | 0  |       xmlRegFreeState(state);  | 
2088  | 0  |       ctxt->states[statenr] = NULL;  | 
2089  | 0  |   }  | 
2090  | 0  |     }  | 
2091  |  | 
  | 
2092  | 0  |     has_epsilon = 0;  | 
2093  |  |  | 
2094  |  |     /*  | 
2095  |  |      * Build the completed transitions bypassing the epsilons  | 
2096  |  |      * Use a marking algorithm to avoid loops  | 
2097  |  |      * Mark sink states too.  | 
2098  |  |      * Process from the latest states backward to the start when  | 
2099  |  |      * there is long cascading epsilon chains this minimize the  | 
2100  |  |      * recursions and transition compares when adding the new ones  | 
2101  |  |      */  | 
2102  | 0  |     for (statenr = ctxt->nbStates - 1;statenr >= 0;statenr--) { | 
2103  | 0  |   state = ctxt->states[statenr];  | 
2104  | 0  |   if (state == NULL)  | 
2105  | 0  |       continue;  | 
2106  | 0  |   if ((state->nbTrans == 0) &&  | 
2107  | 0  |       (state->type != XML_REGEXP_FINAL_STATE)) { | 
2108  | 0  |       state->type = XML_REGEXP_SINK_STATE;  | 
2109  | 0  |   }  | 
2110  | 0  |   for (transnr = 0;transnr < state->nbTrans;transnr++) { | 
2111  | 0  |       if ((state->trans[transnr].atom == NULL) &&  | 
2112  | 0  |     (state->trans[transnr].to >= 0)) { | 
2113  | 0  |     if (state->trans[transnr].to == statenr) { | 
2114  | 0  |         state->trans[transnr].to = -1;  | 
2115  | 0  |     } else if (state->trans[transnr].count < 0) { | 
2116  | 0  |         int newto = state->trans[transnr].to;  | 
2117  |  | 
  | 
2118  | 0  |         has_epsilon = 1;  | 
2119  | 0  |         state->trans[transnr].to = -2;  | 
2120  | 0  |         state->mark = XML_REGEXP_MARK_START;  | 
2121  | 0  |         xmlFAReduceEpsilonTransitions(ctxt, statenr,  | 
2122  | 0  |               newto, state->trans[transnr].counter);  | 
2123  | 0  |         xmlFAFinishReduceEpsilonTransitions(ctxt, newto);  | 
2124  | 0  |         state->mark = XML_REGEXP_MARK_NORMAL;  | 
2125  | 0  |           }  | 
2126  | 0  |       }  | 
2127  | 0  |   }  | 
2128  | 0  |     }  | 
2129  |  |     /*  | 
2130  |  |      * Eliminate the epsilon transitions  | 
2131  |  |      */  | 
2132  | 0  |     if (has_epsilon) { | 
2133  | 0  |   for (statenr = 0;statenr < ctxt->nbStates;statenr++) { | 
2134  | 0  |       state = ctxt->states[statenr];  | 
2135  | 0  |       if (state == NULL)  | 
2136  | 0  |     continue;  | 
2137  | 0  |       for (transnr = 0;transnr < state->nbTrans;transnr++) { | 
2138  | 0  |     xmlRegTransPtr trans = &(state->trans[transnr]);  | 
2139  | 0  |     if ((trans->atom == NULL) &&  | 
2140  | 0  |         (trans->count < 0) &&  | 
2141  | 0  |         (trans->to >= 0)) { | 
2142  | 0  |         trans->to = -1;  | 
2143  | 0  |     }  | 
2144  | 0  |       }  | 
2145  | 0  |   }  | 
2146  | 0  |     }  | 
2147  |  |  | 
2148  |  |     /*  | 
2149  |  |      * Use this pass to detect unreachable states too  | 
2150  |  |      */  | 
2151  | 0  |     for (statenr = 0;statenr < ctxt->nbStates;statenr++) { | 
2152  | 0  |   state = ctxt->states[statenr];  | 
2153  | 0  |   if (state != NULL)  | 
2154  | 0  |       state->reached = XML_REGEXP_MARK_NORMAL;  | 
2155  | 0  |     }  | 
2156  | 0  |     state = ctxt->states[0];  | 
2157  | 0  |     if (state != NULL)  | 
2158  | 0  |   state->reached = XML_REGEXP_MARK_START;  | 
2159  | 0  |     while (state != NULL) { | 
2160  | 0  |   xmlRegStatePtr target = NULL;  | 
2161  | 0  |   state->reached = XML_REGEXP_MARK_VISITED;  | 
2162  |  |   /*  | 
2163  |  |    * Mark all states reachable from the current reachable state  | 
2164  |  |    */  | 
2165  | 0  |   for (transnr = 0;transnr < state->nbTrans;transnr++) { | 
2166  | 0  |       if ((state->trans[transnr].to >= 0) &&  | 
2167  | 0  |     ((state->trans[transnr].atom != NULL) ||  | 
2168  | 0  |      (state->trans[transnr].count >= 0))) { | 
2169  | 0  |     int newto = state->trans[transnr].to;  | 
2170  |  | 
  | 
2171  | 0  |     if (ctxt->states[newto] == NULL)  | 
2172  | 0  |         continue;  | 
2173  | 0  |     if (ctxt->states[newto]->reached == XML_REGEXP_MARK_NORMAL) { | 
2174  | 0  |         ctxt->states[newto]->reached = XML_REGEXP_MARK_START;  | 
2175  | 0  |         target = ctxt->states[newto];  | 
2176  | 0  |     }  | 
2177  | 0  |       }  | 
2178  | 0  |   }  | 
2179  |  |  | 
2180  |  |   /*  | 
2181  |  |    * find the next accessible state not explored  | 
2182  |  |    */  | 
2183  | 0  |   if (target == NULL) { | 
2184  | 0  |       for (statenr = 1;statenr < ctxt->nbStates;statenr++) { | 
2185  | 0  |     state = ctxt->states[statenr];  | 
2186  | 0  |     if ((state != NULL) && (state->reached ==  | 
2187  | 0  |       XML_REGEXP_MARK_START)) { | 
2188  | 0  |         target = state;  | 
2189  | 0  |         break;  | 
2190  | 0  |     }  | 
2191  | 0  |       }  | 
2192  | 0  |   }  | 
2193  | 0  |   state = target;  | 
2194  | 0  |     }  | 
2195  | 0  |     for (statenr = 0;statenr < ctxt->nbStates;statenr++) { | 
2196  | 0  |   state = ctxt->states[statenr];  | 
2197  | 0  |   if ((state != NULL) && (state->reached == XML_REGEXP_MARK_NORMAL)) { | 
2198  | 0  |       xmlRegFreeState(state);  | 
2199  | 0  |       ctxt->states[statenr] = NULL;  | 
2200  | 0  |   }  | 
2201  | 0  |     }  | 
2202  |  | 
  | 
2203  | 0  | }  | 
2204  |  |  | 
2205  |  | static int  | 
2206  | 0  | xmlFACompareRanges(xmlRegRangePtr range1, xmlRegRangePtr range2) { | 
2207  | 0  |     int ret = 0;  | 
2208  |  | 
  | 
2209  | 0  |     if ((range1->type == XML_REGEXP_RANGES) ||  | 
2210  | 0  |         (range2->type == XML_REGEXP_RANGES) ||  | 
2211  | 0  |         (range2->type == XML_REGEXP_SUBREG) ||  | 
2212  | 0  |         (range1->type == XML_REGEXP_SUBREG) ||  | 
2213  | 0  |         (range1->type == XML_REGEXP_STRING) ||  | 
2214  | 0  |         (range2->type == XML_REGEXP_STRING))  | 
2215  | 0  |   return(-1);  | 
2216  |  |  | 
2217  |  |     /* put them in order */  | 
2218  | 0  |     if (range1->type > range2->type) { | 
2219  | 0  |         xmlRegRangePtr tmp;  | 
2220  |  | 
  | 
2221  | 0  |   tmp = range1;  | 
2222  | 0  |   range1 = range2;  | 
2223  | 0  |   range2 = tmp;  | 
2224  | 0  |     }  | 
2225  | 0  |     if ((range1->type == XML_REGEXP_ANYCHAR) ||  | 
2226  | 0  |         (range2->type == XML_REGEXP_ANYCHAR)) { | 
2227  | 0  |   ret = 1;  | 
2228  | 0  |     } else if ((range1->type == XML_REGEXP_EPSILON) ||  | 
2229  | 0  |                (range2->type == XML_REGEXP_EPSILON)) { | 
2230  | 0  |   return(0);  | 
2231  | 0  |     } else if (range1->type == range2->type) { | 
2232  | 0  |         if (range1->type != XML_REGEXP_CHARVAL)  | 
2233  | 0  |             ret = 1;  | 
2234  | 0  |         else if ((range1->end < range2->start) ||  | 
2235  | 0  |            (range2->end < range1->start))  | 
2236  | 0  |       ret = 0;  | 
2237  | 0  |   else  | 
2238  | 0  |       ret = 1;  | 
2239  | 0  |     } else if (range1->type == XML_REGEXP_CHARVAL) { | 
2240  | 0  |         int codepoint;  | 
2241  | 0  |   int neg = 0;  | 
2242  |  |  | 
2243  |  |   /*  | 
2244  |  |    * just check all codepoints in the range for acceptance,  | 
2245  |  |    * this is usually way cheaper since done only once at  | 
2246  |  |    * compilation than testing over and over at runtime or  | 
2247  |  |    * pushing too many states when evaluating.  | 
2248  |  |    */  | 
2249  | 0  |   if (((range1->neg == 0) && (range2->neg != 0)) ||  | 
2250  | 0  |       ((range1->neg != 0) && (range2->neg == 0)))  | 
2251  | 0  |       neg = 1;  | 
2252  |  | 
  | 
2253  | 0  |   for (codepoint = range1->start;codepoint <= range1->end ;codepoint++) { | 
2254  | 0  |       ret = xmlRegCheckCharacterRange(range2->type, codepoint,  | 
2255  | 0  |               0, range2->start, range2->end,  | 
2256  | 0  |               range2->blockName);  | 
2257  | 0  |       if (ret < 0)  | 
2258  | 0  |           return(-1);  | 
2259  | 0  |       if (((neg == 1) && (ret == 0)) ||  | 
2260  | 0  |           ((neg == 0) && (ret == 1)))  | 
2261  | 0  |     return(1);  | 
2262  | 0  |   }  | 
2263  | 0  |   return(0);  | 
2264  | 0  |     } else if ((range1->type == XML_REGEXP_BLOCK_NAME) ||  | 
2265  | 0  |                (range2->type == XML_REGEXP_BLOCK_NAME)) { | 
2266  | 0  |   if (range1->type == range2->type) { | 
2267  | 0  |       ret = xmlStrEqual(range1->blockName, range2->blockName);  | 
2268  | 0  |   } else { | 
2269  |  |       /*  | 
2270  |  |        * comparing a block range with anything else is way  | 
2271  |  |        * too costly, and maintaining the table is like too much  | 
2272  |  |        * memory too, so let's force the automata to save state  | 
2273  |  |        * here.  | 
2274  |  |        */  | 
2275  | 0  |       return(1);  | 
2276  | 0  |   }  | 
2277  | 0  |     } else if ((range1->type < XML_REGEXP_LETTER) ||  | 
2278  | 0  |                (range2->type < XML_REGEXP_LETTER)) { | 
2279  | 0  |   if ((range1->type == XML_REGEXP_ANYSPACE) &&  | 
2280  | 0  |       (range2->type == XML_REGEXP_NOTSPACE))  | 
2281  | 0  |       ret = 0;  | 
2282  | 0  |   else if ((range1->type == XML_REGEXP_INITNAME) &&  | 
2283  | 0  |            (range2->type == XML_REGEXP_NOTINITNAME))  | 
2284  | 0  |       ret = 0;  | 
2285  | 0  |   else if ((range1->type == XML_REGEXP_NAMECHAR) &&  | 
2286  | 0  |            (range2->type == XML_REGEXP_NOTNAMECHAR))  | 
2287  | 0  |       ret = 0;  | 
2288  | 0  |   else if ((range1->type == XML_REGEXP_DECIMAL) &&  | 
2289  | 0  |            (range2->type == XML_REGEXP_NOTDECIMAL))  | 
2290  | 0  |       ret = 0;  | 
2291  | 0  |   else if ((range1->type == XML_REGEXP_REALCHAR) &&  | 
2292  | 0  |            (range2->type == XML_REGEXP_NOTREALCHAR))  | 
2293  | 0  |       ret = 0;  | 
2294  | 0  |   else { | 
2295  |  |       /* same thing to limit complexity */  | 
2296  | 0  |       return(1);  | 
2297  | 0  |   }  | 
2298  | 0  |     } else { | 
2299  | 0  |         ret = 0;  | 
2300  |  |         /* range1->type < range2->type here */  | 
2301  | 0  |         switch (range1->type) { | 
2302  | 0  |       case XML_REGEXP_LETTER:  | 
2303  |  |            /* all disjoint except in the subgroups */  | 
2304  | 0  |            if ((range2->type == XML_REGEXP_LETTER_UPPERCASE) ||  | 
2305  | 0  |          (range2->type == XML_REGEXP_LETTER_LOWERCASE) ||  | 
2306  | 0  |          (range2->type == XML_REGEXP_LETTER_TITLECASE) ||  | 
2307  | 0  |          (range2->type == XML_REGEXP_LETTER_MODIFIER) ||  | 
2308  | 0  |          (range2->type == XML_REGEXP_LETTER_OTHERS))  | 
2309  | 0  |          ret = 1;  | 
2310  | 0  |      break;  | 
2311  | 0  |       case XML_REGEXP_MARK:  | 
2312  | 0  |            if ((range2->type == XML_REGEXP_MARK_NONSPACING) ||  | 
2313  | 0  |          (range2->type == XML_REGEXP_MARK_SPACECOMBINING) ||  | 
2314  | 0  |          (range2->type == XML_REGEXP_MARK_ENCLOSING))  | 
2315  | 0  |          ret = 1;  | 
2316  | 0  |      break;  | 
2317  | 0  |       case XML_REGEXP_NUMBER:  | 
2318  | 0  |            if ((range2->type == XML_REGEXP_NUMBER_DECIMAL) ||  | 
2319  | 0  |          (range2->type == XML_REGEXP_NUMBER_LETTER) ||  | 
2320  | 0  |          (range2->type == XML_REGEXP_NUMBER_OTHERS))  | 
2321  | 0  |          ret = 1;  | 
2322  | 0  |      break;  | 
2323  | 0  |       case XML_REGEXP_PUNCT:  | 
2324  | 0  |            if ((range2->type == XML_REGEXP_PUNCT_CONNECTOR) ||  | 
2325  | 0  |          (range2->type == XML_REGEXP_PUNCT_DASH) ||  | 
2326  | 0  |          (range2->type == XML_REGEXP_PUNCT_OPEN) ||  | 
2327  | 0  |          (range2->type == XML_REGEXP_PUNCT_CLOSE) ||  | 
2328  | 0  |          (range2->type == XML_REGEXP_PUNCT_INITQUOTE) ||  | 
2329  | 0  |          (range2->type == XML_REGEXP_PUNCT_FINQUOTE) ||  | 
2330  | 0  |          (range2->type == XML_REGEXP_PUNCT_OTHERS))  | 
2331  | 0  |          ret = 1;  | 
2332  | 0  |      break;  | 
2333  | 0  |       case XML_REGEXP_SEPAR:  | 
2334  | 0  |            if ((range2->type == XML_REGEXP_SEPAR_SPACE) ||  | 
2335  | 0  |          (range2->type == XML_REGEXP_SEPAR_LINE) ||  | 
2336  | 0  |          (range2->type == XML_REGEXP_SEPAR_PARA))  | 
2337  | 0  |          ret = 1;  | 
2338  | 0  |      break;  | 
2339  | 0  |       case XML_REGEXP_SYMBOL:  | 
2340  | 0  |            if ((range2->type == XML_REGEXP_SYMBOL_MATH) ||  | 
2341  | 0  |          (range2->type == XML_REGEXP_SYMBOL_CURRENCY) ||  | 
2342  | 0  |          (range2->type == XML_REGEXP_SYMBOL_MODIFIER) ||  | 
2343  | 0  |          (range2->type == XML_REGEXP_SYMBOL_OTHERS))  | 
2344  | 0  |          ret = 1;  | 
2345  | 0  |      break;  | 
2346  | 0  |       case XML_REGEXP_OTHER:  | 
2347  | 0  |            if ((range2->type == XML_REGEXP_OTHER_CONTROL) ||  | 
2348  | 0  |          (range2->type == XML_REGEXP_OTHER_FORMAT) ||  | 
2349  | 0  |          (range2->type == XML_REGEXP_OTHER_PRIVATE))  | 
2350  | 0  |          ret = 1;  | 
2351  | 0  |      break;  | 
2352  | 0  |             default:  | 
2353  | 0  |            if ((range2->type >= XML_REGEXP_LETTER) &&  | 
2354  | 0  |          (range2->type < XML_REGEXP_BLOCK_NAME))  | 
2355  | 0  |          ret = 0;  | 
2356  | 0  |      else { | 
2357  |  |          /* safety net ! */  | 
2358  | 0  |          return(1);  | 
2359  | 0  |      }  | 
2360  | 0  |   }  | 
2361  | 0  |     }  | 
2362  | 0  |     if (((range1->neg == 0) && (range2->neg != 0)) ||  | 
2363  | 0  |         ((range1->neg != 0) && (range2->neg == 0)))  | 
2364  | 0  |   ret = !ret;  | 
2365  | 0  |     return(ret);  | 
2366  | 0  | }  | 
2367  |  |  | 
2368  |  | /**  | 
2369  |  |  * Compares two atoms type to check whether they intersect in some ways,  | 
2370  |  |  * this is used by xmlFACompareAtoms only  | 
2371  |  |  *  | 
2372  |  |  * @param type1  an atom type  | 
2373  |  |  * @param type2  an atom type  | 
2374  |  |  * @returns 1 if they may intersect and 0 otherwise  | 
2375  |  |  */  | 
2376  |  | static int  | 
2377  | 0  | xmlFACompareAtomTypes(xmlRegAtomType type1, xmlRegAtomType type2) { | 
2378  | 0  |     if ((type1 == XML_REGEXP_EPSILON) ||  | 
2379  | 0  |         (type1 == XML_REGEXP_CHARVAL) ||  | 
2380  | 0  |   (type1 == XML_REGEXP_RANGES) ||  | 
2381  | 0  |   (type1 == XML_REGEXP_SUBREG) ||  | 
2382  | 0  |   (type1 == XML_REGEXP_STRING) ||  | 
2383  | 0  |   (type1 == XML_REGEXP_ANYCHAR))  | 
2384  | 0  |   return(1);  | 
2385  | 0  |     if ((type2 == XML_REGEXP_EPSILON) ||  | 
2386  | 0  |         (type2 == XML_REGEXP_CHARVAL) ||  | 
2387  | 0  |   (type2 == XML_REGEXP_RANGES) ||  | 
2388  | 0  |   (type2 == XML_REGEXP_SUBREG) ||  | 
2389  | 0  |   (type2 == XML_REGEXP_STRING) ||  | 
2390  | 0  |   (type2 == XML_REGEXP_ANYCHAR))  | 
2391  | 0  |   return(1);  | 
2392  |  |  | 
2393  | 0  |     if (type1 == type2) return(1);  | 
2394  |  |  | 
2395  |  |     /* simplify subsequent compares by making sure type1 < type2 */  | 
2396  | 0  |     if (type1 > type2) { | 
2397  | 0  |         xmlRegAtomType tmp = type1;  | 
2398  | 0  |   type1 = type2;  | 
2399  | 0  |   type2 = tmp;  | 
2400  | 0  |     }  | 
2401  | 0  |     switch (type1) { | 
2402  | 0  |         case XML_REGEXP_ANYSPACE: /* \s */  | 
2403  |  |       /* can't be a letter, number, mark, punctuation, symbol */  | 
2404  | 0  |       if ((type2 == XML_REGEXP_NOTSPACE) ||  | 
2405  | 0  |     ((type2 >= XML_REGEXP_LETTER) &&  | 
2406  | 0  |      (type2 <= XML_REGEXP_LETTER_OTHERS)) ||  | 
2407  | 0  |           ((type2 >= XML_REGEXP_NUMBER) &&  | 
2408  | 0  |      (type2 <= XML_REGEXP_NUMBER_OTHERS)) ||  | 
2409  | 0  |           ((type2 >= XML_REGEXP_MARK) &&  | 
2410  | 0  |      (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||  | 
2411  | 0  |           ((type2 >= XML_REGEXP_PUNCT) &&  | 
2412  | 0  |      (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||  | 
2413  | 0  |           ((type2 >= XML_REGEXP_SYMBOL) &&  | 
2414  | 0  |      (type2 <= XML_REGEXP_SYMBOL_OTHERS))  | 
2415  | 0  |           ) return(0);  | 
2416  | 0  |       break;  | 
2417  | 0  |         case XML_REGEXP_NOTSPACE: /* \S */  | 
2418  | 0  |       break;  | 
2419  | 0  |         case XML_REGEXP_INITNAME: /* \l */  | 
2420  |  |       /* can't be a number, mark, separator, punctuation, symbol or other */  | 
2421  | 0  |       if ((type2 == XML_REGEXP_NOTINITNAME) ||  | 
2422  | 0  |           ((type2 >= XML_REGEXP_NUMBER) &&  | 
2423  | 0  |      (type2 <= XML_REGEXP_NUMBER_OTHERS)) ||  | 
2424  | 0  |           ((type2 >= XML_REGEXP_MARK) &&  | 
2425  | 0  |      (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||  | 
2426  | 0  |           ((type2 >= XML_REGEXP_SEPAR) &&  | 
2427  | 0  |      (type2 <= XML_REGEXP_SEPAR_PARA)) ||  | 
2428  | 0  |           ((type2 >= XML_REGEXP_PUNCT) &&  | 
2429  | 0  |      (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||  | 
2430  | 0  |           ((type2 >= XML_REGEXP_SYMBOL) &&  | 
2431  | 0  |      (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||  | 
2432  | 0  |           ((type2 >= XML_REGEXP_OTHER) &&  | 
2433  | 0  |      (type2 <= XML_REGEXP_OTHER_NA))  | 
2434  | 0  |     ) return(0);  | 
2435  | 0  |       break;  | 
2436  | 0  |         case XML_REGEXP_NOTINITNAME: /* \L */  | 
2437  | 0  |       break;  | 
2438  | 0  |         case XML_REGEXP_NAMECHAR: /* \c */  | 
2439  |  |       /* can't be a mark, separator, punctuation, symbol or other */  | 
2440  | 0  |       if ((type2 == XML_REGEXP_NOTNAMECHAR) ||  | 
2441  | 0  |           ((type2 >= XML_REGEXP_MARK) &&  | 
2442  | 0  |      (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||  | 
2443  | 0  |           ((type2 >= XML_REGEXP_PUNCT) &&  | 
2444  | 0  |      (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||  | 
2445  | 0  |           ((type2 >= XML_REGEXP_SEPAR) &&  | 
2446  | 0  |      (type2 <= XML_REGEXP_SEPAR_PARA)) ||  | 
2447  | 0  |           ((type2 >= XML_REGEXP_SYMBOL) &&  | 
2448  | 0  |      (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||  | 
2449  | 0  |           ((type2 >= XML_REGEXP_OTHER) &&  | 
2450  | 0  |      (type2 <= XML_REGEXP_OTHER_NA))  | 
2451  | 0  |     ) return(0);  | 
2452  | 0  |       break;  | 
2453  | 0  |         case XML_REGEXP_NOTNAMECHAR: /* \C */  | 
2454  | 0  |       break;  | 
2455  | 0  |         case XML_REGEXP_DECIMAL: /* \d */  | 
2456  |  |       /* can't be a letter, mark, separator, punctuation, symbol or other */  | 
2457  | 0  |       if ((type2 == XML_REGEXP_NOTDECIMAL) ||  | 
2458  | 0  |           (type2 == XML_REGEXP_REALCHAR) ||  | 
2459  | 0  |     ((type2 >= XML_REGEXP_LETTER) &&  | 
2460  | 0  |      (type2 <= XML_REGEXP_LETTER_OTHERS)) ||  | 
2461  | 0  |           ((type2 >= XML_REGEXP_MARK) &&  | 
2462  | 0  |      (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||  | 
2463  | 0  |           ((type2 >= XML_REGEXP_PUNCT) &&  | 
2464  | 0  |      (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||  | 
2465  | 0  |           ((type2 >= XML_REGEXP_SEPAR) &&  | 
2466  | 0  |      (type2 <= XML_REGEXP_SEPAR_PARA)) ||  | 
2467  | 0  |           ((type2 >= XML_REGEXP_SYMBOL) &&  | 
2468  | 0  |      (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||  | 
2469  | 0  |           ((type2 >= XML_REGEXP_OTHER) &&  | 
2470  | 0  |      (type2 <= XML_REGEXP_OTHER_NA))  | 
2471  | 0  |     )return(0);  | 
2472  | 0  |       break;  | 
2473  | 0  |         case XML_REGEXP_NOTDECIMAL: /* \D */  | 
2474  | 0  |       break;  | 
2475  | 0  |         case XML_REGEXP_REALCHAR: /* \w */  | 
2476  |  |       /* can't be a mark, separator, punctuation, symbol or other */  | 
2477  | 0  |       if ((type2 == XML_REGEXP_NOTDECIMAL) ||  | 
2478  | 0  |           ((type2 >= XML_REGEXP_MARK) &&  | 
2479  | 0  |      (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||  | 
2480  | 0  |           ((type2 >= XML_REGEXP_PUNCT) &&  | 
2481  | 0  |      (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||  | 
2482  | 0  |           ((type2 >= XML_REGEXP_SEPAR) &&  | 
2483  | 0  |      (type2 <= XML_REGEXP_SEPAR_PARA)) ||  | 
2484  | 0  |           ((type2 >= XML_REGEXP_SYMBOL) &&  | 
2485  | 0  |      (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||  | 
2486  | 0  |           ((type2 >= XML_REGEXP_OTHER) &&  | 
2487  | 0  |      (type2 <= XML_REGEXP_OTHER_NA))  | 
2488  | 0  |     )return(0);  | 
2489  | 0  |       break;  | 
2490  | 0  |         case XML_REGEXP_NOTREALCHAR: /* \W */  | 
2491  | 0  |       break;  | 
2492  |  |   /*  | 
2493  |  |    * at that point we know both type 1 and type2 are from  | 
2494  |  |    * character categories are ordered and are different,  | 
2495  |  |    * it becomes simple because this is a partition  | 
2496  |  |    */  | 
2497  | 0  |         case XML_REGEXP_LETTER:  | 
2498  | 0  |       if (type2 <= XML_REGEXP_LETTER_OTHERS)  | 
2499  | 0  |           return(1);  | 
2500  | 0  |       return(0);  | 
2501  | 0  |         case XML_REGEXP_LETTER_UPPERCASE:  | 
2502  | 0  |         case XML_REGEXP_LETTER_LOWERCASE:  | 
2503  | 0  |         case XML_REGEXP_LETTER_TITLECASE:  | 
2504  | 0  |         case XML_REGEXP_LETTER_MODIFIER:  | 
2505  | 0  |         case XML_REGEXP_LETTER_OTHERS:  | 
2506  | 0  |       return(0);  | 
2507  | 0  |         case XML_REGEXP_MARK:  | 
2508  | 0  |       if (type2 <= XML_REGEXP_MARK_ENCLOSING)  | 
2509  | 0  |           return(1);  | 
2510  | 0  |       return(0);  | 
2511  | 0  |         case XML_REGEXP_MARK_NONSPACING:  | 
2512  | 0  |         case XML_REGEXP_MARK_SPACECOMBINING:  | 
2513  | 0  |         case XML_REGEXP_MARK_ENCLOSING:  | 
2514  | 0  |       return(0);  | 
2515  | 0  |         case XML_REGEXP_NUMBER:  | 
2516  | 0  |       if (type2 <= XML_REGEXP_NUMBER_OTHERS)  | 
2517  | 0  |           return(1);  | 
2518  | 0  |       return(0);  | 
2519  | 0  |         case XML_REGEXP_NUMBER_DECIMAL:  | 
2520  | 0  |         case XML_REGEXP_NUMBER_LETTER:  | 
2521  | 0  |         case XML_REGEXP_NUMBER_OTHERS:  | 
2522  | 0  |       return(0);  | 
2523  | 0  |         case XML_REGEXP_PUNCT:  | 
2524  | 0  |       if (type2 <= XML_REGEXP_PUNCT_OTHERS)  | 
2525  | 0  |           return(1);  | 
2526  | 0  |       return(0);  | 
2527  | 0  |         case XML_REGEXP_PUNCT_CONNECTOR:  | 
2528  | 0  |         case XML_REGEXP_PUNCT_DASH:  | 
2529  | 0  |         case XML_REGEXP_PUNCT_OPEN:  | 
2530  | 0  |         case XML_REGEXP_PUNCT_CLOSE:  | 
2531  | 0  |         case XML_REGEXP_PUNCT_INITQUOTE:  | 
2532  | 0  |         case XML_REGEXP_PUNCT_FINQUOTE:  | 
2533  | 0  |         case XML_REGEXP_PUNCT_OTHERS:  | 
2534  | 0  |       return(0);  | 
2535  | 0  |         case XML_REGEXP_SEPAR:  | 
2536  | 0  |       if (type2 <= XML_REGEXP_SEPAR_PARA)  | 
2537  | 0  |           return(1);  | 
2538  | 0  |       return(0);  | 
2539  | 0  |         case XML_REGEXP_SEPAR_SPACE:  | 
2540  | 0  |         case XML_REGEXP_SEPAR_LINE:  | 
2541  | 0  |         case XML_REGEXP_SEPAR_PARA:  | 
2542  | 0  |       return(0);  | 
2543  | 0  |         case XML_REGEXP_SYMBOL:  | 
2544  | 0  |       if (type2 <= XML_REGEXP_SYMBOL_OTHERS)  | 
2545  | 0  |           return(1);  | 
2546  | 0  |       return(0);  | 
2547  | 0  |         case XML_REGEXP_SYMBOL_MATH:  | 
2548  | 0  |         case XML_REGEXP_SYMBOL_CURRENCY:  | 
2549  | 0  |         case XML_REGEXP_SYMBOL_MODIFIER:  | 
2550  | 0  |         case XML_REGEXP_SYMBOL_OTHERS:  | 
2551  | 0  |       return(0);  | 
2552  | 0  |         case XML_REGEXP_OTHER:  | 
2553  | 0  |       if (type2 <= XML_REGEXP_OTHER_NA)  | 
2554  | 0  |           return(1);  | 
2555  | 0  |       return(0);  | 
2556  | 0  |         case XML_REGEXP_OTHER_CONTROL:  | 
2557  | 0  |         case XML_REGEXP_OTHER_FORMAT:  | 
2558  | 0  |         case XML_REGEXP_OTHER_PRIVATE:  | 
2559  | 0  |         case XML_REGEXP_OTHER_NA:  | 
2560  | 0  |       return(0);  | 
2561  | 0  |   default:  | 
2562  | 0  |       break;  | 
2563  | 0  |     }  | 
2564  | 0  |     return(1);  | 
2565  | 0  | }  | 
2566  |  |  | 
2567  |  | /**  | 
2568  |  |  * Compares two atoms to check whether they are the same exactly  | 
2569  |  |  * this is used to remove equivalent transitions  | 
2570  |  |  *  | 
2571  |  |  * @param atom1  an atom  | 
2572  |  |  * @param atom2  an atom  | 
2573  |  |  * @param deep  if not set only compare string pointers  | 
2574  |  |  * @returns 1 if same and 0 otherwise  | 
2575  |  |  */  | 
2576  |  | static int  | 
2577  | 0  | xmlFAEqualAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2, int deep) { | 
2578  | 0  |     int ret = 0;  | 
2579  |  | 
  | 
2580  | 0  |     if (atom1 == atom2)  | 
2581  | 0  |   return(1);  | 
2582  | 0  |     if ((atom1 == NULL) || (atom2 == NULL))  | 
2583  | 0  |   return(0);  | 
2584  |  |  | 
2585  | 0  |     if (atom1->type != atom2->type)  | 
2586  | 0  |         return(0);  | 
2587  | 0  |     switch (atom1->type) { | 
2588  | 0  |         case XML_REGEXP_EPSILON:  | 
2589  | 0  |       ret = 0;  | 
2590  | 0  |       break;  | 
2591  | 0  |         case XML_REGEXP_STRING:  | 
2592  | 0  |             if (!deep)  | 
2593  | 0  |                 ret = (atom1->valuep == atom2->valuep);  | 
2594  | 0  |             else  | 
2595  | 0  |                 ret = xmlStrEqual((xmlChar *)atom1->valuep,  | 
2596  | 0  |                                   (xmlChar *)atom2->valuep);  | 
2597  | 0  |       break;  | 
2598  | 0  |         case XML_REGEXP_CHARVAL:  | 
2599  | 0  |       ret = (atom1->codepoint == atom2->codepoint);  | 
2600  | 0  |       break;  | 
2601  | 0  |   case XML_REGEXP_RANGES:  | 
2602  |  |       /* too hard to do in the general case */  | 
2603  | 0  |       ret = 0;  | 
2604  | 0  |   default:  | 
2605  | 0  |       break;  | 
2606  | 0  |     }  | 
2607  | 0  |     return(ret);  | 
2608  | 0  | }  | 
2609  |  |  | 
2610  |  | /**  | 
2611  |  |  * Compares two atoms to check whether they intersect in some ways,  | 
2612  |  |  * this is used by xmlFAComputesDeterminism and xmlFARecurseDeterminism only  | 
2613  |  |  *  | 
2614  |  |  * @param atom1  an atom  | 
2615  |  |  * @param atom2  an atom  | 
2616  |  |  * @param deep  if not set only compare string pointers  | 
2617  |  |  * @returns 1 if yes and 0 otherwise  | 
2618  |  |  */  | 
2619  |  | static int  | 
2620  | 0  | xmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2, int deep) { | 
2621  | 0  |     int ret = 1;  | 
2622  |  | 
  | 
2623  | 0  |     if (atom1 == atom2)  | 
2624  | 0  |   return(1);  | 
2625  | 0  |     if ((atom1 == NULL) || (atom2 == NULL))  | 
2626  | 0  |   return(0);  | 
2627  |  |  | 
2628  | 0  |     if ((atom1->type == XML_REGEXP_ANYCHAR) ||  | 
2629  | 0  |         (atom2->type == XML_REGEXP_ANYCHAR))  | 
2630  | 0  |   return(1);  | 
2631  |  |  | 
2632  | 0  |     if (atom1->type > atom2->type) { | 
2633  | 0  |   xmlRegAtomPtr tmp;  | 
2634  | 0  |   tmp = atom1;  | 
2635  | 0  |   atom1 = atom2;  | 
2636  | 0  |   atom2 = tmp;  | 
2637  | 0  |     }  | 
2638  | 0  |     if (atom1->type != atom2->type) { | 
2639  | 0  |         ret = xmlFACompareAtomTypes(atom1->type, atom2->type);  | 
2640  |  |   /* if they can't intersect at the type level break now */  | 
2641  | 0  |   if (ret == 0)  | 
2642  | 0  |       return(0);  | 
2643  | 0  |     }  | 
2644  | 0  |     switch (atom1->type) { | 
2645  | 0  |         case XML_REGEXP_STRING:  | 
2646  | 0  |             if (!deep)  | 
2647  | 0  |                 ret = (atom1->valuep != atom2->valuep);  | 
2648  | 0  |             else { | 
2649  | 0  |                 xmlChar *val1 = (xmlChar *)atom1->valuep;  | 
2650  | 0  |                 xmlChar *val2 = (xmlChar *)atom2->valuep;  | 
2651  | 0  |                 int compound1 = (xmlStrchr(val1, '|') != NULL);  | 
2652  | 0  |                 int compound2 = (xmlStrchr(val2, '|') != NULL);  | 
2653  |  |  | 
2654  |  |                 /* Ignore negative match flag for ##other namespaces */  | 
2655  | 0  |                 if (compound1 != compound2)  | 
2656  | 0  |                     return(0);  | 
2657  |  |  | 
2658  | 0  |                 ret = xmlRegStrEqualWildcard(val1, val2);  | 
2659  | 0  |             }  | 
2660  | 0  |       break;  | 
2661  | 0  |         case XML_REGEXP_EPSILON:  | 
2662  | 0  |       goto not_determinist;  | 
2663  | 0  |         case XML_REGEXP_CHARVAL:  | 
2664  | 0  |       if (atom2->type == XML_REGEXP_CHARVAL) { | 
2665  | 0  |     ret = (atom1->codepoint == atom2->codepoint);  | 
2666  | 0  |       } else { | 
2667  | 0  |           ret = xmlRegCheckCharacter(atom2, atom1->codepoint);  | 
2668  | 0  |     if (ret < 0)  | 
2669  | 0  |         ret = 1;  | 
2670  | 0  |       }  | 
2671  | 0  |       break;  | 
2672  | 0  |         case XML_REGEXP_RANGES:  | 
2673  | 0  |       if (atom2->type == XML_REGEXP_RANGES) { | 
2674  | 0  |           int i, j, res;  | 
2675  | 0  |     xmlRegRangePtr r1, r2;  | 
2676  |  |  | 
2677  |  |     /*  | 
2678  |  |      * need to check that none of the ranges eventually matches  | 
2679  |  |      */  | 
2680  | 0  |     for (i = 0;i < atom1->nbRanges;i++) { | 
2681  | 0  |         for (j = 0;j < atom2->nbRanges;j++) { | 
2682  | 0  |       r1 = atom1->ranges[i];  | 
2683  | 0  |       r2 = atom2->ranges[j];  | 
2684  | 0  |       res = xmlFACompareRanges(r1, r2);  | 
2685  | 0  |       if (res == 1) { | 
2686  | 0  |           ret = 1;  | 
2687  | 0  |           goto done;  | 
2688  | 0  |       }  | 
2689  | 0  |         }  | 
2690  | 0  |     }  | 
2691  | 0  |     ret = 0;  | 
2692  | 0  |       }  | 
2693  | 0  |       break;  | 
2694  | 0  |   default:  | 
2695  | 0  |       goto not_determinist;  | 
2696  | 0  |     }  | 
2697  | 0  | done:  | 
2698  | 0  |     if (atom1->neg != atom2->neg) { | 
2699  | 0  |         ret = !ret;  | 
2700  | 0  |     }  | 
2701  | 0  |     if (ret == 0)  | 
2702  | 0  |         return(0);  | 
2703  | 0  | not_determinist:  | 
2704  | 0  |     return(1);  | 
2705  | 0  | }  | 
2706  |  |  | 
2707  |  | /**  | 
2708  |  |  * Check whether the associated regexp is determinist,  | 
2709  |  |  * should be called after xmlFAEliminateEpsilonTransitions  | 
2710  |  |  *  | 
2711  |  |  * @param ctxt  a regexp parser context  | 
2712  |  |  * @param state  regexp state  | 
2713  |  |  * @param fromnr  the from state  | 
2714  |  |  * @param tonr  the to state  | 
2715  |  |  * @param atom  the atom  | 
2716  |  |  */  | 
2717  |  | static int  | 
2718  |  | xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,  | 
2719  | 0  |                   int fromnr, int tonr, xmlRegAtomPtr atom) { | 
2720  | 0  |     int ret = 1;  | 
2721  | 0  |     int res;  | 
2722  | 0  |     int transnr, nbTrans;  | 
2723  | 0  |     xmlRegTransPtr t1;  | 
2724  | 0  |     int deep = 1;  | 
2725  |  | 
  | 
2726  | 0  |     if (state == NULL)  | 
2727  | 0  |   return(ret);  | 
2728  | 0  |     if (state->markd == XML_REGEXP_MARK_VISITED)  | 
2729  | 0  |   return(ret);  | 
2730  |  |  | 
2731  | 0  |     if (ctxt->flags & AM_AUTOMATA_RNG)  | 
2732  | 0  |         deep = 0;  | 
2733  |  |  | 
2734  |  |     /*  | 
2735  |  |      * don't recurse on transitions potentially added in the course of  | 
2736  |  |      * the elimination.  | 
2737  |  |      */  | 
2738  | 0  |     nbTrans = state->nbTrans;  | 
2739  | 0  |     for (transnr = 0;transnr < nbTrans;transnr++) { | 
2740  | 0  |   t1 = &(state->trans[transnr]);  | 
2741  |  |   /*  | 
2742  |  |    * check transitions conflicting with the one looked at  | 
2743  |  |    */  | 
2744  | 0  |         if ((t1->to < 0) || (t1->to == fromnr))  | 
2745  | 0  |             continue;  | 
2746  | 0  |   if (t1->atom == NULL) { | 
2747  | 0  |       state->markd = XML_REGEXP_MARK_VISITED;  | 
2748  | 0  |       res = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],  | 
2749  | 0  |                               fromnr, tonr, atom);  | 
2750  | 0  |       if (res == 0) { | 
2751  | 0  |           ret = 0;  | 
2752  |  |     /* t1->nd = 1; */  | 
2753  | 0  |       }  | 
2754  | 0  |       continue;  | 
2755  | 0  |   }  | 
2756  | 0  |   if (xmlFACompareAtoms(t1->atom, atom, deep)) { | 
2757  |  |             /* Treat equal transitions as deterministic. */  | 
2758  | 0  |             if ((t1->to != tonr) ||  | 
2759  | 0  |                 (!xmlFAEqualAtoms(t1->atom, atom, deep)))  | 
2760  | 0  |                 ret = 0;  | 
2761  |  |       /* mark the transition as non-deterministic */  | 
2762  | 0  |       t1->nd = 1;  | 
2763  | 0  |   }  | 
2764  | 0  |     }  | 
2765  | 0  |     return(ret);  | 
2766  | 0  | }  | 
2767  |  |  | 
2768  |  | /**  | 
2769  |  |  * Reset flags after checking determinism.  | 
2770  |  |  *  | 
2771  |  |  * @param ctxt  a regexp parser context  | 
2772  |  |  * @param state  regexp state  | 
2773  |  |  */  | 
2774  |  | static void  | 
2775  | 0  | xmlFAFinishRecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state) { | 
2776  | 0  |     int transnr, nbTrans;  | 
2777  |  | 
  | 
2778  | 0  |     if (state == NULL)  | 
2779  | 0  |   return;  | 
2780  | 0  |     if (state->markd != XML_REGEXP_MARK_VISITED)  | 
2781  | 0  |   return;  | 
2782  | 0  |     state->markd = 0;  | 
2783  |  | 
  | 
2784  | 0  |     nbTrans = state->nbTrans;  | 
2785  | 0  |     for (transnr = 0; transnr < nbTrans; transnr++) { | 
2786  | 0  |   xmlRegTransPtr t1 = &state->trans[transnr];  | 
2787  | 0  |   if ((t1->atom == NULL) && (t1->to >= 0))  | 
2788  | 0  |       xmlFAFinishRecurseDeterminism(ctxt, ctxt->states[t1->to]);  | 
2789  | 0  |     }  | 
2790  | 0  | }  | 
2791  |  |  | 
2792  |  | /**  | 
2793  |  |  * Check whether the associated regexp is determinist,  | 
2794  |  |  * should be called after xmlFAEliminateEpsilonTransitions  | 
2795  |  |  *  | 
2796  |  |  * @param ctxt  a regexp parser context  | 
2797  |  |  */  | 
2798  |  | static int  | 
2799  | 0  | xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) { | 
2800  | 0  |     int statenr, transnr;  | 
2801  | 0  |     xmlRegStatePtr state;  | 
2802  | 0  |     xmlRegTransPtr t1, t2, last;  | 
2803  | 0  |     int i;  | 
2804  | 0  |     int ret = 1;  | 
2805  | 0  |     int deep = 1;  | 
2806  |  | 
  | 
2807  | 0  |     if (ctxt->determinist != -1)  | 
2808  | 0  |   return(ctxt->determinist);  | 
2809  |  |  | 
2810  | 0  |     if (ctxt->flags & AM_AUTOMATA_RNG)  | 
2811  | 0  |         deep = 0;  | 
2812  |  |  | 
2813  |  |     /*  | 
2814  |  |      * First cleanup the automata removing cancelled transitions  | 
2815  |  |      */  | 
2816  | 0  |     for (statenr = 0;statenr < ctxt->nbStates;statenr++) { | 
2817  | 0  |   state = ctxt->states[statenr];  | 
2818  | 0  |   if (state == NULL)  | 
2819  | 0  |       continue;  | 
2820  | 0  |   if (state->nbTrans < 2)  | 
2821  | 0  |       continue;  | 
2822  | 0  |   for (transnr = 0;transnr < state->nbTrans;transnr++) { | 
2823  | 0  |       t1 = &(state->trans[transnr]);  | 
2824  |  |       /*  | 
2825  |  |        * Determinism checks in case of counted or all transitions  | 
2826  |  |        * will have to be handled separately  | 
2827  |  |        */  | 
2828  | 0  |       if (t1->atom == NULL) { | 
2829  |  |     /* t1->nd = 1; */  | 
2830  | 0  |     continue;  | 
2831  | 0  |       }  | 
2832  | 0  |       if (t1->to < 0) /* eliminated */  | 
2833  | 0  |     continue;  | 
2834  | 0  |       for (i = 0;i < transnr;i++) { | 
2835  | 0  |     t2 = &(state->trans[i]);  | 
2836  | 0  |     if (t2->to < 0) /* eliminated */  | 
2837  | 0  |         continue;  | 
2838  | 0  |     if (t2->atom != NULL) { | 
2839  | 0  |         if (t1->to == t2->to) { | 
2840  |  |                         /*  | 
2841  |  |                          * Here we use deep because we want to keep the  | 
2842  |  |                          * transitions which indicate a conflict  | 
2843  |  |                          */  | 
2844  | 0  |       if (xmlFAEqualAtoms(t1->atom, t2->atom, deep) &&  | 
2845  | 0  |                             (t1->counter == t2->counter) &&  | 
2846  | 0  |                             (t1->count == t2->count))  | 
2847  | 0  |           t2->to = -1; /* eliminated */  | 
2848  | 0  |         }  | 
2849  | 0  |     }  | 
2850  | 0  |       }  | 
2851  | 0  |   }  | 
2852  | 0  |     }  | 
2853  |  |  | 
2854  |  |     /*  | 
2855  |  |      * Check for all states that there aren't 2 transitions  | 
2856  |  |      * with the same atom and a different target.  | 
2857  |  |      */  | 
2858  | 0  |     for (statenr = 0;statenr < ctxt->nbStates;statenr++) { | 
2859  | 0  |   state = ctxt->states[statenr];  | 
2860  | 0  |   if (state == NULL)  | 
2861  | 0  |       continue;  | 
2862  | 0  |   if (state->nbTrans < 2)  | 
2863  | 0  |       continue;  | 
2864  | 0  |   last = NULL;  | 
2865  | 0  |   for (transnr = 0;transnr < state->nbTrans;transnr++) { | 
2866  | 0  |       t1 = &(state->trans[transnr]);  | 
2867  |  |       /*  | 
2868  |  |        * Determinism checks in case of counted or all transitions  | 
2869  |  |        * will have to be handled separately  | 
2870  |  |        */  | 
2871  | 0  |       if (t1->atom == NULL) { | 
2872  | 0  |     continue;  | 
2873  | 0  |       }  | 
2874  | 0  |       if (t1->to < 0) /* eliminated */  | 
2875  | 0  |     continue;  | 
2876  | 0  |       for (i = 0;i < transnr;i++) { | 
2877  | 0  |     t2 = &(state->trans[i]);  | 
2878  | 0  |     if (t2->to < 0) /* eliminated */  | 
2879  | 0  |         continue;  | 
2880  | 0  |     if (t2->atom != NULL) { | 
2881  |  |                     /*  | 
2882  |  |                      * But here we don't use deep because we want to  | 
2883  |  |                      * find transitions which indicate a conflict  | 
2884  |  |                      */  | 
2885  | 0  |         if (xmlFACompareAtoms(t1->atom, t2->atom, 1)) { | 
2886  |  |                         /*  | 
2887  |  |                          * Treat equal counter transitions that couldn't be  | 
2888  |  |                          * eliminated as deterministic.  | 
2889  |  |                          */  | 
2890  | 0  |                         if ((t1->to != t2->to) ||  | 
2891  | 0  |                             (t1->counter == t2->counter) ||  | 
2892  | 0  |                             (!xmlFAEqualAtoms(t1->atom, t2->atom, deep)))  | 
2893  | 0  |                             ret = 0;  | 
2894  |  |       /* mark the transitions as non-deterministic ones */  | 
2895  | 0  |       t1->nd = 1;  | 
2896  | 0  |       t2->nd = 1;  | 
2897  | 0  |       last = t1;  | 
2898  | 0  |         }  | 
2899  | 0  |     } else { | 
2900  | 0  |                     int res;  | 
2901  |  |  | 
2902  |  |         /*  | 
2903  |  |          * do the closure in case of remaining specific  | 
2904  |  |          * epsilon transitions like choices or all  | 
2905  |  |          */  | 
2906  | 0  |         res = xmlFARecurseDeterminism(ctxt, ctxt->states[t2->to],  | 
2907  | 0  |               statenr, t1->to, t1->atom);  | 
2908  | 0  |                     xmlFAFinishRecurseDeterminism(ctxt, ctxt->states[t2->to]);  | 
2909  |  |         /* don't shortcut the computation so all non deterministic  | 
2910  |  |            transition get marked down  | 
2911  |  |         if (ret == 0)  | 
2912  |  |       return(0);  | 
2913  |  |          */  | 
2914  | 0  |         if (res == 0) { | 
2915  | 0  |       t1->nd = 1;  | 
2916  |  |       /* t2->nd = 1; */  | 
2917  | 0  |       last = t1;  | 
2918  | 0  |                         ret = 0;  | 
2919  | 0  |         }  | 
2920  | 0  |     }  | 
2921  | 0  |       }  | 
2922  |  |       /* don't shortcut the computation so all non deterministic  | 
2923  |  |          transition get marked down  | 
2924  |  |       if (ret == 0)  | 
2925  |  |     break; */  | 
2926  | 0  |   }  | 
2927  |  |  | 
2928  |  |   /*  | 
2929  |  |    * mark specifically the last non-deterministic transition  | 
2930  |  |    * from a state since there is no need to set-up rollback  | 
2931  |  |    * from it  | 
2932  |  |    */  | 
2933  | 0  |   if (last != NULL) { | 
2934  | 0  |       last->nd = 2;  | 
2935  | 0  |   }  | 
2936  |  |  | 
2937  |  |   /* don't shortcut the computation so all non deterministic  | 
2938  |  |      transition get marked down  | 
2939  |  |   if (ret == 0)  | 
2940  |  |       break; */  | 
2941  | 0  |     }  | 
2942  |  | 
  | 
2943  | 0  |     ctxt->determinist = ret;  | 
2944  | 0  |     return(ret);  | 
2945  | 0  | }  | 
2946  |  |  | 
2947  |  | /************************************************************************  | 
2948  |  |  *                  *  | 
2949  |  |  *  Routines to check input against transition atoms    *  | 
2950  |  |  *                  *  | 
2951  |  |  ************************************************************************/  | 
2952  |  |  | 
2953  |  | static int  | 
2954  |  | xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint, int neg,  | 
2955  | 0  |                     int start, int end, const xmlChar *blockName) { | 
2956  | 0  |     int ret = 0;  | 
2957  |  | 
  | 
2958  | 0  |     switch (type) { | 
2959  | 0  |         case XML_REGEXP_STRING:  | 
2960  | 0  |         case XML_REGEXP_SUBREG:  | 
2961  | 0  |         case XML_REGEXP_RANGES:  | 
2962  | 0  |         case XML_REGEXP_EPSILON:  | 
2963  | 0  |       return(-1);  | 
2964  | 0  |         case XML_REGEXP_ANYCHAR:  | 
2965  | 0  |       ret = ((codepoint != '\n') && (codepoint != '\r'));  | 
2966  | 0  |       break;  | 
2967  | 0  |         case XML_REGEXP_CHARVAL:  | 
2968  | 0  |       ret = ((codepoint >= start) && (codepoint <= end));  | 
2969  | 0  |       break;  | 
2970  | 0  |         case XML_REGEXP_NOTSPACE:  | 
2971  | 0  |       neg = !neg;  | 
2972  |  |             /* Falls through. */  | 
2973  | 0  |         case XML_REGEXP_ANYSPACE:  | 
2974  | 0  |       ret = ((codepoint == '\n') || (codepoint == '\r') ||  | 
2975  | 0  |        (codepoint == '\t') || (codepoint == ' '));  | 
2976  | 0  |       break;  | 
2977  | 0  |         case XML_REGEXP_NOTINITNAME:  | 
2978  | 0  |       neg = !neg;  | 
2979  |  |             /* Falls through. */  | 
2980  | 0  |         case XML_REGEXP_INITNAME:  | 
2981  | 0  |       ret = (IS_LETTER(codepoint) ||  | 
2982  | 0  |        (codepoint == '_') || (codepoint == ':'));  | 
2983  | 0  |       break;  | 
2984  | 0  |         case XML_REGEXP_NOTNAMECHAR:  | 
2985  | 0  |       neg = !neg;  | 
2986  |  |             /* Falls through. */  | 
2987  | 0  |         case XML_REGEXP_NAMECHAR:  | 
2988  | 0  |       ret = (IS_LETTER(codepoint) || IS_DIGIT(codepoint) ||  | 
2989  | 0  |        (codepoint == '.') || (codepoint == '-') ||  | 
2990  | 0  |        (codepoint == '_') || (codepoint == ':') ||  | 
2991  | 0  |        IS_COMBINING(codepoint) || IS_EXTENDER(codepoint));  | 
2992  | 0  |       break;  | 
2993  | 0  |         case XML_REGEXP_NOTDECIMAL:  | 
2994  | 0  |       neg = !neg;  | 
2995  |  |             /* Falls through. */  | 
2996  | 0  |         case XML_REGEXP_DECIMAL:  | 
2997  | 0  |       ret = xmlUCSIsCatNd(codepoint);  | 
2998  | 0  |       break;  | 
2999  | 0  |         case XML_REGEXP_REALCHAR:  | 
3000  | 0  |       neg = !neg;  | 
3001  |  |             /* Falls through. */  | 
3002  | 0  |         case XML_REGEXP_NOTREALCHAR:  | 
3003  | 0  |       ret = xmlUCSIsCatP(codepoint);  | 
3004  | 0  |       if (ret == 0)  | 
3005  | 0  |     ret = xmlUCSIsCatZ(codepoint);  | 
3006  | 0  |       if (ret == 0)  | 
3007  | 0  |     ret = xmlUCSIsCatC(codepoint);  | 
3008  | 0  |       break;  | 
3009  | 0  |         case XML_REGEXP_LETTER:  | 
3010  | 0  |       ret = xmlUCSIsCatL(codepoint);  | 
3011  | 0  |       break;  | 
3012  | 0  |         case XML_REGEXP_LETTER_UPPERCASE:  | 
3013  | 0  |       ret = xmlUCSIsCatLu(codepoint);  | 
3014  | 0  |       break;  | 
3015  | 0  |         case XML_REGEXP_LETTER_LOWERCASE:  | 
3016  | 0  |       ret = xmlUCSIsCatLl(codepoint);  | 
3017  | 0  |       break;  | 
3018  | 0  |         case XML_REGEXP_LETTER_TITLECASE:  | 
3019  | 0  |       ret = xmlUCSIsCatLt(codepoint);  | 
3020  | 0  |       break;  | 
3021  | 0  |         case XML_REGEXP_LETTER_MODIFIER:  | 
3022  | 0  |       ret = xmlUCSIsCatLm(codepoint);  | 
3023  | 0  |       break;  | 
3024  | 0  |         case XML_REGEXP_LETTER_OTHERS:  | 
3025  | 0  |       ret = xmlUCSIsCatLo(codepoint);  | 
3026  | 0  |       break;  | 
3027  | 0  |         case XML_REGEXP_MARK:  | 
3028  | 0  |       ret = xmlUCSIsCatM(codepoint);  | 
3029  | 0  |       break;  | 
3030  | 0  |         case XML_REGEXP_MARK_NONSPACING:  | 
3031  | 0  |       ret = xmlUCSIsCatMn(codepoint);  | 
3032  | 0  |       break;  | 
3033  | 0  |         case XML_REGEXP_MARK_SPACECOMBINING:  | 
3034  | 0  |       ret = xmlUCSIsCatMc(codepoint);  | 
3035  | 0  |       break;  | 
3036  | 0  |         case XML_REGEXP_MARK_ENCLOSING:  | 
3037  | 0  |       ret = xmlUCSIsCatMe(codepoint);  | 
3038  | 0  |       break;  | 
3039  | 0  |         case XML_REGEXP_NUMBER:  | 
3040  | 0  |       ret = xmlUCSIsCatN(codepoint);  | 
3041  | 0  |       break;  | 
3042  | 0  |         case XML_REGEXP_NUMBER_DECIMAL:  | 
3043  | 0  |       ret = xmlUCSIsCatNd(codepoint);  | 
3044  | 0  |       break;  | 
3045  | 0  |         case XML_REGEXP_NUMBER_LETTER:  | 
3046  | 0  |       ret = xmlUCSIsCatNl(codepoint);  | 
3047  | 0  |       break;  | 
3048  | 0  |         case XML_REGEXP_NUMBER_OTHERS:  | 
3049  | 0  |       ret = xmlUCSIsCatNo(codepoint);  | 
3050  | 0  |       break;  | 
3051  | 0  |         case XML_REGEXP_PUNCT:  | 
3052  | 0  |       ret = xmlUCSIsCatP(codepoint);  | 
3053  | 0  |       break;  | 
3054  | 0  |         case XML_REGEXP_PUNCT_CONNECTOR:  | 
3055  | 0  |       ret = xmlUCSIsCatPc(codepoint);  | 
3056  | 0  |       break;  | 
3057  | 0  |         case XML_REGEXP_PUNCT_DASH:  | 
3058  | 0  |       ret = xmlUCSIsCatPd(codepoint);  | 
3059  | 0  |       break;  | 
3060  | 0  |         case XML_REGEXP_PUNCT_OPEN:  | 
3061  | 0  |       ret = xmlUCSIsCatPs(codepoint);  | 
3062  | 0  |       break;  | 
3063  | 0  |         case XML_REGEXP_PUNCT_CLOSE:  | 
3064  | 0  |       ret = xmlUCSIsCatPe(codepoint);  | 
3065  | 0  |       break;  | 
3066  | 0  |         case XML_REGEXP_PUNCT_INITQUOTE:  | 
3067  | 0  |       ret = xmlUCSIsCatPi(codepoint);  | 
3068  | 0  |       break;  | 
3069  | 0  |         case XML_REGEXP_PUNCT_FINQUOTE:  | 
3070  | 0  |       ret = xmlUCSIsCatPf(codepoint);  | 
3071  | 0  |       break;  | 
3072  | 0  |         case XML_REGEXP_PUNCT_OTHERS:  | 
3073  | 0  |       ret = xmlUCSIsCatPo(codepoint);  | 
3074  | 0  |       break;  | 
3075  | 0  |         case XML_REGEXP_SEPAR:  | 
3076  | 0  |       ret = xmlUCSIsCatZ(codepoint);  | 
3077  | 0  |       break;  | 
3078  | 0  |         case XML_REGEXP_SEPAR_SPACE:  | 
3079  | 0  |       ret = xmlUCSIsCatZs(codepoint);  | 
3080  | 0  |       break;  | 
3081  | 0  |         case XML_REGEXP_SEPAR_LINE:  | 
3082  | 0  |       ret = xmlUCSIsCatZl(codepoint);  | 
3083  | 0  |       break;  | 
3084  | 0  |         case XML_REGEXP_SEPAR_PARA:  | 
3085  | 0  |       ret = xmlUCSIsCatZp(codepoint);  | 
3086  | 0  |       break;  | 
3087  | 0  |         case XML_REGEXP_SYMBOL:  | 
3088  | 0  |       ret = xmlUCSIsCatS(codepoint);  | 
3089  | 0  |       break;  | 
3090  | 0  |         case XML_REGEXP_SYMBOL_MATH:  | 
3091  | 0  |       ret = xmlUCSIsCatSm(codepoint);  | 
3092  | 0  |       break;  | 
3093  | 0  |         case XML_REGEXP_SYMBOL_CURRENCY:  | 
3094  | 0  |       ret = xmlUCSIsCatSc(codepoint);  | 
3095  | 0  |       break;  | 
3096  | 0  |         case XML_REGEXP_SYMBOL_MODIFIER:  | 
3097  | 0  |       ret = xmlUCSIsCatSk(codepoint);  | 
3098  | 0  |       break;  | 
3099  | 0  |         case XML_REGEXP_SYMBOL_OTHERS:  | 
3100  | 0  |       ret = xmlUCSIsCatSo(codepoint);  | 
3101  | 0  |       break;  | 
3102  | 0  |         case XML_REGEXP_OTHER:  | 
3103  | 0  |       ret = xmlUCSIsCatC(codepoint);  | 
3104  | 0  |       break;  | 
3105  | 0  |         case XML_REGEXP_OTHER_CONTROL:  | 
3106  | 0  |       ret = xmlUCSIsCatCc(codepoint);  | 
3107  | 0  |       break;  | 
3108  | 0  |         case XML_REGEXP_OTHER_FORMAT:  | 
3109  | 0  |       ret = xmlUCSIsCatCf(codepoint);  | 
3110  | 0  |       break;  | 
3111  | 0  |         case XML_REGEXP_OTHER_PRIVATE:  | 
3112  | 0  |       ret = xmlUCSIsCatCo(codepoint);  | 
3113  | 0  |       break;  | 
3114  | 0  |         case XML_REGEXP_OTHER_NA:  | 
3115  |  |       /* ret = xmlUCSIsCatCn(codepoint); */  | 
3116  |  |       /* Seems it doesn't exist anymore in recent Unicode releases */  | 
3117  | 0  |       ret = 0;  | 
3118  | 0  |       break;  | 
3119  | 0  |         case XML_REGEXP_BLOCK_NAME:  | 
3120  | 0  |       ret = xmlUCSIsBlock(codepoint, (const char *) blockName);  | 
3121  | 0  |       break;  | 
3122  | 0  |     }  | 
3123  | 0  |     if (neg)  | 
3124  | 0  |   return(!ret);  | 
3125  | 0  |     return(ret);  | 
3126  | 0  | }  | 
3127  |  |  | 
3128  |  | static int  | 
3129  | 0  | xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint) { | 
3130  | 0  |     int i, ret = 0;  | 
3131  | 0  |     xmlRegRangePtr range;  | 
3132  |  | 
  | 
3133  | 0  |     if ((atom == NULL) || (!IS_CHAR(codepoint)))  | 
3134  | 0  |   return(-1);  | 
3135  |  |  | 
3136  | 0  |     switch (atom->type) { | 
3137  | 0  |         case XML_REGEXP_SUBREG:  | 
3138  | 0  |         case XML_REGEXP_EPSILON:  | 
3139  | 0  |       return(-1);  | 
3140  | 0  |         case XML_REGEXP_CHARVAL:  | 
3141  | 0  |             return(codepoint == atom->codepoint);  | 
3142  | 0  |         case XML_REGEXP_RANGES: { | 
3143  | 0  |       int accept = 0;  | 
3144  |  | 
  | 
3145  | 0  |       for (i = 0;i < atom->nbRanges;i++) { | 
3146  | 0  |     range = atom->ranges[i];  | 
3147  | 0  |     if (range->neg == 2) { | 
3148  | 0  |         ret = xmlRegCheckCharacterRange(range->type, codepoint,  | 
3149  | 0  |             0, range->start, range->end,  | 
3150  | 0  |             range->blockName);  | 
3151  | 0  |         if (ret != 0)  | 
3152  | 0  |       return(0); /* excluded char */  | 
3153  | 0  |     } else if (range->neg) { | 
3154  | 0  |         ret = xmlRegCheckCharacterRange(range->type, codepoint,  | 
3155  | 0  |             0, range->start, range->end,  | 
3156  | 0  |             range->blockName);  | 
3157  | 0  |         if (ret == 0)  | 
3158  | 0  |             accept = 1;  | 
3159  | 0  |         else  | 
3160  | 0  |             return(0);  | 
3161  | 0  |     } else { | 
3162  | 0  |         ret = xmlRegCheckCharacterRange(range->type, codepoint,  | 
3163  | 0  |             0, range->start, range->end,  | 
3164  | 0  |             range->blockName);  | 
3165  | 0  |         if (ret != 0)  | 
3166  | 0  |       accept = 1; /* might still be excluded */  | 
3167  | 0  |     }  | 
3168  | 0  |       }  | 
3169  | 0  |       return(accept);  | 
3170  | 0  |   }  | 
3171  | 0  |         case XML_REGEXP_STRING:  | 
3172  | 0  |       return(-1);  | 
3173  | 0  |         case XML_REGEXP_ANYCHAR:  | 
3174  | 0  |         case XML_REGEXP_ANYSPACE:  | 
3175  | 0  |         case XML_REGEXP_NOTSPACE:  | 
3176  | 0  |         case XML_REGEXP_INITNAME:  | 
3177  | 0  |         case XML_REGEXP_NOTINITNAME:  | 
3178  | 0  |         case XML_REGEXP_NAMECHAR:  | 
3179  | 0  |         case XML_REGEXP_NOTNAMECHAR:  | 
3180  | 0  |         case XML_REGEXP_DECIMAL:  | 
3181  | 0  |         case XML_REGEXP_NOTDECIMAL:  | 
3182  | 0  |         case XML_REGEXP_REALCHAR:  | 
3183  | 0  |         case XML_REGEXP_NOTREALCHAR:  | 
3184  | 0  |         case XML_REGEXP_LETTER:  | 
3185  | 0  |         case XML_REGEXP_LETTER_UPPERCASE:  | 
3186  | 0  |         case XML_REGEXP_LETTER_LOWERCASE:  | 
3187  | 0  |         case XML_REGEXP_LETTER_TITLECASE:  | 
3188  | 0  |         case XML_REGEXP_LETTER_MODIFIER:  | 
3189  | 0  |         case XML_REGEXP_LETTER_OTHERS:  | 
3190  | 0  |         case XML_REGEXP_MARK:  | 
3191  | 0  |         case XML_REGEXP_MARK_NONSPACING:  | 
3192  | 0  |         case XML_REGEXP_MARK_SPACECOMBINING:  | 
3193  | 0  |         case XML_REGEXP_MARK_ENCLOSING:  | 
3194  | 0  |         case XML_REGEXP_NUMBER:  | 
3195  | 0  |         case XML_REGEXP_NUMBER_DECIMAL:  | 
3196  | 0  |         case XML_REGEXP_NUMBER_LETTER:  | 
3197  | 0  |         case XML_REGEXP_NUMBER_OTHERS:  | 
3198  | 0  |         case XML_REGEXP_PUNCT:  | 
3199  | 0  |         case XML_REGEXP_PUNCT_CONNECTOR:  | 
3200  | 0  |         case XML_REGEXP_PUNCT_DASH:  | 
3201  | 0  |         case XML_REGEXP_PUNCT_OPEN:  | 
3202  | 0  |         case XML_REGEXP_PUNCT_CLOSE:  | 
3203  | 0  |         case XML_REGEXP_PUNCT_INITQUOTE:  | 
3204  | 0  |         case XML_REGEXP_PUNCT_FINQUOTE:  | 
3205  | 0  |         case XML_REGEXP_PUNCT_OTHERS:  | 
3206  | 0  |         case XML_REGEXP_SEPAR:  | 
3207  | 0  |         case XML_REGEXP_SEPAR_SPACE:  | 
3208  | 0  |         case XML_REGEXP_SEPAR_LINE:  | 
3209  | 0  |         case XML_REGEXP_SEPAR_PARA:  | 
3210  | 0  |         case XML_REGEXP_SYMBOL:  | 
3211  | 0  |         case XML_REGEXP_SYMBOL_MATH:  | 
3212  | 0  |         case XML_REGEXP_SYMBOL_CURRENCY:  | 
3213  | 0  |         case XML_REGEXP_SYMBOL_MODIFIER:  | 
3214  | 0  |         case XML_REGEXP_SYMBOL_OTHERS:  | 
3215  | 0  |         case XML_REGEXP_OTHER:  | 
3216  | 0  |         case XML_REGEXP_OTHER_CONTROL:  | 
3217  | 0  |         case XML_REGEXP_OTHER_FORMAT:  | 
3218  | 0  |         case XML_REGEXP_OTHER_PRIVATE:  | 
3219  | 0  |         case XML_REGEXP_OTHER_NA:  | 
3220  | 0  |   case XML_REGEXP_BLOCK_NAME:  | 
3221  | 0  |       ret = xmlRegCheckCharacterRange(atom->type, codepoint, 0, 0, 0,  | 
3222  | 0  |                                 (const xmlChar *)atom->valuep);  | 
3223  | 0  |       if (atom->neg)  | 
3224  | 0  |     ret = !ret;  | 
3225  | 0  |       break;  | 
3226  | 0  |     }  | 
3227  | 0  |     return(ret);  | 
3228  | 0  | }  | 
3229  |  |  | 
3230  |  | /************************************************************************  | 
3231  |  |  *                  *  | 
3232  |  |  *  Saving and restoring state of an execution context    *  | 
3233  |  |  *                  *  | 
3234  |  |  ************************************************************************/  | 
3235  |  |  | 
3236  |  | static void  | 
3237  | 0  | xmlFARegExecSave(xmlRegExecCtxtPtr exec) { | 
3238  | 0  | #ifdef MAX_PUSH  | 
3239  | 0  |     if (exec->nbPush > MAX_PUSH) { | 
3240  | 0  |         exec->status = XML_REGEXP_INTERNAL_LIMIT;  | 
3241  | 0  |         return;  | 
3242  | 0  |     }  | 
3243  | 0  |     exec->nbPush++;  | 
3244  | 0  | #endif  | 
3245  |  | 
  | 
3246  | 0  |     if (exec->nbRollbacks >= exec->maxRollbacks) { | 
3247  | 0  |   xmlRegExecRollback *tmp;  | 
3248  | 0  |         int newSize;  | 
3249  | 0  |   int len = exec->nbRollbacks;  | 
3250  |  | 
  | 
3251  | 0  |         newSize = xmlGrowCapacity(exec->maxRollbacks, sizeof(tmp[0]),  | 
3252  | 0  |                                   4, XML_MAX_ITEMS);  | 
3253  | 0  |   if (newSize < 0) { | 
3254  | 0  |             exec->status = XML_REGEXP_OUT_OF_MEMORY;  | 
3255  | 0  |       return;  | 
3256  | 0  |   }  | 
3257  | 0  |   tmp = xmlRealloc(exec->rollbacks, newSize * sizeof(tmp[0]));  | 
3258  | 0  |   if (tmp == NULL) { | 
3259  | 0  |             exec->status = XML_REGEXP_OUT_OF_MEMORY;  | 
3260  | 0  |       return;  | 
3261  | 0  |   }  | 
3262  | 0  |   exec->rollbacks = tmp;  | 
3263  | 0  |   exec->maxRollbacks = newSize;  | 
3264  | 0  |   tmp = &exec->rollbacks[len];  | 
3265  | 0  |   memset(tmp, 0, (exec->maxRollbacks - len) * sizeof(xmlRegExecRollback));  | 
3266  | 0  |     }  | 
3267  | 0  |     exec->rollbacks[exec->nbRollbacks].state = exec->state;  | 
3268  | 0  |     exec->rollbacks[exec->nbRollbacks].index = exec->index;  | 
3269  | 0  |     exec->rollbacks[exec->nbRollbacks].nextbranch = exec->transno + 1;  | 
3270  | 0  |     if (exec->comp->nbCounters > 0) { | 
3271  | 0  |   if (exec->rollbacks[exec->nbRollbacks].counts == NULL) { | 
3272  | 0  |       exec->rollbacks[exec->nbRollbacks].counts = (int *)  | 
3273  | 0  |     xmlMalloc(exec->comp->nbCounters * sizeof(int));  | 
3274  | 0  |       if (exec->rollbacks[exec->nbRollbacks].counts == NULL) { | 
3275  | 0  |     exec->status = XML_REGEXP_OUT_OF_MEMORY;  | 
3276  | 0  |     return;  | 
3277  | 0  |       }  | 
3278  | 0  |   }  | 
3279  | 0  |   memcpy(exec->rollbacks[exec->nbRollbacks].counts, exec->counts,  | 
3280  | 0  |          exec->comp->nbCounters * sizeof(int));  | 
3281  | 0  |     }  | 
3282  | 0  |     exec->nbRollbacks++;  | 
3283  | 0  | }  | 
3284  |  |  | 
3285  |  | static void  | 
3286  | 0  | xmlFARegExecRollBack(xmlRegExecCtxtPtr exec) { | 
3287  | 0  |     if (exec->status != XML_REGEXP_OK)  | 
3288  | 0  |         return;  | 
3289  | 0  |     if (exec->nbRollbacks <= 0) { | 
3290  | 0  |   exec->status = XML_REGEXP_NOT_FOUND;  | 
3291  | 0  |   return;  | 
3292  | 0  |     }  | 
3293  | 0  |     exec->nbRollbacks--;  | 
3294  | 0  |     exec->state = exec->rollbacks[exec->nbRollbacks].state;  | 
3295  | 0  |     exec->index = exec->rollbacks[exec->nbRollbacks].index;  | 
3296  | 0  |     exec->transno = exec->rollbacks[exec->nbRollbacks].nextbranch;  | 
3297  | 0  |     if (exec->comp->nbCounters > 0) { | 
3298  | 0  |   if (exec->rollbacks[exec->nbRollbacks].counts == NULL) { | 
3299  | 0  |       exec->status = XML_REGEXP_INTERNAL_ERROR;  | 
3300  | 0  |       return;  | 
3301  | 0  |   }  | 
3302  | 0  |   if (exec->counts) { | 
3303  | 0  |       memcpy(exec->counts, exec->rollbacks[exec->nbRollbacks].counts,  | 
3304  | 0  |          exec->comp->nbCounters * sizeof(int));  | 
3305  | 0  |   }  | 
3306  | 0  |     }  | 
3307  | 0  | }  | 
3308  |  |  | 
3309  |  | /************************************************************************  | 
3310  |  |  *                  *  | 
3311  |  |  *  Verifier, running an input against a compiled regexp    *  | 
3312  |  |  *                  *  | 
3313  |  |  ************************************************************************/  | 
3314  |  |  | 
3315  |  | static int  | 
3316  | 0  | xmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) { | 
3317  | 0  |     xmlRegExecCtxt execval;  | 
3318  | 0  |     xmlRegExecCtxtPtr exec = &execval;  | 
3319  | 0  |     int ret, codepoint = 0, len, deter;  | 
3320  |  | 
  | 
3321  | 0  |     exec->inputString = content;  | 
3322  | 0  |     exec->index = 0;  | 
3323  | 0  |     exec->nbPush = 0;  | 
3324  | 0  |     exec->determinist = 1;  | 
3325  | 0  |     exec->maxRollbacks = 0;  | 
3326  | 0  |     exec->nbRollbacks = 0;  | 
3327  | 0  |     exec->rollbacks = NULL;  | 
3328  | 0  |     exec->status = XML_REGEXP_OK;  | 
3329  | 0  |     exec->comp = comp;  | 
3330  | 0  |     exec->state = comp->states[0];  | 
3331  | 0  |     exec->transno = 0;  | 
3332  | 0  |     exec->transcount = 0;  | 
3333  | 0  |     exec->inputStack = NULL;  | 
3334  | 0  |     exec->inputStackMax = 0;  | 
3335  | 0  |     if (comp->nbCounters > 0) { | 
3336  | 0  |   exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int));  | 
3337  | 0  |   if (exec->counts == NULL) { | 
3338  | 0  |       return(XML_REGEXP_OUT_OF_MEMORY);  | 
3339  | 0  |   }  | 
3340  | 0  |         memset(exec->counts, 0, comp->nbCounters * sizeof(int));  | 
3341  | 0  |     } else  | 
3342  | 0  |   exec->counts = NULL;  | 
3343  | 0  |     while ((exec->status == XML_REGEXP_OK) && (exec->state != NULL) &&  | 
3344  | 0  |      ((exec->inputString[exec->index] != 0) ||  | 
3345  | 0  |       ((exec->state != NULL) &&  | 
3346  | 0  |        (exec->state->type != XML_REGEXP_FINAL_STATE)))) { | 
3347  | 0  |   xmlRegTransPtr trans;  | 
3348  | 0  |   xmlRegAtomPtr atom;  | 
3349  |  |  | 
3350  |  |   /*  | 
3351  |  |    * If end of input on non-terminal state, rollback, however we may  | 
3352  |  |    * still have epsilon like transition for counted transitions  | 
3353  |  |    * on counters, in that case don't break too early.  Additionally,  | 
3354  |  |    * if we are working on a range like "AB{0,2}", where B is not present, | 
3355  |  |    * we don't want to break.  | 
3356  |  |    */  | 
3357  | 0  |   len = 1;  | 
3358  | 0  |   if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL)) { | 
3359  |  |       /*  | 
3360  |  |        * if there is a transition, we must check if  | 
3361  |  |        *  atom allows minOccurs of 0  | 
3362  |  |        */  | 
3363  | 0  |       if (exec->transno < exec->state->nbTrans) { | 
3364  | 0  |           trans = &exec->state->trans[exec->transno];  | 
3365  | 0  |     if (trans->to >=0) { | 
3366  | 0  |         atom = trans->atom;  | 
3367  | 0  |         if (!((atom->min == 0) && (atom->max > 0)))  | 
3368  | 0  |             goto rollback;  | 
3369  | 0  |     }  | 
3370  | 0  |       } else  | 
3371  | 0  |           goto rollback;  | 
3372  | 0  |   }  | 
3373  |  |  | 
3374  | 0  |   exec->transcount = 0;  | 
3375  | 0  |   for (;exec->transno < exec->state->nbTrans;exec->transno++) { | 
3376  | 0  |       trans = &exec->state->trans[exec->transno];  | 
3377  | 0  |       if (trans->to < 0)  | 
3378  | 0  |     continue;  | 
3379  | 0  |       atom = trans->atom;  | 
3380  | 0  |       ret = 0;  | 
3381  | 0  |       deter = 1;  | 
3382  | 0  |       if (trans->count >= 0) { | 
3383  | 0  |     int count;  | 
3384  | 0  |     xmlRegCounterPtr counter;  | 
3385  |  | 
  | 
3386  | 0  |     if (exec->counts == NULL) { | 
3387  | 0  |         exec->status = XML_REGEXP_INTERNAL_ERROR;  | 
3388  | 0  |         goto error;  | 
3389  | 0  |     }  | 
3390  |  |     /*  | 
3391  |  |      * A counted transition.  | 
3392  |  |      */  | 
3393  |  |  | 
3394  | 0  |     count = exec->counts[trans->count];  | 
3395  | 0  |     counter = &exec->comp->counters[trans->count];  | 
3396  | 0  |     ret = ((count >= counter->min) && (count <= counter->max));  | 
3397  | 0  |     if ((ret) && (counter->min != counter->max))  | 
3398  | 0  |         deter = 0;  | 
3399  | 0  |       } else if (atom == NULL) { | 
3400  | 0  |     exec->status = XML_REGEXP_INTERNAL_ERROR;  | 
3401  | 0  |     break;  | 
3402  | 0  |       } else if (exec->inputString[exec->index] != 0) { | 
3403  | 0  |                 len = 4;  | 
3404  | 0  |                 codepoint = xmlGetUTF8Char(&exec->inputString[exec->index],  | 
3405  | 0  |                                            &len);  | 
3406  | 0  |                 if (codepoint < 0) { | 
3407  | 0  |                     exec->status = XML_REGEXP_INVALID_UTF8;  | 
3408  | 0  |                     goto error;  | 
3409  | 0  |                 }  | 
3410  | 0  |     ret = xmlRegCheckCharacter(atom, codepoint);  | 
3411  | 0  |     if ((ret == 1) && (atom->min >= 0) && (atom->max > 0)) { | 
3412  | 0  |         xmlRegStatePtr to = comp->states[trans->to];  | 
3413  |  |  | 
3414  |  |         /*  | 
3415  |  |          * this is a multiple input sequence  | 
3416  |  |          * If there is a counter associated increment it now.  | 
3417  |  |          * do not increment if the counter is already over the  | 
3418  |  |          * maximum limit in which case get to next transition  | 
3419  |  |          */  | 
3420  | 0  |         if (trans->counter >= 0) { | 
3421  | 0  |       xmlRegCounterPtr counter;  | 
3422  |  | 
  | 
3423  | 0  |       if ((exec->counts == NULL) ||  | 
3424  | 0  |           (exec->comp == NULL) ||  | 
3425  | 0  |           (exec->comp->counters == NULL)) { | 
3426  | 0  |           exec->status = XML_REGEXP_INTERNAL_ERROR;  | 
3427  | 0  |           goto error;  | 
3428  | 0  |       }  | 
3429  | 0  |       counter = &exec->comp->counters[trans->counter];  | 
3430  | 0  |       if (exec->counts[trans->counter] >= counter->max)  | 
3431  | 0  |           continue; /* for loop on transitions */  | 
3432  | 0  |                     }  | 
3433  |  |                     /* Save before incrementing */  | 
3434  | 0  |         if (exec->state->nbTrans > exec->transno + 1) { | 
3435  | 0  |       xmlFARegExecSave(exec);  | 
3436  | 0  |                         if (exec->status != XML_REGEXP_OK)  | 
3437  | 0  |                             goto error;  | 
3438  | 0  |         }  | 
3439  | 0  |         if (trans->counter >= 0) { | 
3440  | 0  |       exec->counts[trans->counter]++;  | 
3441  | 0  |         }  | 
3442  | 0  |         exec->transcount = 1;  | 
3443  | 0  |         do { | 
3444  |  |       /*  | 
3445  |  |        * Try to progress as much as possible on the input  | 
3446  |  |        */  | 
3447  | 0  |       if (exec->transcount == atom->max) { | 
3448  | 0  |           break;  | 
3449  | 0  |       }  | 
3450  | 0  |       exec->index += len;  | 
3451  |  |       /*  | 
3452  |  |        * End of input: stop here  | 
3453  |  |        */  | 
3454  | 0  |       if (exec->inputString[exec->index] == 0) { | 
3455  | 0  |           exec->index -= len;  | 
3456  | 0  |           break;  | 
3457  | 0  |       }  | 
3458  | 0  |       if (exec->transcount >= atom->min) { | 
3459  | 0  |           int transno = exec->transno;  | 
3460  | 0  |           xmlRegStatePtr state = exec->state;  | 
3461  |  |  | 
3462  |  |           /*  | 
3463  |  |            * The transition is acceptable save it  | 
3464  |  |            */  | 
3465  | 0  |           exec->transno = -1; /* trick */  | 
3466  | 0  |           exec->state = to;  | 
3467  | 0  |           xmlFARegExecSave(exec);  | 
3468  | 0  |                             if (exec->status != XML_REGEXP_OK)  | 
3469  | 0  |                                 goto error;  | 
3470  | 0  |           exec->transno = transno;  | 
3471  | 0  |           exec->state = state;  | 
3472  | 0  |       }  | 
3473  | 0  |                         len = 4;  | 
3474  | 0  |                         codepoint = xmlGetUTF8Char(  | 
3475  | 0  |                                 &exec->inputString[exec->index], &len);  | 
3476  | 0  |                         if (codepoint < 0) { | 
3477  | 0  |                             exec->status = XML_REGEXP_INVALID_UTF8;  | 
3478  | 0  |                             goto error;  | 
3479  | 0  |                         }  | 
3480  | 0  |       ret = xmlRegCheckCharacter(atom, codepoint);  | 
3481  | 0  |       exec->transcount++;  | 
3482  | 0  |         } while (ret == 1);  | 
3483  | 0  |         if (exec->transcount < atom->min)  | 
3484  | 0  |       ret = 0;  | 
3485  |  |  | 
3486  |  |         /*  | 
3487  |  |          * If the last check failed but one transition was found  | 
3488  |  |          * possible, rollback  | 
3489  |  |          */  | 
3490  | 0  |         if (ret < 0)  | 
3491  | 0  |       ret = 0;  | 
3492  | 0  |         if (ret == 0) { | 
3493  | 0  |       goto rollback;  | 
3494  | 0  |         }  | 
3495  | 0  |         if (trans->counter >= 0) { | 
3496  | 0  |       if (exec->counts == NULL) { | 
3497  | 0  |           exec->status = XML_REGEXP_INTERNAL_ERROR;  | 
3498  | 0  |           goto error;  | 
3499  | 0  |       }  | 
3500  | 0  |       exec->counts[trans->counter]--;  | 
3501  | 0  |         }  | 
3502  | 0  |     } else if ((ret == 0) && (atom->min == 0) && (atom->max > 0)) { | 
3503  |  |         /*  | 
3504  |  |          * we don't match on the codepoint, but minOccurs of 0  | 
3505  |  |          * says that's ok.  Setting len to 0 inhibits stepping  | 
3506  |  |          * over the codepoint.  | 
3507  |  |          */  | 
3508  | 0  |         exec->transcount = 1;  | 
3509  | 0  |         len = 0;  | 
3510  | 0  |         ret = 1;  | 
3511  | 0  |     }  | 
3512  | 0  |       } else if ((atom->min == 0) && (atom->max > 0)) { | 
3513  |  |           /* another spot to match when minOccurs is 0 */  | 
3514  | 0  |     exec->transcount = 1;  | 
3515  | 0  |     len = 0;  | 
3516  | 0  |     ret = 1;  | 
3517  | 0  |       }  | 
3518  | 0  |       if (ret == 1) { | 
3519  | 0  |     if ((trans->nd == 1) ||  | 
3520  | 0  |         ((trans->count >= 0) && (deter == 0) &&  | 
3521  | 0  |          (exec->state->nbTrans > exec->transno + 1))) { | 
3522  | 0  |         xmlFARegExecSave(exec);  | 
3523  | 0  |                     if (exec->status != XML_REGEXP_OK)  | 
3524  | 0  |                         goto error;  | 
3525  | 0  |     }  | 
3526  | 0  |     if (trans->counter >= 0) { | 
3527  | 0  |         xmlRegCounterPtr counter;  | 
3528  |  |  | 
3529  |  |                     /* make sure we don't go over the counter maximum value */  | 
3530  | 0  |         if ((exec->counts == NULL) ||  | 
3531  | 0  |       (exec->comp == NULL) ||  | 
3532  | 0  |       (exec->comp->counters == NULL)) { | 
3533  | 0  |       exec->status = XML_REGEXP_INTERNAL_ERROR;  | 
3534  | 0  |       goto error;  | 
3535  | 0  |         }  | 
3536  | 0  |         counter = &exec->comp->counters[trans->counter];  | 
3537  | 0  |         if (exec->counts[trans->counter] >= counter->max)  | 
3538  | 0  |       continue; /* for loop on transitions */  | 
3539  | 0  |         exec->counts[trans->counter]++;  | 
3540  | 0  |     }  | 
3541  | 0  |     if ((trans->count >= 0) &&  | 
3542  | 0  |         (trans->count < REGEXP_ALL_COUNTER)) { | 
3543  | 0  |         if (exec->counts == NULL) { | 
3544  | 0  |             exec->status = XML_REGEXP_INTERNAL_ERROR;  | 
3545  | 0  |       goto error;  | 
3546  | 0  |         }  | 
3547  | 0  |         exec->counts[trans->count] = 0;  | 
3548  | 0  |     }  | 
3549  | 0  |     exec->state = comp->states[trans->to];  | 
3550  | 0  |     exec->transno = 0;  | 
3551  | 0  |     if (trans->atom != NULL) { | 
3552  | 0  |         exec->index += len;  | 
3553  | 0  |     }  | 
3554  | 0  |     goto progress;  | 
3555  | 0  |       } else if (ret < 0) { | 
3556  | 0  |     exec->status = XML_REGEXP_INTERNAL_ERROR;  | 
3557  | 0  |     break;  | 
3558  | 0  |       }  | 
3559  | 0  |   }  | 
3560  | 0  |   if ((exec->transno != 0) || (exec->state->nbTrans == 0)) { | 
3561  | 0  | rollback:  | 
3562  |  |       /*  | 
3563  |  |        * Failed to find a way out  | 
3564  |  |        */  | 
3565  | 0  |       exec->determinist = 0;  | 
3566  | 0  |       xmlFARegExecRollBack(exec);  | 
3567  | 0  |   }  | 
3568  | 0  | progress:  | 
3569  | 0  |   continue;  | 
3570  | 0  |     }  | 
3571  | 0  | error:  | 
3572  | 0  |     if (exec->rollbacks != NULL) { | 
3573  | 0  |   if (exec->counts != NULL) { | 
3574  | 0  |       int i;  | 
3575  |  | 
  | 
3576  | 0  |       for (i = 0;i < exec->maxRollbacks;i++)  | 
3577  | 0  |     if (exec->rollbacks[i].counts != NULL)  | 
3578  | 0  |         xmlFree(exec->rollbacks[i].counts);  | 
3579  | 0  |   }  | 
3580  | 0  |   xmlFree(exec->rollbacks);  | 
3581  | 0  |     }  | 
3582  | 0  |     if (exec->state == NULL)  | 
3583  | 0  |         return(XML_REGEXP_INTERNAL_ERROR);  | 
3584  | 0  |     if (exec->counts != NULL)  | 
3585  | 0  |   xmlFree(exec->counts);  | 
3586  | 0  |     if (exec->status == XML_REGEXP_OK)  | 
3587  | 0  |   return(1);  | 
3588  | 0  |     if (exec->status == XML_REGEXP_NOT_FOUND)  | 
3589  | 0  |   return(0);  | 
3590  | 0  |     return(exec->status);  | 
3591  | 0  | }  | 
3592  |  |  | 
3593  |  | /************************************************************************  | 
3594  |  |  *                  *  | 
3595  |  |  *  Progressive interface to the verifier one atom at a time  *  | 
3596  |  |  *                  *  | 
3597  |  |  ************************************************************************/  | 
3598  |  |  | 
3599  |  | /**  | 
3600  |  |  * Build a context used for progressive evaluation of a regexp.  | 
3601  |  |  *  | 
3602  |  |  * @deprecated Internal function, don't use.  | 
3603  |  |  *  | 
3604  |  |  * @param comp  a precompiled regular expression  | 
3605  |  |  * @param callback  a callback function used for handling progresses in the  | 
3606  |  |  *            automata matching phase  | 
3607  |  |  * @param data  the context data associated to the callback in this context  | 
3608  |  |  * @returns the new context  | 
3609  |  |  */  | 
3610  |  | xmlRegExecCtxt *  | 
3611  | 0  | xmlRegNewExecCtxt(xmlRegexp *comp, xmlRegExecCallbacks callback, void *data) { | 
3612  | 0  |     xmlRegExecCtxtPtr exec;  | 
3613  |  | 
  | 
3614  | 0  |     if (comp == NULL)  | 
3615  | 0  |   return(NULL);  | 
3616  | 0  |     if ((comp->compact == NULL) && (comp->states == NULL))  | 
3617  | 0  |         return(NULL);  | 
3618  | 0  |     exec = (xmlRegExecCtxtPtr) xmlMalloc(sizeof(xmlRegExecCtxt));  | 
3619  | 0  |     if (exec == NULL)  | 
3620  | 0  |   return(NULL);  | 
3621  | 0  |     memset(exec, 0, sizeof(xmlRegExecCtxt));  | 
3622  | 0  |     exec->inputString = NULL;  | 
3623  | 0  |     exec->index = 0;  | 
3624  | 0  |     exec->determinist = 1;  | 
3625  | 0  |     exec->maxRollbacks = 0;  | 
3626  | 0  |     exec->nbRollbacks = 0;  | 
3627  | 0  |     exec->rollbacks = NULL;  | 
3628  | 0  |     exec->status = XML_REGEXP_OK;  | 
3629  | 0  |     exec->comp = comp;  | 
3630  | 0  |     if (comp->compact == NULL)  | 
3631  | 0  |   exec->state = comp->states[0];  | 
3632  | 0  |     exec->transno = 0;  | 
3633  | 0  |     exec->transcount = 0;  | 
3634  | 0  |     exec->callback = callback;  | 
3635  | 0  |     exec->data = data;  | 
3636  | 0  |     if (comp->nbCounters > 0) { | 
3637  |  |         /*  | 
3638  |  |    * For error handling, exec->counts is allocated twice the size  | 
3639  |  |    * the second half is used to store the data in case of rollback  | 
3640  |  |    */  | 
3641  | 0  |   exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int)  | 
3642  | 0  |                                    * 2);  | 
3643  | 0  |   if (exec->counts == NULL) { | 
3644  | 0  |       xmlFree(exec);  | 
3645  | 0  |       return(NULL);  | 
3646  | 0  |   }  | 
3647  | 0  |         memset(exec->counts, 0, comp->nbCounters * sizeof(int) * 2);  | 
3648  | 0  |   exec->errCounts = &exec->counts[comp->nbCounters];  | 
3649  | 0  |     } else { | 
3650  | 0  |   exec->counts = NULL;  | 
3651  | 0  |   exec->errCounts = NULL;  | 
3652  | 0  |     }  | 
3653  | 0  |     exec->inputStackMax = 0;  | 
3654  | 0  |     exec->inputStackNr = 0;  | 
3655  | 0  |     exec->inputStack = NULL;  | 
3656  | 0  |     exec->errStateNo = -1;  | 
3657  | 0  |     exec->errString = NULL;  | 
3658  | 0  |     exec->nbPush = 0;  | 
3659  | 0  |     return(exec);  | 
3660  | 0  | }  | 
3661  |  |  | 
3662  |  | /**  | 
3663  |  |  * Free the structures associated to a regular expression evaluation context.  | 
3664  |  |  *  | 
3665  |  |  * @deprecated Internal function, don't use.  | 
3666  |  |  *  | 
3667  |  |  * @param exec  a regular expression evaluation context  | 
3668  |  |  */  | 
3669  |  | void  | 
3670  | 0  | xmlRegFreeExecCtxt(xmlRegExecCtxt *exec) { | 
3671  | 0  |     if (exec == NULL)  | 
3672  | 0  |   return;  | 
3673  |  |  | 
3674  | 0  |     if (exec->rollbacks != NULL) { | 
3675  | 0  |   if (exec->counts != NULL) { | 
3676  | 0  |       int i;  | 
3677  |  | 
  | 
3678  | 0  |       for (i = 0;i < exec->maxRollbacks;i++)  | 
3679  | 0  |     if (exec->rollbacks[i].counts != NULL)  | 
3680  | 0  |         xmlFree(exec->rollbacks[i].counts);  | 
3681  | 0  |   }  | 
3682  | 0  |   xmlFree(exec->rollbacks);  | 
3683  | 0  |     }  | 
3684  | 0  |     if (exec->counts != NULL)  | 
3685  | 0  |   xmlFree(exec->counts);  | 
3686  | 0  |     if (exec->inputStack != NULL) { | 
3687  | 0  |   int i;  | 
3688  |  | 
  | 
3689  | 0  |   for (i = 0;i < exec->inputStackNr;i++) { | 
3690  | 0  |       if (exec->inputStack[i].value != NULL)  | 
3691  | 0  |     xmlFree(exec->inputStack[i].value);  | 
3692  | 0  |   }  | 
3693  | 0  |   xmlFree(exec->inputStack);  | 
3694  | 0  |     }  | 
3695  | 0  |     if (exec->errString != NULL)  | 
3696  | 0  |         xmlFree(exec->errString);  | 
3697  | 0  |     xmlFree(exec);  | 
3698  | 0  | }  | 
3699  |  |  | 
3700  |  | static int  | 
3701  | 0  | xmlRegExecSetErrString(xmlRegExecCtxtPtr exec, const xmlChar *value) { | 
3702  | 0  |     if (exec->errString != NULL)  | 
3703  | 0  |         xmlFree(exec->errString);  | 
3704  | 0  |     if (value == NULL) { | 
3705  | 0  |         exec->errString = NULL;  | 
3706  | 0  |     } else { | 
3707  | 0  |         exec->errString = xmlStrdup(value);  | 
3708  | 0  |         if (exec->errString == NULL) { | 
3709  | 0  |             exec->status = XML_REGEXP_OUT_OF_MEMORY;  | 
3710  | 0  |             return(-1);  | 
3711  | 0  |         }  | 
3712  | 0  |     }  | 
3713  | 0  |     return(0);  | 
3714  | 0  | }  | 
3715  |  |  | 
3716  |  | static void  | 
3717  |  | xmlFARegExecSaveInputString(xmlRegExecCtxtPtr exec, const xmlChar *value,  | 
3718  | 0  |                       void *data) { | 
3719  | 0  |     if (exec->inputStackNr + 1 >= exec->inputStackMax) { | 
3720  | 0  |   xmlRegInputTokenPtr tmp;  | 
3721  | 0  |         int newSize;  | 
3722  |  | 
  | 
3723  | 0  |         newSize = xmlGrowCapacity(exec->inputStackMax, sizeof(tmp[0]),  | 
3724  | 0  |                                   4, XML_MAX_ITEMS);  | 
3725  | 0  |   if (newSize < 0) { | 
3726  | 0  |             exec->status = XML_REGEXP_OUT_OF_MEMORY;  | 
3727  | 0  |       return;  | 
3728  | 0  |   }  | 
3729  | 0  | #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION  | 
3730  | 0  |         if (newSize < 2)  | 
3731  | 0  |             newSize = 2;  | 
3732  | 0  | #endif  | 
3733  | 0  |   tmp = xmlRealloc(exec->inputStack, newSize * sizeof(tmp[0]));  | 
3734  | 0  |   if (tmp == NULL) { | 
3735  | 0  |             exec->status = XML_REGEXP_OUT_OF_MEMORY;  | 
3736  | 0  |       return;  | 
3737  | 0  |   }  | 
3738  | 0  |   exec->inputStack = tmp;  | 
3739  | 0  |   exec->inputStackMax = newSize;  | 
3740  | 0  |     }  | 
3741  | 0  |     if (value == NULL) { | 
3742  | 0  |         exec->inputStack[exec->inputStackNr].value = NULL;  | 
3743  | 0  |     } else { | 
3744  | 0  |         exec->inputStack[exec->inputStackNr].value = xmlStrdup(value);  | 
3745  | 0  |         if (exec->inputStack[exec->inputStackNr].value == NULL) { | 
3746  | 0  |             exec->status = XML_REGEXP_OUT_OF_MEMORY;  | 
3747  | 0  |             return;  | 
3748  | 0  |         }  | 
3749  | 0  |     }  | 
3750  | 0  |     exec->inputStack[exec->inputStackNr].data = data;  | 
3751  | 0  |     exec->inputStackNr++;  | 
3752  | 0  |     exec->inputStack[exec->inputStackNr].value = NULL;  | 
3753  | 0  |     exec->inputStack[exec->inputStackNr].data = NULL;  | 
3754  | 0  | }  | 
3755  |  |  | 
3756  |  | /**  | 
3757  |  |  * Checks if both strings are equal or have the same content. "*"  | 
3758  |  |  * can be used as a wildcard in `valStr`; "|" is used as a separator of  | 
3759  |  |  * substrings in both `expStr` and `valStr`.  | 
3760  |  |  *  | 
3761  |  |  * @param expStr  the string to be evaluated  | 
3762  |  |  * @param valStr  the validation string  | 
3763  |  |  * @returns 1 if the comparison is satisfied and the number of substrings  | 
3764  |  |  * is equal, 0 otherwise.  | 
3765  |  |  */  | 
3766  |  |  | 
3767  |  | static int  | 
3768  | 0  | xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr) { | 
3769  | 0  |     if (expStr == valStr) return(1);  | 
3770  | 0  |     if (expStr == NULL) return(0);  | 
3771  | 0  |     if (valStr == NULL) return(0);  | 
3772  | 0  |     do { | 
3773  |  |   /*  | 
3774  |  |   * Eval if we have a wildcard for the current item.  | 
3775  |  |   */  | 
3776  | 0  |         if (*expStr != *valStr) { | 
3777  |  |       /* if one of them starts with a wildcard make valStr be it */  | 
3778  | 0  |       if (*valStr == '*') { | 
3779  | 0  |           const xmlChar *tmp;  | 
3780  |  | 
  | 
3781  | 0  |     tmp = valStr;  | 
3782  | 0  |     valStr = expStr;  | 
3783  | 0  |     expStr = tmp;  | 
3784  | 0  |       }  | 
3785  | 0  |       if ((*valStr != 0) && (*expStr != 0) && (*expStr++ == '*')) { | 
3786  | 0  |     do { | 
3787  | 0  |         if (*valStr == XML_REG_STRING_SEPARATOR)  | 
3788  | 0  |       break;  | 
3789  | 0  |         valStr++;  | 
3790  | 0  |     } while (*valStr != 0);  | 
3791  | 0  |     continue;  | 
3792  | 0  |       } else  | 
3793  | 0  |     return(0);  | 
3794  | 0  |   }  | 
3795  | 0  |   expStr++;  | 
3796  | 0  |   valStr++;  | 
3797  | 0  |     } while (*valStr != 0);  | 
3798  | 0  |     if (*expStr != 0)  | 
3799  | 0  |   return (0);  | 
3800  | 0  |     else  | 
3801  | 0  |   return (1);  | 
3802  | 0  | }  | 
3803  |  |  | 
3804  |  | /**  | 
3805  |  |  * Push one input token in the execution context  | 
3806  |  |  *  | 
3807  |  |  * @param exec  a regexp execution context  | 
3808  |  |  * @param comp  the precompiled exec with a compact table  | 
3809  |  |  * @param value  a string token input  | 
3810  |  |  * @param data  data associated to the token to reuse in callbacks  | 
3811  |  |  * @returns 1 if the regexp reached a final state, 0 if non-final, and  | 
3812  |  |  *     a negative value in case of error.  | 
3813  |  |  */  | 
3814  |  | static int  | 
3815  |  | xmlRegCompactPushString(xmlRegExecCtxtPtr exec,  | 
3816  |  |                   xmlRegexpPtr comp,  | 
3817  |  |                   const xmlChar *value,  | 
3818  | 0  |                   void *data) { | 
3819  | 0  |     int state = exec->index;  | 
3820  | 0  |     int i, target;  | 
3821  |  | 
  | 
3822  | 0  |     if ((comp == NULL) || (comp->compact == NULL) || (comp->stringMap == NULL))  | 
3823  | 0  |   return(-1);  | 
3824  |  |  | 
3825  | 0  |     if (value == NULL) { | 
3826  |  |   /*  | 
3827  |  |    * are we at a final state ?  | 
3828  |  |    */  | 
3829  | 0  |   if (comp->compact[state * (comp->nbstrings + 1)] ==  | 
3830  | 0  |             XML_REGEXP_FINAL_STATE)  | 
3831  | 0  |       return(1);  | 
3832  | 0  |   return(0);  | 
3833  | 0  |     }  | 
3834  |  |  | 
3835  |  |     /*  | 
3836  |  |      * Examine all outside transitions from current state  | 
3837  |  |      */  | 
3838  | 0  |     for (i = 0;i < comp->nbstrings;i++) { | 
3839  | 0  |   target = comp->compact[state * (comp->nbstrings + 1) + i + 1];  | 
3840  | 0  |   if ((target > 0) && (target <= comp->nbstates)) { | 
3841  | 0  |       target--; /* to avoid 0 */  | 
3842  | 0  |       if (xmlRegStrEqualWildcard(comp->stringMap[i], value)) { | 
3843  | 0  |     exec->index = target;  | 
3844  | 0  |     if ((exec->callback != NULL) && (comp->transdata != NULL)) { | 
3845  | 0  |         exec->callback(exec->data, value,  | 
3846  | 0  |         comp->transdata[state * comp->nbstrings + i], data);  | 
3847  | 0  |     }  | 
3848  | 0  |     if (comp->compact[target * (comp->nbstrings + 1)] ==  | 
3849  | 0  |         XML_REGEXP_SINK_STATE)  | 
3850  | 0  |         goto error;  | 
3851  |  |  | 
3852  | 0  |     if (comp->compact[target * (comp->nbstrings + 1)] ==  | 
3853  | 0  |         XML_REGEXP_FINAL_STATE)  | 
3854  | 0  |         return(1);  | 
3855  | 0  |     return(0);  | 
3856  | 0  |       }  | 
3857  | 0  |   }  | 
3858  | 0  |     }  | 
3859  |  |     /*  | 
3860  |  |      * Failed to find an exit transition out from current state for the  | 
3861  |  |      * current token  | 
3862  |  |      */  | 
3863  | 0  | error:  | 
3864  | 0  |     exec->errStateNo = state;  | 
3865  | 0  |     exec->status = XML_REGEXP_NOT_FOUND;  | 
3866  | 0  |     xmlRegExecSetErrString(exec, value);  | 
3867  | 0  |     return(exec->status);  | 
3868  | 0  | }  | 
3869  |  |  | 
3870  |  | /**  | 
3871  |  |  * Push one input token in the execution context  | 
3872  |  |  *  | 
3873  |  |  * @param exec  a regexp execution context or NULL to indicate the end  | 
3874  |  |  * @param value  a string token input  | 
3875  |  |  * @param data  data associated to the token to reuse in callbacks  | 
3876  |  |  * @param compound  value was assembled from 2 strings  | 
3877  |  |  * @returns 1 if the regexp reached a final state, 0 if non-final, and  | 
3878  |  |  *     a negative value in case of error.  | 
3879  |  |  */  | 
3880  |  | static int  | 
3881  |  | xmlRegExecPushStringInternal(xmlRegExecCtxtPtr exec, const xmlChar *value,  | 
3882  | 0  |                        void *data, int compound) { | 
3883  | 0  |     xmlRegTransPtr trans;  | 
3884  | 0  |     xmlRegAtomPtr atom;  | 
3885  | 0  |     int ret;  | 
3886  | 0  |     int final = 0;  | 
3887  | 0  |     int progress = 1;  | 
3888  |  | 
  | 
3889  | 0  |     if (exec == NULL)  | 
3890  | 0  |   return(-1);  | 
3891  | 0  |     if (exec->comp == NULL)  | 
3892  | 0  |   return(-1);  | 
3893  | 0  |     if (exec->status != XML_REGEXP_OK)  | 
3894  | 0  |   return(exec->status);  | 
3895  |  |  | 
3896  | 0  |     if (exec->comp->compact != NULL)  | 
3897  | 0  |   return(xmlRegCompactPushString(exec, exec->comp, value, data));  | 
3898  |  |  | 
3899  | 0  |     if (value == NULL) { | 
3900  | 0  |         if (exec->state->type == XML_REGEXP_FINAL_STATE)  | 
3901  | 0  |       return(1);  | 
3902  | 0  |   final = 1;  | 
3903  | 0  |     }  | 
3904  |  |  | 
3905  |  |     /*  | 
3906  |  |      * If we have an active rollback stack push the new value there  | 
3907  |  |      * and get back to where we were left  | 
3908  |  |      */  | 
3909  | 0  |     if ((value != NULL) && (exec->inputStackNr > 0)) { | 
3910  | 0  |   xmlFARegExecSaveInputString(exec, value, data);  | 
3911  | 0  |   value = exec->inputStack[exec->index].value;  | 
3912  | 0  |   data = exec->inputStack[exec->index].data;  | 
3913  | 0  |     }  | 
3914  |  | 
  | 
3915  | 0  |     while ((exec->status == XML_REGEXP_OK) &&  | 
3916  | 0  |      ((value != NULL) ||  | 
3917  | 0  |       ((final == 1) &&  | 
3918  | 0  |        (exec->state->type != XML_REGEXP_FINAL_STATE)))) { | 
3919  |  |  | 
3920  |  |   /*  | 
3921  |  |    * End of input on non-terminal state, rollback, however we may  | 
3922  |  |    * still have epsilon like transition for counted transitions  | 
3923  |  |    * on counters, in that case don't break too early.  | 
3924  |  |    */  | 
3925  | 0  |   if ((value == NULL) && (exec->counts == NULL))  | 
3926  | 0  |       goto rollback;  | 
3927  |  |  | 
3928  | 0  |   exec->transcount = 0;  | 
3929  | 0  |   for (;exec->transno < exec->state->nbTrans;exec->transno++) { | 
3930  | 0  |       trans = &exec->state->trans[exec->transno];  | 
3931  | 0  |       if (trans->to < 0)  | 
3932  | 0  |     continue;  | 
3933  | 0  |       atom = trans->atom;  | 
3934  | 0  |       ret = 0;  | 
3935  | 0  |       if (trans->count == REGEXP_ALL_LAX_COUNTER) { | 
3936  | 0  |     int i;  | 
3937  | 0  |     int count;  | 
3938  | 0  |     xmlRegTransPtr t;  | 
3939  | 0  |     xmlRegCounterPtr counter;  | 
3940  |  | 
  | 
3941  | 0  |     ret = 0;  | 
3942  |  |  | 
3943  |  |     /*  | 
3944  |  |      * Check all counted transitions from the current state  | 
3945  |  |      */  | 
3946  | 0  |     if ((value == NULL) && (final)) { | 
3947  | 0  |         ret = 1;  | 
3948  | 0  |     } else if (value != NULL) { | 
3949  | 0  |         for (i = 0;i < exec->state->nbTrans;i++) { | 
3950  | 0  |       t = &exec->state->trans[i];  | 
3951  | 0  |       if ((t->counter < 0) || (t == trans))  | 
3952  | 0  |           continue;  | 
3953  | 0  |       counter = &exec->comp->counters[t->counter];  | 
3954  | 0  |       count = exec->counts[t->counter];  | 
3955  | 0  |       if ((count < counter->max) &&  | 
3956  | 0  |                 (t->atom != NULL) &&  | 
3957  | 0  |           (xmlStrEqual(value, t->atom->valuep))) { | 
3958  | 0  |           ret = 0;  | 
3959  | 0  |           break;  | 
3960  | 0  |       }  | 
3961  | 0  |       if ((count >= counter->min) &&  | 
3962  | 0  |           (count < counter->max) &&  | 
3963  | 0  |           (t->atom != NULL) &&  | 
3964  | 0  |           (xmlStrEqual(value, t->atom->valuep))) { | 
3965  | 0  |           ret = 1;  | 
3966  | 0  |           break;  | 
3967  | 0  |       }  | 
3968  | 0  |         }  | 
3969  | 0  |     }  | 
3970  | 0  |       } else if (trans->count == REGEXP_ALL_COUNTER) { | 
3971  | 0  |     int i;  | 
3972  | 0  |     int count;  | 
3973  | 0  |     xmlRegTransPtr t;  | 
3974  | 0  |     xmlRegCounterPtr counter;  | 
3975  |  | 
  | 
3976  | 0  |     ret = 1;  | 
3977  |  |  | 
3978  |  |     /*  | 
3979  |  |      * Check all counted transitions from the current state  | 
3980  |  |      */  | 
3981  | 0  |     for (i = 0;i < exec->state->nbTrans;i++) { | 
3982  | 0  |                     t = &exec->state->trans[i];  | 
3983  | 0  |         if ((t->counter < 0) || (t == trans))  | 
3984  | 0  |       continue;  | 
3985  | 0  |                     counter = &exec->comp->counters[t->counter];  | 
3986  | 0  |         count = exec->counts[t->counter];  | 
3987  | 0  |         if ((count < counter->min) || (count > counter->max)) { | 
3988  | 0  |       ret = 0;  | 
3989  | 0  |       break;  | 
3990  | 0  |         }  | 
3991  | 0  |     }  | 
3992  | 0  |       } else if (trans->count >= 0) { | 
3993  | 0  |     int count;  | 
3994  | 0  |     xmlRegCounterPtr counter;  | 
3995  |  |  | 
3996  |  |     /*  | 
3997  |  |      * A counted transition.  | 
3998  |  |      */  | 
3999  |  | 
  | 
4000  | 0  |     count = exec->counts[trans->count];  | 
4001  | 0  |     counter = &exec->comp->counters[trans->count];  | 
4002  | 0  |     ret = ((count >= counter->min) && (count <= counter->max));  | 
4003  | 0  |       } else if (atom == NULL) { | 
4004  | 0  |     exec->status = XML_REGEXP_INTERNAL_ERROR;  | 
4005  | 0  |     break;  | 
4006  | 0  |       } else if (value != NULL) { | 
4007  | 0  |     ret = xmlRegStrEqualWildcard(atom->valuep, value);  | 
4008  | 0  |     if (atom->neg) { | 
4009  | 0  |         ret = !ret;  | 
4010  | 0  |         if (!compound)  | 
4011  | 0  |             ret = 0;  | 
4012  | 0  |     }  | 
4013  | 0  |     if ((ret == 1) && (trans->counter >= 0)) { | 
4014  | 0  |         xmlRegCounterPtr counter;  | 
4015  | 0  |         int count;  | 
4016  |  | 
  | 
4017  | 0  |         count = exec->counts[trans->counter];  | 
4018  | 0  |         counter = &exec->comp->counters[trans->counter];  | 
4019  | 0  |         if (count >= counter->max)  | 
4020  | 0  |       ret = 0;  | 
4021  | 0  |     }  | 
4022  |  | 
  | 
4023  | 0  |     if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) { | 
4024  | 0  |         xmlRegStatePtr to = exec->comp->states[trans->to];  | 
4025  |  |  | 
4026  |  |         /*  | 
4027  |  |          * this is a multiple input sequence  | 
4028  |  |          */  | 
4029  | 0  |         if (exec->state->nbTrans > exec->transno + 1) { | 
4030  | 0  |       if (exec->inputStackNr <= 0) { | 
4031  | 0  |           xmlFARegExecSaveInputString(exec, value, data);  | 
4032  | 0  |       }  | 
4033  | 0  |       xmlFARegExecSave(exec);  | 
4034  | 0  |         }  | 
4035  | 0  |         exec->transcount = 1;  | 
4036  | 0  |         do { | 
4037  |  |       /*  | 
4038  |  |        * Try to progress as much as possible on the input  | 
4039  |  |        */  | 
4040  | 0  |       if (exec->transcount == atom->max) { | 
4041  | 0  |           break;  | 
4042  | 0  |       }  | 
4043  | 0  |       exec->index++;  | 
4044  | 0  |       value = exec->inputStack[exec->index].value;  | 
4045  | 0  |       data = exec->inputStack[exec->index].data;  | 
4046  |  |  | 
4047  |  |       /*  | 
4048  |  |        * End of input: stop here  | 
4049  |  |        */  | 
4050  | 0  |       if (value == NULL) { | 
4051  | 0  |           exec->index --;  | 
4052  | 0  |           break;  | 
4053  | 0  |       }  | 
4054  | 0  |       if (exec->transcount >= atom->min) { | 
4055  | 0  |           int transno = exec->transno;  | 
4056  | 0  |           xmlRegStatePtr state = exec->state;  | 
4057  |  |  | 
4058  |  |           /*  | 
4059  |  |            * The transition is acceptable save it  | 
4060  |  |            */  | 
4061  | 0  |           exec->transno = -1; /* trick */  | 
4062  | 0  |           exec->state = to;  | 
4063  | 0  |           if (exec->inputStackNr <= 0) { | 
4064  | 0  |         xmlFARegExecSaveInputString(exec, value, data);  | 
4065  | 0  |           }  | 
4066  | 0  |           xmlFARegExecSave(exec);  | 
4067  | 0  |           exec->transno = transno;  | 
4068  | 0  |           exec->state = state;  | 
4069  | 0  |       }  | 
4070  | 0  |       ret = xmlStrEqual(value, atom->valuep);  | 
4071  | 0  |       exec->transcount++;  | 
4072  | 0  |         } while (ret == 1);  | 
4073  | 0  |         if (exec->transcount < atom->min)  | 
4074  | 0  |       ret = 0;  | 
4075  |  |  | 
4076  |  |         /*  | 
4077  |  |          * If the last check failed but one transition was found  | 
4078  |  |          * possible, rollback  | 
4079  |  |          */  | 
4080  | 0  |         if (ret < 0)  | 
4081  | 0  |       ret = 0;  | 
4082  | 0  |         if (ret == 0) { | 
4083  | 0  |       goto rollback;  | 
4084  | 0  |         }  | 
4085  | 0  |     }  | 
4086  | 0  |       }  | 
4087  | 0  |       if (ret == 1) { | 
4088  | 0  |     if ((exec->callback != NULL) && (atom != NULL) &&  | 
4089  | 0  |       (data != NULL)) { | 
4090  | 0  |         exec->callback(exec->data, atom->valuep,  | 
4091  | 0  |                  atom->data, data);  | 
4092  | 0  |     }  | 
4093  | 0  |     if (exec->state->nbTrans > exec->transno + 1) { | 
4094  | 0  |         if (exec->inputStackNr <= 0) { | 
4095  | 0  |       xmlFARegExecSaveInputString(exec, value, data);  | 
4096  | 0  |         }  | 
4097  | 0  |         xmlFARegExecSave(exec);  | 
4098  | 0  |     }  | 
4099  | 0  |     if (trans->counter >= 0) { | 
4100  | 0  |         exec->counts[trans->counter]++;  | 
4101  | 0  |     }  | 
4102  | 0  |     if ((trans->count >= 0) &&  | 
4103  | 0  |         (trans->count < REGEXP_ALL_COUNTER)) { | 
4104  | 0  |         exec->counts[trans->count] = 0;  | 
4105  | 0  |     }  | 
4106  | 0  |                 if ((exec->comp->states[trans->to] != NULL) &&  | 
4107  | 0  |         (exec->comp->states[trans->to]->type ==  | 
4108  | 0  |          XML_REGEXP_SINK_STATE)) { | 
4109  |  |         /*  | 
4110  |  |          * entering a sink state, save the current state as error  | 
4111  |  |          * state.  | 
4112  |  |          */  | 
4113  | 0  |                     if (xmlRegExecSetErrString(exec, value) < 0)  | 
4114  | 0  |                         break;  | 
4115  | 0  |         exec->errState = exec->state;  | 
4116  | 0  |         memcpy(exec->errCounts, exec->counts,  | 
4117  | 0  |          exec->comp->nbCounters * sizeof(int));  | 
4118  | 0  |     }  | 
4119  | 0  |     exec->state = exec->comp->states[trans->to];  | 
4120  | 0  |     exec->transno = 0;  | 
4121  | 0  |     if (trans->atom != NULL) { | 
4122  | 0  |         if (exec->inputStack != NULL) { | 
4123  | 0  |       exec->index++;  | 
4124  | 0  |       if (exec->index < exec->inputStackNr) { | 
4125  | 0  |           value = exec->inputStack[exec->index].value;  | 
4126  | 0  |           data = exec->inputStack[exec->index].data;  | 
4127  | 0  |       } else { | 
4128  | 0  |           value = NULL;  | 
4129  | 0  |           data = NULL;  | 
4130  | 0  |       }  | 
4131  | 0  |         } else { | 
4132  | 0  |       value = NULL;  | 
4133  | 0  |       data = NULL;  | 
4134  | 0  |         }  | 
4135  | 0  |     }  | 
4136  | 0  |     goto progress;  | 
4137  | 0  |       } else if (ret < 0) { | 
4138  | 0  |     exec->status = XML_REGEXP_INTERNAL_ERROR;  | 
4139  | 0  |     break;  | 
4140  | 0  |       }  | 
4141  | 0  |   }  | 
4142  | 0  |   if ((exec->transno != 0) || (exec->state->nbTrans == 0)) { | 
4143  | 0  | rollback:  | 
4144  |  |             /*  | 
4145  |  |        * if we didn't yet rollback on the current input  | 
4146  |  |        * store the current state as the error state.  | 
4147  |  |        */  | 
4148  | 0  |       if ((progress) && (exec->state != NULL) &&  | 
4149  | 0  |           (exec->state->type != XML_REGEXP_SINK_STATE)) { | 
4150  | 0  |           progress = 0;  | 
4151  | 0  |                 if (xmlRegExecSetErrString(exec, value) < 0)  | 
4152  | 0  |                     break;  | 
4153  | 0  |     exec->errState = exec->state;  | 
4154  | 0  |                 if (exec->comp->nbCounters)  | 
4155  | 0  |                     memcpy(exec->errCounts, exec->counts,  | 
4156  | 0  |                            exec->comp->nbCounters * sizeof(int));  | 
4157  | 0  |       }  | 
4158  |  |  | 
4159  |  |       /*  | 
4160  |  |        * Failed to find a way out  | 
4161  |  |        */  | 
4162  | 0  |       exec->determinist = 0;  | 
4163  | 0  |       xmlFARegExecRollBack(exec);  | 
4164  | 0  |       if ((exec->inputStack != NULL ) &&  | 
4165  | 0  |                 (exec->status == XML_REGEXP_OK)) { | 
4166  | 0  |     value = exec->inputStack[exec->index].value;  | 
4167  | 0  |     data = exec->inputStack[exec->index].data;  | 
4168  | 0  |       }  | 
4169  | 0  |   }  | 
4170  | 0  |   continue;  | 
4171  | 0  | progress:  | 
4172  | 0  |         progress = 1;  | 
4173  | 0  |     }  | 
4174  | 0  |     if (exec->status == XML_REGEXP_OK) { | 
4175  | 0  |         return(exec->state->type == XML_REGEXP_FINAL_STATE);  | 
4176  | 0  |     }  | 
4177  | 0  |     return(exec->status);  | 
4178  | 0  | }  | 
4179  |  |  | 
4180  |  | /**  | 
4181  |  |  * Push one input token in the execution context  | 
4182  |  |  *  | 
4183  |  |  * @deprecated Internal function, don't use.  | 
4184  |  |  *  | 
4185  |  |  * @param exec  a regexp execution context or NULL to indicate the end  | 
4186  |  |  * @param value  a string token input  | 
4187  |  |  * @param data  data associated to the token to reuse in callbacks  | 
4188  |  |  * @returns 1 if the regexp reached a final state, 0 if non-final, and  | 
4189  |  |  *     a negative value in case of error.  | 
4190  |  |  */  | 
4191  |  | int  | 
4192  |  | xmlRegExecPushString(xmlRegExecCtxt *exec, const xmlChar *value,  | 
4193  | 0  |                void *data) { | 
4194  | 0  |     return(xmlRegExecPushStringInternal(exec, value, data, 0));  | 
4195  | 0  | }  | 
4196  |  |  | 
4197  |  | /**  | 
4198  |  |  * Push one input token in the execution context  | 
4199  |  |  *  | 
4200  |  |  * @deprecated Internal function, don't use.  | 
4201  |  |  *  | 
4202  |  |  * @param exec  a regexp execution context or NULL to indicate the end  | 
4203  |  |  * @param value  the first string token input  | 
4204  |  |  * @param value2  the second string token input  | 
4205  |  |  * @param data  data associated to the token to reuse in callbacks  | 
4206  |  |  * @returns 1 if the regexp reached a final state, 0 if non-final, and  | 
4207  |  |  *     a negative value in case of error.  | 
4208  |  |  */  | 
4209  |  | int  | 
4210  |  | xmlRegExecPushString2(xmlRegExecCtxt *exec, const xmlChar *value,  | 
4211  | 0  |                       const xmlChar *value2, void *data) { | 
4212  | 0  |     xmlChar buf[150];  | 
4213  | 0  |     int lenn, lenp, ret;  | 
4214  | 0  |     xmlChar *str;  | 
4215  |  | 
  | 
4216  | 0  |     if (exec == NULL)  | 
4217  | 0  |   return(-1);  | 
4218  | 0  |     if (exec->comp == NULL)  | 
4219  | 0  |   return(-1);  | 
4220  | 0  |     if (exec->status != XML_REGEXP_OK)  | 
4221  | 0  |   return(exec->status);  | 
4222  |  |  | 
4223  | 0  |     if (value2 == NULL)  | 
4224  | 0  |         return(xmlRegExecPushString(exec, value, data));  | 
4225  |  |  | 
4226  | 0  |     lenn = strlen((char *) value2);  | 
4227  | 0  |     lenp = strlen((char *) value);  | 
4228  |  | 
  | 
4229  | 0  |     if (150 < lenn + lenp + 2) { | 
4230  | 0  |   str = xmlMalloc(lenn + lenp + 2);  | 
4231  | 0  |   if (str == NULL) { | 
4232  | 0  |       exec->status = XML_REGEXP_OUT_OF_MEMORY;  | 
4233  | 0  |       return(-1);  | 
4234  | 0  |   }  | 
4235  | 0  |     } else { | 
4236  | 0  |   str = buf;  | 
4237  | 0  |     }  | 
4238  | 0  |     memcpy(&str[0], value, lenp);  | 
4239  | 0  |     str[lenp] = XML_REG_STRING_SEPARATOR;  | 
4240  | 0  |     memcpy(&str[lenp + 1], value2, lenn);  | 
4241  | 0  |     str[lenn + lenp + 1] = 0;  | 
4242  |  | 
  | 
4243  | 0  |     if (exec->comp->compact != NULL)  | 
4244  | 0  |   ret = xmlRegCompactPushString(exec, exec->comp, str, data);  | 
4245  | 0  |     else  | 
4246  | 0  |         ret = xmlRegExecPushStringInternal(exec, str, data, 1);  | 
4247  |  | 
  | 
4248  | 0  |     if (str != buf)  | 
4249  | 0  |         xmlFree(str);  | 
4250  | 0  |     return(ret);  | 
4251  | 0  | }  | 
4252  |  |  | 
4253  |  | /**  | 
4254  |  |  * Extract information from the regexp execution. Internal routine to  | 
4255  |  |  * implement #xmlRegExecNextValues and #xmlRegExecErrInfo  | 
4256  |  |  *  | 
4257  |  |  * @param exec  a regexp execution context  | 
4258  |  |  * @param err  error extraction or normal one  | 
4259  |  |  * @param nbval  pointer to the number of accepted values IN/OUT  | 
4260  |  |  * @param nbneg  return number of negative transitions  | 
4261  |  |  * @param values  pointer to the array of acceptable values  | 
4262  |  |  * @param terminal  return value if this was a terminal state  | 
4263  |  |  * @returns 0 in case of success or -1 in case of error.  | 
4264  |  |  */  | 
4265  |  | static int  | 
4266  |  | xmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err,  | 
4267  |  |                     int *nbval, int *nbneg,  | 
4268  | 0  |         xmlChar **values, int *terminal) { | 
4269  | 0  |     int maxval;  | 
4270  | 0  |     int nb = 0;  | 
4271  |  | 
  | 
4272  | 0  |     if ((exec == NULL) || (nbval == NULL) || (nbneg == NULL) ||  | 
4273  | 0  |         (values == NULL) || (*nbval <= 0))  | 
4274  | 0  |         return(-1);  | 
4275  |  |  | 
4276  | 0  |     maxval = *nbval;  | 
4277  | 0  |     *nbval = 0;  | 
4278  | 0  |     *nbneg = 0;  | 
4279  | 0  |     if ((exec->comp != NULL) && (exec->comp->compact != NULL)) { | 
4280  | 0  |         xmlRegexpPtr comp;  | 
4281  | 0  |   int target, i, state;  | 
4282  |  | 
  | 
4283  | 0  |         comp = exec->comp;  | 
4284  |  | 
  | 
4285  | 0  |   if (err) { | 
4286  | 0  |       if (exec->errStateNo == -1) return(-1);  | 
4287  | 0  |       state = exec->errStateNo;  | 
4288  | 0  |   } else { | 
4289  | 0  |       state = exec->index;  | 
4290  | 0  |   }  | 
4291  | 0  |   if (terminal != NULL) { | 
4292  | 0  |       if (comp->compact[state * (comp->nbstrings + 1)] ==  | 
4293  | 0  |           XML_REGEXP_FINAL_STATE)  | 
4294  | 0  |     *terminal = 1;  | 
4295  | 0  |       else  | 
4296  | 0  |     *terminal = 0;  | 
4297  | 0  |   }  | 
4298  | 0  |   for (i = 0;(i < comp->nbstrings) && (nb < maxval);i++) { | 
4299  | 0  |       target = comp->compact[state * (comp->nbstrings + 1) + i + 1];  | 
4300  | 0  |       if ((target > 0) && (target <= comp->nbstates) &&  | 
4301  | 0  |           (comp->compact[(target - 1) * (comp->nbstrings + 1)] !=  | 
4302  | 0  |      XML_REGEXP_SINK_STATE)) { | 
4303  | 0  |           values[nb++] = comp->stringMap[i];  | 
4304  | 0  |     (*nbval)++;  | 
4305  | 0  |       }  | 
4306  | 0  |   }  | 
4307  | 0  |   for (i = 0;(i < comp->nbstrings) && (nb < maxval);i++) { | 
4308  | 0  |       target = comp->compact[state * (comp->nbstrings + 1) + i + 1];  | 
4309  | 0  |       if ((target > 0) && (target <= comp->nbstates) &&  | 
4310  | 0  |           (comp->compact[(target - 1) * (comp->nbstrings + 1)] ==  | 
4311  | 0  |      XML_REGEXP_SINK_STATE)) { | 
4312  | 0  |           values[nb++] = comp->stringMap[i];  | 
4313  | 0  |     (*nbneg)++;  | 
4314  | 0  |       }  | 
4315  | 0  |   }  | 
4316  | 0  |     } else { | 
4317  | 0  |         int transno;  | 
4318  | 0  |   xmlRegTransPtr trans;  | 
4319  | 0  |   xmlRegAtomPtr atom;  | 
4320  | 0  |   xmlRegStatePtr state;  | 
4321  |  | 
  | 
4322  | 0  |   if (terminal != NULL) { | 
4323  | 0  |       if (exec->state->type == XML_REGEXP_FINAL_STATE)  | 
4324  | 0  |     *terminal = 1;  | 
4325  | 0  |       else  | 
4326  | 0  |     *terminal = 0;  | 
4327  | 0  |   }  | 
4328  |  | 
  | 
4329  | 0  |   if (err) { | 
4330  | 0  |       if (exec->errState == NULL) return(-1);  | 
4331  | 0  |       state = exec->errState;  | 
4332  | 0  |   } else { | 
4333  | 0  |       if (exec->state == NULL) return(-1);  | 
4334  | 0  |       state = exec->state;  | 
4335  | 0  |   }  | 
4336  | 0  |   for (transno = 0;  | 
4337  | 0  |        (transno < state->nbTrans) && (nb < maxval);  | 
4338  | 0  |        transno++) { | 
4339  | 0  |       trans = &state->trans[transno];  | 
4340  | 0  |       if (trans->to < 0)  | 
4341  | 0  |     continue;  | 
4342  | 0  |       atom = trans->atom;  | 
4343  | 0  |       if ((atom == NULL) || (atom->valuep == NULL))  | 
4344  | 0  |     continue;  | 
4345  | 0  |       if (trans->count == REGEXP_ALL_LAX_COUNTER) { | 
4346  |  |           /* this should not be reached but ... */  | 
4347  | 0  |       } else if (trans->count == REGEXP_ALL_COUNTER) { | 
4348  |  |           /* this should not be reached but ... */  | 
4349  | 0  |       } else if (trans->counter >= 0) { | 
4350  | 0  |     xmlRegCounterPtr counter = NULL;  | 
4351  | 0  |     int count;  | 
4352  |  | 
  | 
4353  | 0  |     if (err)  | 
4354  | 0  |         count = exec->errCounts[trans->counter];  | 
4355  | 0  |     else  | 
4356  | 0  |         count = exec->counts[trans->counter];  | 
4357  | 0  |     if (exec->comp != NULL)  | 
4358  | 0  |         counter = &exec->comp->counters[trans->counter];  | 
4359  | 0  |     if ((counter == NULL) || (count < counter->max)) { | 
4360  | 0  |         if (atom->neg)  | 
4361  | 0  |       values[nb++] = (xmlChar *) atom->valuep2;  | 
4362  | 0  |         else  | 
4363  | 0  |       values[nb++] = (xmlChar *) atom->valuep;  | 
4364  | 0  |         (*nbval)++;  | 
4365  | 0  |     }  | 
4366  | 0  |       } else { | 
4367  | 0  |                 if ((exec->comp != NULL) && (exec->comp->states[trans->to] != NULL) &&  | 
4368  | 0  |         (exec->comp->states[trans->to]->type !=  | 
4369  | 0  |          XML_REGEXP_SINK_STATE)) { | 
4370  | 0  |         if (atom->neg)  | 
4371  | 0  |       values[nb++] = (xmlChar *) atom->valuep2;  | 
4372  | 0  |         else  | 
4373  | 0  |       values[nb++] = (xmlChar *) atom->valuep;  | 
4374  | 0  |         (*nbval)++;  | 
4375  | 0  |     }  | 
4376  | 0  |       }  | 
4377  | 0  |   }  | 
4378  | 0  |   for (transno = 0;  | 
4379  | 0  |        (transno < state->nbTrans) && (nb < maxval);  | 
4380  | 0  |        transno++) { | 
4381  | 0  |       trans = &state->trans[transno];  | 
4382  | 0  |       if (trans->to < 0)  | 
4383  | 0  |     continue;  | 
4384  | 0  |       atom = trans->atom;  | 
4385  | 0  |       if ((atom == NULL) || (atom->valuep == NULL))  | 
4386  | 0  |     continue;  | 
4387  | 0  |       if (trans->count == REGEXP_ALL_LAX_COUNTER) { | 
4388  | 0  |           continue;  | 
4389  | 0  |       } else if (trans->count == REGEXP_ALL_COUNTER) { | 
4390  | 0  |           continue;  | 
4391  | 0  |       } else if (trans->counter >= 0) { | 
4392  | 0  |           continue;  | 
4393  | 0  |       } else { | 
4394  | 0  |                 if ((exec->comp->states[trans->to] != NULL) &&  | 
4395  | 0  |         (exec->comp->states[trans->to]->type ==  | 
4396  | 0  |          XML_REGEXP_SINK_STATE)) { | 
4397  | 0  |         if (atom->neg)  | 
4398  | 0  |       values[nb++] = (xmlChar *) atom->valuep2;  | 
4399  | 0  |         else  | 
4400  | 0  |       values[nb++] = (xmlChar *) atom->valuep;  | 
4401  | 0  |         (*nbneg)++;  | 
4402  | 0  |     }  | 
4403  | 0  |       }  | 
4404  | 0  |   }  | 
4405  | 0  |     }  | 
4406  | 0  |     return(0);  | 
4407  | 0  | }  | 
4408  |  |  | 
4409  |  | /**  | 
4410  |  |  * Extract information from the regexp execution.  | 
4411  |  |  * The parameter `values` must point to an array of `nbval` string pointers  | 
4412  |  |  * on return nbval will contain the number of possible strings in that  | 
4413  |  |  * state and the `values` array will be updated with them. The string values  | 
4414  |  |  * returned will be freed with the `exec` context and don't need to be  | 
4415  |  |  * deallocated.  | 
4416  |  |  *  | 
4417  |  |  * @deprecated Internal function, don't use.  | 
4418  |  |  *  | 
4419  |  |  * @param exec  a regexp execution context  | 
4420  |  |  * @param nbval  pointer to the number of accepted values IN/OUT  | 
4421  |  |  * @param nbneg  return number of negative transitions  | 
4422  |  |  * @param values  pointer to the array of acceptable values  | 
4423  |  |  * @param terminal  return value if this was a terminal state  | 
4424  |  |  * @returns 0 in case of success or -1 in case of error.  | 
4425  |  |  */  | 
4426  |  | int  | 
4427  |  | xmlRegExecNextValues(xmlRegExecCtxt *exec, int *nbval, int *nbneg,  | 
4428  | 0  |                      xmlChar **values, int *terminal) { | 
4429  | 0  |     return(xmlRegExecGetValues(exec, 0, nbval, nbneg, values, terminal));  | 
4430  | 0  | }  | 
4431  |  |  | 
4432  |  | /**  | 
4433  |  |  * Extract error information from the regexp execution. The parameter  | 
4434  |  |  * `string` will be updated with the value pushed and not accepted,  | 
4435  |  |  * the parameter `values` must point to an array of `nbval` string pointers  | 
4436  |  |  * on return nbval will contain the number of possible strings in that  | 
4437  |  |  * state and the `values` array will be updated with them. The string values  | 
4438  |  |  * returned will be freed with the `exec` context and don't need to be  | 
4439  |  |  * deallocated.  | 
4440  |  |  *  | 
4441  |  |  * @deprecated Internal function, don't use.  | 
4442  |  |  *  | 
4443  |  |  * @param exec  a regexp execution context generating an error  | 
4444  |  |  * @param string  return value for the error string  | 
4445  |  |  * @param nbval  pointer to the number of accepted values IN/OUT  | 
4446  |  |  * @param nbneg  return number of negative transitions  | 
4447  |  |  * @param values  pointer to the array of acceptable values  | 
4448  |  |  * @param terminal  return value if this was a terminal state  | 
4449  |  |  * @returns 0 in case of success or -1 in case of error.  | 
4450  |  |  */  | 
4451  |  | int  | 
4452  |  | xmlRegExecErrInfo(xmlRegExecCtxt *exec, const xmlChar **string,  | 
4453  | 0  |                   int *nbval, int *nbneg, xmlChar **values, int *terminal) { | 
4454  | 0  |     if (exec == NULL)  | 
4455  | 0  |         return(-1);  | 
4456  | 0  |     if (string != NULL) { | 
4457  | 0  |         if (exec->status != XML_REGEXP_OK)  | 
4458  | 0  |       *string = exec->errString;  | 
4459  | 0  |   else  | 
4460  | 0  |       *string = NULL;  | 
4461  | 0  |     }  | 
4462  | 0  |     return(xmlRegExecGetValues(exec, 1, nbval, nbneg, values, terminal));  | 
4463  | 0  | }  | 
4464  |  |  | 
4465  |  | /**  | 
4466  |  |  * Clear errors in the context, allowing to recover  | 
4467  |  |  * from errors on specific scenarios  | 
4468  |  |  *  | 
4469  |  |  * @param exec  a regexp execution context  | 
4470  |  |  * @remarks it doesn's reset the last internal libxml2 error  | 
4471  |  |  */  | 
4472  |  | void  | 
4473  | 0  | xmlRegExecClearErrors(xmlRegExecCtxt* exec) { | 
4474  | 0  |     exec->status = 0;  | 
4475  | 0  |     exec->errState = NULL;  | 
4476  | 0  |     exec->errStateNo = -1;  | 
4477  | 0  |     xmlFree(exec->errString);  | 
4478  | 0  |     exec->errString = NULL;  | 
4479  | 0  | }  | 
4480  |  |  | 
4481  |  | /************************************************************************  | 
4482  |  |  *                  *  | 
4483  |  |  *  Parser for the Schemas Datatype Regular Expressions   *  | 
4484  |  |  *  http://www.w3.org/TR/2001/REC-xmlschema-2-20010502/#regexs  *  | 
4485  |  |  *                  *  | 
4486  |  |  ************************************************************************/  | 
4487  |  |  | 
4488  |  | /**  | 
4489  |  |  * [10]   Char   ::=   [^.\?*+()|\#x5B\#x5D]  | 
4490  |  |  *  | 
4491  |  |  * @param ctxt  a regexp parser context  | 
4492  |  |  */  | 
4493  |  | static int  | 
4494  | 0  | xmlFAIsChar(xmlRegParserCtxtPtr ctxt) { | 
4495  | 0  |     int cur;  | 
4496  | 0  |     int len;  | 
4497  |  | 
  | 
4498  | 0  |     len = 4;  | 
4499  | 0  |     cur = xmlGetUTF8Char(ctxt->cur, &len);  | 
4500  | 0  |     if (cur < 0) { | 
4501  | 0  |         ERROR("Invalid UTF-8"); | 
4502  | 0  |         return(0);  | 
4503  | 0  |     }  | 
4504  | 0  |     if ((cur == '.') || (cur == '\\') || (cur == '?') ||  | 
4505  | 0  |   (cur == '*') || (cur == '+') || (cur == '(') || | 
4506  | 0  |   (cur == ')') || (cur == '|') || (cur == 0x5B) ||  | 
4507  | 0  |   (cur == 0x5D) || (cur == 0))  | 
4508  | 0  |   return(-1);  | 
4509  | 0  |     return(cur);  | 
4510  | 0  | }  | 
4511  |  |  | 
4512  |  | /**  | 
4513  |  |  * [27]   charProp   ::=   IsCategory | IsBlock  | 
4514  |  |  * [28]   IsCategory ::= Letters | Marks | Numbers | Punctuation |  | 
4515  |  |  *                       Separators | Symbols | Others  | 
4516  |  |  * [29]   Letters   ::=   'L' [ultmo]?  | 
4517  |  |  * [30]   Marks   ::=   'M' [nce]?  | 
4518  |  |  * [31]   Numbers   ::=   'N' [dlo]?  | 
4519  |  |  * [32]   Punctuation   ::=   'P' [cdseifo]?  | 
4520  |  |  * [33]   Separators   ::=   'Z' [slp]?  | 
4521  |  |  * [34]   Symbols   ::=   'S' [mcko]?  | 
4522  |  |  * [35]   Others   ::=   'C' [cfon]?  | 
4523  |  |  * [36]   IsBlock   ::=   'Is' [a-zA-Z0-9\#x2D]+  | 
4524  |  |  *  | 
4525  |  |  * @param ctxt  a regexp parser context  | 
4526  |  |  */  | 
4527  |  | static void  | 
4528  | 0  | xmlFAParseCharProp(xmlRegParserCtxtPtr ctxt) { | 
4529  | 0  |     int cur;  | 
4530  | 0  |     xmlRegAtomType type = (xmlRegAtomType) 0;  | 
4531  | 0  |     xmlChar *blockName = NULL;  | 
4532  |  | 
  | 
4533  | 0  |     cur = CUR;  | 
4534  | 0  |     if (cur == 'L') { | 
4535  | 0  |   NEXT;  | 
4536  | 0  |   cur = CUR;  | 
4537  | 0  |   if (cur == 'u') { | 
4538  | 0  |       NEXT;  | 
4539  | 0  |       type = XML_REGEXP_LETTER_UPPERCASE;  | 
4540  | 0  |   } else if (cur == 'l') { | 
4541  | 0  |       NEXT;  | 
4542  | 0  |       type = XML_REGEXP_LETTER_LOWERCASE;  | 
4543  | 0  |   } else if (cur == 't') { | 
4544  | 0  |       NEXT;  | 
4545  | 0  |       type = XML_REGEXP_LETTER_TITLECASE;  | 
4546  | 0  |   } else if (cur == 'm') { | 
4547  | 0  |       NEXT;  | 
4548  | 0  |       type = XML_REGEXP_LETTER_MODIFIER;  | 
4549  | 0  |   } else if (cur == 'o') { | 
4550  | 0  |       NEXT;  | 
4551  | 0  |       type = XML_REGEXP_LETTER_OTHERS;  | 
4552  | 0  |   } else { | 
4553  | 0  |       type = XML_REGEXP_LETTER;  | 
4554  | 0  |   }  | 
4555  | 0  |     } else if (cur == 'M') { | 
4556  | 0  |   NEXT;  | 
4557  | 0  |   cur = CUR;  | 
4558  | 0  |   if (cur == 'n') { | 
4559  | 0  |       NEXT;  | 
4560  |  |       /* nonspacing */  | 
4561  | 0  |       type = XML_REGEXP_MARK_NONSPACING;  | 
4562  | 0  |   } else if (cur == 'c') { | 
4563  | 0  |       NEXT;  | 
4564  |  |       /* spacing combining */  | 
4565  | 0  |       type = XML_REGEXP_MARK_SPACECOMBINING;  | 
4566  | 0  |   } else if (cur == 'e') { | 
4567  | 0  |       NEXT;  | 
4568  |  |       /* enclosing */  | 
4569  | 0  |       type = XML_REGEXP_MARK_ENCLOSING;  | 
4570  | 0  |   } else { | 
4571  |  |       /* all marks */  | 
4572  | 0  |       type = XML_REGEXP_MARK;  | 
4573  | 0  |   }  | 
4574  | 0  |     } else if (cur == 'N') { | 
4575  | 0  |   NEXT;  | 
4576  | 0  |   cur = CUR;  | 
4577  | 0  |   if (cur == 'd') { | 
4578  | 0  |       NEXT;  | 
4579  |  |       /* digital */  | 
4580  | 0  |       type = XML_REGEXP_NUMBER_DECIMAL;  | 
4581  | 0  |   } else if (cur == 'l') { | 
4582  | 0  |       NEXT;  | 
4583  |  |       /* letter */  | 
4584  | 0  |       type = XML_REGEXP_NUMBER_LETTER;  | 
4585  | 0  |   } else if (cur == 'o') { | 
4586  | 0  |       NEXT;  | 
4587  |  |       /* other */  | 
4588  | 0  |       type = XML_REGEXP_NUMBER_OTHERS;  | 
4589  | 0  |   } else { | 
4590  |  |       /* all numbers */  | 
4591  | 0  |       type = XML_REGEXP_NUMBER;  | 
4592  | 0  |   }  | 
4593  | 0  |     } else if (cur == 'P') { | 
4594  | 0  |   NEXT;  | 
4595  | 0  |   cur = CUR;  | 
4596  | 0  |   if (cur == 'c') { | 
4597  | 0  |       NEXT;  | 
4598  |  |       /* connector */  | 
4599  | 0  |       type = XML_REGEXP_PUNCT_CONNECTOR;  | 
4600  | 0  |   } else if (cur == 'd') { | 
4601  | 0  |       NEXT;  | 
4602  |  |       /* dash */  | 
4603  | 0  |       type = XML_REGEXP_PUNCT_DASH;  | 
4604  | 0  |   } else if (cur == 's') { | 
4605  | 0  |       NEXT;  | 
4606  |  |       /* open */  | 
4607  | 0  |       type = XML_REGEXP_PUNCT_OPEN;  | 
4608  | 0  |   } else if (cur == 'e') { | 
4609  | 0  |       NEXT;  | 
4610  |  |       /* close */  | 
4611  | 0  |       type = XML_REGEXP_PUNCT_CLOSE;  | 
4612  | 0  |   } else if (cur == 'i') { | 
4613  | 0  |       NEXT;  | 
4614  |  |       /* initial quote */  | 
4615  | 0  |       type = XML_REGEXP_PUNCT_INITQUOTE;  | 
4616  | 0  |   } else if (cur == 'f') { | 
4617  | 0  |       NEXT;  | 
4618  |  |       /* final quote */  | 
4619  | 0  |       type = XML_REGEXP_PUNCT_FINQUOTE;  | 
4620  | 0  |   } else if (cur == 'o') { | 
4621  | 0  |       NEXT;  | 
4622  |  |       /* other */  | 
4623  | 0  |       type = XML_REGEXP_PUNCT_OTHERS;  | 
4624  | 0  |   } else { | 
4625  |  |       /* all punctuation */  | 
4626  | 0  |       type = XML_REGEXP_PUNCT;  | 
4627  | 0  |   }  | 
4628  | 0  |     } else if (cur == 'Z') { | 
4629  | 0  |   NEXT;  | 
4630  | 0  |   cur = CUR;  | 
4631  | 0  |   if (cur == 's') { | 
4632  | 0  |       NEXT;  | 
4633  |  |       /* space */  | 
4634  | 0  |       type = XML_REGEXP_SEPAR_SPACE;  | 
4635  | 0  |   } else if (cur == 'l') { | 
4636  | 0  |       NEXT;  | 
4637  |  |       /* line */  | 
4638  | 0  |       type = XML_REGEXP_SEPAR_LINE;  | 
4639  | 0  |   } else if (cur == 'p') { | 
4640  | 0  |       NEXT;  | 
4641  |  |       /* paragraph */  | 
4642  | 0  |       type = XML_REGEXP_SEPAR_PARA;  | 
4643  | 0  |   } else { | 
4644  |  |       /* all separators */  | 
4645  | 0  |       type = XML_REGEXP_SEPAR;  | 
4646  | 0  |   }  | 
4647  | 0  |     } else if (cur == 'S') { | 
4648  | 0  |   NEXT;  | 
4649  | 0  |   cur = CUR;  | 
4650  | 0  |   if (cur == 'm') { | 
4651  | 0  |       NEXT;  | 
4652  | 0  |       type = XML_REGEXP_SYMBOL_MATH;  | 
4653  |  |       /* math */  | 
4654  | 0  |   } else if (cur == 'c') { | 
4655  | 0  |       NEXT;  | 
4656  | 0  |       type = XML_REGEXP_SYMBOL_CURRENCY;  | 
4657  |  |       /* currency */  | 
4658  | 0  |   } else if (cur == 'k') { | 
4659  | 0  |       NEXT;  | 
4660  | 0  |       type = XML_REGEXP_SYMBOL_MODIFIER;  | 
4661  |  |       /* modifiers */  | 
4662  | 0  |   } else if (cur == 'o') { | 
4663  | 0  |       NEXT;  | 
4664  | 0  |       type = XML_REGEXP_SYMBOL_OTHERS;  | 
4665  |  |       /* other */  | 
4666  | 0  |   } else { | 
4667  |  |       /* all symbols */  | 
4668  | 0  |       type = XML_REGEXP_SYMBOL;  | 
4669  | 0  |   }  | 
4670  | 0  |     } else if (cur == 'C') { | 
4671  | 0  |   NEXT;  | 
4672  | 0  |   cur = CUR;  | 
4673  | 0  |   if (cur == 'c') { | 
4674  | 0  |       NEXT;  | 
4675  |  |       /* control */  | 
4676  | 0  |       type = XML_REGEXP_OTHER_CONTROL;  | 
4677  | 0  |   } else if (cur == 'f') { | 
4678  | 0  |       NEXT;  | 
4679  |  |       /* format */  | 
4680  | 0  |       type = XML_REGEXP_OTHER_FORMAT;  | 
4681  | 0  |   } else if (cur == 'o') { | 
4682  | 0  |       NEXT;  | 
4683  |  |       /* private use */  | 
4684  | 0  |       type = XML_REGEXP_OTHER_PRIVATE;  | 
4685  | 0  |   } else if (cur == 'n') { | 
4686  | 0  |       NEXT;  | 
4687  |  |       /* not assigned */  | 
4688  | 0  |       type = XML_REGEXP_OTHER_NA;  | 
4689  | 0  |   } else { | 
4690  |  |       /* all others */  | 
4691  | 0  |       type = XML_REGEXP_OTHER;  | 
4692  | 0  |   }  | 
4693  | 0  |     } else if (cur == 'I') { | 
4694  | 0  |   const xmlChar *start;  | 
4695  | 0  |   NEXT;  | 
4696  | 0  |   cur = CUR;  | 
4697  | 0  |   if (cur != 's') { | 
4698  | 0  |       ERROR("IsXXXX expected"); | 
4699  | 0  |       return;  | 
4700  | 0  |   }  | 
4701  | 0  |   NEXT;  | 
4702  | 0  |   start = ctxt->cur;  | 
4703  | 0  |   cur = CUR;  | 
4704  | 0  |   if (((cur >= 'a') && (cur <= 'z')) ||  | 
4705  | 0  |       ((cur >= 'A') && (cur <= 'Z')) ||  | 
4706  | 0  |       ((cur >= '0') && (cur <= '9')) ||  | 
4707  | 0  |       (cur == 0x2D)) { | 
4708  | 0  |       NEXT;  | 
4709  | 0  |       cur = CUR;  | 
4710  | 0  |       while (((cur >= 'a') && (cur <= 'z')) ||  | 
4711  | 0  |     ((cur >= 'A') && (cur <= 'Z')) ||  | 
4712  | 0  |     ((cur >= '0') && (cur <= '9')) ||  | 
4713  | 0  |     (cur == 0x2D)) { | 
4714  | 0  |     NEXT;  | 
4715  | 0  |     cur = CUR;  | 
4716  | 0  |       }  | 
4717  | 0  |   }  | 
4718  | 0  |   type = XML_REGEXP_BLOCK_NAME;  | 
4719  | 0  |   blockName = xmlStrndup(start, ctxt->cur - start);  | 
4720  | 0  |         if (blockName == NULL)  | 
4721  | 0  |       xmlRegexpErrMemory(ctxt);  | 
4722  | 0  |     } else { | 
4723  | 0  |   ERROR("Unknown char property"); | 
4724  | 0  |   return;  | 
4725  | 0  |     }  | 
4726  | 0  |     if (ctxt->atom == NULL) { | 
4727  | 0  |   ctxt->atom = xmlRegNewAtom(ctxt, type);  | 
4728  | 0  |         if (ctxt->atom == NULL) { | 
4729  | 0  |             xmlFree(blockName);  | 
4730  | 0  |             return;  | 
4731  | 0  |         }  | 
4732  | 0  |   ctxt->atom->valuep = blockName;  | 
4733  | 0  |     } else if (ctxt->atom->type == XML_REGEXP_RANGES) { | 
4734  | 0  |         if (xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,  | 
4735  | 0  |                                type, 0, 0, blockName) == NULL) { | 
4736  | 0  |             xmlFree(blockName);  | 
4737  | 0  |         }  | 
4738  | 0  |     }  | 
4739  | 0  | }  | 
4740  |  |  | 
4741  |  | static int parse_escaped_codeunit(xmlRegParserCtxtPtr ctxt)  | 
4742  | 0  | { | 
4743  | 0  |     int val = 0, i, cur;  | 
4744  | 0  |     for (i = 0; i < 4; i++) { | 
4745  | 0  |   NEXT;  | 
4746  | 0  |   val *= 16;  | 
4747  | 0  |   cur = CUR;  | 
4748  | 0  |   if (cur >= '0' && cur <= '9') { | 
4749  | 0  |       val += cur - '0';  | 
4750  | 0  |   } else if (cur >= 'A' && cur <= 'F') { | 
4751  | 0  |       val += cur - 'A' + 10;  | 
4752  | 0  |   } else if (cur >= 'a' && cur <= 'f') { | 
4753  | 0  |       val += cur - 'a' + 10;  | 
4754  | 0  |   } else { | 
4755  | 0  |       ERROR("Expecting hex digit"); | 
4756  | 0  |       return -1;  | 
4757  | 0  |   }  | 
4758  | 0  |     }  | 
4759  | 0  |     return val;  | 
4760  | 0  | }  | 
4761  |  |  | 
4762  |  | static int parse_escaped_codepoint(xmlRegParserCtxtPtr ctxt)  | 
4763  | 0  | { | 
4764  | 0  |     int val = parse_escaped_codeunit(ctxt);  | 
4765  | 0  |     if (0xD800 <= val && val <= 0xDBFF) { | 
4766  | 0  |   NEXT;  | 
4767  | 0  |   if (CUR == '\\') { | 
4768  | 0  |       NEXT;  | 
4769  | 0  |       if (CUR == 'u') { | 
4770  | 0  |     int low = parse_escaped_codeunit(ctxt);  | 
4771  | 0  |     if (0xDC00 <= low && low <= 0xDFFF) { | 
4772  | 0  |         return (val - 0xD800) * 0x400 + (low - 0xDC00) + 0x10000;  | 
4773  | 0  |     }  | 
4774  | 0  |       }  | 
4775  | 0  |   }  | 
4776  | 0  |   ERROR("Invalid low surrogate pair code unit"); | 
4777  | 0  |   val = -1;  | 
4778  | 0  |     }  | 
4779  | 0  |     return val;  | 
4780  | 0  | }  | 
4781  |  |  | 
4782  |  | /**  | 
4783  |  |  * ```  | 
4784  |  |  * [23] charClassEsc ::= ( SingleCharEsc | MultiCharEsc | catEsc | complEsc )  | 
4785  |  |  * [24] SingleCharEsc ::= '\' [nrt\|.?*+(){}\#x2D\#x5B\#x5D\#x5E] | 
4786  |  |  * [25] catEsc   ::=   '\p{' charProp '}' | 
4787  |  |  * [26] complEsc ::=   '\P{' charProp '}' | 
4788  |  |  * [37] MultiCharEsc ::= '.' | ('\' [sSiIcCdDwW]) | 
4789  |  |  * ```  | 
4790  |  |  *  | 
4791  |  |  * @param ctxt  a regexp parser context  | 
4792  |  |  */  | 
4793  |  | static void  | 
4794  | 0  | xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) { | 
4795  | 0  |     int cur;  | 
4796  |  | 
  | 
4797  | 0  |     if (CUR == '.') { | 
4798  | 0  |   if (ctxt->atom == NULL) { | 
4799  | 0  |       ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_ANYCHAR);  | 
4800  | 0  |   } else if (ctxt->atom->type == XML_REGEXP_RANGES) { | 
4801  | 0  |       xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,  | 
4802  | 0  |              XML_REGEXP_ANYCHAR, 0, 0, NULL);  | 
4803  | 0  |   }  | 
4804  | 0  |   NEXT;  | 
4805  | 0  |   return;  | 
4806  | 0  |     }  | 
4807  | 0  |     if (CUR != '\\') { | 
4808  | 0  |   ERROR("Escaped sequence: expecting \\"); | 
4809  | 0  |   return;  | 
4810  | 0  |     }  | 
4811  | 0  |     NEXT;  | 
4812  | 0  |     cur = CUR;  | 
4813  | 0  |     if (cur == 'p') { | 
4814  | 0  |   NEXT;  | 
4815  | 0  |   if (CUR != '{') { | 
4816  | 0  |       ERROR("Expecting '{'"); | 
4817  | 0  |       return;  | 
4818  | 0  |   }  | 
4819  | 0  |   NEXT;  | 
4820  | 0  |   xmlFAParseCharProp(ctxt);  | 
4821  | 0  |   if (CUR != '}') { | 
4822  | 0  |       ERROR("Expecting '}'"); | 
4823  | 0  |       return;  | 
4824  | 0  |   }  | 
4825  | 0  |   NEXT;  | 
4826  | 0  |     } else if (cur == 'P') { | 
4827  | 0  |   NEXT;  | 
4828  | 0  |   if (CUR != '{') { | 
4829  | 0  |       ERROR("Expecting '{'"); | 
4830  | 0  |       return;  | 
4831  | 0  |   }  | 
4832  | 0  |   NEXT;  | 
4833  | 0  |   xmlFAParseCharProp(ctxt);  | 
4834  | 0  |         if (ctxt->atom != NULL)  | 
4835  | 0  |       ctxt->atom->neg = 1;  | 
4836  | 0  |   if (CUR != '}') { | 
4837  | 0  |       ERROR("Expecting '}'"); | 
4838  | 0  |       return;  | 
4839  | 0  |   }  | 
4840  | 0  |   NEXT;  | 
4841  | 0  |     } else if ((cur == 'n') || (cur == 'r') || (cur == 't') || (cur == '\\') ||  | 
4842  | 0  |   (cur == '|') || (cur == '.') || (cur == '?') || (cur == '*') ||  | 
4843  | 0  |   (cur == '+') || (cur == '(') || (cur == ')') || (cur == '{') || | 
4844  | 0  |   (cur == '}') || (cur == 0x2D) || (cur == 0x5B) || (cur == 0x5D) ||  | 
4845  | 0  |   (cur == 0x5E) ||  | 
4846  |  |  | 
4847  |  |   /* Non-standard escape sequences:  | 
4848  |  |    *                  Java 1.8|.NET Core 3.1|MSXML 6 */  | 
4849  | 0  |   (cur == '!') ||     /*   +  |     +       |    +   */  | 
4850  | 0  |   (cur == '"') ||     /*   +  |     +       |    +   */  | 
4851  | 0  |   (cur == '#') ||     /*   +  |     +       |    +   */  | 
4852  | 0  |   (cur == '$') ||     /*   +  |     +       |    +   */  | 
4853  | 0  |   (cur == '%') ||     /*   +  |     +       |    +   */  | 
4854  | 0  |   (cur == ',') ||     /*   +  |     +       |    +   */  | 
4855  | 0  |   (cur == '/') ||     /*   +  |     +       |    +   */  | 
4856  | 0  |   (cur == ':') ||     /*   +  |     +       |    +   */  | 
4857  | 0  |   (cur == ';') ||     /*   +  |     +       |    +   */  | 
4858  | 0  |   (cur == '=') ||     /*   +  |     +       |    +   */  | 
4859  | 0  |   (cur == '>') ||     /*      |     +       |    +   */  | 
4860  | 0  |   (cur == '@') ||     /*   +  |     +       |    +   */  | 
4861  | 0  |   (cur == '`') ||     /*   +  |     +       |    +   */  | 
4862  | 0  |   (cur == '~') ||     /*   +  |     +       |    +   */  | 
4863  | 0  |   (cur == 'u')) {     /*      |     +       |    +   */ | 
4864  | 0  |   if (ctxt->atom == NULL) { | 
4865  | 0  |       ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);  | 
4866  | 0  |       if (ctxt->atom != NULL) { | 
4867  | 0  |           switch (cur) { | 
4868  | 0  |         case 'n':  | 
4869  | 0  |             ctxt->atom->codepoint = '\n';  | 
4870  | 0  |       break;  | 
4871  | 0  |         case 'r':  | 
4872  | 0  |             ctxt->atom->codepoint = '\r';  | 
4873  | 0  |       break;  | 
4874  | 0  |         case 't':  | 
4875  | 0  |             ctxt->atom->codepoint = '\t';  | 
4876  | 0  |       break;  | 
4877  | 0  |         case 'u':  | 
4878  | 0  |       cur = parse_escaped_codepoint(ctxt);  | 
4879  | 0  |       if (cur < 0) { | 
4880  | 0  |           return;  | 
4881  | 0  |       }  | 
4882  | 0  |       ctxt->atom->codepoint = cur;  | 
4883  | 0  |       break;  | 
4884  | 0  |         default:  | 
4885  | 0  |       ctxt->atom->codepoint = cur;  | 
4886  | 0  |     }  | 
4887  | 0  |       }  | 
4888  | 0  |   } else if (ctxt->atom->type == XML_REGEXP_RANGES) { | 
4889  | 0  |             switch (cur) { | 
4890  | 0  |                 case 'n':  | 
4891  | 0  |                     cur = '\n';  | 
4892  | 0  |                     break;  | 
4893  | 0  |                 case 'r':  | 
4894  | 0  |                     cur = '\r';  | 
4895  | 0  |                     break;  | 
4896  | 0  |                 case 't':  | 
4897  | 0  |                     cur = '\t';  | 
4898  | 0  |                     break;  | 
4899  | 0  |             }  | 
4900  | 0  |       xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,  | 
4901  | 0  |              XML_REGEXP_CHARVAL, cur, cur, NULL);  | 
4902  | 0  |   }  | 
4903  | 0  |   NEXT;  | 
4904  | 0  |     } else if ((cur == 's') || (cur == 'S') || (cur == 'i') || (cur == 'I') ||  | 
4905  | 0  |   (cur == 'c') || (cur == 'C') || (cur == 'd') || (cur == 'D') ||  | 
4906  | 0  |   (cur == 'w') || (cur == 'W')) { | 
4907  | 0  |   xmlRegAtomType type = XML_REGEXP_ANYSPACE;  | 
4908  |  | 
  | 
4909  | 0  |   switch (cur) { | 
4910  | 0  |       case 's':  | 
4911  | 0  |     type = XML_REGEXP_ANYSPACE;  | 
4912  | 0  |     break;  | 
4913  | 0  |       case 'S':  | 
4914  | 0  |     type = XML_REGEXP_NOTSPACE;  | 
4915  | 0  |     break;  | 
4916  | 0  |       case 'i':  | 
4917  | 0  |     type = XML_REGEXP_INITNAME;  | 
4918  | 0  |     break;  | 
4919  | 0  |       case 'I':  | 
4920  | 0  |     type = XML_REGEXP_NOTINITNAME;  | 
4921  | 0  |     break;  | 
4922  | 0  |       case 'c':  | 
4923  | 0  |     type = XML_REGEXP_NAMECHAR;  | 
4924  | 0  |     break;  | 
4925  | 0  |       case 'C':  | 
4926  | 0  |     type = XML_REGEXP_NOTNAMECHAR;  | 
4927  | 0  |     break;  | 
4928  | 0  |       case 'd':  | 
4929  | 0  |     type = XML_REGEXP_DECIMAL;  | 
4930  | 0  |     break;  | 
4931  | 0  |       case 'D':  | 
4932  | 0  |     type = XML_REGEXP_NOTDECIMAL;  | 
4933  | 0  |     break;  | 
4934  | 0  |       case 'w':  | 
4935  | 0  |     type = XML_REGEXP_REALCHAR;  | 
4936  | 0  |     break;  | 
4937  | 0  |       case 'W':  | 
4938  | 0  |     type = XML_REGEXP_NOTREALCHAR;  | 
4939  | 0  |     break;  | 
4940  | 0  |   }  | 
4941  | 0  |   NEXT;  | 
4942  | 0  |   if (ctxt->atom == NULL) { | 
4943  | 0  |       ctxt->atom = xmlRegNewAtom(ctxt, type);  | 
4944  | 0  |   } else if (ctxt->atom->type == XML_REGEXP_RANGES) { | 
4945  | 0  |       xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,  | 
4946  | 0  |              type, 0, 0, NULL);  | 
4947  | 0  |   }  | 
4948  | 0  |     } else { | 
4949  | 0  |   ERROR("Wrong escape sequence, misuse of character '\\'"); | 
4950  | 0  |     }  | 
4951  | 0  | }  | 
4952  |  |  | 
4953  |  | /**  | 
4954  |  |  * ```  | 
4955  |  |  * [17]   charRange   ::=     seRange | XmlCharRef | XmlCharIncDash  | 
4956  |  |  * [18]   seRange   ::=   charOrEsc '-' charOrEsc  | 
4957  |  |  * [20]   charOrEsc   ::=   XmlChar | SingleCharEsc  | 
4958  |  |  * [21]   XmlChar   ::=   [^\\#x2D\#x5B\#x5D]  | 
4959  |  |  * [22]   XmlCharIncDash   ::=   [^\\#x5B\#x5D]  | 
4960  |  |  * ```  | 
4961  |  |  *  | 
4962  |  |  * @param ctxt  a regexp parser context  | 
4963  |  |  */  | 
4964  |  | static void  | 
4965  | 0  | xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) { | 
4966  | 0  |     int cur, len;  | 
4967  | 0  |     int start = -1;  | 
4968  | 0  |     int end = -1;  | 
4969  |  | 
  | 
4970  | 0  |     if (CUR == '\0') { | 
4971  | 0  |         ERROR("Expecting ']'"); | 
4972  | 0  |   return;  | 
4973  | 0  |     }  | 
4974  |  |  | 
4975  | 0  |     cur = CUR;  | 
4976  | 0  |     if (cur == '\\') { | 
4977  | 0  |   NEXT;  | 
4978  | 0  |   cur = CUR;  | 
4979  | 0  |   switch (cur) { | 
4980  | 0  |       case 'n': start = 0xA; break;  | 
4981  | 0  |       case 'r': start = 0xD; break;  | 
4982  | 0  |       case 't': start = 0x9; break;  | 
4983  | 0  |       case '\\': case '|': case '.': case '-': case '^': case '?':  | 
4984  | 0  |       case '*': case '+': case '{': case '}': case '(': case ')': | 
4985  | 0  |       case '[': case ']':  | 
4986  | 0  |     start = cur; break;  | 
4987  | 0  |       default:  | 
4988  | 0  |     ERROR("Invalid escape value"); | 
4989  | 0  |     return;  | 
4990  | 0  |   }  | 
4991  | 0  |   end = start;  | 
4992  | 0  |         len = 1;  | 
4993  | 0  |     } else if ((cur != 0x5B) && (cur != 0x5D)) { | 
4994  | 0  |         len = 4;  | 
4995  | 0  |         end = start = xmlGetUTF8Char(ctxt->cur, &len);  | 
4996  | 0  |         if (start < 0) { | 
4997  | 0  |             ERROR("Invalid UTF-8"); | 
4998  | 0  |             return;  | 
4999  | 0  |         }  | 
5000  | 0  |     } else { | 
5001  | 0  |   ERROR("Expecting a char range"); | 
5002  | 0  |   return;  | 
5003  | 0  |     }  | 
5004  |  |     /*  | 
5005  |  |      * Since we are "inside" a range, we can assume ctxt->cur is past  | 
5006  |  |      * the start of ctxt->string, and PREV should be safe  | 
5007  |  |      */  | 
5008  | 0  |     if ((start == '-') && (NXT(1) != ']') && (PREV != '[') && (PREV != '^')) { | 
5009  | 0  |   NEXTL(len);  | 
5010  | 0  |   return;  | 
5011  | 0  |     }  | 
5012  | 0  |     NEXTL(len);  | 
5013  | 0  |     cur = CUR;  | 
5014  | 0  |     if ((cur != '-') || (NXT(1) == '[') || (NXT(1) == ']')) { | 
5015  | 0  |         xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,  | 
5016  | 0  |                   XML_REGEXP_CHARVAL, start, end, NULL);  | 
5017  | 0  |   return;  | 
5018  | 0  |     }  | 
5019  | 0  |     NEXT;  | 
5020  | 0  |     cur = CUR;  | 
5021  | 0  |     if (cur == '\\') { | 
5022  | 0  |   NEXT;  | 
5023  | 0  |   cur = CUR;  | 
5024  | 0  |   switch (cur) { | 
5025  | 0  |       case 'n': end = 0xA; break;  | 
5026  | 0  |       case 'r': end = 0xD; break;  | 
5027  | 0  |       case 't': end = 0x9; break;  | 
5028  | 0  |       case '\\': case '|': case '.': case '-': case '^': case '?':  | 
5029  | 0  |       case '*': case '+': case '{': case '}': case '(': case ')': | 
5030  | 0  |       case '[': case ']':  | 
5031  | 0  |     end = cur; break;  | 
5032  | 0  |       default:  | 
5033  | 0  |     ERROR("Invalid escape value"); | 
5034  | 0  |     return;  | 
5035  | 0  |   }  | 
5036  | 0  |         len = 1;  | 
5037  | 0  |     } else if ((cur != '\0') && (cur != 0x5B) && (cur != 0x5D)) { | 
5038  | 0  |         len = 4;  | 
5039  | 0  |         end = xmlGetUTF8Char(ctxt->cur, &len);  | 
5040  | 0  |         if (end < 0) { | 
5041  | 0  |             ERROR("Invalid UTF-8"); | 
5042  | 0  |             return;  | 
5043  | 0  |         }  | 
5044  | 0  |     } else { | 
5045  | 0  |   ERROR("Expecting the end of a char range"); | 
5046  | 0  |   return;  | 
5047  | 0  |     }  | 
5048  |  |  | 
5049  |  |     /* TODO check that the values are acceptable character ranges for XML */  | 
5050  | 0  |     if (end < start) { | 
5051  | 0  |   ERROR("End of range is before start of range"); | 
5052  | 0  |     } else { | 
5053  | 0  |         NEXTL(len);  | 
5054  | 0  |         xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,  | 
5055  | 0  |                XML_REGEXP_CHARVAL, start, end, NULL);  | 
5056  | 0  |     }  | 
5057  | 0  | }  | 
5058  |  |  | 
5059  |  | /**  | 
5060  |  |  * [14]   posCharGroup ::= ( charRange | charClassEsc  )+  | 
5061  |  |  *  | 
5062  |  |  * @param ctxt  a regexp parser context  | 
5063  |  |  */  | 
5064  |  | static void  | 
5065  | 0  | xmlFAParsePosCharGroup(xmlRegParserCtxtPtr ctxt) { | 
5066  | 0  |     do { | 
5067  | 0  |   if (CUR == '\\') { | 
5068  | 0  |       xmlFAParseCharClassEsc(ctxt);  | 
5069  | 0  |   } else { | 
5070  | 0  |       xmlFAParseCharRange(ctxt);  | 
5071  | 0  |   }  | 
5072  | 0  |     } while ((CUR != ']') && (CUR != '-') &&  | 
5073  | 0  |              (CUR != 0) && (ctxt->error == 0));  | 
5074  | 0  | }  | 
5075  |  |  | 
5076  |  | /**  | 
5077  |  |  * [13]   charGroup    ::= posCharGroup | negCharGroup | charClassSub  | 
5078  |  |  * [15]   negCharGroup ::= '^' posCharGroup  | 
5079  |  |  * [16]   charClassSub ::= ( posCharGroup | negCharGroup ) '-' charClassExpr  | 
5080  |  |  * [12]   charClassExpr ::= '[' charGroup ']'  | 
5081  |  |  *  | 
5082  |  |  * @param ctxt  a regexp parser context  | 
5083  |  |  */  | 
5084  |  | static void  | 
5085  | 0  | xmlFAParseCharGroup(xmlRegParserCtxtPtr ctxt) { | 
5086  | 0  |     int neg = ctxt->neg;  | 
5087  |  | 
  | 
5088  | 0  |     if (CUR == '^') { | 
5089  | 0  |   NEXT;  | 
5090  | 0  |   ctxt->neg = !ctxt->neg;  | 
5091  | 0  |   xmlFAParsePosCharGroup(ctxt);  | 
5092  | 0  |   ctxt->neg = neg;  | 
5093  | 0  |     }  | 
5094  | 0  |     while ((CUR != ']') && (ctxt->error == 0)) { | 
5095  | 0  |   if ((CUR == '-') && (NXT(1) == '[')) { | 
5096  | 0  |       NEXT; /* eat the '-' */  | 
5097  | 0  |       NEXT; /* eat the '[' */  | 
5098  | 0  |       ctxt->neg = 2;  | 
5099  | 0  |       xmlFAParseCharGroup(ctxt);  | 
5100  | 0  |       ctxt->neg = neg;  | 
5101  | 0  |       if (CUR == ']') { | 
5102  | 0  |     NEXT;  | 
5103  | 0  |       } else { | 
5104  | 0  |     ERROR("charClassExpr: ']' expected"); | 
5105  | 0  |       }  | 
5106  | 0  |       break;  | 
5107  | 0  |   } else { | 
5108  | 0  |       xmlFAParsePosCharGroup(ctxt);  | 
5109  | 0  |   }  | 
5110  | 0  |     }  | 
5111  | 0  | }  | 
5112  |  |  | 
5113  |  | /**  | 
5114  |  |  * [11]   charClass   ::=     charClassEsc | charClassExpr  | 
5115  |  |  * [12]   charClassExpr   ::=   '[' charGroup ']'  | 
5116  |  |  *  | 
5117  |  |  * @param ctxt  a regexp parser context  | 
5118  |  |  */  | 
5119  |  | static void  | 
5120  | 0  | xmlFAParseCharClass(xmlRegParserCtxtPtr ctxt) { | 
5121  | 0  |     if (CUR == '[') { | 
5122  | 0  |   NEXT;  | 
5123  | 0  |   ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_RANGES);  | 
5124  | 0  |   if (ctxt->atom == NULL)  | 
5125  | 0  |       return;  | 
5126  | 0  |   xmlFAParseCharGroup(ctxt);  | 
5127  | 0  |   if (CUR == ']') { | 
5128  | 0  |       NEXT;  | 
5129  | 0  |   } else { | 
5130  | 0  |       ERROR("xmlFAParseCharClass: ']' expected"); | 
5131  | 0  |   }  | 
5132  | 0  |     } else { | 
5133  | 0  |   xmlFAParseCharClassEsc(ctxt);  | 
5134  | 0  |     }  | 
5135  | 0  | }  | 
5136  |  |  | 
5137  |  | /**  | 
5138  |  |  * [8]   QuantExact   ::=   [0-9]+  | 
5139  |  |  *  | 
5140  |  |  * @param ctxt  a regexp parser context  | 
5141  |  |  * @returns 0 if success or -1 in case of error  | 
5142  |  |  */  | 
5143  |  | static int  | 
5144  | 0  | xmlFAParseQuantExact(xmlRegParserCtxtPtr ctxt) { | 
5145  | 0  |     int ret = 0;  | 
5146  | 0  |     int ok = 0;  | 
5147  | 0  |     int overflow = 0;  | 
5148  |  | 
  | 
5149  | 0  |     while ((CUR >= '0') && (CUR <= '9')) { | 
5150  | 0  |         if (ret > INT_MAX / 10) { | 
5151  | 0  |             overflow = 1;  | 
5152  | 0  |         } else { | 
5153  | 0  |             int digit = CUR - '0';  | 
5154  |  | 
  | 
5155  | 0  |             ret *= 10;  | 
5156  | 0  |             if (ret > INT_MAX - digit)  | 
5157  | 0  |                 overflow = 1;  | 
5158  | 0  |             else  | 
5159  | 0  |                 ret += digit;  | 
5160  | 0  |         }  | 
5161  | 0  |   ok = 1;  | 
5162  | 0  |   NEXT;  | 
5163  | 0  |     }  | 
5164  | 0  |     if ((ok != 1) || (overflow == 1)) { | 
5165  | 0  |   return(-1);  | 
5166  | 0  |     }  | 
5167  | 0  |     return(ret);  | 
5168  | 0  | }  | 
5169  |  |  | 
5170  |  | /**  | 
5171  |  |  * [4]   quantifier   ::=   [?*+] | ( '{' quantity '}' ) | 
5172  |  |  * [5]   quantity   ::=   quantRange | quantMin | QuantExact  | 
5173  |  |  * [6]   quantRange   ::=   QuantExact ',' QuantExact  | 
5174  |  |  * [7]   quantMin   ::=   QuantExact ','  | 
5175  |  |  * [8]   QuantExact   ::=   [0-9]+  | 
5176  |  |  *  | 
5177  |  |  * @param ctxt  a regexp parser context  | 
5178  |  |  */  | 
5179  |  | static int  | 
5180  | 0  | xmlFAParseQuantifier(xmlRegParserCtxtPtr ctxt) { | 
5181  | 0  |     int cur;  | 
5182  |  | 
  | 
5183  | 0  |     cur = CUR;  | 
5184  | 0  |     if ((cur == '?') || (cur == '*') || (cur == '+')) { | 
5185  | 0  |   if (ctxt->atom != NULL) { | 
5186  | 0  |       if (cur == '?')  | 
5187  | 0  |     ctxt->atom->quant = XML_REGEXP_QUANT_OPT;  | 
5188  | 0  |       else if (cur == '*')  | 
5189  | 0  |     ctxt->atom->quant = XML_REGEXP_QUANT_MULT;  | 
5190  | 0  |       else if (cur == '+')  | 
5191  | 0  |     ctxt->atom->quant = XML_REGEXP_QUANT_PLUS;  | 
5192  | 0  |   }  | 
5193  | 0  |   NEXT;  | 
5194  | 0  |   return(1);  | 
5195  | 0  |     }  | 
5196  | 0  |     if (cur == '{') { | 
5197  | 0  |   int min = 0, max = 0;  | 
5198  |  | 
  | 
5199  | 0  |   NEXT;  | 
5200  | 0  |   cur = xmlFAParseQuantExact(ctxt);  | 
5201  | 0  |   if (cur >= 0)  | 
5202  | 0  |       min = cur;  | 
5203  | 0  |         else { | 
5204  | 0  |             ERROR("Improper quantifier"); | 
5205  | 0  |         }  | 
5206  | 0  |   if (CUR == ',') { | 
5207  | 0  |       NEXT;  | 
5208  | 0  |       if (CUR == '}')  | 
5209  | 0  |           max = INT_MAX;  | 
5210  | 0  |       else { | 
5211  | 0  |           cur = xmlFAParseQuantExact(ctxt);  | 
5212  | 0  |           if (cur >= 0)  | 
5213  | 0  |         max = cur;  | 
5214  | 0  |     else { | 
5215  | 0  |         ERROR("Improper quantifier"); | 
5216  | 0  |     }  | 
5217  | 0  |       }  | 
5218  | 0  |   }  | 
5219  | 0  |   if (CUR == '}') { | 
5220  | 0  |       NEXT;  | 
5221  | 0  |   } else { | 
5222  | 0  |       ERROR("Unterminated quantifier"); | 
5223  | 0  |   }  | 
5224  | 0  |   if (max == 0)  | 
5225  | 0  |       max = min;  | 
5226  | 0  |   if (ctxt->atom != NULL) { | 
5227  | 0  |       ctxt->atom->quant = XML_REGEXP_QUANT_RANGE;  | 
5228  | 0  |       ctxt->atom->min = min;  | 
5229  | 0  |       ctxt->atom->max = max;  | 
5230  | 0  |   }  | 
5231  | 0  |   return(1);  | 
5232  | 0  |     }  | 
5233  | 0  |     return(0);  | 
5234  | 0  | }  | 
5235  |  |  | 
5236  |  | /**  | 
5237  |  |  * [9]   atom   ::=   Char | charClass | ( '(' regExp ')' ) | 
5238  |  |  *  | 
5239  |  |  * @param ctxt  a regexp parser context  | 
5240  |  |  */  | 
5241  |  | static int  | 
5242  | 0  | xmlFAParseAtom(xmlRegParserCtxtPtr ctxt) { | 
5243  | 0  |     int codepoint, len;  | 
5244  |  | 
  | 
5245  | 0  |     codepoint = xmlFAIsChar(ctxt);  | 
5246  | 0  |     if (codepoint > 0) { | 
5247  | 0  |   ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);  | 
5248  | 0  |   if (ctxt->atom == NULL)  | 
5249  | 0  |       return(-1);  | 
5250  | 0  |         len = 4;  | 
5251  | 0  |         codepoint = xmlGetUTF8Char(ctxt->cur, &len);  | 
5252  | 0  |         if (codepoint < 0) { | 
5253  | 0  |             ERROR("Invalid UTF-8"); | 
5254  | 0  |             return(-1);  | 
5255  | 0  |         }  | 
5256  | 0  |   ctxt->atom->codepoint = codepoint;  | 
5257  | 0  |   NEXTL(len);  | 
5258  | 0  |   return(1);  | 
5259  | 0  |     } else if (CUR == '|') { | 
5260  | 0  |   return(0);  | 
5261  | 0  |     } else if (CUR == 0) { | 
5262  | 0  |   return(0);  | 
5263  | 0  |     } else if (CUR == ')') { | 
5264  | 0  |   return(0);  | 
5265  | 0  |     } else if (CUR == '(') { | 
5266  | 0  |   xmlRegStatePtr start, oldend, start0;  | 
5267  |  | 
  | 
5268  | 0  |   NEXT;  | 
5269  | 0  |         if (ctxt->depth >= 50) { | 
5270  | 0  |       ERROR("xmlFAParseAtom: maximum nesting depth exceeded"); | 
5271  | 0  |             return(-1);  | 
5272  | 0  |         }  | 
5273  |  |   /*  | 
5274  |  |    * this extra Epsilon transition is needed if we count with 0 allowed  | 
5275  |  |    * unfortunately this can't be known at that point  | 
5276  |  |    */  | 
5277  | 0  |   xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);  | 
5278  | 0  |   start0 = ctxt->state;  | 
5279  | 0  |   xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);  | 
5280  | 0  |   start = ctxt->state;  | 
5281  | 0  |   oldend = ctxt->end;  | 
5282  | 0  |   ctxt->end = NULL;  | 
5283  | 0  |   ctxt->atom = NULL;  | 
5284  | 0  |         ctxt->depth++;  | 
5285  | 0  |   xmlFAParseRegExp(ctxt, 0);  | 
5286  | 0  |         ctxt->depth--;  | 
5287  | 0  |   if (CUR == ')') { | 
5288  | 0  |       NEXT;  | 
5289  | 0  |   } else { | 
5290  | 0  |       ERROR("xmlFAParseAtom: expecting ')'"); | 
5291  | 0  |   }  | 
5292  | 0  |   ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_SUBREG);  | 
5293  | 0  |   if (ctxt->atom == NULL)  | 
5294  | 0  |       return(-1);  | 
5295  | 0  |   ctxt->atom->start = start;  | 
5296  | 0  |   ctxt->atom->start0 = start0;  | 
5297  | 0  |   ctxt->atom->stop = ctxt->state;  | 
5298  | 0  |   ctxt->end = oldend;  | 
5299  | 0  |   return(1);  | 
5300  | 0  |     } else if ((CUR == '[') || (CUR == '\\') || (CUR == '.')) { | 
5301  | 0  |   xmlFAParseCharClass(ctxt);  | 
5302  | 0  |   return(1);  | 
5303  | 0  |     }  | 
5304  | 0  |     return(0);  | 
5305  | 0  | }  | 
5306  |  |  | 
5307  |  | /**  | 
5308  |  |  * [3]   piece   ::=   atom quantifier?  | 
5309  |  |  *  | 
5310  |  |  * @param ctxt  a regexp parser context  | 
5311  |  |  */  | 
5312  |  | static int  | 
5313  | 0  | xmlFAParsePiece(xmlRegParserCtxtPtr ctxt) { | 
5314  | 0  |     int ret;  | 
5315  |  | 
  | 
5316  | 0  |     ctxt->atom = NULL;  | 
5317  | 0  |     ret = xmlFAParseAtom(ctxt);  | 
5318  | 0  |     if (ret == 0)  | 
5319  | 0  |   return(0);  | 
5320  | 0  |     if (ctxt->atom == NULL) { | 
5321  | 0  |   ERROR("internal: no atom generated"); | 
5322  | 0  |     }  | 
5323  | 0  |     xmlFAParseQuantifier(ctxt);  | 
5324  | 0  |     return(1);  | 
5325  | 0  | }  | 
5326  |  |  | 
5327  |  | /**  | 
5328  |  |  * `to` is used to optimize by removing duplicate path in automata  | 
5329  |  |  * in expressions like (a|b)(c|d)  | 
5330  |  |  *  | 
5331  |  |  * [2]   branch   ::=   piece*  | 
5332  |  |  *  | 
5333  |  |  * @param ctxt  a regexp parser context  | 
5334  |  |  * @param to  optional target to the end of the branch  | 
5335  |  |  */  | 
5336  |  | static int  | 
5337  | 0  | xmlFAParseBranch(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr to) { | 
5338  | 0  |     xmlRegStatePtr previous;  | 
5339  | 0  |     int ret;  | 
5340  |  | 
  | 
5341  | 0  |     previous = ctxt->state;  | 
5342  | 0  |     ret = xmlFAParsePiece(ctxt);  | 
5343  | 0  |     if (ret == 0) { | 
5344  |  |         /* Empty branch */  | 
5345  | 0  |   xmlFAGenerateEpsilonTransition(ctxt, previous, to);  | 
5346  | 0  |     } else { | 
5347  | 0  |   if (xmlFAGenerateTransitions(ctxt, previous,  | 
5348  | 0  |           (CUR=='|' || CUR==')' || CUR==0) ? to : NULL,  | 
5349  | 0  |                 ctxt->atom) < 0) { | 
5350  | 0  |             xmlRegFreeAtom(ctxt->atom);  | 
5351  | 0  |             ctxt->atom = NULL;  | 
5352  | 0  |       return(-1);  | 
5353  | 0  |         }  | 
5354  | 0  |   previous = ctxt->state;  | 
5355  | 0  |   ctxt->atom = NULL;  | 
5356  | 0  |     }  | 
5357  | 0  |     while ((ret != 0) && (ctxt->error == 0)) { | 
5358  | 0  |   ret = xmlFAParsePiece(ctxt);  | 
5359  | 0  |   if (ret != 0) { | 
5360  | 0  |       if (xmlFAGenerateTransitions(ctxt, previous,  | 
5361  | 0  |               (CUR=='|' || CUR==')' || CUR==0) ? to : NULL,  | 
5362  | 0  |                     ctxt->atom) < 0) { | 
5363  | 0  |                 xmlRegFreeAtom(ctxt->atom);  | 
5364  | 0  |                 ctxt->atom = NULL;  | 
5365  | 0  |                 return(-1);  | 
5366  | 0  |             }  | 
5367  | 0  |       previous = ctxt->state;  | 
5368  | 0  |       ctxt->atom = NULL;  | 
5369  | 0  |   }  | 
5370  | 0  |     }  | 
5371  | 0  |     return(0);  | 
5372  | 0  | }  | 
5373  |  |  | 
5374  |  | /**  | 
5375  |  |  * [1]   regExp   ::=     branch  ( '|' branch )*  | 
5376  |  |  *  | 
5377  |  |  * @param ctxt  a regexp parser context  | 
5378  |  |  * @param top  is this the top-level expression ?  | 
5379  |  |  */  | 
5380  |  | static void  | 
5381  | 0  | xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top) { | 
5382  | 0  |     xmlRegStatePtr start, end;  | 
5383  |  |  | 
5384  |  |     /* if not top start should have been generated by an epsilon trans */  | 
5385  | 0  |     start = ctxt->state;  | 
5386  | 0  |     ctxt->end = NULL;  | 
5387  | 0  |     xmlFAParseBranch(ctxt, NULL);  | 
5388  | 0  |     if (top) { | 
5389  | 0  |   ctxt->state->type = XML_REGEXP_FINAL_STATE;  | 
5390  | 0  |     }  | 
5391  | 0  |     if (CUR != '|') { | 
5392  | 0  |   ctxt->end = ctxt->state;  | 
5393  | 0  |   return;  | 
5394  | 0  |     }  | 
5395  | 0  |     end = ctxt->state;  | 
5396  | 0  |     while ((CUR == '|') && (ctxt->error == 0)) { | 
5397  | 0  |   NEXT;  | 
5398  | 0  |   ctxt->state = start;  | 
5399  | 0  |   ctxt->end = NULL;  | 
5400  | 0  |   xmlFAParseBranch(ctxt, end);  | 
5401  | 0  |     }  | 
5402  | 0  |     if (!top) { | 
5403  | 0  |   ctxt->state = end;  | 
5404  | 0  |   ctxt->end = end;  | 
5405  | 0  |     }  | 
5406  | 0  | }  | 
5407  |  |  | 
5408  |  | /************************************************************************  | 
5409  |  |  *                  *  | 
5410  |  |  *      The basic API         *  | 
5411  |  |  *                  *  | 
5412  |  |  ************************************************************************/  | 
5413  |  |  | 
5414  |  | /**  | 
5415  |  |  * No-op since 2.14.0.  | 
5416  |  |  *  | 
5417  |  |  * @deprecated Don't use.  | 
5418  |  |  *  | 
5419  |  |  * @param output  the file for the output debug  | 
5420  |  |  * @param regexp  the compiled regexp  | 
5421  |  |  */  | 
5422  |  | void  | 
5423  |  | xmlRegexpPrint(FILE *output ATTRIBUTE_UNUSED,  | 
5424  | 0  |                xmlRegexp *regexp ATTRIBUTE_UNUSED) { | 
5425  | 0  | }  | 
5426  |  |  | 
5427  |  | /**  | 
5428  |  |  * Parses an XML Schemas regular expression.  | 
5429  |  |  *  | 
5430  |  |  * Parses a regular expression conforming to XML Schemas Part 2 Datatype  | 
5431  |  |  * Appendix F and builds an automata suitable for testing strings against  | 
5432  |  |  * that regular expression.  | 
5433  |  |  *  | 
5434  |  |  * @param regexp  a regular expression string  | 
5435  |  |  * @returns the compiled expression or NULL in case of error  | 
5436  |  |  */  | 
5437  |  | xmlRegexp *  | 
5438  | 0  | xmlRegexpCompile(const xmlChar *regexp) { | 
5439  | 0  |     xmlRegexpPtr ret = NULL;  | 
5440  | 0  |     xmlRegParserCtxtPtr ctxt;  | 
5441  |  | 
  | 
5442  | 0  |     if (regexp == NULL)  | 
5443  | 0  |         return(NULL);  | 
5444  |  |  | 
5445  | 0  |     ctxt = xmlRegNewParserCtxt(regexp);  | 
5446  | 0  |     if (ctxt == NULL)  | 
5447  | 0  |   return(NULL);  | 
5448  |  |  | 
5449  |  |     /* initialize the parser */  | 
5450  | 0  |     ctxt->state = xmlRegStatePush(ctxt);  | 
5451  | 0  |     if (ctxt->state == NULL)  | 
5452  | 0  |         goto error;  | 
5453  | 0  |     ctxt->start = ctxt->state;  | 
5454  | 0  |     ctxt->end = NULL;  | 
5455  |  |  | 
5456  |  |     /* parse the expression building an automata */  | 
5457  | 0  |     xmlFAParseRegExp(ctxt, 1);  | 
5458  | 0  |     if (CUR != 0) { | 
5459  | 0  |   ERROR("xmlFAParseRegExp: extra characters"); | 
5460  | 0  |     }  | 
5461  | 0  |     if (ctxt->error != 0)  | 
5462  | 0  |         goto error;  | 
5463  | 0  |     ctxt->end = ctxt->state;  | 
5464  | 0  |     ctxt->start->type = XML_REGEXP_START_STATE;  | 
5465  | 0  |     ctxt->end->type = XML_REGEXP_FINAL_STATE;  | 
5466  |  |  | 
5467  |  |     /* remove the Epsilon except for counted transitions */  | 
5468  | 0  |     xmlFAEliminateEpsilonTransitions(ctxt);  | 
5469  |  |  | 
5470  |  | 
  | 
5471  | 0  |     if (ctxt->error != 0)  | 
5472  | 0  |         goto error;  | 
5473  | 0  |     ret = xmlRegEpxFromParse(ctxt);  | 
5474  |  | 
  | 
5475  | 0  | error:  | 
5476  | 0  |     xmlRegFreeParserCtxt(ctxt);  | 
5477  | 0  |     return(ret);  | 
5478  | 0  | }  | 
5479  |  |  | 
5480  |  | /**  | 
5481  |  |  * Check if the regular expression matches a string.  | 
5482  |  |  *  | 
5483  |  |  * @param comp  the compiled regular expression  | 
5484  |  |  * @param content  the value to check against the regular expression  | 
5485  |  |  * @returns 1 if it matches, 0 if not and a negative value in case of error  | 
5486  |  |  */  | 
5487  |  | int  | 
5488  | 0  | xmlRegexpExec(xmlRegexp *comp, const xmlChar *content) { | 
5489  | 0  |     if ((comp == NULL) || (content == NULL))  | 
5490  | 0  |   return(-1);  | 
5491  | 0  |     return(xmlFARegExec(comp, content));  | 
5492  | 0  | }  | 
5493  |  |  | 
5494  |  | /**  | 
5495  |  |  * Check if the regular expression is deterministic.  | 
5496  |  |  *  | 
5497  |  |  * DTD and XML Schemas require a deterministic content model,  | 
5498  |  |  * so the automaton compiled from the regex must be a DFA.  | 
5499  |  |  *  | 
5500  |  |  * The runtime of this function is quadratic in the number of  | 
5501  |  |  * outgoing edges, causing serious worst-case performance issues.  | 
5502  |  |  *  | 
5503  |  |  * @deprecated: Internal function, don't use.  | 
5504  |  |  *  | 
5505  |  |  * @param comp  the compiled regular expression  | 
5506  |  |  * @returns 1 if it yes, 0 if not and a negative value in case  | 
5507  |  |  * of error  | 
5508  |  |  */  | 
5509  |  | int  | 
5510  | 0  | xmlRegexpIsDeterminist(xmlRegexp *comp) { | 
5511  | 0  |     xmlAutomataPtr am;  | 
5512  | 0  |     int ret;  | 
5513  |  | 
  | 
5514  | 0  |     if (comp == NULL)  | 
5515  | 0  |   return(-1);  | 
5516  | 0  |     if (comp->determinist != -1)  | 
5517  | 0  |   return(comp->determinist);  | 
5518  |  |  | 
5519  | 0  |     am = xmlNewAutomata();  | 
5520  | 0  |     if (am == NULL)  | 
5521  | 0  |         return(-1);  | 
5522  | 0  |     if (am->states != NULL) { | 
5523  | 0  |   int i;  | 
5524  |  | 
  | 
5525  | 0  |   for (i = 0;i < am->nbStates;i++)  | 
5526  | 0  |       xmlRegFreeState(am->states[i]);  | 
5527  | 0  |   xmlFree(am->states);  | 
5528  | 0  |     }  | 
5529  | 0  |     am->nbAtoms = comp->nbAtoms;  | 
5530  | 0  |     am->atoms = comp->atoms;  | 
5531  | 0  |     am->nbStates = comp->nbStates;  | 
5532  | 0  |     am->states = comp->states;  | 
5533  | 0  |     am->determinist = -1;  | 
5534  | 0  |     am->flags = comp->flags;  | 
5535  | 0  |     ret = xmlFAComputesDeterminism(am);  | 
5536  | 0  |     am->atoms = NULL;  | 
5537  | 0  |     am->states = NULL;  | 
5538  | 0  |     xmlFreeAutomata(am);  | 
5539  | 0  |     comp->determinist = ret;  | 
5540  | 0  |     return(ret);  | 
5541  | 0  | }  | 
5542  |  |  | 
5543  |  | /**  | 
5544  |  |  * Free a regexp.  | 
5545  |  |  *  | 
5546  |  |  * @param regexp  the regexp  | 
5547  |  |  */  | 
5548  |  | void  | 
5549  | 0  | xmlRegFreeRegexp(xmlRegexp *regexp) { | 
5550  | 0  |     int i;  | 
5551  | 0  |     if (regexp == NULL)  | 
5552  | 0  |   return;  | 
5553  |  |  | 
5554  | 0  |     if (regexp->string != NULL)  | 
5555  | 0  |   xmlFree(regexp->string);  | 
5556  | 0  |     if (regexp->states != NULL) { | 
5557  | 0  |   for (i = 0;i < regexp->nbStates;i++)  | 
5558  | 0  |       xmlRegFreeState(regexp->states[i]);  | 
5559  | 0  |   xmlFree(regexp->states);  | 
5560  | 0  |     }  | 
5561  | 0  |     if (regexp->atoms != NULL) { | 
5562  | 0  |   for (i = 0;i < regexp->nbAtoms;i++)  | 
5563  | 0  |       xmlRegFreeAtom(regexp->atoms[i]);  | 
5564  | 0  |   xmlFree(regexp->atoms);  | 
5565  | 0  |     }  | 
5566  | 0  |     if (regexp->counters != NULL)  | 
5567  | 0  |   xmlFree(regexp->counters);  | 
5568  | 0  |     if (regexp->compact != NULL)  | 
5569  | 0  |   xmlFree(regexp->compact);  | 
5570  | 0  |     if (regexp->transdata != NULL)  | 
5571  | 0  |   xmlFree(regexp->transdata);  | 
5572  | 0  |     if (regexp->stringMap != NULL) { | 
5573  | 0  |   for (i = 0; i < regexp->nbstrings;i++)  | 
5574  | 0  |       xmlFree(regexp->stringMap[i]);  | 
5575  | 0  |   xmlFree(regexp->stringMap);  | 
5576  | 0  |     }  | 
5577  |  | 
  | 
5578  | 0  |     xmlFree(regexp);  | 
5579  | 0  | }  | 
5580  |  |  | 
5581  |  | /************************************************************************  | 
5582  |  |  *                  *  | 
5583  |  |  *      The Automata interface        *  | 
5584  |  |  *                  *  | 
5585  |  |  ************************************************************************/  | 
5586  |  |  | 
5587  |  | /**  | 
5588  |  |  * Create a new automata  | 
5589  |  |  *  | 
5590  |  |  * @deprecated Internal function, don't use.  | 
5591  |  |  *  | 
5592  |  |  * @returns the new object or NULL in case of failure  | 
5593  |  |  */  | 
5594  |  | xmlAutomata *  | 
5595  | 0  | xmlNewAutomata(void) { | 
5596  | 0  |     xmlAutomataPtr ctxt;  | 
5597  |  | 
  | 
5598  | 0  |     ctxt = xmlRegNewParserCtxt(NULL);  | 
5599  | 0  |     if (ctxt == NULL)  | 
5600  | 0  |   return(NULL);  | 
5601  |  |  | 
5602  |  |     /* initialize the parser */  | 
5603  | 0  |     ctxt->state = xmlRegStatePush(ctxt);  | 
5604  | 0  |     if (ctxt->state == NULL) { | 
5605  | 0  |   xmlFreeAutomata(ctxt);  | 
5606  | 0  |   return(NULL);  | 
5607  | 0  |     }  | 
5608  | 0  |     ctxt->start = ctxt->state;  | 
5609  | 0  |     ctxt->end = NULL;  | 
5610  |  | 
  | 
5611  | 0  |     ctxt->start->type = XML_REGEXP_START_STATE;  | 
5612  | 0  |     ctxt->flags = 0;  | 
5613  |  | 
  | 
5614  | 0  |     return(ctxt);  | 
5615  | 0  | }  | 
5616  |  |  | 
5617  |  | /**  | 
5618  |  |  * Free an automata  | 
5619  |  |  *  | 
5620  |  |  * @deprecated Internal function, don't use.  | 
5621  |  |  *  | 
5622  |  |  * @param am  an automata  | 
5623  |  |  */  | 
5624  |  | void  | 
5625  | 0  | xmlFreeAutomata(xmlAutomata *am) { | 
5626  | 0  |     if (am == NULL)  | 
5627  | 0  |   return;  | 
5628  | 0  |     xmlRegFreeParserCtxt(am);  | 
5629  | 0  | }  | 
5630  |  |  | 
5631  |  | /**  | 
5632  |  |  * Set some flags on the automata  | 
5633  |  |  *  | 
5634  |  |  * @deprecated Internal function, don't use.  | 
5635  |  |  *  | 
5636  |  |  * @param am  an automata  | 
5637  |  |  * @param flags  a set of internal flags  | 
5638  |  |  */  | 
5639  |  | void  | 
5640  | 0  | xmlAutomataSetFlags(xmlAutomata *am, int flags) { | 
5641  | 0  |     if (am == NULL)  | 
5642  | 0  |   return;  | 
5643  | 0  |     am->flags |= flags;  | 
5644  | 0  | }  | 
5645  |  |  | 
5646  |  | /**  | 
5647  |  |  * Initial state lookup  | 
5648  |  |  *  | 
5649  |  |  * @deprecated Internal function, don't use.  | 
5650  |  |  *  | 
5651  |  |  * @param am  an automata  | 
5652  |  |  * @returns the initial state of the automata  | 
5653  |  |  */  | 
5654  |  | xmlAutomataState *  | 
5655  | 0  | xmlAutomataGetInitState(xmlAutomata *am) { | 
5656  | 0  |     if (am == NULL)  | 
5657  | 0  |   return(NULL);  | 
5658  | 0  |     return(am->start);  | 
5659  | 0  | }  | 
5660  |  |  | 
5661  |  | /**  | 
5662  |  |  * Makes that state a final state  | 
5663  |  |  *  | 
5664  |  |  * @deprecated Internal function, don't use.  | 
5665  |  |  *  | 
5666  |  |  * @param am  an automata  | 
5667  |  |  * @param state  a state in this automata  | 
5668  |  |  * @returns 0 or -1 in case of error  | 
5669  |  |  */  | 
5670  |  | int  | 
5671  | 0  | xmlAutomataSetFinalState(xmlAutomata *am, xmlAutomataState *state) { | 
5672  | 0  |     if ((am == NULL) || (state == NULL))  | 
5673  | 0  |   return(-1);  | 
5674  | 0  |     state->type = XML_REGEXP_FINAL_STATE;  | 
5675  | 0  |     return(0);  | 
5676  | 0  | }  | 
5677  |  |  | 
5678  |  | /**  | 
5679  |  |  * Add a transition.  | 
5680  |  |  *  | 
5681  |  |  * If `to` is NULL, this creates first a new target state in the automata  | 
5682  |  |  * and then adds a transition from the `from` state to the target state  | 
5683  |  |  * activated by the value of `token`  | 
5684  |  |  *  | 
5685  |  |  * @deprecated Internal function, don't use.  | 
5686  |  |  *  | 
5687  |  |  * @param am  an automata  | 
5688  |  |  * @param from  the starting point of the transition  | 
5689  |  |  * @param to  the target point of the transition or NULL  | 
5690  |  |  * @param token  the input string associated to that transition  | 
5691  |  |  * @param data  data passed to the callback function if the transition is activated  | 
5692  |  |  * @returns the target state or NULL in case of error  | 
5693  |  |  */  | 
5694  |  | xmlAutomataState *  | 
5695  |  | xmlAutomataNewTransition(xmlAutomata *am, xmlAutomataState *from,  | 
5696  |  |        xmlAutomataState *to, const xmlChar *token,  | 
5697  | 0  |        void *data) { | 
5698  | 0  |     xmlRegAtomPtr atom;  | 
5699  |  | 
  | 
5700  | 0  |     if ((am == NULL) || (from == NULL) || (token == NULL))  | 
5701  | 0  |   return(NULL);  | 
5702  | 0  |     atom = xmlRegNewAtom(am, XML_REGEXP_STRING);  | 
5703  | 0  |     if (atom == NULL)  | 
5704  | 0  |         return(NULL);  | 
5705  | 0  |     atom->data = data;  | 
5706  | 0  |     atom->valuep = xmlStrdup(token);  | 
5707  | 0  |     if (atom->valuep == NULL) { | 
5708  | 0  |         xmlRegFreeAtom(atom);  | 
5709  | 0  |         xmlRegexpErrMemory(am);  | 
5710  | 0  |         return(NULL);  | 
5711  | 0  |     }  | 
5712  |  |  | 
5713  | 0  |     if (xmlFAGenerateTransitions(am, from, to, atom) < 0) { | 
5714  | 0  |         xmlRegFreeAtom(atom);  | 
5715  | 0  |   return(NULL);  | 
5716  | 0  |     }  | 
5717  | 0  |     if (to == NULL)  | 
5718  | 0  |   return(am->state);  | 
5719  | 0  |     return(to);  | 
5720  | 0  | }  | 
5721  |  |  | 
5722  |  | /**  | 
5723  |  |  * If `to` is NULL, this creates first a new target state in the automata  | 
5724  |  |  * and then adds a transition from the `from` state to the target state  | 
5725  |  |  * activated by the value of `token`  | 
5726  |  |  *  | 
5727  |  |  * @deprecated Internal function, don't use.  | 
5728  |  |  *  | 
5729  |  |  * @param am  an automata  | 
5730  |  |  * @param from  the starting point of the transition  | 
5731  |  |  * @param to  the target point of the transition or NULL  | 
5732  |  |  * @param token  the first input string associated to that transition  | 
5733  |  |  * @param token2  the second input string associated to that transition  | 
5734  |  |  * @param data  data passed to the callback function if the transition is activated  | 
5735  |  |  * @returns the target state or NULL in case of error  | 
5736  |  |  */  | 
5737  |  | xmlAutomataState *  | 
5738  |  | xmlAutomataNewTransition2(xmlAutomata *am, xmlAutomataState *from,  | 
5739  |  |         xmlAutomataState *to, const xmlChar *token,  | 
5740  | 0  |         const xmlChar *token2, void *data) { | 
5741  | 0  |     xmlRegAtomPtr atom;  | 
5742  |  | 
  | 
5743  | 0  |     if ((am == NULL) || (from == NULL) || (token == NULL))  | 
5744  | 0  |   return(NULL);  | 
5745  | 0  |     atom = xmlRegNewAtom(am, XML_REGEXP_STRING);  | 
5746  | 0  |     if (atom == NULL)  | 
5747  | 0  |   return(NULL);  | 
5748  | 0  |     atom->data = data;  | 
5749  | 0  |     if ((token2 == NULL) || (*token2 == 0)) { | 
5750  | 0  |   atom->valuep = xmlStrdup(token);  | 
5751  | 0  |     } else { | 
5752  | 0  |   int lenn, lenp;  | 
5753  | 0  |   xmlChar *str;  | 
5754  |  | 
  | 
5755  | 0  |   lenn = strlen((char *) token2);  | 
5756  | 0  |   lenp = strlen((char *) token);  | 
5757  |  | 
  | 
5758  | 0  |   str = xmlMalloc(lenn + lenp + 2);  | 
5759  | 0  |   if (str == NULL) { | 
5760  | 0  |       xmlRegFreeAtom(atom);  | 
5761  | 0  |       return(NULL);  | 
5762  | 0  |   }  | 
5763  | 0  |   memcpy(&str[0], token, lenp);  | 
5764  | 0  |   str[lenp] = '|';  | 
5765  | 0  |   memcpy(&str[lenp + 1], token2, lenn);  | 
5766  | 0  |   str[lenn + lenp + 1] = 0;  | 
5767  |  | 
  | 
5768  | 0  |   atom->valuep = str;  | 
5769  | 0  |     }  | 
5770  |  |  | 
5771  | 0  |     if (xmlFAGenerateTransitions(am, from, to, atom) < 0) { | 
5772  | 0  |         xmlRegFreeAtom(atom);  | 
5773  | 0  |   return(NULL);  | 
5774  | 0  |     }  | 
5775  | 0  |     if (to == NULL)  | 
5776  | 0  |   return(am->state);  | 
5777  | 0  |     return(to);  | 
5778  | 0  | }  | 
5779  |  |  | 
5780  |  | /**  | 
5781  |  |  * If `to` is NULL, this creates first a new target state in the automata  | 
5782  |  |  * and then adds a transition from the `from` state to the target state  | 
5783  |  |  * activated by any value except (`token`,`token2`)  | 
5784  |  |  * Note that if `token2` is not NULL, then (X, NULL) won't match to follow  | 
5785  |  |  * the semantic of XSD \#\#other  | 
5786  |  |  *  | 
5787  |  |  * @deprecated Internal function, don't use.  | 
5788  |  |  *  | 
5789  |  |  * @param am  an automata  | 
5790  |  |  * @param from  the starting point of the transition  | 
5791  |  |  * @param to  the target point of the transition or NULL  | 
5792  |  |  * @param token  the first input string associated to that transition  | 
5793  |  |  * @param token2  the second input string associated to that transition  | 
5794  |  |  * @param data  data passed to the callback function if the transition is activated  | 
5795  |  |  * @returns the target state or NULL in case of error  | 
5796  |  |  */  | 
5797  |  | xmlAutomataState *  | 
5798  |  | xmlAutomataNewNegTrans(xmlAutomata *am, xmlAutomataState *from,  | 
5799  |  |            xmlAutomataState *to, const xmlChar *token,  | 
5800  | 0  |            const xmlChar *token2, void *data) { | 
5801  | 0  |     xmlRegAtomPtr atom;  | 
5802  | 0  |     xmlChar err_msg[200];  | 
5803  |  | 
  | 
5804  | 0  |     if ((am == NULL) || (from == NULL) || (token == NULL))  | 
5805  | 0  |   return(NULL);  | 
5806  | 0  |     atom = xmlRegNewAtom(am, XML_REGEXP_STRING);  | 
5807  | 0  |     if (atom == NULL)  | 
5808  | 0  |   return(NULL);  | 
5809  | 0  |     atom->data = data;  | 
5810  | 0  |     atom->neg = 1;  | 
5811  | 0  |     if ((token2 == NULL) || (*token2 == 0)) { | 
5812  | 0  |   atom->valuep = xmlStrdup(token);  | 
5813  | 0  |     } else { | 
5814  | 0  |   int lenn, lenp;  | 
5815  | 0  |   xmlChar *str;  | 
5816  |  | 
  | 
5817  | 0  |   lenn = strlen((char *) token2);  | 
5818  | 0  |   lenp = strlen((char *) token);  | 
5819  |  | 
  | 
5820  | 0  |   str = xmlMalloc(lenn + lenp + 2);  | 
5821  | 0  |   if (str == NULL) { | 
5822  | 0  |       xmlRegFreeAtom(atom);  | 
5823  | 0  |       return(NULL);  | 
5824  | 0  |   }  | 
5825  | 0  |   memcpy(&str[0], token, lenp);  | 
5826  | 0  |   str[lenp] = '|';  | 
5827  | 0  |   memcpy(&str[lenp + 1], token2, lenn);  | 
5828  | 0  |   str[lenn + lenp + 1] = 0;  | 
5829  |  | 
  | 
5830  | 0  |   atom->valuep = str;  | 
5831  | 0  |     }  | 
5832  | 0  |     snprintf((char *) err_msg, 199, "not %s", (const char *) atom->valuep);  | 
5833  | 0  |     err_msg[199] = 0;  | 
5834  | 0  |     atom->valuep2 = xmlStrdup(err_msg);  | 
5835  |  | 
  | 
5836  | 0  |     if (xmlFAGenerateTransitions(am, from, to, atom) < 0) { | 
5837  | 0  |         xmlRegFreeAtom(atom);  | 
5838  | 0  |   return(NULL);  | 
5839  | 0  |     }  | 
5840  | 0  |     am->negs++;  | 
5841  | 0  |     if (to == NULL)  | 
5842  | 0  |   return(am->state);  | 
5843  | 0  |     return(to);  | 
5844  | 0  | }  | 
5845  |  |  | 
5846  |  | /**  | 
5847  |  |  * If `to` is NULL, this creates first a new target state in the automata  | 
5848  |  |  * and then adds a transition from the `from` state to the target state  | 
5849  |  |  * activated by a succession of input of value `token` and `token2` and  | 
5850  |  |  * whose number is between `min` and `max`  | 
5851  |  |  *  | 
5852  |  |  * @deprecated Internal function, don't use.  | 
5853  |  |  *  | 
5854  |  |  * @param am  an automata  | 
5855  |  |  * @param from  the starting point of the transition  | 
5856  |  |  * @param to  the target point of the transition or NULL  | 
5857  |  |  * @param token  the input string associated to that transition  | 
5858  |  |  * @param token2  the second input string associated to that transition  | 
5859  |  |  * @param min  the minimum successive occurrences of token  | 
5860  |  |  * @param max  the maximum successive occurrences of token  | 
5861  |  |  * @param data  data associated to the transition  | 
5862  |  |  * @returns the target state or NULL in case of error  | 
5863  |  |  */  | 
5864  |  | xmlAutomataState *  | 
5865  |  | xmlAutomataNewCountTrans2(xmlAutomata *am, xmlAutomataState *from,  | 
5866  |  |        xmlAutomataState *to, const xmlChar *token,  | 
5867  |  |        const xmlChar *token2,  | 
5868  | 0  |        int min, int max, void *data) { | 
5869  | 0  |     xmlRegAtomPtr atom;  | 
5870  | 0  |     int counter;  | 
5871  |  | 
  | 
5872  | 0  |     if ((am == NULL) || (from == NULL) || (token == NULL))  | 
5873  | 0  |   return(NULL);  | 
5874  | 0  |     if (min < 0)  | 
5875  | 0  |   return(NULL);  | 
5876  | 0  |     if ((max < min) || (max < 1))  | 
5877  | 0  |   return(NULL);  | 
5878  | 0  |     atom = xmlRegNewAtom(am, XML_REGEXP_STRING);  | 
5879  | 0  |     if (atom == NULL)  | 
5880  | 0  |   return(NULL);  | 
5881  | 0  |     if ((token2 == NULL) || (*token2 == 0)) { | 
5882  | 0  |   atom->valuep = xmlStrdup(token);  | 
5883  | 0  |         if (atom->valuep == NULL)  | 
5884  | 0  |             goto error;  | 
5885  | 0  |     } else { | 
5886  | 0  |   int lenn, lenp;  | 
5887  | 0  |   xmlChar *str;  | 
5888  |  | 
  | 
5889  | 0  |   lenn = strlen((char *) token2);  | 
5890  | 0  |   lenp = strlen((char *) token);  | 
5891  |  | 
  | 
5892  | 0  |   str = xmlMalloc(lenn + lenp + 2);  | 
5893  | 0  |   if (str == NULL)  | 
5894  | 0  |       goto error;  | 
5895  | 0  |   memcpy(&str[0], token, lenp);  | 
5896  | 0  |   str[lenp] = '|';  | 
5897  | 0  |   memcpy(&str[lenp + 1], token2, lenn);  | 
5898  | 0  |   str[lenn + lenp + 1] = 0;  | 
5899  |  | 
  | 
5900  | 0  |   atom->valuep = str;  | 
5901  | 0  |     }  | 
5902  | 0  |     atom->data = data;  | 
5903  | 0  |     if (min == 0)  | 
5904  | 0  |   atom->min = 1;  | 
5905  | 0  |     else  | 
5906  | 0  |   atom->min = min;  | 
5907  | 0  |     atom->max = max;  | 
5908  |  |  | 
5909  |  |     /*  | 
5910  |  |      * associate a counter to the transition.  | 
5911  |  |      */  | 
5912  | 0  |     counter = xmlRegGetCounter(am);  | 
5913  | 0  |     if (counter < 0)  | 
5914  | 0  |         goto error;  | 
5915  | 0  |     am->counters[counter].min = min;  | 
5916  | 0  |     am->counters[counter].max = max;  | 
5917  |  |  | 
5918  |  |     /* xmlFAGenerateTransitions(am, from, to, atom); */  | 
5919  | 0  |     if (to == NULL) { | 
5920  | 0  |   to = xmlRegStatePush(am);  | 
5921  | 0  |         if (to == NULL)  | 
5922  | 0  |             goto error;  | 
5923  | 0  |     }  | 
5924  | 0  |     xmlRegStateAddTrans(am, from, atom, to, counter, -1);  | 
5925  | 0  |     if (xmlRegAtomPush(am, atom) < 0)  | 
5926  | 0  |         goto error;  | 
5927  | 0  |     am->state = to;  | 
5928  |  | 
  | 
5929  | 0  |     if (to == NULL)  | 
5930  | 0  |   to = am->state;  | 
5931  | 0  |     if (to == NULL)  | 
5932  | 0  |   return(NULL);  | 
5933  | 0  |     if (min == 0)  | 
5934  | 0  |   xmlFAGenerateEpsilonTransition(am, from, to);  | 
5935  | 0  |     return(to);  | 
5936  |  |  | 
5937  | 0  | error:  | 
5938  | 0  |     xmlRegFreeAtom(atom);  | 
5939  | 0  |     return(NULL);  | 
5940  | 0  | }  | 
5941  |  |  | 
5942  |  | /**  | 
5943  |  |  * If `to` is NULL, this creates first a new target state in the automata  | 
5944  |  |  * and then adds a transition from the `from` state to the target state  | 
5945  |  |  * activated by a succession of input of value `token` and whose number  | 
5946  |  |  * is between `min` and `max`  | 
5947  |  |  *  | 
5948  |  |  * @deprecated Internal function, don't use.  | 
5949  |  |  *  | 
5950  |  |  * @param am  an automata  | 
5951  |  |  * @param from  the starting point of the transition  | 
5952  |  |  * @param to  the target point of the transition or NULL  | 
5953  |  |  * @param token  the input string associated to that transition  | 
5954  |  |  * @param min  the minimum successive occurrences of token  | 
5955  |  |  * @param max  the maximum successive occurrences of token  | 
5956  |  |  * @param data  data associated to the transition  | 
5957  |  |  * @returns the target state or NULL in case of error  | 
5958  |  |  */  | 
5959  |  | xmlAutomataState *  | 
5960  |  | xmlAutomataNewCountTrans(xmlAutomata *am, xmlAutomataState *from,  | 
5961  |  |        xmlAutomataState *to, const xmlChar *token,  | 
5962  | 0  |        int min, int max, void *data) { | 
5963  | 0  |     xmlRegAtomPtr atom;  | 
5964  | 0  |     int counter;  | 
5965  |  | 
  | 
5966  | 0  |     if ((am == NULL) || (from == NULL) || (token == NULL))  | 
5967  | 0  |   return(NULL);  | 
5968  | 0  |     if (min < 0)  | 
5969  | 0  |   return(NULL);  | 
5970  | 0  |     if ((max < min) || (max < 1))  | 
5971  | 0  |   return(NULL);  | 
5972  | 0  |     atom = xmlRegNewAtom(am, XML_REGEXP_STRING);  | 
5973  | 0  |     if (atom == NULL)  | 
5974  | 0  |   return(NULL);  | 
5975  | 0  |     atom->valuep = xmlStrdup(token);  | 
5976  | 0  |     if (atom->valuep == NULL)  | 
5977  | 0  |         goto error;  | 
5978  | 0  |     atom->data = data;  | 
5979  | 0  |     if (min == 0)  | 
5980  | 0  |   atom->min = 1;  | 
5981  | 0  |     else  | 
5982  | 0  |   atom->min = min;  | 
5983  | 0  |     atom->max = max;  | 
5984  |  |  | 
5985  |  |     /*  | 
5986  |  |      * associate a counter to the transition.  | 
5987  |  |      */  | 
5988  | 0  |     counter = xmlRegGetCounter(am);  | 
5989  | 0  |     if (counter < 0)  | 
5990  | 0  |         goto error;  | 
5991  | 0  |     am->counters[counter].min = min;  | 
5992  | 0  |     am->counters[counter].max = max;  | 
5993  |  |  | 
5994  |  |     /* xmlFAGenerateTransitions(am, from, to, atom); */  | 
5995  | 0  |     if (to == NULL) { | 
5996  | 0  |   to = xmlRegStatePush(am);  | 
5997  | 0  |         if (to == NULL)  | 
5998  | 0  |             goto error;  | 
5999  | 0  |     }  | 
6000  | 0  |     xmlRegStateAddTrans(am, from, atom, to, counter, -1);  | 
6001  | 0  |     if (xmlRegAtomPush(am, atom) < 0)  | 
6002  | 0  |         goto error;  | 
6003  | 0  |     am->state = to;  | 
6004  |  | 
  | 
6005  | 0  |     if (to == NULL)  | 
6006  | 0  |   to = am->state;  | 
6007  | 0  |     if (to == NULL)  | 
6008  | 0  |   return(NULL);  | 
6009  | 0  |     if (min == 0)  | 
6010  | 0  |   xmlFAGenerateEpsilonTransition(am, from, to);  | 
6011  | 0  |     return(to);  | 
6012  |  |  | 
6013  | 0  | error:  | 
6014  | 0  |     xmlRegFreeAtom(atom);  | 
6015  | 0  |     return(NULL);  | 
6016  | 0  | }  | 
6017  |  |  | 
6018  |  | /**  | 
6019  |  |  * If `to` is NULL, this creates first a new target state in the automata  | 
6020  |  |  * and then adds a transition from the `from` state to the target state  | 
6021  |  |  * activated by a succession of input of value `token` and `token2` and whose  | 
6022  |  |  * number is between `min` and `max`, moreover that transition can only be  | 
6023  |  |  * crossed once.  | 
6024  |  |  *  | 
6025  |  |  * @deprecated Internal function, don't use.  | 
6026  |  |  *  | 
6027  |  |  * @param am  an automata  | 
6028  |  |  * @param from  the starting point of the transition  | 
6029  |  |  * @param to  the target point of the transition or NULL  | 
6030  |  |  * @param token  the input string associated to that transition  | 
6031  |  |  * @param token2  the second input string associated to that transition  | 
6032  |  |  * @param min  the minimum successive occurrences of token  | 
6033  |  |  * @param max  the maximum successive occurrences of token  | 
6034  |  |  * @param data  data associated to the transition  | 
6035  |  |  * @returns the target state or NULL in case of error  | 
6036  |  |  */  | 
6037  |  | xmlAutomataState *  | 
6038  |  | xmlAutomataNewOnceTrans2(xmlAutomata *am, xmlAutomataState *from,  | 
6039  |  |        xmlAutomataState *to, const xmlChar *token,  | 
6040  |  |        const xmlChar *token2,  | 
6041  | 0  |        int min, int max, void *data) { | 
6042  | 0  |     xmlRegAtomPtr atom;  | 
6043  | 0  |     int counter;  | 
6044  |  | 
  | 
6045  | 0  |     if ((am == NULL) || (from == NULL) || (token == NULL))  | 
6046  | 0  |   return(NULL);  | 
6047  | 0  |     if (min < 1)  | 
6048  | 0  |   return(NULL);  | 
6049  | 0  |     if (max < min)  | 
6050  | 0  |   return(NULL);  | 
6051  | 0  |     atom = xmlRegNewAtom(am, XML_REGEXP_STRING);  | 
6052  | 0  |     if (atom == NULL)  | 
6053  | 0  |   return(NULL);  | 
6054  | 0  |     if ((token2 == NULL) || (*token2 == 0)) { | 
6055  | 0  |   atom->valuep = xmlStrdup(token);  | 
6056  | 0  |         if (atom->valuep == NULL)  | 
6057  | 0  |             goto error;  | 
6058  | 0  |     } else { | 
6059  | 0  |   int lenn, lenp;  | 
6060  | 0  |   xmlChar *str;  | 
6061  |  | 
  | 
6062  | 0  |   lenn = strlen((char *) token2);  | 
6063  | 0  |   lenp = strlen((char *) token);  | 
6064  |  | 
  | 
6065  | 0  |   str = xmlMalloc(lenn + lenp + 2);  | 
6066  | 0  |   if (str == NULL)  | 
6067  | 0  |       goto error;  | 
6068  | 0  |   memcpy(&str[0], token, lenp);  | 
6069  | 0  |   str[lenp] = '|';  | 
6070  | 0  |   memcpy(&str[lenp + 1], token2, lenn);  | 
6071  | 0  |   str[lenn + lenp + 1] = 0;  | 
6072  |  | 
  | 
6073  | 0  |   atom->valuep = str;  | 
6074  | 0  |     }  | 
6075  | 0  |     atom->data = data;  | 
6076  | 0  |     atom->quant = XML_REGEXP_QUANT_ONCEONLY;  | 
6077  | 0  |     atom->min = min;  | 
6078  | 0  |     atom->max = max;  | 
6079  |  |     /*  | 
6080  |  |      * associate a counter to the transition.  | 
6081  |  |      */  | 
6082  | 0  |     counter = xmlRegGetCounter(am);  | 
6083  | 0  |     if (counter < 0)  | 
6084  | 0  |         goto error;  | 
6085  | 0  |     am->counters[counter].min = 1;  | 
6086  | 0  |     am->counters[counter].max = 1;  | 
6087  |  |  | 
6088  |  |     /* xmlFAGenerateTransitions(am, from, to, atom); */  | 
6089  | 0  |     if (to == NULL) { | 
6090  | 0  |   to = xmlRegStatePush(am);  | 
6091  | 0  |         if (to == NULL)  | 
6092  | 0  |             goto error;  | 
6093  | 0  |     }  | 
6094  | 0  |     xmlRegStateAddTrans(am, from, atom, to, counter, -1);  | 
6095  | 0  |     if (xmlRegAtomPush(am, atom) < 0)  | 
6096  | 0  |         goto error;  | 
6097  | 0  |     am->state = to;  | 
6098  | 0  |     return(to);  | 
6099  |  |  | 
6100  | 0  | error:  | 
6101  | 0  |     xmlRegFreeAtom(atom);  | 
6102  | 0  |     return(NULL);  | 
6103  | 0  | }  | 
6104  |  |  | 
6105  |  |  | 
6106  |  |  | 
6107  |  | /**  | 
6108  |  |  * If `to` is NULL, this creates first a new target state in the automata  | 
6109  |  |  * and then adds a transition from the `from` state to the target state  | 
6110  |  |  * activated by a succession of input of value `token` and whose number  | 
6111  |  |  * is between `min` and `max`, moreover that transition can only be crossed  | 
6112  |  |  * once.  | 
6113  |  |  *  | 
6114  |  |  * @deprecated Internal function, don't use.  | 
6115  |  |  *  | 
6116  |  |  * @param am  an automata  | 
6117  |  |  * @param from  the starting point of the transition  | 
6118  |  |  * @param to  the target point of the transition or NULL  | 
6119  |  |  * @param token  the input string associated to that transition  | 
6120  |  |  * @param min  the minimum successive occurrences of token  | 
6121  |  |  * @param max  the maximum successive occurrences of token  | 
6122  |  |  * @param data  data associated to the transition  | 
6123  |  |  * @returns the target state or NULL in case of error  | 
6124  |  |  */  | 
6125  |  | xmlAutomataState *  | 
6126  |  | xmlAutomataNewOnceTrans(xmlAutomata *am, xmlAutomataState *from,  | 
6127  |  |        xmlAutomataState *to, const xmlChar *token,  | 
6128  | 0  |        int min, int max, void *data) { | 
6129  | 0  |     xmlRegAtomPtr atom;  | 
6130  | 0  |     int counter;  | 
6131  |  | 
  | 
6132  | 0  |     if ((am == NULL) || (from == NULL) || (token == NULL))  | 
6133  | 0  |   return(NULL);  | 
6134  | 0  |     if (min < 1)  | 
6135  | 0  |   return(NULL);  | 
6136  | 0  |     if (max < min)  | 
6137  | 0  |   return(NULL);  | 
6138  | 0  |     atom = xmlRegNewAtom(am, XML_REGEXP_STRING);  | 
6139  | 0  |     if (atom == NULL)  | 
6140  | 0  |   return(NULL);  | 
6141  | 0  |     atom->valuep = xmlStrdup(token);  | 
6142  | 0  |     atom->data = data;  | 
6143  | 0  |     atom->quant = XML_REGEXP_QUANT_ONCEONLY;  | 
6144  | 0  |     atom->min = min;  | 
6145  | 0  |     atom->max = max;  | 
6146  |  |     /*  | 
6147  |  |      * associate a counter to the transition.  | 
6148  |  |      */  | 
6149  | 0  |     counter = xmlRegGetCounter(am);  | 
6150  | 0  |     if (counter < 0)  | 
6151  | 0  |         goto error;  | 
6152  | 0  |     am->counters[counter].min = 1;  | 
6153  | 0  |     am->counters[counter].max = 1;  | 
6154  |  |  | 
6155  |  |     /* xmlFAGenerateTransitions(am, from, to, atom); */  | 
6156  | 0  |     if (to == NULL) { | 
6157  | 0  |   to = xmlRegStatePush(am);  | 
6158  | 0  |         if (to == NULL)  | 
6159  | 0  |             goto error;  | 
6160  | 0  |     }  | 
6161  | 0  |     xmlRegStateAddTrans(am, from, atom, to, counter, -1);  | 
6162  | 0  |     if (xmlRegAtomPush(am, atom) < 0)  | 
6163  | 0  |         goto error;  | 
6164  | 0  |     am->state = to;  | 
6165  | 0  |     return(to);  | 
6166  |  |  | 
6167  | 0  | error:  | 
6168  | 0  |     xmlRegFreeAtom(atom);  | 
6169  | 0  |     return(NULL);  | 
6170  | 0  | }  | 
6171  |  |  | 
6172  |  | /**  | 
6173  |  |  * Create a new disconnected state in the automata  | 
6174  |  |  *  | 
6175  |  |  * @deprecated Internal function, don't use.  | 
6176  |  |  *  | 
6177  |  |  * @param am  an automata  | 
6178  |  |  * @returns the new state or NULL in case of error  | 
6179  |  |  */  | 
6180  |  | xmlAutomataState *  | 
6181  | 0  | xmlAutomataNewState(xmlAutomata *am) { | 
6182  | 0  |     if (am == NULL)  | 
6183  | 0  |   return(NULL);  | 
6184  | 0  |     return(xmlRegStatePush(am));  | 
6185  | 0  | }  | 
6186  |  |  | 
6187  |  | /**  | 
6188  |  |  * If `to` is NULL, this creates first a new target state in the automata  | 
6189  |  |  * and then adds an epsilon transition from the `from` state to the  | 
6190  |  |  * target state  | 
6191  |  |  *  | 
6192  |  |  * @deprecated Internal function, don't use.  | 
6193  |  |  *  | 
6194  |  |  * @param am  an automata  | 
6195  |  |  * @param from  the starting point of the transition  | 
6196  |  |  * @param to  the target point of the transition or NULL  | 
6197  |  |  * @returns the target state or NULL in case of error  | 
6198  |  |  */  | 
6199  |  | xmlAutomataState *  | 
6200  |  | xmlAutomataNewEpsilon(xmlAutomata *am, xmlAutomataState *from,  | 
6201  | 0  |           xmlAutomataState *to) { | 
6202  | 0  |     if ((am == NULL) || (from == NULL))  | 
6203  | 0  |   return(NULL);  | 
6204  | 0  |     xmlFAGenerateEpsilonTransition(am, from, to);  | 
6205  | 0  |     if (to == NULL)  | 
6206  | 0  |   return(am->state);  | 
6207  | 0  |     return(to);  | 
6208  | 0  | }  | 
6209  |  |  | 
6210  |  | /**  | 
6211  |  |  * If `to` is NULL, this creates first a new target state in the automata  | 
6212  |  |  * and then adds a an ALL transition from the `from` state to the  | 
6213  |  |  * target state. That transition is an epsilon transition allowed only when  | 
6214  |  |  * all transitions from the `from` node have been activated.  | 
6215  |  |  *  | 
6216  |  |  * @deprecated Internal function, don't use.  | 
6217  |  |  *  | 
6218  |  |  * @param am  an automata  | 
6219  |  |  * @param from  the starting point of the transition  | 
6220  |  |  * @param to  the target point of the transition or NULL  | 
6221  |  |  * @param lax  allow to transition if not all all transitions have been activated  | 
6222  |  |  * @returns the target state or NULL in case of error  | 
6223  |  |  */  | 
6224  |  | xmlAutomataState *  | 
6225  |  | xmlAutomataNewAllTrans(xmlAutomata *am, xmlAutomataState *from,  | 
6226  | 0  |            xmlAutomataState *to, int lax) { | 
6227  | 0  |     if ((am == NULL) || (from == NULL))  | 
6228  | 0  |   return(NULL);  | 
6229  | 0  |     xmlFAGenerateAllTransition(am, from, to, lax);  | 
6230  | 0  |     if (to == NULL)  | 
6231  | 0  |   return(am->state);  | 
6232  | 0  |     return(to);  | 
6233  | 0  | }  | 
6234  |  |  | 
6235  |  | /**  | 
6236  |  |  * Create a new counter  | 
6237  |  |  *  | 
6238  |  |  * @deprecated Internal function, don't use.  | 
6239  |  |  *  | 
6240  |  |  * @param am  an automata  | 
6241  |  |  * @param min  the minimal value on the counter  | 
6242  |  |  * @param max  the maximal value on the counter  | 
6243  |  |  * @returns the counter number or -1 in case of error  | 
6244  |  |  */  | 
6245  |  | int  | 
6246  | 0  | xmlAutomataNewCounter(xmlAutomata *am, int min, int max) { | 
6247  | 0  |     int ret;  | 
6248  |  | 
  | 
6249  | 0  |     if (am == NULL)  | 
6250  | 0  |   return(-1);  | 
6251  |  |  | 
6252  | 0  |     ret = xmlRegGetCounter(am);  | 
6253  | 0  |     if (ret < 0)  | 
6254  | 0  |   return(-1);  | 
6255  | 0  |     am->counters[ret].min = min;  | 
6256  | 0  |     am->counters[ret].max = max;  | 
6257  | 0  |     return(ret);  | 
6258  | 0  | }  | 
6259  |  |  | 
6260  |  | /**  | 
6261  |  |  * If `to` is NULL, this creates first a new target state in the automata  | 
6262  |  |  * and then adds an epsilon transition from the `from` state to the target state  | 
6263  |  |  * which will increment the counter provided  | 
6264  |  |  *  | 
6265  |  |  * @deprecated Internal function, don't use.  | 
6266  |  |  *  | 
6267  |  |  * @param am  an automata  | 
6268  |  |  * @param from  the starting point of the transition  | 
6269  |  |  * @param to  the target point of the transition or NULL  | 
6270  |  |  * @param counter  the counter associated to that transition  | 
6271  |  |  * @returns the target state or NULL in case of error  | 
6272  |  |  */  | 
6273  |  | xmlAutomataState *  | 
6274  |  | xmlAutomataNewCountedTrans(xmlAutomata *am, xmlAutomataState *from,  | 
6275  | 0  |     xmlAutomataState *to, int counter) { | 
6276  | 0  |     if ((am == NULL) || (from == NULL) || (counter < 0))  | 
6277  | 0  |   return(NULL);  | 
6278  | 0  |     xmlFAGenerateCountedEpsilonTransition(am, from, to, counter);  | 
6279  | 0  |     if (to == NULL)  | 
6280  | 0  |   return(am->state);  | 
6281  | 0  |     return(to);  | 
6282  | 0  | }  | 
6283  |  |  | 
6284  |  | /**  | 
6285  |  |  * If `to` is NULL, this creates first a new target state in the automata  | 
6286  |  |  * and then adds an epsilon transition from the `from` state to the target state  | 
6287  |  |  * which will be allowed only if the counter is within the right range.  | 
6288  |  |  *  | 
6289  |  |  * @deprecated Internal function, don't use.  | 
6290  |  |  *  | 
6291  |  |  * @param am  an automata  | 
6292  |  |  * @param from  the starting point of the transition  | 
6293  |  |  * @param to  the target point of the transition or NULL  | 
6294  |  |  * @param counter  the counter associated to that transition  | 
6295  |  |  * @returns the target state or NULL in case of error  | 
6296  |  |  */  | 
6297  |  | xmlAutomataState *  | 
6298  |  | xmlAutomataNewCounterTrans(xmlAutomata *am, xmlAutomataState *from,  | 
6299  | 0  |     xmlAutomataState *to, int counter) { | 
6300  | 0  |     if ((am == NULL) || (from == NULL) || (counter < 0))  | 
6301  | 0  |   return(NULL);  | 
6302  | 0  |     xmlFAGenerateCountedTransition(am, from, to, counter);  | 
6303  | 0  |     if (to == NULL)  | 
6304  | 0  |   return(am->state);  | 
6305  | 0  |     return(to);  | 
6306  | 0  | }  | 
6307  |  |  | 
6308  |  | /**  | 
6309  |  |  * Compile the automata into a Reg Exp ready for being executed.  | 
6310  |  |  * The automata should be free after this point.  | 
6311  |  |  *  | 
6312  |  |  * @deprecated Internal function, don't use.  | 
6313  |  |  *  | 
6314  |  |  * @param am  an automata  | 
6315  |  |  * @returns the compiled regexp or NULL in case of error  | 
6316  |  |  */  | 
6317  |  | xmlRegexp *  | 
6318  | 0  | xmlAutomataCompile(xmlAutomata *am) { | 
6319  | 0  |     xmlRegexpPtr ret;  | 
6320  |  | 
  | 
6321  | 0  |     if ((am == NULL) || (am->error != 0)) return(NULL);  | 
6322  | 0  |     xmlFAEliminateEpsilonTransitions(am);  | 
6323  | 0  |     if (am->error != 0)  | 
6324  | 0  |         return(NULL);  | 
6325  |  |     /* xmlFAComputesDeterminism(am); */  | 
6326  | 0  |     ret = xmlRegEpxFromParse(am);  | 
6327  |  | 
  | 
6328  | 0  |     return(ret);  | 
6329  | 0  | }  | 
6330  |  |  | 
6331  |  | /**  | 
6332  |  |  * Checks if an automata is determinist.  | 
6333  |  |  *  | 
6334  |  |  * @deprecated Internal function, don't use.  | 
6335  |  |  *  | 
6336  |  |  * @param am  an automata  | 
6337  |  |  * @returns 1 if true, 0 if not, and -1 in case of error  | 
6338  |  |  */  | 
6339  |  | int  | 
6340  | 0  | xmlAutomataIsDeterminist(xmlAutomata *am) { | 
6341  | 0  |     int ret;  | 
6342  |  | 
  | 
6343  | 0  |     if (am == NULL)  | 
6344  | 0  |   return(-1);  | 
6345  |  |  | 
6346  | 0  |     ret = xmlFAComputesDeterminism(am);  | 
6347  | 0  |     return(ret);  | 
6348  | 0  | }  | 
6349  |  |  | 
6350  |  | #endif /* LIBXML_REGEXP_ENABLED */  |