Coverage Report

Created: 2024-06-09 09:18

/src/libxml2/xmlregexp.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * regexp.c: generic and extensible Regular Expression engine
3
 *
4
 * Basically designed with the purpose of compiling regexps for
5
 * the variety of validation/schemas mechanisms now available in
6
 * XML related specifications these include:
7
 *    - XML-1.0 DTD validation
8
 *    - XML Schemas structure part 1
9
 *    - XML Schemas Datatypes part 2 especially Appendix F
10
 *    - RELAX-NG/TREX i.e. the counter proposal
11
 *
12
 * See Copyright for the status of this software.
13
 *
14
 * Daniel Veillard <veillard@redhat.com>
15
 */
16
17
#define IN_LIBXML
18
#include "libxml.h"
19
20
#ifdef LIBXML_REGEXP_ENABLED
21
22
/* #define DEBUG_ERR */
23
24
#include <stdio.h>
25
#include <string.h>
26
#include <limits.h>
27
28
#include <libxml/tree.h>
29
#include <libxml/parserInternals.h>
30
#include <libxml/xmlregexp.h>
31
#include <libxml/xmlautomata.h>
32
#include <libxml/xmlunicode.h>
33
34
#include "private/error.h"
35
#include "private/regexp.h"
36
37
#ifndef SIZE_MAX
38
68.4k
#define SIZE_MAX ((size_t) -1)
39
#endif
40
41
/* #define DEBUG_REGEXP_GRAPH */
42
/* #define DEBUG_REGEXP_EXEC */
43
/* #define DEBUG_PUSH */
44
/* #define DEBUG_COMPACTION */
45
46
334
#define MAX_PUSH 10000000
47
48
#ifdef ERROR
49
#undef ERROR
50
#endif
51
#define ERROR(str)              \
52
0
    ctxt->error = XML_REGEXP_COMPILE_ERROR;       \
53
0
    xmlRegexpErrCompile(ctxt, str);
54
0
#define NEXT ctxt->cur++
55
0
#define CUR (*(ctxt->cur))
56
0
#define NXT(index) (ctxt->cur[index])
57
58
0
#define CUR_SCHAR(s, l) xmlStringCurrentChar(NULL, s, &l)
59
0
#define NEXTL(l) ctxt->cur += l;
60
0
#define XML_REG_STRING_SEPARATOR '|'
61
/*
62
 * Need PREV to check on a '-' within a Character Group. May only be used
63
 * when it's guaranteed that cur is not at the beginning of ctxt->string!
64
 */
65
0
#define PREV (ctxt->cur[-1])
66
67
/**
68
 * TODO:
69
 *
70
 * macro to flag unimplemented blocks
71
 */
72
#define TODO                \
73
0
    xmlGenericError(xmlGenericErrorContext,       \
74
0
      "Unimplemented block at %s:%d\n",       \
75
0
            __FILE__, __LINE__);
76
77
/************************************************************************
78
 *                  *
79
 *      Datatypes and structures      *
80
 *                  *
81
 ************************************************************************/
82
83
/*
84
 * Note: the order of the enums below is significant, do not shuffle
85
 */
86
typedef enum {
87
    XML_REGEXP_EPSILON = 1,
88
    XML_REGEXP_CHARVAL,
89
    XML_REGEXP_RANGES,
90
    XML_REGEXP_SUBREG,  /* used for () sub regexps */
91
    XML_REGEXP_STRING,
92
    XML_REGEXP_ANYCHAR, /* . */
93
    XML_REGEXP_ANYSPACE, /* \s */
94
    XML_REGEXP_NOTSPACE, /* \S */
95
    XML_REGEXP_INITNAME, /* \l */
96
    XML_REGEXP_NOTINITNAME, /* \L */
97
    XML_REGEXP_NAMECHAR, /* \c */
98
    XML_REGEXP_NOTNAMECHAR, /* \C */
99
    XML_REGEXP_DECIMAL, /* \d */
100
    XML_REGEXP_NOTDECIMAL, /* \D */
101
    XML_REGEXP_REALCHAR, /* \w */
102
    XML_REGEXP_NOTREALCHAR, /* \W */
103
    XML_REGEXP_LETTER = 100,
104
    XML_REGEXP_LETTER_UPPERCASE,
105
    XML_REGEXP_LETTER_LOWERCASE,
106
    XML_REGEXP_LETTER_TITLECASE,
107
    XML_REGEXP_LETTER_MODIFIER,
108
    XML_REGEXP_LETTER_OTHERS,
109
    XML_REGEXP_MARK,
110
    XML_REGEXP_MARK_NONSPACING,
111
    XML_REGEXP_MARK_SPACECOMBINING,
112
    XML_REGEXP_MARK_ENCLOSING,
113
    XML_REGEXP_NUMBER,
114
    XML_REGEXP_NUMBER_DECIMAL,
115
    XML_REGEXP_NUMBER_LETTER,
116
    XML_REGEXP_NUMBER_OTHERS,
117
    XML_REGEXP_PUNCT,
118
    XML_REGEXP_PUNCT_CONNECTOR,
119
    XML_REGEXP_PUNCT_DASH,
120
    XML_REGEXP_PUNCT_OPEN,
121
    XML_REGEXP_PUNCT_CLOSE,
122
    XML_REGEXP_PUNCT_INITQUOTE,
123
    XML_REGEXP_PUNCT_FINQUOTE,
124
    XML_REGEXP_PUNCT_OTHERS,
125
    XML_REGEXP_SEPAR,
126
    XML_REGEXP_SEPAR_SPACE,
127
    XML_REGEXP_SEPAR_LINE,
128
    XML_REGEXP_SEPAR_PARA,
129
    XML_REGEXP_SYMBOL,
130
    XML_REGEXP_SYMBOL_MATH,
131
    XML_REGEXP_SYMBOL_CURRENCY,
132
    XML_REGEXP_SYMBOL_MODIFIER,
133
    XML_REGEXP_SYMBOL_OTHERS,
134
    XML_REGEXP_OTHER,
135
    XML_REGEXP_OTHER_CONTROL,
136
    XML_REGEXP_OTHER_FORMAT,
137
    XML_REGEXP_OTHER_PRIVATE,
138
    XML_REGEXP_OTHER_NA,
139
    XML_REGEXP_BLOCK_NAME
140
} xmlRegAtomType;
141
142
typedef enum {
143
    XML_REGEXP_QUANT_EPSILON = 1,
144
    XML_REGEXP_QUANT_ONCE,
145
    XML_REGEXP_QUANT_OPT,
146
    XML_REGEXP_QUANT_MULT,
147
    XML_REGEXP_QUANT_PLUS,
148
    XML_REGEXP_QUANT_ONCEONLY,
149
    XML_REGEXP_QUANT_ALL,
150
    XML_REGEXP_QUANT_RANGE
151
} xmlRegQuantType;
152
153
typedef enum {
154
    XML_REGEXP_START_STATE = 1,
155
    XML_REGEXP_FINAL_STATE,
156
    XML_REGEXP_TRANS_STATE,
157
    XML_REGEXP_SINK_STATE,
158
    XML_REGEXP_UNREACH_STATE
159
} xmlRegStateType;
160
161
typedef enum {
162
    XML_REGEXP_MARK_NORMAL = 0,
163
    XML_REGEXP_MARK_START,
164
    XML_REGEXP_MARK_VISITED
165
} xmlRegMarkedType;
166
167
typedef struct _xmlRegRange xmlRegRange;
168
typedef xmlRegRange *xmlRegRangePtr;
169
170
struct _xmlRegRange {
171
    int neg;    /* 0 normal, 1 not, 2 exclude */
172
    xmlRegAtomType type;
173
    int start;
174
    int end;
175
    xmlChar *blockName;
176
};
177
178
typedef struct _xmlRegAtom xmlRegAtom;
179
typedef xmlRegAtom *xmlRegAtomPtr;
180
181
typedef struct _xmlAutomataState xmlRegState;
182
typedef xmlRegState *xmlRegStatePtr;
183
184
struct _xmlRegAtom {
185
    int no;
186
    xmlRegAtomType type;
187
    xmlRegQuantType quant;
188
    int min;
189
    int max;
190
191
    void *valuep;
192
    void *valuep2;
193
    int neg;
194
    int codepoint;
195
    xmlRegStatePtr start;
196
    xmlRegStatePtr start0;
197
    xmlRegStatePtr stop;
198
    int maxRanges;
199
    int nbRanges;
200
    xmlRegRangePtr *ranges;
201
    void *data;
202
};
203
204
typedef struct _xmlRegCounter xmlRegCounter;
205
typedef xmlRegCounter *xmlRegCounterPtr;
206
207
struct _xmlRegCounter {
208
    int min;
209
    int max;
210
};
211
212
typedef struct _xmlRegTrans xmlRegTrans;
213
typedef xmlRegTrans *xmlRegTransPtr;
214
215
struct _xmlRegTrans {
216
    xmlRegAtomPtr atom;
217
    int to;
218
    int counter;
219
    int count;
220
    int nd;
221
};
222
223
struct _xmlAutomataState {
224
    xmlRegStateType type;
225
    xmlRegMarkedType mark;
226
    xmlRegMarkedType markd;
227
    xmlRegMarkedType reached;
228
    int no;
229
    int maxTrans;
230
    int nbTrans;
231
    xmlRegTrans *trans;
232
    /*  knowing states pointing to us can speed things up */
233
    int maxTransTo;
234
    int nbTransTo;
235
    int *transTo;
236
};
237
238
typedef struct _xmlAutomata xmlRegParserCtxt;
239
typedef xmlRegParserCtxt *xmlRegParserCtxtPtr;
240
241
2.32M
#define AM_AUTOMATA_RNG 1
242
243
struct _xmlAutomata {
244
    xmlChar *string;
245
    xmlChar *cur;
246
247
    int error;
248
    int neg;
249
250
    xmlRegStatePtr start;
251
    xmlRegStatePtr end;
252
    xmlRegStatePtr state;
253
254
    xmlRegAtomPtr atom;
255
256
    int maxAtoms;
257
    int nbAtoms;
258
    xmlRegAtomPtr *atoms;
259
260
    int maxStates;
261
    int nbStates;
262
    xmlRegStatePtr *states;
263
264
    int maxCounters;
265
    int nbCounters;
266
    xmlRegCounter *counters;
267
268
    int determinist;
269
    int negs;
270
    int flags;
271
272
    int depth;
273
};
274
275
struct _xmlRegexp {
276
    xmlChar *string;
277
    int nbStates;
278
    xmlRegStatePtr *states;
279
    int nbAtoms;
280
    xmlRegAtomPtr *atoms;
281
    int nbCounters;
282
    xmlRegCounter *counters;
283
    int determinist;
284
    int flags;
285
    /*
286
     * That's the compact form for determinists automatas
287
     */
288
    int nbstates;
289
    int *compact;
290
    void **transdata;
291
    int nbstrings;
292
    xmlChar **stringMap;
293
};
294
295
typedef struct _xmlRegExecRollback xmlRegExecRollback;
296
typedef xmlRegExecRollback *xmlRegExecRollbackPtr;
297
298
struct _xmlRegExecRollback {
299
    xmlRegStatePtr state;/* the current state */
300
    int index;    /* the index in the input stack */
301
    int nextbranch; /* the next transition to explore in that state */
302
    int *counts;  /* save the automata state if it has some */
303
};
304
305
typedef struct _xmlRegInputToken xmlRegInputToken;
306
typedef xmlRegInputToken *xmlRegInputTokenPtr;
307
308
struct _xmlRegInputToken {
309
    xmlChar *value;
310
    void *data;
311
};
312
313
struct _xmlRegExecCtxt {
314
    int status;   /* execution status != 0 indicate an error */
315
    int determinist;  /* did we find an indeterministic behaviour */
316
    xmlRegexpPtr comp;  /* the compiled regexp */
317
    xmlRegExecCallbacks callback;
318
    void *data;
319
320
    xmlRegStatePtr state;/* the current state */
321
    int transno;  /* the current transition on that state */
322
    int transcount; /* the number of chars in char counted transitions */
323
324
    /*
325
     * A stack of rollback states
326
     */
327
    int maxRollbacks;
328
    int nbRollbacks;
329
    xmlRegExecRollback *rollbacks;
330
331
    /*
332
     * The state of the automata if any
333
     */
334
    int *counts;
335
336
    /*
337
     * The input stack
338
     */
339
    int inputStackMax;
340
    int inputStackNr;
341
    int index;
342
    int *charStack;
343
    const xmlChar *inputString; /* when operating on characters */
344
    xmlRegInputTokenPtr inputStack;/* when operating on strings */
345
346
    /*
347
     * error handling
348
     */
349
    int errStateNo;   /* the error state number */
350
    xmlRegStatePtr errState;    /* the error state */
351
    xmlChar *errString;   /* the string raising the error */
352
    int *errCounts;   /* counters at the error state */
353
    int nbPush;
354
};
355
356
147k
#define REGEXP_ALL_COUNTER  0x123456
357
147k
#define REGEXP_ALL_LAX_COUNTER  0x123457
358
359
static void xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top);
360
static void xmlRegFreeState(xmlRegStatePtr state);
361
static void xmlRegFreeAtom(xmlRegAtomPtr atom);
362
static int xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr);
363
static int xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint);
364
static int xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint,
365
                  int neg, int start, int end, const xmlChar *blockName);
366
367
/************************************************************************
368
 *                  *
369
 *    Regexp memory error handler       *
370
 *                  *
371
 ************************************************************************/
372
/**
373
 * xmlRegexpErrMemory:
374
 * @extra:  extra information
375
 *
376
 * Handle an out of memory condition
377
 */
378
static void
379
xmlRegexpErrMemory(xmlRegParserCtxtPtr ctxt, const char *extra)
380
0
{
381
0
    const char *regexp = NULL;
382
0
    if (ctxt != NULL) {
383
0
        regexp = (const char *) ctxt->string;
384
0
  ctxt->error = XML_ERR_NO_MEMORY;
385
0
    }
386
0
    __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, XML_FROM_REGEXP,
387
0
        XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
388
0
        regexp, NULL, 0, 0,
389
0
        "Memory allocation failed : %s\n", extra);
390
0
}
391
392
/**
393
 * xmlRegexpErrCompile:
394
 * @extra:  extra information
395
 *
396
 * Handle a compilation failure
397
 */
398
static void
399
xmlRegexpErrCompile(xmlRegParserCtxtPtr ctxt, const char *extra)
400
0
{
401
0
    const char *regexp = NULL;
402
0
    int idx = 0;
403
404
0
    if (ctxt != NULL) {
405
0
        regexp = (const char *) ctxt->string;
406
0
  idx = ctxt->cur - ctxt->string;
407
0
  ctxt->error = XML_REGEXP_COMPILE_ERROR;
408
0
    }
409
0
    __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, XML_FROM_REGEXP,
410
0
        XML_REGEXP_COMPILE_ERROR, XML_ERR_FATAL, NULL, 0, extra,
411
0
        regexp, NULL, idx, 0,
412
0
        "failed to compile: %s\n", extra);
413
0
}
414
415
/************************************************************************
416
 *                  *
417
 *      Allocation/Deallocation       *
418
 *                  *
419
 ************************************************************************/
420
421
static int xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt);
422
423
/**
424
 * xmlRegCalloc2:
425
 * @dim1:  size of first dimension
426
 * @dim2:  size of second dimension
427
 * @elemSize:  size of element
428
 *
429
 * Allocate a two-dimensional array and set all elements to zero.
430
 *
431
 * Returns the new array or NULL in case of error.
432
 */
433
static void*
434
68.4k
xmlRegCalloc2(size_t dim1, size_t dim2, size_t elemSize) {
435
68.4k
    size_t totalSize;
436
68.4k
    void *ret;
437
438
    /* Check for overflow */
439
68.4k
    if (dim1 > SIZE_MAX / dim2 / elemSize)
440
0
        return (NULL);
441
68.4k
    totalSize = dim1 * dim2 * elemSize;
442
68.4k
    ret = xmlMalloc(totalSize);
443
68.4k
    if (ret != NULL)
444
68.4k
        memset(ret, 0, totalSize);
445
68.4k
    return (ret);
446
68.4k
}
447
448
/**
449
 * xmlRegEpxFromParse:
450
 * @ctxt:  the parser context used to build it
451
 *
452
 * Allocate a new regexp and fill it with the result from the parser
453
 *
454
 * Returns the new regexp or NULL in case of error
455
 */
456
static xmlRegexpPtr
457
71.2k
xmlRegEpxFromParse(xmlRegParserCtxtPtr ctxt) {
458
71.2k
    xmlRegexpPtr ret;
459
460
71.2k
    ret = (xmlRegexpPtr) xmlMalloc(sizeof(xmlRegexp));
461
71.2k
    if (ret == NULL) {
462
0
  xmlRegexpErrMemory(ctxt, "compiling regexp");
463
0
  return(NULL);
464
0
    }
465
71.2k
    memset(ret, 0, sizeof(xmlRegexp));
466
71.2k
    ret->string = ctxt->string;
467
71.2k
    ret->nbStates = ctxt->nbStates;
468
71.2k
    ret->states = ctxt->states;
469
71.2k
    ret->nbAtoms = ctxt->nbAtoms;
470
71.2k
    ret->atoms = ctxt->atoms;
471
71.2k
    ret->nbCounters = ctxt->nbCounters;
472
71.2k
    ret->counters = ctxt->counters;
473
71.2k
    ret->determinist = ctxt->determinist;
474
71.2k
    ret->flags = ctxt->flags;
475
71.2k
    if (ret->determinist == -1) {
476
71.2k
        xmlRegexpIsDeterminist(ret);
477
71.2k
    }
478
479
71.2k
    if ((ret->determinist != 0) &&
480
71.2k
  (ret->nbCounters == 0) &&
481
71.2k
  (ctxt->negs == 0) &&
482
71.2k
  (ret->atoms != NULL) &&
483
71.2k
  (ret->atoms[0] != NULL) &&
484
71.2k
  (ret->atoms[0]->type == XML_REGEXP_STRING)) {
485
68.4k
  int i, j, nbstates = 0, nbatoms = 0;
486
68.4k
  int *stateRemap;
487
68.4k
  int *stringRemap;
488
68.4k
  int *transitions;
489
68.4k
  void **transdata;
490
68.4k
  xmlChar **stringMap;
491
68.4k
        xmlChar *value;
492
493
  /*
494
   * Switch to a compact representation
495
   * 1/ counting the effective number of states left
496
   * 2/ counting the unique number of atoms, and check that
497
   *    they are all of the string type
498
   * 3/ build a table state x atom for the transitions
499
   */
500
501
68.4k
  stateRemap = xmlMalloc(ret->nbStates * sizeof(int));
502
68.4k
  if (stateRemap == NULL) {
503
0
      xmlRegexpErrMemory(ctxt, "compiling regexp");
504
0
      xmlFree(ret);
505
0
      return(NULL);
506
0
  }
507
1.06M
  for (i = 0;i < ret->nbStates;i++) {
508
994k
      if (ret->states[i] != NULL) {
509
562k
    stateRemap[i] = nbstates;
510
562k
    nbstates++;
511
562k
      } else {
512
431k
    stateRemap[i] = -1;
513
431k
      }
514
994k
  }
515
#ifdef DEBUG_COMPACTION
516
  printf("Final: %d states\n", nbstates);
517
#endif
518
68.4k
  stringMap = xmlMalloc(ret->nbAtoms * sizeof(char *));
519
68.4k
  if (stringMap == NULL) {
520
0
      xmlRegexpErrMemory(ctxt, "compiling regexp");
521
0
      xmlFree(stateRemap);
522
0
      xmlFree(ret);
523
0
      return(NULL);
524
0
  }
525
68.4k
  stringRemap = xmlMalloc(ret->nbAtoms * sizeof(int));
526
68.4k
  if (stringRemap == NULL) {
527
0
      xmlRegexpErrMemory(ctxt, "compiling regexp");
528
0
      xmlFree(stringMap);
529
0
      xmlFree(stateRemap);
530
0
      xmlFree(ret);
531
0
      return(NULL);
532
0
  }
533
873k
  for (i = 0;i < ret->nbAtoms;i++) {
534
805k
      if ((ret->atoms[i]->type == XML_REGEXP_STRING) &&
535
805k
    (ret->atoms[i]->quant == XML_REGEXP_QUANT_ONCE)) {
536
805k
    value = ret->atoms[i]->valuep;
537
6.25M
                for (j = 0;j < nbatoms;j++) {
538
5.89M
        if (xmlStrEqual(stringMap[j], value)) {
539
449k
      stringRemap[i] = j;
540
449k
      break;
541
449k
        }
542
5.89M
    }
543
805k
    if (j >= nbatoms) {
544
355k
        stringRemap[i] = nbatoms;
545
355k
        stringMap[nbatoms] = xmlStrdup(value);
546
355k
        if (stringMap[nbatoms] == NULL) {
547
0
      for (i = 0;i < nbatoms;i++)
548
0
          xmlFree(stringMap[i]);
549
0
      xmlFree(stringRemap);
550
0
      xmlFree(stringMap);
551
0
      xmlFree(stateRemap);
552
0
      xmlFree(ret);
553
0
      return(NULL);
554
0
        }
555
355k
        nbatoms++;
556
355k
    }
557
805k
      } else {
558
0
    xmlFree(stateRemap);
559
0
    xmlFree(stringRemap);
560
0
    for (i = 0;i < nbatoms;i++)
561
0
        xmlFree(stringMap[i]);
562
0
    xmlFree(stringMap);
563
0
    xmlFree(ret);
564
0
    return(NULL);
565
0
      }
566
805k
  }
567
#ifdef DEBUG_COMPACTION
568
  printf("Final: %d atoms\n", nbatoms);
569
#endif
570
68.4k
  transitions = (int *) xmlRegCalloc2(nbstates + 1, nbatoms + 1,
571
68.4k
                                            sizeof(int));
572
68.4k
  if (transitions == NULL) {
573
0
      xmlFree(stateRemap);
574
0
      xmlFree(stringRemap);
575
0
            for (i = 0;i < nbatoms;i++)
576
0
    xmlFree(stringMap[i]);
577
0
      xmlFree(stringMap);
578
0
      xmlFree(ret);
579
0
      return(NULL);
580
0
  }
581
582
  /*
583
   * Allocate the transition table. The first entry for each
584
   * state corresponds to the state type.
585
   */
586
68.4k
  transdata = NULL;
587
588
678k
  for (i = 0;i < ret->nbStates;i++) {
589
612k
      int stateno, atomno, targetno, prev;
590
612k
      xmlRegStatePtr state;
591
612k
      xmlRegTransPtr trans;
592
593
612k
      stateno = stateRemap[i];
594
612k
      if (stateno == -1)
595
419k
    continue;
596
193k
      state = ret->states[i];
597
598
193k
      transitions[stateno * (nbatoms + 1)] = state->type;
599
600
1.31M
      for (j = 0;j < state->nbTrans;j++) {
601
1.11M
    trans = &(state->trans[j]);
602
1.11M
    if ((trans->to == -1) || (trans->atom == NULL))
603
442k
        continue;
604
677k
                atomno = stringRemap[trans->atom->no];
605
677k
    if ((trans->atom->data != NULL) && (transdata == NULL)) {
606
0
        transdata = (void **) xmlRegCalloc2(nbstates, nbatoms,
607
0
                                      sizeof(void *));
608
0
        if (transdata == NULL) {
609
0
      xmlRegexpErrMemory(ctxt, "compiling regexp");
610
0
      break;
611
0
        }
612
0
    }
613
677k
    targetno = stateRemap[trans->to];
614
    /*
615
     * if the same atom can generate transitions to 2 different
616
     * states then it means the automata is not deterministic and
617
     * the compact form can't be used !
618
     */
619
677k
    prev = transitions[stateno * (nbatoms + 1) + atomno + 1];
620
677k
    if (prev != 0) {
621
2.59k
        if (prev != targetno + 1) {
622
2.59k
      ret->determinist = 0;
623
#ifdef DEBUG_COMPACTION
624
      printf("Indet: state %d trans %d, atom %d to %d : %d to %d\n",
625
             i, j, trans->atom->no, trans->to, atomno, targetno);
626
      printf("       previous to is %d\n", prev);
627
#endif
628
2.59k
      if (transdata != NULL)
629
0
          xmlFree(transdata);
630
2.59k
      xmlFree(transitions);
631
2.59k
      xmlFree(stateRemap);
632
2.59k
      xmlFree(stringRemap);
633
53.7k
      for (i = 0;i < nbatoms;i++)
634
51.1k
          xmlFree(stringMap[i]);
635
2.59k
      xmlFree(stringMap);
636
2.59k
      goto not_determ;
637
2.59k
        }
638
674k
    } else {
639
#if 0
640
        printf("State %d trans %d: atom %d to %d : %d to %d\n",
641
         i, j, trans->atom->no, trans->to, atomno, targetno);
642
#endif
643
674k
        transitions[stateno * (nbatoms + 1) + atomno + 1] =
644
674k
      targetno + 1; /* to avoid 0 */
645
674k
        if (transdata != NULL)
646
0
      transdata[stateno * nbatoms + atomno] =
647
0
          trans->atom->data;
648
674k
    }
649
677k
      }
650
193k
  }
651
65.8k
  ret->determinist = 1;
652
#ifdef DEBUG_COMPACTION
653
  /*
654
   * Debug
655
   */
656
  for (i = 0;i < nbstates;i++) {
657
      for (j = 0;j < nbatoms + 1;j++) {
658
                printf("%02d ", transitions[i * (nbatoms + 1) + j]);
659
      }
660
      printf("\n");
661
  }
662
  printf("\n");
663
#endif
664
  /*
665
   * Cleanup of the old data
666
   */
667
65.8k
  if (ret->states != NULL) {
668
669k
      for (i = 0;i < ret->nbStates;i++)
669
603k
    xmlRegFreeState(ret->states[i]);
670
65.8k
      xmlFree(ret->states);
671
65.8k
  }
672
65.8k
  ret->states = NULL;
673
65.8k
  ret->nbStates = 0;
674
65.8k
  if (ret->atoms != NULL) {
675
497k
      for (i = 0;i < ret->nbAtoms;i++)
676
431k
    xmlRegFreeAtom(ret->atoms[i]);
677
65.8k
      xmlFree(ret->atoms);
678
65.8k
  }
679
65.8k
  ret->atoms = NULL;
680
65.8k
  ret->nbAtoms = 0;
681
682
65.8k
  ret->compact = transitions;
683
65.8k
  ret->transdata = transdata;
684
65.8k
  ret->stringMap = stringMap;
685
65.8k
  ret->nbstrings = nbatoms;
686
65.8k
  ret->nbstates = nbstates;
687
65.8k
  xmlFree(stateRemap);
688
65.8k
  xmlFree(stringRemap);
689
65.8k
    }
690
71.2k
not_determ:
691
71.2k
    ctxt->string = NULL;
692
71.2k
    ctxt->nbStates = 0;
693
71.2k
    ctxt->states = NULL;
694
71.2k
    ctxt->nbAtoms = 0;
695
71.2k
    ctxt->atoms = NULL;
696
71.2k
    ctxt->nbCounters = 0;
697
71.2k
    ctxt->counters = NULL;
698
71.2k
    return(ret);
699
71.2k
}
700
701
/**
702
 * xmlRegNewParserCtxt:
703
 * @string:  the string to parse
704
 *
705
 * Allocate a new regexp parser context
706
 *
707
 * Returns the new context or NULL in case of error
708
 */
709
static xmlRegParserCtxtPtr
710
142k
xmlRegNewParserCtxt(const xmlChar *string) {
711
142k
    xmlRegParserCtxtPtr ret;
712
713
142k
    ret = (xmlRegParserCtxtPtr) xmlMalloc(sizeof(xmlRegParserCtxt));
714
142k
    if (ret == NULL)
715
0
  return(NULL);
716
142k
    memset(ret, 0, sizeof(xmlRegParserCtxt));
717
142k
    if (string != NULL)
718
0
  ret->string = xmlStrdup(string);
719
142k
    ret->cur = ret->string;
720
142k
    ret->neg = 0;
721
142k
    ret->negs = 0;
722
142k
    ret->error = 0;
723
142k
    ret->determinist = -1;
724
142k
    return(ret);
725
142k
}
726
727
/**
728
 * xmlRegNewRange:
729
 * @ctxt:  the regexp parser context
730
 * @neg:  is that negative
731
 * @type:  the type of range
732
 * @start:  the start codepoint
733
 * @end:  the end codepoint
734
 *
735
 * Allocate a new regexp range
736
 *
737
 * Returns the new range or NULL in case of error
738
 */
739
static xmlRegRangePtr
740
xmlRegNewRange(xmlRegParserCtxtPtr ctxt,
741
0
         int neg, xmlRegAtomType type, int start, int end) {
742
0
    xmlRegRangePtr ret;
743
744
0
    ret = (xmlRegRangePtr) xmlMalloc(sizeof(xmlRegRange));
745
0
    if (ret == NULL) {
746
0
  xmlRegexpErrMemory(ctxt, "allocating range");
747
0
  return(NULL);
748
0
    }
749
0
    ret->neg = neg;
750
0
    ret->type = type;
751
0
    ret->start = start;
752
0
    ret->end = end;
753
0
    return(ret);
754
0
}
755
756
/**
757
 * xmlRegFreeRange:
758
 * @range:  the regexp range
759
 *
760
 * Free a regexp range
761
 */
762
static void
763
0
xmlRegFreeRange(xmlRegRangePtr range) {
764
0
    if (range == NULL)
765
0
  return;
766
767
0
    if (range->blockName != NULL)
768
0
  xmlFree(range->blockName);
769
0
    xmlFree(range);
770
0
}
771
772
/**
773
 * xmlRegCopyRange:
774
 * @range:  the regexp range
775
 *
776
 * Copy a regexp range
777
 *
778
 * Returns the new copy or NULL in case of error.
779
 */
780
static xmlRegRangePtr
781
0
xmlRegCopyRange(xmlRegParserCtxtPtr ctxt, xmlRegRangePtr range) {
782
0
    xmlRegRangePtr ret;
783
784
0
    if (range == NULL)
785
0
  return(NULL);
786
787
0
    ret = xmlRegNewRange(ctxt, range->neg, range->type, range->start,
788
0
                         range->end);
789
0
    if (ret == NULL)
790
0
        return(NULL);
791
0
    if (range->blockName != NULL) {
792
0
  ret->blockName = xmlStrdup(range->blockName);
793
0
  if (ret->blockName == NULL) {
794
0
      xmlRegexpErrMemory(ctxt, "allocating range");
795
0
      xmlRegFreeRange(ret);
796
0
      return(NULL);
797
0
  }
798
0
    }
799
0
    return(ret);
800
0
}
801
802
/**
803
 * xmlRegNewAtom:
804
 * @ctxt:  the regexp parser context
805
 * @type:  the type of atom
806
 *
807
 * Allocate a new atom
808
 *
809
 * Returns the new atom or NULL in case of error
810
 */
811
static xmlRegAtomPtr
812
890k
xmlRegNewAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomType type) {
813
890k
    xmlRegAtomPtr ret;
814
815
890k
    ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom));
816
890k
    if (ret == NULL) {
817
0
  xmlRegexpErrMemory(ctxt, "allocating atom");
818
0
  return(NULL);
819
0
    }
820
890k
    memset(ret, 0, sizeof(xmlRegAtom));
821
890k
    ret->type = type;
822
890k
    ret->quant = XML_REGEXP_QUANT_ONCE;
823
890k
    ret->min = 0;
824
890k
    ret->max = 0;
825
890k
    return(ret);
826
890k
}
827
828
/**
829
 * xmlRegFreeAtom:
830
 * @atom:  the regexp atom
831
 *
832
 * Free a regexp atom
833
 */
834
static void
835
881k
xmlRegFreeAtom(xmlRegAtomPtr atom) {
836
881k
    int i;
837
838
881k
    if (atom == NULL)
839
0
  return;
840
841
881k
    for (i = 0;i < atom->nbRanges;i++)
842
0
  xmlRegFreeRange(atom->ranges[i]);
843
881k
    if (atom->ranges != NULL)
844
0
  xmlFree(atom->ranges);
845
881k
    if ((atom->type == XML_REGEXP_STRING) && (atom->valuep != NULL))
846
881k
  xmlFree(atom->valuep);
847
881k
    if ((atom->type == XML_REGEXP_STRING) && (atom->valuep2 != NULL))
848
0
  xmlFree(atom->valuep2);
849
881k
    if ((atom->type == XML_REGEXP_BLOCK_NAME) && (atom->valuep != NULL))
850
0
  xmlFree(atom->valuep);
851
881k
    xmlFree(atom);
852
881k
}
853
854
/**
855
 * xmlRegCopyAtom:
856
 * @ctxt:  the regexp parser context
857
 * @atom:  the original atom
858
 *
859
 * Allocate a new regexp range
860
 *
861
 * Returns the new atom or NULL in case of error
862
 */
863
static xmlRegAtomPtr
864
0
xmlRegCopyAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
865
0
    xmlRegAtomPtr ret;
866
867
0
    ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom));
868
0
    if (ret == NULL) {
869
0
  xmlRegexpErrMemory(ctxt, "copying atom");
870
0
  return(NULL);
871
0
    }
872
0
    memset(ret, 0, sizeof(xmlRegAtom));
873
0
    ret->type = atom->type;
874
0
    ret->quant = atom->quant;
875
0
    ret->min = atom->min;
876
0
    ret->max = atom->max;
877
0
    if (atom->nbRanges > 0) {
878
0
        int i;
879
880
0
        ret->ranges = (xmlRegRangePtr *) xmlMalloc(sizeof(xmlRegRangePtr) *
881
0
                                             atom->nbRanges);
882
0
  if (ret->ranges == NULL) {
883
0
      xmlRegexpErrMemory(ctxt, "copying atom");
884
0
      goto error;
885
0
  }
886
0
  for (i = 0;i < atom->nbRanges;i++) {
887
0
      ret->ranges[i] = xmlRegCopyRange(ctxt, atom->ranges[i]);
888
0
      if (ret->ranges[i] == NULL)
889
0
          goto error;
890
0
      ret->nbRanges = i + 1;
891
0
  }
892
0
    }
893
0
    return(ret);
894
895
0
error:
896
0
    xmlRegFreeAtom(ret);
897
0
    return(NULL);
898
0
}
899
900
static xmlRegStatePtr
901
1.15M
xmlRegNewState(xmlRegParserCtxtPtr ctxt) {
902
1.15M
    xmlRegStatePtr ret;
903
904
1.15M
    ret = (xmlRegStatePtr) xmlMalloc(sizeof(xmlRegState));
905
1.15M
    if (ret == NULL) {
906
0
  xmlRegexpErrMemory(ctxt, "allocating state");
907
0
  return(NULL);
908
0
    }
909
1.15M
    memset(ret, 0, sizeof(xmlRegState));
910
1.15M
    ret->type = XML_REGEXP_TRANS_STATE;
911
1.15M
    ret->mark = XML_REGEXP_MARK_NORMAL;
912
1.15M
    return(ret);
913
1.15M
}
914
915
/**
916
 * xmlRegFreeState:
917
 * @state:  the regexp state
918
 *
919
 * Free a regexp state
920
 */
921
static void
922
1.58M
xmlRegFreeState(xmlRegStatePtr state) {
923
1.58M
    if (state == NULL)
924
437k
  return;
925
926
1.15M
    if (state->trans != NULL)
927
1.02M
  xmlFree(state->trans);
928
1.15M
    if (state->transTo != NULL)
929
1.00M
  xmlFree(state->transTo);
930
1.15M
    xmlFree(state);
931
1.15M
}
932
933
/**
934
 * xmlRegFreeParserCtxt:
935
 * @ctxt:  the regexp parser context
936
 *
937
 * Free a regexp parser context
938
 */
939
static void
940
142k
xmlRegFreeParserCtxt(xmlRegParserCtxtPtr ctxt) {
941
142k
    int i;
942
142k
    if (ctxt == NULL)
943
0
  return;
944
945
142k
    if (ctxt->string != NULL)
946
0
  xmlFree(ctxt->string);
947
142k
    if (ctxt->states != NULL) {
948
0
  for (i = 0;i < ctxt->nbStates;i++)
949
0
      xmlRegFreeState(ctxt->states[i]);
950
0
  xmlFree(ctxt->states);
951
0
    }
952
142k
    if (ctxt->atoms != NULL) {
953
0
  for (i = 0;i < ctxt->nbAtoms;i++)
954
0
      xmlRegFreeAtom(ctxt->atoms[i]);
955
0
  xmlFree(ctxt->atoms);
956
0
    }
957
142k
    if (ctxt->counters != NULL)
958
0
  xmlFree(ctxt->counters);
959
142k
    xmlFree(ctxt);
960
142k
}
961
962
/************************************************************************
963
 *                  *
964
 *      Display of Data structures      *
965
 *                  *
966
 ************************************************************************/
967
968
static void
969
0
xmlRegPrintAtomType(FILE *output, xmlRegAtomType type) {
970
0
    switch (type) {
971
0
        case XML_REGEXP_EPSILON:
972
0
      fprintf(output, "epsilon "); break;
973
0
        case XML_REGEXP_CHARVAL:
974
0
      fprintf(output, "charval "); break;
975
0
        case XML_REGEXP_RANGES:
976
0
      fprintf(output, "ranges "); break;
977
0
        case XML_REGEXP_SUBREG:
978
0
      fprintf(output, "subexpr "); break;
979
0
        case XML_REGEXP_STRING:
980
0
      fprintf(output, "string "); break;
981
0
        case XML_REGEXP_ANYCHAR:
982
0
      fprintf(output, "anychar "); break;
983
0
        case XML_REGEXP_ANYSPACE:
984
0
      fprintf(output, "anyspace "); break;
985
0
        case XML_REGEXP_NOTSPACE:
986
0
      fprintf(output, "notspace "); break;
987
0
        case XML_REGEXP_INITNAME:
988
0
      fprintf(output, "initname "); break;
989
0
        case XML_REGEXP_NOTINITNAME:
990
0
      fprintf(output, "notinitname "); break;
991
0
        case XML_REGEXP_NAMECHAR:
992
0
      fprintf(output, "namechar "); break;
993
0
        case XML_REGEXP_NOTNAMECHAR:
994
0
      fprintf(output, "notnamechar "); break;
995
0
        case XML_REGEXP_DECIMAL:
996
0
      fprintf(output, "decimal "); break;
997
0
        case XML_REGEXP_NOTDECIMAL:
998
0
      fprintf(output, "notdecimal "); break;
999
0
        case XML_REGEXP_REALCHAR:
1000
0
      fprintf(output, "realchar "); break;
1001
0
        case XML_REGEXP_NOTREALCHAR:
1002
0
      fprintf(output, "notrealchar "); break;
1003
0
        case XML_REGEXP_LETTER:
1004
0
            fprintf(output, "LETTER "); break;
1005
0
        case XML_REGEXP_LETTER_UPPERCASE:
1006
0
            fprintf(output, "LETTER_UPPERCASE "); break;
1007
0
        case XML_REGEXP_LETTER_LOWERCASE:
1008
0
            fprintf(output, "LETTER_LOWERCASE "); break;
1009
0
        case XML_REGEXP_LETTER_TITLECASE:
1010
0
            fprintf(output, "LETTER_TITLECASE "); break;
1011
0
        case XML_REGEXP_LETTER_MODIFIER:
1012
0
            fprintf(output, "LETTER_MODIFIER "); break;
1013
0
        case XML_REGEXP_LETTER_OTHERS:
1014
0
            fprintf(output, "LETTER_OTHERS "); break;
1015
0
        case XML_REGEXP_MARK:
1016
0
            fprintf(output, "MARK "); break;
1017
0
        case XML_REGEXP_MARK_NONSPACING:
1018
0
            fprintf(output, "MARK_NONSPACING "); break;
1019
0
        case XML_REGEXP_MARK_SPACECOMBINING:
1020
0
            fprintf(output, "MARK_SPACECOMBINING "); break;
1021
0
        case XML_REGEXP_MARK_ENCLOSING:
1022
0
            fprintf(output, "MARK_ENCLOSING "); break;
1023
0
        case XML_REGEXP_NUMBER:
1024
0
            fprintf(output, "NUMBER "); break;
1025
0
        case XML_REGEXP_NUMBER_DECIMAL:
1026
0
            fprintf(output, "NUMBER_DECIMAL "); break;
1027
0
        case XML_REGEXP_NUMBER_LETTER:
1028
0
            fprintf(output, "NUMBER_LETTER "); break;
1029
0
        case XML_REGEXP_NUMBER_OTHERS:
1030
0
            fprintf(output, "NUMBER_OTHERS "); break;
1031
0
        case XML_REGEXP_PUNCT:
1032
0
            fprintf(output, "PUNCT "); break;
1033
0
        case XML_REGEXP_PUNCT_CONNECTOR:
1034
0
            fprintf(output, "PUNCT_CONNECTOR "); break;
1035
0
        case XML_REGEXP_PUNCT_DASH:
1036
0
            fprintf(output, "PUNCT_DASH "); break;
1037
0
        case XML_REGEXP_PUNCT_OPEN:
1038
0
            fprintf(output, "PUNCT_OPEN "); break;
1039
0
        case XML_REGEXP_PUNCT_CLOSE:
1040
0
            fprintf(output, "PUNCT_CLOSE "); break;
1041
0
        case XML_REGEXP_PUNCT_INITQUOTE:
1042
0
            fprintf(output, "PUNCT_INITQUOTE "); break;
1043
0
        case XML_REGEXP_PUNCT_FINQUOTE:
1044
0
            fprintf(output, "PUNCT_FINQUOTE "); break;
1045
0
        case XML_REGEXP_PUNCT_OTHERS:
1046
0
            fprintf(output, "PUNCT_OTHERS "); break;
1047
0
        case XML_REGEXP_SEPAR:
1048
0
            fprintf(output, "SEPAR "); break;
1049
0
        case XML_REGEXP_SEPAR_SPACE:
1050
0
            fprintf(output, "SEPAR_SPACE "); break;
1051
0
        case XML_REGEXP_SEPAR_LINE:
1052
0
            fprintf(output, "SEPAR_LINE "); break;
1053
0
        case XML_REGEXP_SEPAR_PARA:
1054
0
            fprintf(output, "SEPAR_PARA "); break;
1055
0
        case XML_REGEXP_SYMBOL:
1056
0
            fprintf(output, "SYMBOL "); break;
1057
0
        case XML_REGEXP_SYMBOL_MATH:
1058
0
            fprintf(output, "SYMBOL_MATH "); break;
1059
0
        case XML_REGEXP_SYMBOL_CURRENCY:
1060
0
            fprintf(output, "SYMBOL_CURRENCY "); break;
1061
0
        case XML_REGEXP_SYMBOL_MODIFIER:
1062
0
            fprintf(output, "SYMBOL_MODIFIER "); break;
1063
0
        case XML_REGEXP_SYMBOL_OTHERS:
1064
0
            fprintf(output, "SYMBOL_OTHERS "); break;
1065
0
        case XML_REGEXP_OTHER:
1066
0
            fprintf(output, "OTHER "); break;
1067
0
        case XML_REGEXP_OTHER_CONTROL:
1068
0
            fprintf(output, "OTHER_CONTROL "); break;
1069
0
        case XML_REGEXP_OTHER_FORMAT:
1070
0
            fprintf(output, "OTHER_FORMAT "); break;
1071
0
        case XML_REGEXP_OTHER_PRIVATE:
1072
0
            fprintf(output, "OTHER_PRIVATE "); break;
1073
0
        case XML_REGEXP_OTHER_NA:
1074
0
            fprintf(output, "OTHER_NA "); break;
1075
0
        case XML_REGEXP_BLOCK_NAME:
1076
0
      fprintf(output, "BLOCK "); break;
1077
0
    }
1078
0
}
1079
1080
static void
1081
0
xmlRegPrintQuantType(FILE *output, xmlRegQuantType type) {
1082
0
    switch (type) {
1083
0
        case XML_REGEXP_QUANT_EPSILON:
1084
0
      fprintf(output, "epsilon "); break;
1085
0
        case XML_REGEXP_QUANT_ONCE:
1086
0
      fprintf(output, "once "); break;
1087
0
        case XML_REGEXP_QUANT_OPT:
1088
0
      fprintf(output, "? "); break;
1089
0
        case XML_REGEXP_QUANT_MULT:
1090
0
      fprintf(output, "* "); break;
1091
0
        case XML_REGEXP_QUANT_PLUS:
1092
0
      fprintf(output, "+ "); break;
1093
0
  case XML_REGEXP_QUANT_RANGE:
1094
0
      fprintf(output, "range "); break;
1095
0
  case XML_REGEXP_QUANT_ONCEONLY:
1096
0
      fprintf(output, "onceonly "); break;
1097
0
  case XML_REGEXP_QUANT_ALL:
1098
0
      fprintf(output, "all "); break;
1099
0
    }
1100
0
}
1101
static void
1102
0
xmlRegPrintRange(FILE *output, xmlRegRangePtr range) {
1103
0
    fprintf(output, "  range: ");
1104
0
    if (range->neg)
1105
0
  fprintf(output, "negative ");
1106
0
    xmlRegPrintAtomType(output, range->type);
1107
0
    fprintf(output, "%c - %c\n", range->start, range->end);
1108
0
}
1109
1110
static void
1111
0
xmlRegPrintAtom(FILE *output, xmlRegAtomPtr atom) {
1112
0
    fprintf(output, " atom: ");
1113
0
    if (atom == NULL) {
1114
0
  fprintf(output, "NULL\n");
1115
0
  return;
1116
0
    }
1117
0
    if (atom->neg)
1118
0
        fprintf(output, "not ");
1119
0
    xmlRegPrintAtomType(output, atom->type);
1120
0
    xmlRegPrintQuantType(output, atom->quant);
1121
0
    if (atom->quant == XML_REGEXP_QUANT_RANGE)
1122
0
  fprintf(output, "%d-%d ", atom->min, atom->max);
1123
0
    if (atom->type == XML_REGEXP_STRING)
1124
0
  fprintf(output, "'%s' ", (char *) atom->valuep);
1125
0
    if (atom->type == XML_REGEXP_CHARVAL)
1126
0
  fprintf(output, "char %c\n", atom->codepoint);
1127
0
    else if (atom->type == XML_REGEXP_RANGES) {
1128
0
  int i;
1129
0
  fprintf(output, "%d entries\n", atom->nbRanges);
1130
0
  for (i = 0; i < atom->nbRanges;i++)
1131
0
      xmlRegPrintRange(output, atom->ranges[i]);
1132
0
    } else if (atom->type == XML_REGEXP_SUBREG) {
1133
0
  fprintf(output, "start %d end %d\n", atom->start->no, atom->stop->no);
1134
0
    } else {
1135
0
  fprintf(output, "\n");
1136
0
    }
1137
0
}
1138
1139
static void
1140
0
xmlRegPrintTrans(FILE *output, xmlRegTransPtr trans) {
1141
0
    fprintf(output, "  trans: ");
1142
0
    if (trans == NULL) {
1143
0
  fprintf(output, "NULL\n");
1144
0
  return;
1145
0
    }
1146
0
    if (trans->to < 0) {
1147
0
  fprintf(output, "removed\n");
1148
0
  return;
1149
0
    }
1150
0
    if (trans->nd != 0) {
1151
0
  if (trans->nd == 2)
1152
0
      fprintf(output, "last not determinist, ");
1153
0
  else
1154
0
      fprintf(output, "not determinist, ");
1155
0
    }
1156
0
    if (trans->counter >= 0) {
1157
0
  fprintf(output, "counted %d, ", trans->counter);
1158
0
    }
1159
0
    if (trans->count == REGEXP_ALL_COUNTER) {
1160
0
  fprintf(output, "all transition, ");
1161
0
    } else if (trans->count >= 0) {
1162
0
  fprintf(output, "count based %d, ", trans->count);
1163
0
    }
1164
0
    if (trans->atom == NULL) {
1165
0
  fprintf(output, "epsilon to %d\n", trans->to);
1166
0
  return;
1167
0
    }
1168
0
    if (trans->atom->type == XML_REGEXP_CHARVAL)
1169
0
  fprintf(output, "char %c ", trans->atom->codepoint);
1170
0
    fprintf(output, "atom %d, to %d\n", trans->atom->no, trans->to);
1171
0
}
1172
1173
static void
1174
0
xmlRegPrintState(FILE *output, xmlRegStatePtr state) {
1175
0
    int i;
1176
1177
0
    fprintf(output, " state: ");
1178
0
    if (state == NULL) {
1179
0
  fprintf(output, "NULL\n");
1180
0
  return;
1181
0
    }
1182
0
    if (state->type == XML_REGEXP_START_STATE)
1183
0
  fprintf(output, "START ");
1184
0
    if (state->type == XML_REGEXP_FINAL_STATE)
1185
0
  fprintf(output, "FINAL ");
1186
1187
0
    fprintf(output, "%d, %d transitions:\n", state->no, state->nbTrans);
1188
0
    for (i = 0;i < state->nbTrans; i++) {
1189
0
  xmlRegPrintTrans(output, &(state->trans[i]));
1190
0
    }
1191
0
}
1192
1193
#ifdef DEBUG_REGEXP_GRAPH
1194
static void
1195
xmlRegPrintCtxt(FILE *output, xmlRegParserCtxtPtr ctxt) {
1196
    int i;
1197
1198
    fprintf(output, " ctxt: ");
1199
    if (ctxt == NULL) {
1200
  fprintf(output, "NULL\n");
1201
  return;
1202
    }
1203
    fprintf(output, "'%s' ", ctxt->string);
1204
    if (ctxt->error)
1205
  fprintf(output, "error ");
1206
    if (ctxt->neg)
1207
  fprintf(output, "neg ");
1208
    fprintf(output, "\n");
1209
    fprintf(output, "%d atoms:\n", ctxt->nbAtoms);
1210
    for (i = 0;i < ctxt->nbAtoms; i++) {
1211
  fprintf(output, " %02d ", i);
1212
  xmlRegPrintAtom(output, ctxt->atoms[i]);
1213
    }
1214
    if (ctxt->atom != NULL) {
1215
  fprintf(output, "current atom:\n");
1216
  xmlRegPrintAtom(output, ctxt->atom);
1217
    }
1218
    fprintf(output, "%d states:", ctxt->nbStates);
1219
    if (ctxt->start != NULL)
1220
  fprintf(output, " start: %d", ctxt->start->no);
1221
    if (ctxt->end != NULL)
1222
  fprintf(output, " end: %d", ctxt->end->no);
1223
    fprintf(output, "\n");
1224
    for (i = 0;i < ctxt->nbStates; i++) {
1225
  xmlRegPrintState(output, ctxt->states[i]);
1226
    }
1227
    fprintf(output, "%d counters:\n", ctxt->nbCounters);
1228
    for (i = 0;i < ctxt->nbCounters; i++) {
1229
  fprintf(output, " %d: min %d max %d\n", i, ctxt->counters[i].min,
1230
                                    ctxt->counters[i].max);
1231
    }
1232
}
1233
#endif
1234
1235
/************************************************************************
1236
 *                  *
1237
 *     Finite Automata structures manipulations   *
1238
 *                  *
1239
 ************************************************************************/
1240
1241
static void
1242
xmlRegAtomAddRange(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom,
1243
             int neg, xmlRegAtomType type, int start, int end,
1244
0
       xmlChar *blockName) {
1245
0
    xmlRegRangePtr range;
1246
1247
0
    if (atom == NULL) {
1248
0
  ERROR("add range: atom is NULL");
1249
0
  return;
1250
0
    }
1251
0
    if (atom->type != XML_REGEXP_RANGES) {
1252
0
  ERROR("add range: atom is not ranges");
1253
0
  return;
1254
0
    }
1255
0
    if (atom->maxRanges == 0) {
1256
0
  atom->maxRanges = 4;
1257
0
  atom->ranges = (xmlRegRangePtr *) xmlMalloc(atom->maxRanges *
1258
0
                                 sizeof(xmlRegRangePtr));
1259
0
  if (atom->ranges == NULL) {
1260
0
      xmlRegexpErrMemory(ctxt, "adding ranges");
1261
0
      atom->maxRanges = 0;
1262
0
      return;
1263
0
  }
1264
0
    } else if (atom->nbRanges >= atom->maxRanges) {
1265
0
  xmlRegRangePtr *tmp;
1266
0
  atom->maxRanges *= 2;
1267
0
  tmp = (xmlRegRangePtr *) xmlRealloc(atom->ranges, atom->maxRanges *
1268
0
                                 sizeof(xmlRegRangePtr));
1269
0
  if (tmp == NULL) {
1270
0
      xmlRegexpErrMemory(ctxt, "adding ranges");
1271
0
      atom->maxRanges /= 2;
1272
0
      return;
1273
0
  }
1274
0
  atom->ranges = tmp;
1275
0
    }
1276
0
    range = xmlRegNewRange(ctxt, neg, type, start, end);
1277
0
    if (range == NULL)
1278
0
  return;
1279
0
    range->blockName = blockName;
1280
0
    atom->ranges[atom->nbRanges++] = range;
1281
1282
0
}
1283
1284
static int
1285
0
xmlRegGetCounter(xmlRegParserCtxtPtr ctxt) {
1286
0
    if (ctxt->maxCounters == 0) {
1287
0
  ctxt->maxCounters = 4;
1288
0
  ctxt->counters = (xmlRegCounter *) xmlMalloc(ctxt->maxCounters *
1289
0
                                 sizeof(xmlRegCounter));
1290
0
  if (ctxt->counters == NULL) {
1291
0
      xmlRegexpErrMemory(ctxt, "allocating counter");
1292
0
      ctxt->maxCounters = 0;
1293
0
      return(-1);
1294
0
  }
1295
0
    } else if (ctxt->nbCounters >= ctxt->maxCounters) {
1296
0
  xmlRegCounter *tmp;
1297
0
  ctxt->maxCounters *= 2;
1298
0
  tmp = (xmlRegCounter *) xmlRealloc(ctxt->counters, ctxt->maxCounters *
1299
0
                               sizeof(xmlRegCounter));
1300
0
  if (tmp == NULL) {
1301
0
      xmlRegexpErrMemory(ctxt, "allocating counter");
1302
0
      ctxt->maxCounters /= 2;
1303
0
      return(-1);
1304
0
  }
1305
0
  ctxt->counters = tmp;
1306
0
    }
1307
0
    ctxt->counters[ctxt->nbCounters].min = -1;
1308
0
    ctxt->counters[ctxt->nbCounters].max = -1;
1309
0
    return(ctxt->nbCounters++);
1310
0
}
1311
1312
static int
1313
890k
xmlRegAtomPush(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
1314
890k
    if (atom == NULL) {
1315
0
  ERROR("atom push: atom is NULL");
1316
0
  return(-1);
1317
0
    }
1318
890k
    if (ctxt->maxAtoms == 0) {
1319
71.2k
  ctxt->maxAtoms = 4;
1320
71.2k
  ctxt->atoms = (xmlRegAtomPtr *) xmlMalloc(ctxt->maxAtoms *
1321
71.2k
                                 sizeof(xmlRegAtomPtr));
1322
71.2k
  if (ctxt->atoms == NULL) {
1323
0
      xmlRegexpErrMemory(ctxt, "pushing atom");
1324
0
      ctxt->maxAtoms = 0;
1325
0
      return(-1);
1326
0
  }
1327
819k
    } else if (ctxt->nbAtoms >= ctxt->maxAtoms) {
1328
62.1k
  xmlRegAtomPtr *tmp;
1329
62.1k
  ctxt->maxAtoms *= 2;
1330
62.1k
  tmp = (xmlRegAtomPtr *) xmlRealloc(ctxt->atoms, ctxt->maxAtoms *
1331
62.1k
                                 sizeof(xmlRegAtomPtr));
1332
62.1k
  if (tmp == NULL) {
1333
0
      xmlRegexpErrMemory(ctxt, "allocating counter");
1334
0
      ctxt->maxAtoms /= 2;
1335
0
      return(-1);
1336
0
  }
1337
62.1k
  ctxt->atoms = tmp;
1338
62.1k
    }
1339
890k
    atom->no = ctxt->nbAtoms;
1340
890k
    ctxt->atoms[ctxt->nbAtoms++] = atom;
1341
890k
    return(0);
1342
890k
}
1343
1344
static void
1345
xmlRegStateAddTransTo(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr target,
1346
5.03M
                      int from) {
1347
5.03M
    if (target->maxTransTo == 0) {
1348
1.01M
  target->maxTransTo = 8;
1349
1.01M
  target->transTo = (int *) xmlMalloc(target->maxTransTo *
1350
1.01M
                                 sizeof(int));
1351
1.01M
  if (target->transTo == NULL) {
1352
0
      xmlRegexpErrMemory(ctxt, "adding transition");
1353
0
      target->maxTransTo = 0;
1354
0
      return;
1355
0
  }
1356
4.01M
    } else if (target->nbTransTo >= target->maxTransTo) {
1357
100k
  int *tmp;
1358
100k
  target->maxTransTo *= 2;
1359
100k
  tmp = (int *) xmlRealloc(target->transTo, target->maxTransTo *
1360
100k
                                 sizeof(int));
1361
100k
  if (tmp == NULL) {
1362
0
      xmlRegexpErrMemory(ctxt, "adding transition");
1363
0
      target->maxTransTo /= 2;
1364
0
      return;
1365
0
  }
1366
100k
  target->transTo = tmp;
1367
100k
    }
1368
5.03M
    target->transTo[target->nbTransTo] = from;
1369
5.03M
    target->nbTransTo++;
1370
5.03M
}
1371
1372
static void
1373
xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
1374
              xmlRegAtomPtr atom, xmlRegStatePtr target,
1375
5.03M
        int counter, int count) {
1376
1377
5.03M
    int nrtrans;
1378
1379
5.03M
    if (state == NULL) {
1380
0
  ERROR("add state: state is NULL");
1381
0
  return;
1382
0
    }
1383
5.03M
    if (target == NULL) {
1384
0
  ERROR("add state: target is NULL");
1385
0
  return;
1386
0
    }
1387
    /*
1388
     * Other routines follow the philosophy 'When in doubt, add a transition'
1389
     * so we check here whether such a transition is already present and, if
1390
     * so, silently ignore this request.
1391
     */
1392
1393
690M
    for (nrtrans = state->nbTrans - 1; nrtrans >= 0; nrtrans--) {
1394
685M
  xmlRegTransPtr trans = &(state->trans[nrtrans]);
1395
685M
  if ((trans->atom == atom) &&
1396
685M
      (trans->to == target->no) &&
1397
685M
      (trans->counter == counter) &&
1398
685M
      (trans->count == count)) {
1399
#ifdef DEBUG_REGEXP_GRAPH
1400
      printf("Ignoring duplicate transition from %d to %d\n",
1401
        state->no, target->no);
1402
#endif
1403
5.70k
      return;
1404
5.70k
  }
1405
685M
    }
1406
1407
5.03M
    if (state->maxTrans == 0) {
1408
1.03M
  state->maxTrans = 8;
1409
1.03M
  state->trans = (xmlRegTrans *) xmlMalloc(state->maxTrans *
1410
1.03M
                                 sizeof(xmlRegTrans));
1411
1.03M
  if (state->trans == NULL) {
1412
0
      xmlRegexpErrMemory(ctxt, "adding transition");
1413
0
      state->maxTrans = 0;
1414
0
      return;
1415
0
  }
1416
3.99M
    } else if (state->nbTrans >= state->maxTrans) {
1417
113k
  xmlRegTrans *tmp;
1418
113k
  state->maxTrans *= 2;
1419
113k
  tmp = (xmlRegTrans *) xmlRealloc(state->trans, state->maxTrans *
1420
113k
                                 sizeof(xmlRegTrans));
1421
113k
  if (tmp == NULL) {
1422
0
      xmlRegexpErrMemory(ctxt, "adding transition");
1423
0
      state->maxTrans /= 2;
1424
0
      return;
1425
0
  }
1426
113k
  state->trans = tmp;
1427
113k
    }
1428
#ifdef DEBUG_REGEXP_GRAPH
1429
    printf("Add trans from %d to %d ", state->no, target->no);
1430
    if (count == REGEXP_ALL_COUNTER)
1431
  printf("all transition\n");
1432
    else if (count >= 0)
1433
  printf("count based %d\n", count);
1434
    else if (counter >= 0)
1435
  printf("counted %d\n", counter);
1436
    else if (atom == NULL)
1437
  printf("epsilon transition\n");
1438
    else if (atom != NULL)
1439
        xmlRegPrintAtom(stdout, atom);
1440
#endif
1441
1442
5.03M
    state->trans[state->nbTrans].atom = atom;
1443
5.03M
    state->trans[state->nbTrans].to = target->no;
1444
5.03M
    state->trans[state->nbTrans].counter = counter;
1445
5.03M
    state->trans[state->nbTrans].count = count;
1446
5.03M
    state->trans[state->nbTrans].nd = 0;
1447
5.03M
    state->nbTrans++;
1448
5.03M
    xmlRegStateAddTransTo(ctxt, target, state->no);
1449
5.03M
}
1450
1451
static int
1452
1.15M
xmlRegStatePush(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state) {
1453
1.15M
    if (state == NULL) return(-1);
1454
1.15M
    if (ctxt->maxStates == 0) {
1455
142k
  ctxt->maxStates = 4;
1456
142k
  ctxt->states = (xmlRegStatePtr *) xmlMalloc(ctxt->maxStates *
1457
142k
                                 sizeof(xmlRegStatePtr));
1458
142k
  if (ctxt->states == NULL) {
1459
0
      xmlRegexpErrMemory(ctxt, "adding state");
1460
0
      ctxt->maxStates = 0;
1461
0
      return(-1);
1462
0
  }
1463
1.01M
    } else if (ctxt->nbStates >= ctxt->maxStates) {
1464
88.9k
  xmlRegStatePtr *tmp;
1465
88.9k
  ctxt->maxStates *= 2;
1466
88.9k
  tmp = (xmlRegStatePtr *) xmlRealloc(ctxt->states, ctxt->maxStates *
1467
88.9k
                                 sizeof(xmlRegStatePtr));
1468
88.9k
  if (tmp == NULL) {
1469
0
      xmlRegexpErrMemory(ctxt, "adding state");
1470
0
      ctxt->maxStates /= 2;
1471
0
      return(-1);
1472
0
  }
1473
88.9k
  ctxt->states = tmp;
1474
88.9k
    }
1475
1.15M
    state->no = ctxt->nbStates;
1476
1.15M
    ctxt->states[ctxt->nbStates++] = state;
1477
1.15M
    return(0);
1478
1.15M
}
1479
1480
/**
1481
 * xmlFAGenerateAllTransition:
1482
 * @ctxt:  a regexp parser context
1483
 * @from:  the from state
1484
 * @to:  the target state or NULL for building a new one
1485
 * @lax:
1486
 *
1487
 */
1488
static void
1489
xmlFAGenerateAllTransition(xmlRegParserCtxtPtr ctxt,
1490
         xmlRegStatePtr from, xmlRegStatePtr to,
1491
0
         int lax) {
1492
0
    if (to == NULL) {
1493
0
  to = xmlRegNewState(ctxt);
1494
0
  xmlRegStatePush(ctxt, to);
1495
0
  ctxt->state = to;
1496
0
    }
1497
0
    if (lax)
1498
0
  xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_LAX_COUNTER);
1499
0
    else
1500
0
  xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_COUNTER);
1501
0
}
1502
1503
/**
1504
 * xmlFAGenerateEpsilonTransition:
1505
 * @ctxt:  a regexp parser context
1506
 * @from:  the from state
1507
 * @to:  the target state or NULL for building a new one
1508
 *
1509
 */
1510
static void
1511
xmlFAGenerateEpsilonTransition(xmlRegParserCtxtPtr ctxt,
1512
1.00M
             xmlRegStatePtr from, xmlRegStatePtr to) {
1513
1.00M
    if (to == NULL) {
1514
127k
  to = xmlRegNewState(ctxt);
1515
127k
  xmlRegStatePush(ctxt, to);
1516
127k
  ctxt->state = to;
1517
127k
    }
1518
1.00M
    xmlRegStateAddTrans(ctxt, from, NULL, to, -1, -1);
1519
1.00M
}
1520
1521
/**
1522
 * xmlFAGenerateCountedEpsilonTransition:
1523
 * @ctxt:  a regexp parser context
1524
 * @from:  the from state
1525
 * @to:  the target state or NULL for building a new one
1526
 * counter:  the counter for that transition
1527
 *
1528
 */
1529
static void
1530
xmlFAGenerateCountedEpsilonTransition(xmlRegParserCtxtPtr ctxt,
1531
0
      xmlRegStatePtr from, xmlRegStatePtr to, int counter) {
1532
0
    if (to == NULL) {
1533
0
  to = xmlRegNewState(ctxt);
1534
0
  xmlRegStatePush(ctxt, to);
1535
0
  ctxt->state = to;
1536
0
    }
1537
0
    xmlRegStateAddTrans(ctxt, from, NULL, to, counter, -1);
1538
0
}
1539
1540
/**
1541
 * xmlFAGenerateCountedTransition:
1542
 * @ctxt:  a regexp parser context
1543
 * @from:  the from state
1544
 * @to:  the target state or NULL for building a new one
1545
 * counter:  the counter for that transition
1546
 *
1547
 */
1548
static void
1549
xmlFAGenerateCountedTransition(xmlRegParserCtxtPtr ctxt,
1550
0
      xmlRegStatePtr from, xmlRegStatePtr to, int counter) {
1551
0
    if (to == NULL) {
1552
0
  to = xmlRegNewState(ctxt);
1553
0
  xmlRegStatePush(ctxt, to);
1554
0
  ctxt->state = to;
1555
0
    }
1556
0
    xmlRegStateAddTrans(ctxt, from, NULL, to, -1, counter);
1557
0
}
1558
1559
/**
1560
 * xmlFAGenerateTransitions:
1561
 * @ctxt:  a regexp parser context
1562
 * @from:  the from state
1563
 * @to:  the target state or NULL for building a new one
1564
 * @atom:  the atom generating the transition
1565
 *
1566
 * Returns 0 if success and -1 in case of error.
1567
 */
1568
static int
1569
xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
1570
890k
                   xmlRegStatePtr to, xmlRegAtomPtr atom) {
1571
890k
    xmlRegStatePtr end;
1572
890k
    int nullable = 0;
1573
1574
890k
    if (atom == NULL) {
1575
0
  ERROR("generate transition: atom == NULL");
1576
0
  return(-1);
1577
0
    }
1578
890k
    if (atom->type == XML_REGEXP_SUBREG) {
1579
  /*
1580
   * this is a subexpression handling one should not need to
1581
   * create a new node except for XML_REGEXP_QUANT_RANGE.
1582
   */
1583
0
  if (xmlRegAtomPush(ctxt, atom) < 0) {
1584
0
      return(-1);
1585
0
  }
1586
0
  if ((to != NULL) && (atom->stop != to) &&
1587
0
      (atom->quant != XML_REGEXP_QUANT_RANGE)) {
1588
      /*
1589
       * Generate an epsilon transition to link to the target
1590
       */
1591
0
      xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);
1592
#ifdef DV
1593
  } else if ((to == NULL) && (atom->quant != XML_REGEXP_QUANT_RANGE) &&
1594
       (atom->quant != XML_REGEXP_QUANT_ONCE)) {
1595
      to = xmlRegNewState(ctxt);
1596
      xmlRegStatePush(ctxt, to);
1597
      ctxt->state = to;
1598
      xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);
1599
#endif
1600
0
  }
1601
0
  switch (atom->quant) {
1602
0
      case XML_REGEXP_QUANT_OPT:
1603
0
    atom->quant = XML_REGEXP_QUANT_ONCE;
1604
    /*
1605
     * transition done to the state after end of atom.
1606
     *      1. set transition from atom start to new state
1607
     *      2. set transition from atom end to this state.
1608
     */
1609
0
                if (to == NULL) {
1610
0
                    xmlFAGenerateEpsilonTransition(ctxt, atom->start, 0);
1611
0
                    xmlFAGenerateEpsilonTransition(ctxt, atom->stop,
1612
0
                                                   ctxt->state);
1613
0
                } else {
1614
0
                    xmlFAGenerateEpsilonTransition(ctxt, atom->start, to);
1615
0
                }
1616
0
    break;
1617
0
      case XML_REGEXP_QUANT_MULT:
1618
0
    atom->quant = XML_REGEXP_QUANT_ONCE;
1619
0
    xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop);
1620
0
    xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
1621
0
    break;
1622
0
      case XML_REGEXP_QUANT_PLUS:
1623
0
    atom->quant = XML_REGEXP_QUANT_ONCE;
1624
0
    xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
1625
0
    break;
1626
0
      case XML_REGEXP_QUANT_RANGE: {
1627
0
    int counter;
1628
0
    xmlRegStatePtr inter, newstate;
1629
1630
    /*
1631
     * create the final state now if needed
1632
     */
1633
0
    if (to != NULL) {
1634
0
        newstate = to;
1635
0
    } else {
1636
0
        newstate = xmlRegNewState(ctxt);
1637
0
        xmlRegStatePush(ctxt, newstate);
1638
0
    }
1639
1640
    /*
1641
     * The principle here is to use counted transition
1642
     * to avoid explosion in the number of states in the
1643
     * graph. This is clearly more complex but should not
1644
     * be exploitable at runtime.
1645
     */
1646
0
    if ((atom->min == 0) && (atom->start0 == NULL)) {
1647
0
        xmlRegAtomPtr copy;
1648
        /*
1649
         * duplicate a transition based on atom to count next
1650
         * occurrences after 1. We cannot loop to atom->start
1651
         * directly because we need an epsilon transition to
1652
         * newstate.
1653
         */
1654
         /* ???? For some reason it seems we never reach that
1655
            case, I suppose this got optimized out before when
1656
      building the automata */
1657
0
        copy = xmlRegCopyAtom(ctxt, atom);
1658
0
        if (copy == NULL)
1659
0
            return(-1);
1660
0
        copy->quant = XML_REGEXP_QUANT_ONCE;
1661
0
        copy->min = 0;
1662
0
        copy->max = 0;
1663
1664
0
        if (xmlFAGenerateTransitions(ctxt, atom->start, NULL, copy)
1665
0
            < 0)
1666
0
      return(-1);
1667
0
        inter = ctxt->state;
1668
0
        counter = xmlRegGetCounter(ctxt);
1669
0
        ctxt->counters[counter].min = atom->min - 1;
1670
0
        ctxt->counters[counter].max = atom->max - 1;
1671
        /* count the number of times we see it again */
1672
0
        xmlFAGenerateCountedEpsilonTransition(ctxt, inter,
1673
0
               atom->stop, counter);
1674
        /* allow a way out based on the count */
1675
0
        xmlFAGenerateCountedTransition(ctxt, inter,
1676
0
                                 newstate, counter);
1677
        /* and also allow a direct exit for 0 */
1678
0
        xmlFAGenerateEpsilonTransition(ctxt, atom->start,
1679
0
                                       newstate);
1680
0
    } else {
1681
        /*
1682
         * either we need the atom at least once or there
1683
         * is an atom->start0 allowing to easily plug the
1684
         * epsilon transition.
1685
         */
1686
0
        counter = xmlRegGetCounter(ctxt);
1687
0
        ctxt->counters[counter].min = atom->min - 1;
1688
0
        ctxt->counters[counter].max = atom->max - 1;
1689
        /* allow a way out based on the count */
1690
0
        xmlFAGenerateCountedTransition(ctxt, atom->stop,
1691
0
                                 newstate, counter);
1692
        /* count the number of times we see it again */
1693
0
        xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop,
1694
0
               atom->start, counter);
1695
        /* and if needed allow a direct exit for 0 */
1696
0
        if (atom->min == 0)
1697
0
      xmlFAGenerateEpsilonTransition(ctxt, atom->start0,
1698
0
                   newstate);
1699
1700
0
    }
1701
0
    atom->min = 0;
1702
0
    atom->max = 0;
1703
0
    atom->quant = XML_REGEXP_QUANT_ONCE;
1704
0
    ctxt->state = newstate;
1705
0
      }
1706
0
      default:
1707
0
    break;
1708
0
  }
1709
0
  return(0);
1710
0
    }
1711
890k
    if ((atom->min == 0) && (atom->max == 0) &&
1712
890k
               (atom->quant == XML_REGEXP_QUANT_RANGE)) {
1713
        /*
1714
   * we can discard the atom and generate an epsilon transition instead
1715
   */
1716
0
  if (to == NULL) {
1717
0
      to = xmlRegNewState(ctxt);
1718
0
      if (to != NULL)
1719
0
    xmlRegStatePush(ctxt, to);
1720
0
      else {
1721
0
    return(-1);
1722
0
      }
1723
0
  }
1724
0
  xmlFAGenerateEpsilonTransition(ctxt, from, to);
1725
0
  ctxt->state = to;
1726
0
  xmlRegFreeAtom(atom);
1727
0
  return(0);
1728
0
    }
1729
890k
    if (to == NULL) {
1730
847k
  to = xmlRegNewState(ctxt);
1731
847k
  if (to != NULL)
1732
847k
      xmlRegStatePush(ctxt, to);
1733
0
  else {
1734
0
      return(-1);
1735
0
  }
1736
847k
    }
1737
890k
    end = to;
1738
890k
    if ((atom->quant == XML_REGEXP_QUANT_MULT) ||
1739
890k
        (atom->quant == XML_REGEXP_QUANT_PLUS)) {
1740
  /*
1741
   * Do not pollute the target state by adding transitions from
1742
   * it as it is likely to be the shared target of multiple branches.
1743
   * So isolate with an epsilon transition.
1744
   */
1745
0
        xmlRegStatePtr tmp;
1746
1747
0
  tmp = xmlRegNewState(ctxt);
1748
0
  if (tmp != NULL)
1749
0
      xmlRegStatePush(ctxt, tmp);
1750
0
  else {
1751
0
      return(-1);
1752
0
  }
1753
0
  xmlFAGenerateEpsilonTransition(ctxt, tmp, to);
1754
0
  to = tmp;
1755
0
    }
1756
890k
    if (xmlRegAtomPush(ctxt, atom) < 0) {
1757
0
  return(-1);
1758
0
    }
1759
890k
    if ((atom->quant == XML_REGEXP_QUANT_RANGE) &&
1760
890k
        (atom->min == 0) && (atom->max > 0)) {
1761
0
  nullable = 1;
1762
0
  atom->min = 1;
1763
0
        if (atom->max == 1)
1764
0
      atom->quant = XML_REGEXP_QUANT_OPT;
1765
0
    }
1766
890k
    xmlRegStateAddTrans(ctxt, from, atom, to, -1, -1);
1767
890k
    ctxt->state = end;
1768
890k
    switch (atom->quant) {
1769
0
  case XML_REGEXP_QUANT_OPT:
1770
0
      atom->quant = XML_REGEXP_QUANT_ONCE;
1771
0
      xmlFAGenerateEpsilonTransition(ctxt, from, to);
1772
0
      break;
1773
0
  case XML_REGEXP_QUANT_MULT:
1774
0
      atom->quant = XML_REGEXP_QUANT_ONCE;
1775
0
      xmlFAGenerateEpsilonTransition(ctxt, from, to);
1776
0
      xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
1777
0
      break;
1778
0
  case XML_REGEXP_QUANT_PLUS:
1779
0
      atom->quant = XML_REGEXP_QUANT_ONCE;
1780
0
      xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
1781
0
      break;
1782
0
  case XML_REGEXP_QUANT_RANGE:
1783
0
      if (nullable)
1784
0
    xmlFAGenerateEpsilonTransition(ctxt, from, to);
1785
0
      break;
1786
890k
  default:
1787
890k
      break;
1788
890k
    }
1789
890k
    return(0);
1790
890k
}
1791
1792
/**
1793
 * xmlFAReduceEpsilonTransitions:
1794
 * @ctxt:  a regexp parser context
1795
 * @fromnr:  the from state
1796
 * @tonr:  the to state
1797
 * @counter:  should that transition be associated to a counted
1798
 *
1799
 */
1800
static void
1801
xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr,
1802
679k
                        int tonr, int counter) {
1803
679k
    int transnr;
1804
679k
    xmlRegStatePtr from;
1805
679k
    xmlRegStatePtr to;
1806
1807
#ifdef DEBUG_REGEXP_GRAPH
1808
    printf("xmlFAReduceEpsilonTransitions(%d, %d)\n", fromnr, tonr);
1809
#endif
1810
679k
    from = ctxt->states[fromnr];
1811
679k
    if (from == NULL)
1812
0
  return;
1813
679k
    to = ctxt->states[tonr];
1814
679k
    if (to == NULL)
1815
0
  return;
1816
679k
    if ((to->mark == XML_REGEXP_MARK_START) ||
1817
679k
  (to->mark == XML_REGEXP_MARK_VISITED))
1818
0
  return;
1819
1820
679k
    to->mark = XML_REGEXP_MARK_VISITED;
1821
679k
    if (to->type == XML_REGEXP_FINAL_STATE) {
1822
#ifdef DEBUG_REGEXP_GRAPH
1823
  printf("State %d is final, so %d becomes final\n", tonr, fromnr);
1824
#endif
1825
625k
  from->type = XML_REGEXP_FINAL_STATE;
1826
625k
    }
1827
4.52M
    for (transnr = 0;transnr < to->nbTrans;transnr++) {
1828
3.84M
        if (to->trans[transnr].to < 0)
1829
1.49M
      continue;
1830
2.34M
  if (to->trans[transnr].atom == NULL) {
1831
      /*
1832
       * Don't remove counted transitions
1833
       * Don't loop either
1834
       */
1835
33.6k
      if (to->trans[transnr].to != fromnr) {
1836
33.6k
    if (to->trans[transnr].count >= 0) {
1837
0
        int newto = to->trans[transnr].to;
1838
1839
0
        xmlRegStateAddTrans(ctxt, from, NULL,
1840
0
          ctxt->states[newto],
1841
0
          -1, to->trans[transnr].count);
1842
33.6k
    } else {
1843
#ifdef DEBUG_REGEXP_GRAPH
1844
        printf("Found epsilon trans %d from %d to %d\n",
1845
         transnr, tonr, to->trans[transnr].to);
1846
#endif
1847
33.6k
        if (to->trans[transnr].counter >= 0) {
1848
0
      xmlFAReduceEpsilonTransitions(ctxt, fromnr,
1849
0
                to->trans[transnr].to,
1850
0
                to->trans[transnr].counter);
1851
33.6k
        } else {
1852
33.6k
      xmlFAReduceEpsilonTransitions(ctxt, fromnr,
1853
33.6k
                to->trans[transnr].to,
1854
33.6k
                counter);
1855
33.6k
        }
1856
33.6k
    }
1857
33.6k
      }
1858
2.31M
  } else {
1859
2.31M
      int newto = to->trans[transnr].to;
1860
1861
2.31M
      if (to->trans[transnr].counter >= 0) {
1862
0
    xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom,
1863
0
            ctxt->states[newto],
1864
0
            to->trans[transnr].counter, -1);
1865
2.31M
      } else {
1866
2.31M
    xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom,
1867
2.31M
            ctxt->states[newto], counter, -1);
1868
2.31M
      }
1869
2.31M
  }
1870
2.34M
    }
1871
679k
    to->mark = XML_REGEXP_MARK_NORMAL;
1872
679k
}
1873
1874
/**
1875
 * xmlFAEliminateSimpleEpsilonTransitions:
1876
 * @ctxt:  a regexp parser context
1877
 *
1878
 * Eliminating general epsilon transitions can get costly in the general
1879
 * algorithm due to the large amount of generated new transitions and
1880
 * associated comparisons. However for simple epsilon transition used just
1881
 * to separate building blocks when generating the automata this can be
1882
 * reduced to state elimination:
1883
 *    - if there exists an epsilon from X to Y
1884
 *    - if there is no other transition from X
1885
 * then X and Y are semantically equivalent and X can be eliminated
1886
 * If X is the start state then make Y the start state, else replace the
1887
 * target of all transitions to X by transitions to Y.
1888
 *
1889
 * If X is a final state, skip it.
1890
 * Otherwise it would be necessary to manipulate counters for this case when
1891
 * eliminating state 2:
1892
 * State 1 has a transition with an atom to state 2.
1893
 * State 2 is final and has an epsilon transition to state 1.
1894
 */
1895
static void
1896
71.2k
xmlFAEliminateSimpleEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
1897
71.2k
    int statenr, i, j, newto;
1898
71.2k
    xmlRegStatePtr state, tmp;
1899
1900
1.15M
    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1901
1.08M
  state = ctxt->states[statenr];
1902
1.08M
  if (state == NULL)
1903
0
      continue;
1904
1.08M
  if (state->nbTrans != 1)
1905
631k
      continue;
1906
457k
       if (state->type == XML_REGEXP_UNREACH_STATE ||
1907
457k
           state->type == XML_REGEXP_FINAL_STATE)
1908
14.6k
      continue;
1909
  /* is the only transition out a basic transition */
1910
442k
  if ((state->trans[0].atom == NULL) &&
1911
442k
      (state->trans[0].to >= 0) &&
1912
442k
      (state->trans[0].to != statenr) &&
1913
442k
      (state->trans[0].counter < 0) &&
1914
442k
      (state->trans[0].count < 0)) {
1915
403k
      newto = state->trans[0].to;
1916
1917
403k
            if (state->type == XML_REGEXP_START_STATE) {
1918
#ifdef DEBUG_REGEXP_GRAPH
1919
    printf("Found simple epsilon trans from start %d to %d\n",
1920
           statenr, newto);
1921
#endif
1922
356k
            } else {
1923
#ifdef DEBUG_REGEXP_GRAPH
1924
    printf("Found simple epsilon trans from %d to %d\n",
1925
           statenr, newto);
1926
#endif
1927
1.18M
          for (i = 0;i < state->nbTransTo;i++) {
1928
830k
        tmp = ctxt->states[state->transTo[i]];
1929
38.4M
        for (j = 0;j < tmp->nbTrans;j++) {
1930
37.6M
      if (tmp->trans[j].to == statenr) {
1931
#ifdef DEBUG_REGEXP_GRAPH
1932
          printf("Changed transition %d on %d to go to %d\n",
1933
           j, tmp->no, newto);
1934
#endif
1935
828k
          tmp->trans[j].to = -1;
1936
828k
          xmlRegStateAddTrans(ctxt, tmp, tmp->trans[j].atom,
1937
828k
            ctxt->states[newto],
1938
828k
                  tmp->trans[j].counter,
1939
828k
            tmp->trans[j].count);
1940
828k
      }
1941
37.6M
        }
1942
830k
    }
1943
356k
    if (state->type == XML_REGEXP_FINAL_STATE)
1944
0
        ctxt->states[newto]->type = XML_REGEXP_FINAL_STATE;
1945
    /* eliminate the transition completely */
1946
356k
    state->nbTrans = 0;
1947
1948
356k
                state->type = XML_REGEXP_UNREACH_STATE;
1949
1950
356k
      }
1951
1952
403k
  }
1953
442k
    }
1954
71.2k
}
1955
/**
1956
 * xmlFAEliminateEpsilonTransitions:
1957
 * @ctxt:  a regexp parser context
1958
 *
1959
 */
1960
static void
1961
71.2k
xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
1962
71.2k
    int statenr, transnr;
1963
71.2k
    xmlRegStatePtr state;
1964
71.2k
    int has_epsilon;
1965
1966
71.2k
    if (ctxt->states == NULL) return;
1967
1968
    /*
1969
     * Eliminate simple epsilon transition and the associated unreachable
1970
     * states.
1971
     */
1972
71.2k
    xmlFAEliminateSimpleEpsilonTransitions(ctxt);
1973
1.15M
    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1974
1.08M
  state = ctxt->states[statenr];
1975
1.08M
  if ((state != NULL) && (state->type == XML_REGEXP_UNREACH_STATE)) {
1976
#ifdef DEBUG_REGEXP_GRAPH
1977
      printf("Removed unreachable state %d\n", statenr);
1978
#endif
1979
356k
      xmlRegFreeState(state);
1980
356k
      ctxt->states[statenr] = NULL;
1981
356k
  }
1982
1.08M
    }
1983
1984
71.2k
    has_epsilon = 0;
1985
1986
    /*
1987
     * Build the completed transitions bypassing the epsilons
1988
     * Use a marking algorithm to avoid loops
1989
     * Mark sink states too.
1990
     * Process from the latest states backward to the start when
1991
     * there is long cascading epsilon chains this minimize the
1992
     * recursions and transition compares when adding the new ones
1993
     */
1994
1.15M
    for (statenr = ctxt->nbStates - 1;statenr >= 0;statenr--) {
1995
1.08M
  state = ctxt->states[statenr];
1996
1.08M
  if (state == NULL)
1997
356k
      continue;
1998
731k
  if ((state->nbTrans == 0) &&
1999
731k
      (state->type != XML_REGEXP_FINAL_STATE)) {
2000
0
      state->type = XML_REGEXP_SINK_STATE;
2001
0
  }
2002
5.40M
  for (transnr = 0;transnr < state->nbTrans;transnr++) {
2003
4.67M
      if ((state->trans[transnr].atom == NULL) &&
2004
4.67M
    (state->trans[transnr].to >= 0)) {
2005
645k
    if (state->trans[transnr].to == statenr) {
2006
0
        state->trans[transnr].to = -1;
2007
#ifdef DEBUG_REGEXP_GRAPH
2008
        printf("Removed loopback epsilon trans %d on %d\n",
2009
         transnr, statenr);
2010
#endif
2011
645k
    } else if (state->trans[transnr].count < 0) {
2012
645k
        int newto = state->trans[transnr].to;
2013
2014
#ifdef DEBUG_REGEXP_GRAPH
2015
        printf("Found epsilon trans %d from %d to %d\n",
2016
         transnr, statenr, newto);
2017
#endif
2018
645k
        has_epsilon = 1;
2019
645k
        state->trans[transnr].to = -2;
2020
645k
        state->mark = XML_REGEXP_MARK_START;
2021
645k
        xmlFAReduceEpsilonTransitions(ctxt, statenr,
2022
645k
              newto, state->trans[transnr].counter);
2023
645k
        state->mark = XML_REGEXP_MARK_NORMAL;
2024
#ifdef DEBUG_REGEXP_GRAPH
2025
    } else {
2026
        printf("Found counted transition %d on %d\n",
2027
         transnr, statenr);
2028
#endif
2029
645k
          }
2030
645k
      }
2031
4.67M
  }
2032
731k
    }
2033
    /*
2034
     * Eliminate the epsilon transitions
2035
     */
2036
71.2k
    if (has_epsilon) {
2037
1.10M
  for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
2038
1.04M
      state = ctxt->states[statenr];
2039
1.04M
      if (state == NULL)
2040
352k
    continue;
2041
5.33M
      for (transnr = 0;transnr < state->nbTrans;transnr++) {
2042
4.64M
    xmlRegTransPtr trans = &(state->trans[transnr]);
2043
4.64M
    if ((trans->atom == NULL) &&
2044
4.64M
        (trans->count < 0) &&
2045
4.64M
        (trans->to >= 0)) {
2046
0
        trans->to = -1;
2047
0
    }
2048
4.64M
      }
2049
693k
  }
2050
57.9k
    }
2051
2052
    /*
2053
     * Use this pass to detect unreachable states too
2054
     */
2055
1.15M
    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
2056
1.08M
  state = ctxt->states[statenr];
2057
1.08M
  if (state != NULL)
2058
731k
      state->reached = XML_REGEXP_MARK_NORMAL;
2059
1.08M
    }
2060
71.2k
    state = ctxt->states[0];
2061
71.2k
    if (state != NULL)
2062
71.2k
  state->reached = XML_REGEXP_MARK_START;
2063
722k
    while (state != NULL) {
2064
650k
  xmlRegStatePtr target = NULL;
2065
650k
  state->reached = XML_REGEXP_MARK_VISITED;
2066
  /*
2067
   * Mark all states reachable from the current reachable state
2068
   */
2069
4.25M
  for (transnr = 0;transnr < state->nbTrans;transnr++) {
2070
3.60M
      if ((state->trans[transnr].to >= 0) &&
2071
3.60M
    ((state->trans[transnr].atom != NULL) ||
2072
2.49M
     (state->trans[transnr].count >= 0))) {
2073
2.49M
    int newto = state->trans[transnr].to;
2074
2075
2.49M
    if (ctxt->states[newto] == NULL)
2076
0
        continue;
2077
2.49M
    if (ctxt->states[newto]->reached == XML_REGEXP_MARK_NORMAL) {
2078
579k
        ctxt->states[newto]->reached = XML_REGEXP_MARK_START;
2079
579k
        target = ctxt->states[newto];
2080
579k
    }
2081
2.49M
      }
2082
3.60M
  }
2083
2084
  /*
2085
   * find the next accessible state not explored
2086
   */
2087
650k
  if (target == NULL) {
2088
205M
      for (statenr = 1;statenr < ctxt->nbStates;statenr++) {
2089
205M
    state = ctxt->states[statenr];
2090
205M
    if ((state != NULL) && (state->reached ==
2091
204M
      XML_REGEXP_MARK_START)) {
2092
481k
        target = state;
2093
481k
        break;
2094
481k
    }
2095
205M
      }
2096
552k
  }
2097
650k
  state = target;
2098
650k
    }
2099
1.15M
    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
2100
1.08M
  state = ctxt->states[statenr];
2101
1.08M
  if ((state != NULL) && (state->reached == XML_REGEXP_MARK_NORMAL)) {
2102
#ifdef DEBUG_REGEXP_GRAPH
2103
      printf("Removed unreachable state %d\n", statenr);
2104
#endif
2105
80.8k
      xmlRegFreeState(state);
2106
80.8k
      ctxt->states[statenr] = NULL;
2107
80.8k
  }
2108
1.08M
    }
2109
2110
71.2k
}
2111
2112
static int
2113
0
xmlFACompareRanges(xmlRegRangePtr range1, xmlRegRangePtr range2) {
2114
0
    int ret = 0;
2115
2116
0
    if ((range1->type == XML_REGEXP_RANGES) ||
2117
0
        (range2->type == XML_REGEXP_RANGES) ||
2118
0
        (range2->type == XML_REGEXP_SUBREG) ||
2119
0
        (range1->type == XML_REGEXP_SUBREG) ||
2120
0
        (range1->type == XML_REGEXP_STRING) ||
2121
0
        (range2->type == XML_REGEXP_STRING))
2122
0
  return(-1);
2123
2124
    /* put them in order */
2125
0
    if (range1->type > range2->type) {
2126
0
        xmlRegRangePtr tmp;
2127
2128
0
  tmp = range1;
2129
0
  range1 = range2;
2130
0
  range2 = tmp;
2131
0
    }
2132
0
    if ((range1->type == XML_REGEXP_ANYCHAR) ||
2133
0
        (range2->type == XML_REGEXP_ANYCHAR)) {
2134
0
  ret = 1;
2135
0
    } else if ((range1->type == XML_REGEXP_EPSILON) ||
2136
0
               (range2->type == XML_REGEXP_EPSILON)) {
2137
0
  return(0);
2138
0
    } else if (range1->type == range2->type) {
2139
0
        if (range1->type != XML_REGEXP_CHARVAL)
2140
0
            ret = 1;
2141
0
        else if ((range1->end < range2->start) ||
2142
0
           (range2->end < range1->start))
2143
0
      ret = 0;
2144
0
  else
2145
0
      ret = 1;
2146
0
    } else if (range1->type == XML_REGEXP_CHARVAL) {
2147
0
        int codepoint;
2148
0
  int neg = 0;
2149
2150
  /*
2151
   * just check all codepoints in the range for acceptance,
2152
   * this is usually way cheaper since done only once at
2153
   * compilation than testing over and over at runtime or
2154
   * pushing too many states when evaluating.
2155
   */
2156
0
  if (((range1->neg == 0) && (range2->neg != 0)) ||
2157
0
      ((range1->neg != 0) && (range2->neg == 0)))
2158
0
      neg = 1;
2159
2160
0
  for (codepoint = range1->start;codepoint <= range1->end ;codepoint++) {
2161
0
      ret = xmlRegCheckCharacterRange(range2->type, codepoint,
2162
0
              0, range2->start, range2->end,
2163
0
              range2->blockName);
2164
0
      if (ret < 0)
2165
0
          return(-1);
2166
0
      if (((neg == 1) && (ret == 0)) ||
2167
0
          ((neg == 0) && (ret == 1)))
2168
0
    return(1);
2169
0
  }
2170
0
  return(0);
2171
0
    } else if ((range1->type == XML_REGEXP_BLOCK_NAME) ||
2172
0
               (range2->type == XML_REGEXP_BLOCK_NAME)) {
2173
0
  if (range1->type == range2->type) {
2174
0
      ret = xmlStrEqual(range1->blockName, range2->blockName);
2175
0
  } else {
2176
      /*
2177
       * comparing a block range with anything else is way
2178
       * too costly, and maintaining the table is like too much
2179
       * memory too, so let's force the automata to save state
2180
       * here.
2181
       */
2182
0
      return(1);
2183
0
  }
2184
0
    } else if ((range1->type < XML_REGEXP_LETTER) ||
2185
0
               (range2->type < XML_REGEXP_LETTER)) {
2186
0
  if ((range1->type == XML_REGEXP_ANYSPACE) &&
2187
0
      (range2->type == XML_REGEXP_NOTSPACE))
2188
0
      ret = 0;
2189
0
  else if ((range1->type == XML_REGEXP_INITNAME) &&
2190
0
           (range2->type == XML_REGEXP_NOTINITNAME))
2191
0
      ret = 0;
2192
0
  else if ((range1->type == XML_REGEXP_NAMECHAR) &&
2193
0
           (range2->type == XML_REGEXP_NOTNAMECHAR))
2194
0
      ret = 0;
2195
0
  else if ((range1->type == XML_REGEXP_DECIMAL) &&
2196
0
           (range2->type == XML_REGEXP_NOTDECIMAL))
2197
0
      ret = 0;
2198
0
  else if ((range1->type == XML_REGEXP_REALCHAR) &&
2199
0
           (range2->type == XML_REGEXP_NOTREALCHAR))
2200
0
      ret = 0;
2201
0
  else {
2202
      /* same thing to limit complexity */
2203
0
      return(1);
2204
0
  }
2205
0
    } else {
2206
0
        ret = 0;
2207
        /* range1->type < range2->type here */
2208
0
        switch (range1->type) {
2209
0
      case XML_REGEXP_LETTER:
2210
           /* all disjoint except in the subgroups */
2211
0
           if ((range2->type == XML_REGEXP_LETTER_UPPERCASE) ||
2212
0
         (range2->type == XML_REGEXP_LETTER_LOWERCASE) ||
2213
0
         (range2->type == XML_REGEXP_LETTER_TITLECASE) ||
2214
0
         (range2->type == XML_REGEXP_LETTER_MODIFIER) ||
2215
0
         (range2->type == XML_REGEXP_LETTER_OTHERS))
2216
0
         ret = 1;
2217
0
     break;
2218
0
      case XML_REGEXP_MARK:
2219
0
           if ((range2->type == XML_REGEXP_MARK_NONSPACING) ||
2220
0
         (range2->type == XML_REGEXP_MARK_SPACECOMBINING) ||
2221
0
         (range2->type == XML_REGEXP_MARK_ENCLOSING))
2222
0
         ret = 1;
2223
0
     break;
2224
0
      case XML_REGEXP_NUMBER:
2225
0
           if ((range2->type == XML_REGEXP_NUMBER_DECIMAL) ||
2226
0
         (range2->type == XML_REGEXP_NUMBER_LETTER) ||
2227
0
         (range2->type == XML_REGEXP_NUMBER_OTHERS))
2228
0
         ret = 1;
2229
0
     break;
2230
0
      case XML_REGEXP_PUNCT:
2231
0
           if ((range2->type == XML_REGEXP_PUNCT_CONNECTOR) ||
2232
0
         (range2->type == XML_REGEXP_PUNCT_DASH) ||
2233
0
         (range2->type == XML_REGEXP_PUNCT_OPEN) ||
2234
0
         (range2->type == XML_REGEXP_PUNCT_CLOSE) ||
2235
0
         (range2->type == XML_REGEXP_PUNCT_INITQUOTE) ||
2236
0
         (range2->type == XML_REGEXP_PUNCT_FINQUOTE) ||
2237
0
         (range2->type == XML_REGEXP_PUNCT_OTHERS))
2238
0
         ret = 1;
2239
0
     break;
2240
0
      case XML_REGEXP_SEPAR:
2241
0
           if ((range2->type == XML_REGEXP_SEPAR_SPACE) ||
2242
0
         (range2->type == XML_REGEXP_SEPAR_LINE) ||
2243
0
         (range2->type == XML_REGEXP_SEPAR_PARA))
2244
0
         ret = 1;
2245
0
     break;
2246
0
      case XML_REGEXP_SYMBOL:
2247
0
           if ((range2->type == XML_REGEXP_SYMBOL_MATH) ||
2248
0
         (range2->type == XML_REGEXP_SYMBOL_CURRENCY) ||
2249
0
         (range2->type == XML_REGEXP_SYMBOL_MODIFIER) ||
2250
0
         (range2->type == XML_REGEXP_SYMBOL_OTHERS))
2251
0
         ret = 1;
2252
0
     break;
2253
0
      case XML_REGEXP_OTHER:
2254
0
           if ((range2->type == XML_REGEXP_OTHER_CONTROL) ||
2255
0
         (range2->type == XML_REGEXP_OTHER_FORMAT) ||
2256
0
         (range2->type == XML_REGEXP_OTHER_PRIVATE))
2257
0
         ret = 1;
2258
0
     break;
2259
0
            default:
2260
0
           if ((range2->type >= XML_REGEXP_LETTER) &&
2261
0
         (range2->type < XML_REGEXP_BLOCK_NAME))
2262
0
         ret = 0;
2263
0
     else {
2264
         /* safety net ! */
2265
0
         return(1);
2266
0
     }
2267
0
  }
2268
0
    }
2269
0
    if (((range1->neg == 0) && (range2->neg != 0)) ||
2270
0
        ((range1->neg != 0) && (range2->neg == 0)))
2271
0
  ret = !ret;
2272
0
    return(ret);
2273
0
}
2274
2275
/**
2276
 * xmlFACompareAtomTypes:
2277
 * @type1:  an atom type
2278
 * @type2:  an atom type
2279
 *
2280
 * Compares two atoms type to check whether they intersect in some ways,
2281
 * this is used by xmlFACompareAtoms only
2282
 *
2283
 * Returns 1 if they may intersect and 0 otherwise
2284
 */
2285
static int
2286
0
xmlFACompareAtomTypes(xmlRegAtomType type1, xmlRegAtomType type2) {
2287
0
    if ((type1 == XML_REGEXP_EPSILON) ||
2288
0
        (type1 == XML_REGEXP_CHARVAL) ||
2289
0
  (type1 == XML_REGEXP_RANGES) ||
2290
0
  (type1 == XML_REGEXP_SUBREG) ||
2291
0
  (type1 == XML_REGEXP_STRING) ||
2292
0
  (type1 == XML_REGEXP_ANYCHAR))
2293
0
  return(1);
2294
0
    if ((type2 == XML_REGEXP_EPSILON) ||
2295
0
        (type2 == XML_REGEXP_CHARVAL) ||
2296
0
  (type2 == XML_REGEXP_RANGES) ||
2297
0
  (type2 == XML_REGEXP_SUBREG) ||
2298
0
  (type2 == XML_REGEXP_STRING) ||
2299
0
  (type2 == XML_REGEXP_ANYCHAR))
2300
0
  return(1);
2301
2302
0
    if (type1 == type2) return(1);
2303
2304
    /* simplify subsequent compares by making sure type1 < type2 */
2305
0
    if (type1 > type2) {
2306
0
        xmlRegAtomType tmp = type1;
2307
0
  type1 = type2;
2308
0
  type2 = tmp;
2309
0
    }
2310
0
    switch (type1) {
2311
0
        case XML_REGEXP_ANYSPACE: /* \s */
2312
      /* can't be a letter, number, mark, punctuation, symbol */
2313
0
      if ((type2 == XML_REGEXP_NOTSPACE) ||
2314
0
    ((type2 >= XML_REGEXP_LETTER) &&
2315
0
     (type2 <= XML_REGEXP_LETTER_OTHERS)) ||
2316
0
          ((type2 >= XML_REGEXP_NUMBER) &&
2317
0
     (type2 <= XML_REGEXP_NUMBER_OTHERS)) ||
2318
0
          ((type2 >= XML_REGEXP_MARK) &&
2319
0
     (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
2320
0
          ((type2 >= XML_REGEXP_PUNCT) &&
2321
0
     (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
2322
0
          ((type2 >= XML_REGEXP_SYMBOL) &&
2323
0
     (type2 <= XML_REGEXP_SYMBOL_OTHERS))
2324
0
          ) return(0);
2325
0
      break;
2326
0
        case XML_REGEXP_NOTSPACE: /* \S */
2327
0
      break;
2328
0
        case XML_REGEXP_INITNAME: /* \l */
2329
      /* can't be a number, mark, separator, punctuation, symbol or other */
2330
0
      if ((type2 == XML_REGEXP_NOTINITNAME) ||
2331
0
          ((type2 >= XML_REGEXP_NUMBER) &&
2332
0
     (type2 <= XML_REGEXP_NUMBER_OTHERS)) ||
2333
0
          ((type2 >= XML_REGEXP_MARK) &&
2334
0
     (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
2335
0
          ((type2 >= XML_REGEXP_SEPAR) &&
2336
0
     (type2 <= XML_REGEXP_SEPAR_PARA)) ||
2337
0
          ((type2 >= XML_REGEXP_PUNCT) &&
2338
0
     (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
2339
0
          ((type2 >= XML_REGEXP_SYMBOL) &&
2340
0
     (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
2341
0
          ((type2 >= XML_REGEXP_OTHER) &&
2342
0
     (type2 <= XML_REGEXP_OTHER_NA))
2343
0
    ) return(0);
2344
0
      break;
2345
0
        case XML_REGEXP_NOTINITNAME: /* \L */
2346
0
      break;
2347
0
        case XML_REGEXP_NAMECHAR: /* \c */
2348
      /* can't be a mark, separator, punctuation, symbol or other */
2349
0
      if ((type2 == XML_REGEXP_NOTNAMECHAR) ||
2350
0
          ((type2 >= XML_REGEXP_MARK) &&
2351
0
     (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
2352
0
          ((type2 >= XML_REGEXP_PUNCT) &&
2353
0
     (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
2354
0
          ((type2 >= XML_REGEXP_SEPAR) &&
2355
0
     (type2 <= XML_REGEXP_SEPAR_PARA)) ||
2356
0
          ((type2 >= XML_REGEXP_SYMBOL) &&
2357
0
     (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
2358
0
          ((type2 >= XML_REGEXP_OTHER) &&
2359
0
     (type2 <= XML_REGEXP_OTHER_NA))
2360
0
    ) return(0);
2361
0
      break;
2362
0
        case XML_REGEXP_NOTNAMECHAR: /* \C */
2363
0
      break;
2364
0
        case XML_REGEXP_DECIMAL: /* \d */
2365
      /* can't be a letter, mark, separator, punctuation, symbol or other */
2366
0
      if ((type2 == XML_REGEXP_NOTDECIMAL) ||
2367
0
          (type2 == XML_REGEXP_REALCHAR) ||
2368
0
    ((type2 >= XML_REGEXP_LETTER) &&
2369
0
     (type2 <= XML_REGEXP_LETTER_OTHERS)) ||
2370
0
          ((type2 >= XML_REGEXP_MARK) &&
2371
0
     (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
2372
0
          ((type2 >= XML_REGEXP_PUNCT) &&
2373
0
     (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
2374
0
          ((type2 >= XML_REGEXP_SEPAR) &&
2375
0
     (type2 <= XML_REGEXP_SEPAR_PARA)) ||
2376
0
          ((type2 >= XML_REGEXP_SYMBOL) &&
2377
0
     (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
2378
0
          ((type2 >= XML_REGEXP_OTHER) &&
2379
0
     (type2 <= XML_REGEXP_OTHER_NA))
2380
0
    )return(0);
2381
0
      break;
2382
0
        case XML_REGEXP_NOTDECIMAL: /* \D */
2383
0
      break;
2384
0
        case XML_REGEXP_REALCHAR: /* \w */
2385
      /* can't be a mark, separator, punctuation, symbol or other */
2386
0
      if ((type2 == XML_REGEXP_NOTDECIMAL) ||
2387
0
          ((type2 >= XML_REGEXP_MARK) &&
2388
0
     (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
2389
0
          ((type2 >= XML_REGEXP_PUNCT) &&
2390
0
     (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
2391
0
          ((type2 >= XML_REGEXP_SEPAR) &&
2392
0
     (type2 <= XML_REGEXP_SEPAR_PARA)) ||
2393
0
          ((type2 >= XML_REGEXP_SYMBOL) &&
2394
0
     (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
2395
0
          ((type2 >= XML_REGEXP_OTHER) &&
2396
0
     (type2 <= XML_REGEXP_OTHER_NA))
2397
0
    )return(0);
2398
0
      break;
2399
0
        case XML_REGEXP_NOTREALCHAR: /* \W */
2400
0
      break;
2401
  /*
2402
   * at that point we know both type 1 and type2 are from
2403
   * character categories are ordered and are different,
2404
   * it becomes simple because this is a partition
2405
   */
2406
0
        case XML_REGEXP_LETTER:
2407
0
      if (type2 <= XML_REGEXP_LETTER_OTHERS)
2408
0
          return(1);
2409
0
      return(0);
2410
0
        case XML_REGEXP_LETTER_UPPERCASE:
2411
0
        case XML_REGEXP_LETTER_LOWERCASE:
2412
0
        case XML_REGEXP_LETTER_TITLECASE:
2413
0
        case XML_REGEXP_LETTER_MODIFIER:
2414
0
        case XML_REGEXP_LETTER_OTHERS:
2415
0
      return(0);
2416
0
        case XML_REGEXP_MARK:
2417
0
      if (type2 <= XML_REGEXP_MARK_ENCLOSING)
2418
0
          return(1);
2419
0
      return(0);
2420
0
        case XML_REGEXP_MARK_NONSPACING:
2421
0
        case XML_REGEXP_MARK_SPACECOMBINING:
2422
0
        case XML_REGEXP_MARK_ENCLOSING:
2423
0
      return(0);
2424
0
        case XML_REGEXP_NUMBER:
2425
0
      if (type2 <= XML_REGEXP_NUMBER_OTHERS)
2426
0
          return(1);
2427
0
      return(0);
2428
0
        case XML_REGEXP_NUMBER_DECIMAL:
2429
0
        case XML_REGEXP_NUMBER_LETTER:
2430
0
        case XML_REGEXP_NUMBER_OTHERS:
2431
0
      return(0);
2432
0
        case XML_REGEXP_PUNCT:
2433
0
      if (type2 <= XML_REGEXP_PUNCT_OTHERS)
2434
0
          return(1);
2435
0
      return(0);
2436
0
        case XML_REGEXP_PUNCT_CONNECTOR:
2437
0
        case XML_REGEXP_PUNCT_DASH:
2438
0
        case XML_REGEXP_PUNCT_OPEN:
2439
0
        case XML_REGEXP_PUNCT_CLOSE:
2440
0
        case XML_REGEXP_PUNCT_INITQUOTE:
2441
0
        case XML_REGEXP_PUNCT_FINQUOTE:
2442
0
        case XML_REGEXP_PUNCT_OTHERS:
2443
0
      return(0);
2444
0
        case XML_REGEXP_SEPAR:
2445
0
      if (type2 <= XML_REGEXP_SEPAR_PARA)
2446
0
          return(1);
2447
0
      return(0);
2448
0
        case XML_REGEXP_SEPAR_SPACE:
2449
0
        case XML_REGEXP_SEPAR_LINE:
2450
0
        case XML_REGEXP_SEPAR_PARA:
2451
0
      return(0);
2452
0
        case XML_REGEXP_SYMBOL:
2453
0
      if (type2 <= XML_REGEXP_SYMBOL_OTHERS)
2454
0
          return(1);
2455
0
      return(0);
2456
0
        case XML_REGEXP_SYMBOL_MATH:
2457
0
        case XML_REGEXP_SYMBOL_CURRENCY:
2458
0
        case XML_REGEXP_SYMBOL_MODIFIER:
2459
0
        case XML_REGEXP_SYMBOL_OTHERS:
2460
0
      return(0);
2461
0
        case XML_REGEXP_OTHER:
2462
0
      if (type2 <= XML_REGEXP_OTHER_NA)
2463
0
          return(1);
2464
0
      return(0);
2465
0
        case XML_REGEXP_OTHER_CONTROL:
2466
0
        case XML_REGEXP_OTHER_FORMAT:
2467
0
        case XML_REGEXP_OTHER_PRIVATE:
2468
0
        case XML_REGEXP_OTHER_NA:
2469
0
      return(0);
2470
0
  default:
2471
0
      break;
2472
0
    }
2473
0
    return(1);
2474
0
}
2475
2476
/**
2477
 * xmlFAEqualAtoms:
2478
 * @atom1:  an atom
2479
 * @atom2:  an atom
2480
 * @deep: if not set only compare string pointers
2481
 *
2482
 * Compares two atoms to check whether they are the same exactly
2483
 * this is used to remove equivalent transitions
2484
 *
2485
 * Returns 1 if same and 0 otherwise
2486
 */
2487
static int
2488
6.30M
xmlFAEqualAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2, int deep) {
2489
6.30M
    int ret = 0;
2490
2491
6.30M
    if (atom1 == atom2)
2492
0
  return(1);
2493
6.30M
    if ((atom1 == NULL) || (atom2 == NULL))
2494
0
  return(0);
2495
2496
6.30M
    if (atom1->type != atom2->type)
2497
0
        return(0);
2498
6.30M
    switch (atom1->type) {
2499
0
        case XML_REGEXP_EPSILON:
2500
0
      ret = 0;
2501
0
      break;
2502
6.30M
        case XML_REGEXP_STRING:
2503
6.30M
            if (!deep)
2504
0
                ret = (atom1->valuep == atom2->valuep);
2505
6.30M
            else
2506
6.30M
                ret = xmlStrEqual((xmlChar *)atom1->valuep,
2507
6.30M
                                  (xmlChar *)atom2->valuep);
2508
6.30M
      break;
2509
0
        case XML_REGEXP_CHARVAL:
2510
0
      ret = (atom1->codepoint == atom2->codepoint);
2511
0
      break;
2512
0
  case XML_REGEXP_RANGES:
2513
      /* too hard to do in the general case */
2514
0
      ret = 0;
2515
0
  default:
2516
0
      break;
2517
6.30M
    }
2518
6.30M
    return(ret);
2519
6.30M
}
2520
2521
/**
2522
 * xmlFACompareAtoms:
2523
 * @atom1:  an atom
2524
 * @atom2:  an atom
2525
 * @deep: if not set only compare string pointers
2526
 *
2527
 * Compares two atoms to check whether they intersect in some ways,
2528
 * this is used by xmlFAComputesDeterminism and xmlFARecurseDeterminism only
2529
 *
2530
 * Returns 1 if yes and 0 otherwise
2531
 */
2532
static int
2533
393M
xmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2, int deep) {
2534
393M
    int ret = 1;
2535
2536
393M
    if (atom1 == atom2)
2537
0
  return(1);
2538
393M
    if ((atom1 == NULL) || (atom2 == NULL))
2539
0
  return(0);
2540
2541
393M
    if ((atom1->type == XML_REGEXP_ANYCHAR) ||
2542
393M
        (atom2->type == XML_REGEXP_ANYCHAR))
2543
0
  return(1);
2544
2545
393M
    if (atom1->type > atom2->type) {
2546
0
  xmlRegAtomPtr tmp;
2547
0
  tmp = atom1;
2548
0
  atom1 = atom2;
2549
0
  atom2 = tmp;
2550
0
    }
2551
393M
    if (atom1->type != atom2->type) {
2552
0
        ret = xmlFACompareAtomTypes(atom1->type, atom2->type);
2553
  /* if they can't intersect at the type level break now */
2554
0
  if (ret == 0)
2555
0
      return(0);
2556
0
    }
2557
393M
    switch (atom1->type) {
2558
393M
        case XML_REGEXP_STRING:
2559
393M
            if (!deep)
2560
0
                ret = (atom1->valuep != atom2->valuep);
2561
393M
            else {
2562
393M
                xmlChar *val1 = (xmlChar *)atom1->valuep;
2563
393M
                xmlChar *val2 = (xmlChar *)atom2->valuep;
2564
393M
                int compound1 = (xmlStrchr(val1, '|') != NULL);
2565
393M
                int compound2 = (xmlStrchr(val2, '|') != NULL);
2566
2567
                /* Ignore negative match flag for ##other namespaces */
2568
393M
                if (compound1 != compound2)
2569
0
                    return(0);
2570
2571
393M
                ret = xmlRegStrEqualWildcard(val1, val2);
2572
393M
            }
2573
393M
      break;
2574
393M
        case XML_REGEXP_EPSILON:
2575
0
      goto not_determinist;
2576
0
        case XML_REGEXP_CHARVAL:
2577
0
      if (atom2->type == XML_REGEXP_CHARVAL) {
2578
0
    ret = (atom1->codepoint == atom2->codepoint);
2579
0
      } else {
2580
0
          ret = xmlRegCheckCharacter(atom2, atom1->codepoint);
2581
0
    if (ret < 0)
2582
0
        ret = 1;
2583
0
      }
2584
0
      break;
2585
0
        case XML_REGEXP_RANGES:
2586
0
      if (atom2->type == XML_REGEXP_RANGES) {
2587
0
          int i, j, res;
2588
0
    xmlRegRangePtr r1, r2;
2589
2590
    /*
2591
     * need to check that none of the ranges eventually matches
2592
     */
2593
0
    for (i = 0;i < atom1->nbRanges;i++) {
2594
0
        for (j = 0;j < atom2->nbRanges;j++) {
2595
0
      r1 = atom1->ranges[i];
2596
0
      r2 = atom2->ranges[j];
2597
0
      res = xmlFACompareRanges(r1, r2);
2598
0
      if (res == 1) {
2599
0
          ret = 1;
2600
0
          goto done;
2601
0
      }
2602
0
        }
2603
0
    }
2604
0
    ret = 0;
2605
0
      }
2606
0
      break;
2607
0
  default:
2608
0
      goto not_determinist;
2609
393M
    }
2610
393M
done:
2611
393M
    if (atom1->neg != atom2->neg) {
2612
0
        ret = !ret;
2613
0
    }
2614
393M
    if (ret == 0)
2615
299M
        return(0);
2616
94.3M
not_determinist:
2617
94.3M
    return(1);
2618
393M
}
2619
2620
/**
2621
 * xmlFARecurseDeterminism:
2622
 * @ctxt:  a regexp parser context
2623
 *
2624
 * Check whether the associated regexp is determinist,
2625
 * should be called after xmlFAEliminateEpsilonTransitions()
2626
 *
2627
 */
2628
static int
2629
xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
2630
2.24M
                   int to, xmlRegAtomPtr atom) {
2631
2.24M
    int ret = 1;
2632
2.24M
    int res;
2633
2.24M
    int transnr, nbTrans;
2634
2.24M
    xmlRegTransPtr t1;
2635
2.24M
    int deep = 1;
2636
2637
2.24M
    if (state == NULL)
2638
0
  return(ret);
2639
2.24M
    if (state->markd == XML_REGEXP_MARK_VISITED)
2640
0
  return(ret);
2641
2642
2.24M
    if (ctxt->flags & AM_AUTOMATA_RNG)
2643
0
        deep = 0;
2644
2645
    /*
2646
     * don't recurse on transitions potentially added in the course of
2647
     * the elimination.
2648
     */
2649
2.24M
    nbTrans = state->nbTrans;
2650
777M
    for (transnr = 0;transnr < nbTrans;transnr++) {
2651
775M
  t1 = &(state->trans[transnr]);
2652
  /*
2653
   * check transitions conflicting with the one looked at
2654
   */
2655
775M
  if (t1->atom == NULL) {
2656
4.44M
      if (t1->to < 0)
2657
4.44M
    continue;
2658
0
      state->markd = XML_REGEXP_MARK_VISITED;
2659
0
      res = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
2660
0
                               to, atom);
2661
0
      if (res == 0) {
2662
0
          ret = 0;
2663
    /* t1->nd = 1; */
2664
0
      }
2665
0
      continue;
2666
4.44M
  }
2667
770M
  if (t1->to != to)
2668
770M
      continue;
2669
0
  if (xmlFACompareAtoms(t1->atom, atom, deep)) {
2670
0
      ret = 0;
2671
      /* mark the transition as non-deterministic */
2672
0
      t1->nd = 1;
2673
0
  }
2674
0
    }
2675
2.24M
    return(ret);
2676
2.24M
}
2677
2678
/**
2679
 * xmlFAFinishRecurseDeterminism:
2680
 * @ctxt:  a regexp parser context
2681
 *
2682
 * Reset flags after checking determinism.
2683
 */
2684
static void
2685
2.24M
xmlFAFinishRecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state) {
2686
2.24M
    int transnr, nbTrans;
2687
2688
2.24M
    if (state == NULL)
2689
0
  return;
2690
2.24M
    if (state->markd != XML_REGEXP_MARK_VISITED)
2691
2.24M
  return;
2692
0
    state->markd = 0;
2693
2694
0
    nbTrans = state->nbTrans;
2695
0
    for (transnr = 0; transnr < nbTrans; transnr++) {
2696
0
  xmlRegTransPtr t1 = &state->trans[transnr];
2697
0
  if ((t1->atom == NULL) && (t1->to >= 0))
2698
0
      xmlFAFinishRecurseDeterminism(ctxt, ctxt->states[t1->to]);
2699
0
    }
2700
0
}
2701
2702
/**
2703
 * xmlFAComputesDeterminism:
2704
 * @ctxt:  a regexp parser context
2705
 *
2706
 * Check whether the associated regexp is determinist,
2707
 * should be called after xmlFAEliminateEpsilonTransitions()
2708
 *
2709
 */
2710
static int
2711
71.2k
xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
2712
71.2k
    int statenr, transnr;
2713
71.2k
    xmlRegStatePtr state;
2714
71.2k
    xmlRegTransPtr t1, t2, last;
2715
71.2k
    int i;
2716
71.2k
    int ret = 1;
2717
71.2k
    int deep = 1;
2718
2719
#ifdef DEBUG_REGEXP_GRAPH
2720
    printf("xmlFAComputesDeterminism\n");
2721
    xmlRegPrintCtxt(stdout, ctxt);
2722
#endif
2723
71.2k
    if (ctxt->determinist != -1)
2724
0
  return(ctxt->determinist);
2725
2726
71.2k
    if (ctxt->flags & AM_AUTOMATA_RNG)
2727
0
        deep = 0;
2728
2729
    /*
2730
     * First cleanup the automata removing cancelled transitions
2731
     */
2732
1.15M
    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
2733
1.08M
  state = ctxt->states[statenr];
2734
1.08M
  if (state == NULL)
2735
437k
      continue;
2736
650k
  if (state->nbTrans < 2)
2737
60.7k
      continue;
2738
4.14M
  for (transnr = 0;transnr < state->nbTrans;transnr++) {
2739
3.55M
      t1 = &(state->trans[transnr]);
2740
      /*
2741
       * Determinism checks in case of counted or all transitions
2742
       * will have to be handled separately
2743
       */
2744
3.55M
      if (t1->atom == NULL) {
2745
    /* t1->nd = 1; */
2746
1.09M
    continue;
2747
1.09M
      }
2748
2.46M
      if (t1->to == -1) /* eliminated */
2749
20.7k
    continue;
2750
435M
      for (i = 0;i < transnr;i++) {
2751
432M
    t2 = &(state->trans[i]);
2752
432M
    if (t2->to == -1) /* eliminated */
2753
19.1M
        continue;
2754
413M
    if (t2->atom != NULL) {
2755
411M
        if (t1->to == t2->to) {
2756
                        /*
2757
                         * Here we use deep because we want to keep the
2758
                         * transitions which indicate a conflict
2759
                         */
2760
6.30M
      if (xmlFAEqualAtoms(t1->atom, t2->atom, deep) &&
2761
6.30M
                            (t1->counter == t2->counter) &&
2762
6.30M
                            (t1->count == t2->count))
2763
239k
          t2->to = -1; /* eliminated */
2764
6.30M
        }
2765
411M
    }
2766
413M
      }
2767
2.44M
  }
2768
590k
    }
2769
2770
    /*
2771
     * Check for all states that there aren't 2 transitions
2772
     * with the same atom and a different target.
2773
     */
2774
1.15M
    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
2775
1.07M
  state = ctxt->states[statenr];
2776
1.07M
  if (state == NULL)
2777
437k
      continue;
2778
641k
  if (state->nbTrans < 2)
2779
60.7k
      continue;
2780
581k
  last = NULL;
2781
4.09M
  for (transnr = 0;transnr < state->nbTrans;transnr++) {
2782
3.51M
      t1 = &(state->trans[transnr]);
2783
      /*
2784
       * Determinism checks in case of counted or all transitions
2785
       * will have to be handled separately
2786
       */
2787
3.51M
      if (t1->atom == NULL) {
2788
1.07M
    continue;
2789
1.07M
      }
2790
2.43M
      if (t1->to == -1) /* eliminated */
2791
260k
    continue;
2792
403M
      for (i = 0;i < transnr;i++) {
2793
401M
    t2 = &(state->trans[i]);
2794
401M
    if (t2->to == -1) /* eliminated */
2795
5.82M
        continue;
2796
395M
    if (t2->atom != NULL) {
2797
                    /*
2798
                     * But here we don't use deep because we want to
2799
                     * find transitions which indicate a conflict
2800
                     */
2801
393M
        if (xmlFACompareAtoms(t1->atom, t2->atom, 1)) {
2802
94.3M
      ret = 0;
2803
      /* mark the transitions as non-deterministic ones */
2804
94.3M
      t1->nd = 1;
2805
94.3M
      t2->nd = 1;
2806
94.3M
      last = t1;
2807
94.3M
        }
2808
393M
    } else if (t1->to != -1) {
2809
        /*
2810
         * do the closure in case of remaining specific
2811
         * epsilon transitions like choices or all
2812
         */
2813
2.24M
        ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
2814
2.24M
               t2->to, t2->atom);
2815
2.24M
                    xmlFAFinishRecurseDeterminism(ctxt, ctxt->states[t1->to]);
2816
        /* don't shortcut the computation so all non deterministic
2817
           transition get marked down
2818
        if (ret == 0)
2819
      return(0);
2820
         */
2821
2.24M
        if (ret == 0) {
2822
0
      t1->nd = 1;
2823
      /* t2->nd = 1; */
2824
0
      last = t1;
2825
0
        }
2826
2.24M
    }
2827
395M
      }
2828
      /* don't shortcut the computation so all non deterministic
2829
         transition get marked down
2830
      if (ret == 0)
2831
    break; */
2832
2.17M
  }
2833
2834
  /*
2835
   * mark specifically the last non-deterministic transition
2836
   * from a state since there is no need to set-up rollback
2837
   * from it
2838
   */
2839
581k
  if (last != NULL) {
2840
9.07k
      last->nd = 2;
2841
9.07k
  }
2842
2843
  /* don't shortcut the computation so all non deterministic
2844
     transition get marked down
2845
  if (ret == 0)
2846
      break; */
2847
581k
    }
2848
2849
71.2k
    ctxt->determinist = ret;
2850
71.2k
    return(ret);
2851
71.2k
}
2852
2853
/************************************************************************
2854
 *                  *
2855
 *  Routines to check input against transition atoms    *
2856
 *                  *
2857
 ************************************************************************/
2858
2859
static int
2860
xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint, int neg,
2861
0
                    int start, int end, const xmlChar *blockName) {
2862
0
    int ret = 0;
2863
2864
0
    switch (type) {
2865
0
        case XML_REGEXP_STRING:
2866
0
        case XML_REGEXP_SUBREG:
2867
0
        case XML_REGEXP_RANGES:
2868
0
        case XML_REGEXP_EPSILON:
2869
0
      return(-1);
2870
0
        case XML_REGEXP_ANYCHAR:
2871
0
      ret = ((codepoint != '\n') && (codepoint != '\r'));
2872
0
      break;
2873
0
        case XML_REGEXP_CHARVAL:
2874
0
      ret = ((codepoint >= start) && (codepoint <= end));
2875
0
      break;
2876
0
        case XML_REGEXP_NOTSPACE:
2877
0
      neg = !neg;
2878
            /* Falls through. */
2879
0
        case XML_REGEXP_ANYSPACE:
2880
0
      ret = ((codepoint == '\n') || (codepoint == '\r') ||
2881
0
       (codepoint == '\t') || (codepoint == ' '));
2882
0
      break;
2883
0
        case XML_REGEXP_NOTINITNAME:
2884
0
      neg = !neg;
2885
            /* Falls through. */
2886
0
        case XML_REGEXP_INITNAME:
2887
0
      ret = (IS_LETTER(codepoint) ||
2888
0
       (codepoint == '_') || (codepoint == ':'));
2889
0
      break;
2890
0
        case XML_REGEXP_NOTNAMECHAR:
2891
0
      neg = !neg;
2892
            /* Falls through. */
2893
0
        case XML_REGEXP_NAMECHAR:
2894
0
      ret = (IS_LETTER(codepoint) || IS_DIGIT(codepoint) ||
2895
0
       (codepoint == '.') || (codepoint == '-') ||
2896
0
       (codepoint == '_') || (codepoint == ':') ||
2897
0
       IS_COMBINING(codepoint) || IS_EXTENDER(codepoint));
2898
0
      break;
2899
0
        case XML_REGEXP_NOTDECIMAL:
2900
0
      neg = !neg;
2901
            /* Falls through. */
2902
0
        case XML_REGEXP_DECIMAL:
2903
0
      ret = xmlUCSIsCatNd(codepoint);
2904
0
      break;
2905
0
        case XML_REGEXP_REALCHAR:
2906
0
      neg = !neg;
2907
            /* Falls through. */
2908
0
        case XML_REGEXP_NOTREALCHAR:
2909
0
      ret = xmlUCSIsCatP(codepoint);
2910
0
      if (ret == 0)
2911
0
    ret = xmlUCSIsCatZ(codepoint);
2912
0
      if (ret == 0)
2913
0
    ret = xmlUCSIsCatC(codepoint);
2914
0
      break;
2915
0
        case XML_REGEXP_LETTER:
2916
0
      ret = xmlUCSIsCatL(codepoint);
2917
0
      break;
2918
0
        case XML_REGEXP_LETTER_UPPERCASE:
2919
0
      ret = xmlUCSIsCatLu(codepoint);
2920
0
      break;
2921
0
        case XML_REGEXP_LETTER_LOWERCASE:
2922
0
      ret = xmlUCSIsCatLl(codepoint);
2923
0
      break;
2924
0
        case XML_REGEXP_LETTER_TITLECASE:
2925
0
      ret = xmlUCSIsCatLt(codepoint);
2926
0
      break;
2927
0
        case XML_REGEXP_LETTER_MODIFIER:
2928
0
      ret = xmlUCSIsCatLm(codepoint);
2929
0
      break;
2930
0
        case XML_REGEXP_LETTER_OTHERS:
2931
0
      ret = xmlUCSIsCatLo(codepoint);
2932
0
      break;
2933
0
        case XML_REGEXP_MARK:
2934
0
      ret = xmlUCSIsCatM(codepoint);
2935
0
      break;
2936
0
        case XML_REGEXP_MARK_NONSPACING:
2937
0
      ret = xmlUCSIsCatMn(codepoint);
2938
0
      break;
2939
0
        case XML_REGEXP_MARK_SPACECOMBINING:
2940
0
      ret = xmlUCSIsCatMc(codepoint);
2941
0
      break;
2942
0
        case XML_REGEXP_MARK_ENCLOSING:
2943
0
      ret = xmlUCSIsCatMe(codepoint);
2944
0
      break;
2945
0
        case XML_REGEXP_NUMBER:
2946
0
      ret = xmlUCSIsCatN(codepoint);
2947
0
      break;
2948
0
        case XML_REGEXP_NUMBER_DECIMAL:
2949
0
      ret = xmlUCSIsCatNd(codepoint);
2950
0
      break;
2951
0
        case XML_REGEXP_NUMBER_LETTER:
2952
0
      ret = xmlUCSIsCatNl(codepoint);
2953
0
      break;
2954
0
        case XML_REGEXP_NUMBER_OTHERS:
2955
0
      ret = xmlUCSIsCatNo(codepoint);
2956
0
      break;
2957
0
        case XML_REGEXP_PUNCT:
2958
0
      ret = xmlUCSIsCatP(codepoint);
2959
0
      break;
2960
0
        case XML_REGEXP_PUNCT_CONNECTOR:
2961
0
      ret = xmlUCSIsCatPc(codepoint);
2962
0
      break;
2963
0
        case XML_REGEXP_PUNCT_DASH:
2964
0
      ret = xmlUCSIsCatPd(codepoint);
2965
0
      break;
2966
0
        case XML_REGEXP_PUNCT_OPEN:
2967
0
      ret = xmlUCSIsCatPs(codepoint);
2968
0
      break;
2969
0
        case XML_REGEXP_PUNCT_CLOSE:
2970
0
      ret = xmlUCSIsCatPe(codepoint);
2971
0
      break;
2972
0
        case XML_REGEXP_PUNCT_INITQUOTE:
2973
0
      ret = xmlUCSIsCatPi(codepoint);
2974
0
      break;
2975
0
        case XML_REGEXP_PUNCT_FINQUOTE:
2976
0
      ret = xmlUCSIsCatPf(codepoint);
2977
0
      break;
2978
0
        case XML_REGEXP_PUNCT_OTHERS:
2979
0
      ret = xmlUCSIsCatPo(codepoint);
2980
0
      break;
2981
0
        case XML_REGEXP_SEPAR:
2982
0
      ret = xmlUCSIsCatZ(codepoint);
2983
0
      break;
2984
0
        case XML_REGEXP_SEPAR_SPACE:
2985
0
      ret = xmlUCSIsCatZs(codepoint);
2986
0
      break;
2987
0
        case XML_REGEXP_SEPAR_LINE:
2988
0
      ret = xmlUCSIsCatZl(codepoint);
2989
0
      break;
2990
0
        case XML_REGEXP_SEPAR_PARA:
2991
0
      ret = xmlUCSIsCatZp(codepoint);
2992
0
      break;
2993
0
        case XML_REGEXP_SYMBOL:
2994
0
      ret = xmlUCSIsCatS(codepoint);
2995
0
      break;
2996
0
        case XML_REGEXP_SYMBOL_MATH:
2997
0
      ret = xmlUCSIsCatSm(codepoint);
2998
0
      break;
2999
0
        case XML_REGEXP_SYMBOL_CURRENCY:
3000
0
      ret = xmlUCSIsCatSc(codepoint);
3001
0
      break;
3002
0
        case XML_REGEXP_SYMBOL_MODIFIER:
3003
0
      ret = xmlUCSIsCatSk(codepoint);
3004
0
      break;
3005
0
        case XML_REGEXP_SYMBOL_OTHERS:
3006
0
      ret = xmlUCSIsCatSo(codepoint);
3007
0
      break;
3008
0
        case XML_REGEXP_OTHER:
3009
0
      ret = xmlUCSIsCatC(codepoint);
3010
0
      break;
3011
0
        case XML_REGEXP_OTHER_CONTROL:
3012
0
      ret = xmlUCSIsCatCc(codepoint);
3013
0
      break;
3014
0
        case XML_REGEXP_OTHER_FORMAT:
3015
0
      ret = xmlUCSIsCatCf(codepoint);
3016
0
      break;
3017
0
        case XML_REGEXP_OTHER_PRIVATE:
3018
0
      ret = xmlUCSIsCatCo(codepoint);
3019
0
      break;
3020
0
        case XML_REGEXP_OTHER_NA:
3021
      /* ret = xmlUCSIsCatCn(codepoint); */
3022
      /* Seems it doesn't exist anymore in recent Unicode releases */
3023
0
      ret = 0;
3024
0
      break;
3025
0
        case XML_REGEXP_BLOCK_NAME:
3026
0
      ret = xmlUCSIsBlock(codepoint, (const char *) blockName);
3027
0
      break;
3028
0
    }
3029
0
    if (neg)
3030
0
  return(!ret);
3031
0
    return(ret);
3032
0
}
3033
3034
static int
3035
0
xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint) {
3036
0
    int i, ret = 0;
3037
0
    xmlRegRangePtr range;
3038
3039
0
    if ((atom == NULL) || (!IS_CHAR(codepoint)))
3040
0
  return(-1);
3041
3042
0
    switch (atom->type) {
3043
0
        case XML_REGEXP_SUBREG:
3044
0
        case XML_REGEXP_EPSILON:
3045
0
      return(-1);
3046
0
        case XML_REGEXP_CHARVAL:
3047
0
            return(codepoint == atom->codepoint);
3048
0
        case XML_REGEXP_RANGES: {
3049
0
      int accept = 0;
3050
3051
0
      for (i = 0;i < atom->nbRanges;i++) {
3052
0
    range = atom->ranges[i];
3053
0
    if (range->neg == 2) {
3054
0
        ret = xmlRegCheckCharacterRange(range->type, codepoint,
3055
0
            0, range->start, range->end,
3056
0
            range->blockName);
3057
0
        if (ret != 0)
3058
0
      return(0); /* excluded char */
3059
0
    } else if (range->neg) {
3060
0
        ret = xmlRegCheckCharacterRange(range->type, codepoint,
3061
0
            0, range->start, range->end,
3062
0
            range->blockName);
3063
0
        if (ret == 0)
3064
0
            accept = 1;
3065
0
        else
3066
0
            return(0);
3067
0
    } else {
3068
0
        ret = xmlRegCheckCharacterRange(range->type, codepoint,
3069
0
            0, range->start, range->end,
3070
0
            range->blockName);
3071
0
        if (ret != 0)
3072
0
      accept = 1; /* might still be excluded */
3073
0
    }
3074
0
      }
3075
0
      return(accept);
3076
0
  }
3077
0
        case XML_REGEXP_STRING:
3078
0
      printf("TODO: XML_REGEXP_STRING\n");
3079
0
      return(-1);
3080
0
        case XML_REGEXP_ANYCHAR:
3081
0
        case XML_REGEXP_ANYSPACE:
3082
0
        case XML_REGEXP_NOTSPACE:
3083
0
        case XML_REGEXP_INITNAME:
3084
0
        case XML_REGEXP_NOTINITNAME:
3085
0
        case XML_REGEXP_NAMECHAR:
3086
0
        case XML_REGEXP_NOTNAMECHAR:
3087
0
        case XML_REGEXP_DECIMAL:
3088
0
        case XML_REGEXP_NOTDECIMAL:
3089
0
        case XML_REGEXP_REALCHAR:
3090
0
        case XML_REGEXP_NOTREALCHAR:
3091
0
        case XML_REGEXP_LETTER:
3092
0
        case XML_REGEXP_LETTER_UPPERCASE:
3093
0
        case XML_REGEXP_LETTER_LOWERCASE:
3094
0
        case XML_REGEXP_LETTER_TITLECASE:
3095
0
        case XML_REGEXP_LETTER_MODIFIER:
3096
0
        case XML_REGEXP_LETTER_OTHERS:
3097
0
        case XML_REGEXP_MARK:
3098
0
        case XML_REGEXP_MARK_NONSPACING:
3099
0
        case XML_REGEXP_MARK_SPACECOMBINING:
3100
0
        case XML_REGEXP_MARK_ENCLOSING:
3101
0
        case XML_REGEXP_NUMBER:
3102
0
        case XML_REGEXP_NUMBER_DECIMAL:
3103
0
        case XML_REGEXP_NUMBER_LETTER:
3104
0
        case XML_REGEXP_NUMBER_OTHERS:
3105
0
        case XML_REGEXP_PUNCT:
3106
0
        case XML_REGEXP_PUNCT_CONNECTOR:
3107
0
        case XML_REGEXP_PUNCT_DASH:
3108
0
        case XML_REGEXP_PUNCT_OPEN:
3109
0
        case XML_REGEXP_PUNCT_CLOSE:
3110
0
        case XML_REGEXP_PUNCT_INITQUOTE:
3111
0
        case XML_REGEXP_PUNCT_FINQUOTE:
3112
0
        case XML_REGEXP_PUNCT_OTHERS:
3113
0
        case XML_REGEXP_SEPAR:
3114
0
        case XML_REGEXP_SEPAR_SPACE:
3115
0
        case XML_REGEXP_SEPAR_LINE:
3116
0
        case XML_REGEXP_SEPAR_PARA:
3117
0
        case XML_REGEXP_SYMBOL:
3118
0
        case XML_REGEXP_SYMBOL_MATH:
3119
0
        case XML_REGEXP_SYMBOL_CURRENCY:
3120
0
        case XML_REGEXP_SYMBOL_MODIFIER:
3121
0
        case XML_REGEXP_SYMBOL_OTHERS:
3122
0
        case XML_REGEXP_OTHER:
3123
0
        case XML_REGEXP_OTHER_CONTROL:
3124
0
        case XML_REGEXP_OTHER_FORMAT:
3125
0
        case XML_REGEXP_OTHER_PRIVATE:
3126
0
        case XML_REGEXP_OTHER_NA:
3127
0
  case XML_REGEXP_BLOCK_NAME:
3128
0
      ret = xmlRegCheckCharacterRange(atom->type, codepoint, 0, 0, 0,
3129
0
                                (const xmlChar *)atom->valuep);
3130
0
      if (atom->neg)
3131
0
    ret = !ret;
3132
0
      break;
3133
0
    }
3134
0
    return(ret);
3135
0
}
3136
3137
/************************************************************************
3138
 *                  *
3139
 *  Saving and restoring state of an execution context    *
3140
 *                  *
3141
 ************************************************************************/
3142
3143
#ifdef DEBUG_REGEXP_EXEC
3144
static void
3145
xmlFARegDebugExec(xmlRegExecCtxtPtr exec) {
3146
    printf("state: %d:%d:idx %d", exec->state->no, exec->transno, exec->index);
3147
    if (exec->inputStack != NULL) {
3148
  int i;
3149
  printf(": ");
3150
  for (i = 0;(i < 3) && (i < exec->inputStackNr);i++)
3151
      printf("%s ", (const char *)
3152
             exec->inputStack[exec->inputStackNr - (i + 1)].value);
3153
    } else {
3154
  printf(": %s", &(exec->inputString[exec->index]));
3155
    }
3156
    printf("\n");
3157
}
3158
#endif
3159
3160
static void
3161
334
xmlFARegExecSave(xmlRegExecCtxtPtr exec) {
3162
#ifdef DEBUG_REGEXP_EXEC
3163
    printf("saving ");
3164
    exec->transno++;
3165
    xmlFARegDebugExec(exec);
3166
    exec->transno--;
3167
#endif
3168
334
#ifdef MAX_PUSH
3169
334
    if (exec->nbPush > MAX_PUSH) {
3170
0
        return;
3171
0
    }
3172
334
    exec->nbPush++;
3173
334
#endif
3174
3175
334
    if (exec->maxRollbacks == 0) {
3176
333
  exec->maxRollbacks = 4;
3177
333
  exec->rollbacks = (xmlRegExecRollback *) xmlMalloc(exec->maxRollbacks *
3178
333
                                 sizeof(xmlRegExecRollback));
3179
333
  if (exec->rollbacks == NULL) {
3180
0
      xmlRegexpErrMemory(NULL, "saving regexp");
3181
0
      exec->maxRollbacks = 0;
3182
0
      return;
3183
0
  }
3184
333
  memset(exec->rollbacks, 0,
3185
333
         exec->maxRollbacks * sizeof(xmlRegExecRollback));
3186
333
    } else if (exec->nbRollbacks >= exec->maxRollbacks) {
3187
0
  xmlRegExecRollback *tmp;
3188
0
  int len = exec->maxRollbacks;
3189
3190
0
  exec->maxRollbacks *= 2;
3191
0
  tmp = (xmlRegExecRollback *) xmlRealloc(exec->rollbacks,
3192
0
      exec->maxRollbacks * sizeof(xmlRegExecRollback));
3193
0
  if (tmp == NULL) {
3194
0
      xmlRegexpErrMemory(NULL, "saving regexp");
3195
0
      exec->maxRollbacks /= 2;
3196
0
      return;
3197
0
  }
3198
0
  exec->rollbacks = tmp;
3199
0
  tmp = &exec->rollbacks[len];
3200
0
  memset(tmp, 0, (exec->maxRollbacks - len) * sizeof(xmlRegExecRollback));
3201
0
    }
3202
334
    exec->rollbacks[exec->nbRollbacks].state = exec->state;
3203
334
    exec->rollbacks[exec->nbRollbacks].index = exec->index;
3204
334
    exec->rollbacks[exec->nbRollbacks].nextbranch = exec->transno + 1;
3205
334
    if (exec->comp->nbCounters > 0) {
3206
0
  if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
3207
0
      exec->rollbacks[exec->nbRollbacks].counts = (int *)
3208
0
    xmlMalloc(exec->comp->nbCounters * sizeof(int));
3209
0
      if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
3210
0
    xmlRegexpErrMemory(NULL, "saving regexp");
3211
0
    exec->status = -5;
3212
0
    return;
3213
0
      }
3214
0
  }
3215
0
  memcpy(exec->rollbacks[exec->nbRollbacks].counts, exec->counts,
3216
0
         exec->comp->nbCounters * sizeof(int));
3217
0
    }
3218
334
    exec->nbRollbacks++;
3219
334
}
3220
3221
static void
3222
2.34k
xmlFARegExecRollBack(xmlRegExecCtxtPtr exec) {
3223
2.34k
    if (exec->nbRollbacks <= 0) {
3224
2.03k
  exec->status = -1;
3225
#ifdef DEBUG_REGEXP_EXEC
3226
  printf("rollback failed on empty stack\n");
3227
#endif
3228
2.03k
  return;
3229
2.03k
    }
3230
319
    exec->nbRollbacks--;
3231
319
    exec->state = exec->rollbacks[exec->nbRollbacks].state;
3232
319
    exec->index = exec->rollbacks[exec->nbRollbacks].index;
3233
319
    exec->transno = exec->rollbacks[exec->nbRollbacks].nextbranch;
3234
319
    if (exec->comp->nbCounters > 0) {
3235
0
  if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
3236
0
      fprintf(stderr, "exec save: allocation failed");
3237
0
      exec->status = -6;
3238
0
      return;
3239
0
  }
3240
0
  if (exec->counts) {
3241
0
      memcpy(exec->counts, exec->rollbacks[exec->nbRollbacks].counts,
3242
0
         exec->comp->nbCounters * sizeof(int));
3243
0
  }
3244
0
    }
3245
3246
#ifdef DEBUG_REGEXP_EXEC
3247
    printf("restored ");
3248
    xmlFARegDebugExec(exec);
3249
#endif
3250
319
}
3251
3252
/************************************************************************
3253
 *                  *
3254
 *  Verifier, running an input against a compiled regexp    *
3255
 *                  *
3256
 ************************************************************************/
3257
3258
static int
3259
0
xmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) {
3260
0
    xmlRegExecCtxt execval;
3261
0
    xmlRegExecCtxtPtr exec = &execval;
3262
0
    int ret, codepoint = 0, len, deter;
3263
3264
0
    exec->inputString = content;
3265
0
    exec->index = 0;
3266
0
    exec->nbPush = 0;
3267
0
    exec->determinist = 1;
3268
0
    exec->maxRollbacks = 0;
3269
0
    exec->nbRollbacks = 0;
3270
0
    exec->rollbacks = NULL;
3271
0
    exec->status = 0;
3272
0
    exec->comp = comp;
3273
0
    exec->state = comp->states[0];
3274
0
    exec->transno = 0;
3275
0
    exec->transcount = 0;
3276
0
    exec->inputStack = NULL;
3277
0
    exec->inputStackMax = 0;
3278
0
    if (comp->nbCounters > 0) {
3279
0
  exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int));
3280
0
  if (exec->counts == NULL) {
3281
0
      xmlRegexpErrMemory(NULL, "running regexp");
3282
0
      return(-1);
3283
0
  }
3284
0
        memset(exec->counts, 0, comp->nbCounters * sizeof(int));
3285
0
    } else
3286
0
  exec->counts = NULL;
3287
0
    while ((exec->status == 0) && (exec->state != NULL) &&
3288
0
     ((exec->inputString[exec->index] != 0) ||
3289
0
      ((exec->state != NULL) &&
3290
0
       (exec->state->type != XML_REGEXP_FINAL_STATE)))) {
3291
0
  xmlRegTransPtr trans;
3292
0
  xmlRegAtomPtr atom;
3293
3294
  /*
3295
   * If end of input on non-terminal state, rollback, however we may
3296
   * still have epsilon like transition for counted transitions
3297
   * on counters, in that case don't break too early.  Additionally,
3298
   * if we are working on a range like "AB{0,2}", where B is not present,
3299
   * we don't want to break.
3300
   */
3301
0
  len = 1;
3302
0
  if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL)) {
3303
      /*
3304
       * if there is a transition, we must check if
3305
       *  atom allows minOccurs of 0
3306
       */
3307
0
      if (exec->transno < exec->state->nbTrans) {
3308
0
          trans = &exec->state->trans[exec->transno];
3309
0
    if (trans->to >=0) {
3310
0
        atom = trans->atom;
3311
0
        if (!((atom->min == 0) && (atom->max > 0)))
3312
0
            goto rollback;
3313
0
    }
3314
0
      } else
3315
0
          goto rollback;
3316
0
  }
3317
3318
0
  exec->transcount = 0;
3319
0
  for (;exec->transno < exec->state->nbTrans;exec->transno++) {
3320
0
      trans = &exec->state->trans[exec->transno];
3321
0
      if (trans->to < 0)
3322
0
    continue;
3323
0
      atom = trans->atom;
3324
0
      ret = 0;
3325
0
      deter = 1;
3326
0
      if (trans->count >= 0) {
3327
0
    int count;
3328
0
    xmlRegCounterPtr counter;
3329
3330
0
    if (exec->counts == NULL) {
3331
0
        exec->status = -1;
3332
0
        goto error;
3333
0
    }
3334
    /*
3335
     * A counted transition.
3336
     */
3337
3338
0
    count = exec->counts[trans->count];
3339
0
    counter = &exec->comp->counters[trans->count];
3340
#ifdef DEBUG_REGEXP_EXEC
3341
    printf("testing count %d: val %d, min %d, max %d\n",
3342
           trans->count, count, counter->min,  counter->max);
3343
#endif
3344
0
    ret = ((count >= counter->min) && (count <= counter->max));
3345
0
    if ((ret) && (counter->min != counter->max))
3346
0
        deter = 0;
3347
0
      } else if (atom == NULL) {
3348
0
    fprintf(stderr, "epsilon transition left at runtime\n");
3349
0
    exec->status = -2;
3350
0
    break;
3351
0
      } else if (exec->inputString[exec->index] != 0) {
3352
0
                codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len);
3353
0
    ret = xmlRegCheckCharacter(atom, codepoint);
3354
0
    if ((ret == 1) && (atom->min >= 0) && (atom->max > 0)) {
3355
0
        xmlRegStatePtr to = comp->states[trans->to];
3356
3357
        /*
3358
         * this is a multiple input sequence
3359
         * If there is a counter associated increment it now.
3360
         * do not increment if the counter is already over the
3361
         * maximum limit in which case get to next transition
3362
         */
3363
0
        if (trans->counter >= 0) {
3364
0
      xmlRegCounterPtr counter;
3365
3366
0
      if ((exec->counts == NULL) ||
3367
0
          (exec->comp == NULL) ||
3368
0
          (exec->comp->counters == NULL)) {
3369
0
          exec->status = -1;
3370
0
          goto error;
3371
0
      }
3372
0
      counter = &exec->comp->counters[trans->counter];
3373
0
      if (exec->counts[trans->counter] >= counter->max)
3374
0
          continue; /* for loop on transitions */
3375
0
                    }
3376
                    /* Save before incrementing */
3377
0
        if (exec->state->nbTrans > exec->transno + 1) {
3378
0
      xmlFARegExecSave(exec);
3379
0
        }
3380
0
        if (trans->counter >= 0) {
3381
#ifdef DEBUG_REGEXP_EXEC
3382
      printf("Increasing count %d\n", trans->counter);
3383
#endif
3384
0
      exec->counts[trans->counter]++;
3385
0
        }
3386
0
        exec->transcount = 1;
3387
0
        do {
3388
      /*
3389
       * Try to progress as much as possible on the input
3390
       */
3391
0
      if (exec->transcount == atom->max) {
3392
0
          break;
3393
0
      }
3394
0
      exec->index += len;
3395
      /*
3396
       * End of input: stop here
3397
       */
3398
0
      if (exec->inputString[exec->index] == 0) {
3399
0
          exec->index -= len;
3400
0
          break;
3401
0
      }
3402
0
      if (exec->transcount >= atom->min) {
3403
0
          int transno = exec->transno;
3404
0
          xmlRegStatePtr state = exec->state;
3405
3406
          /*
3407
           * The transition is acceptable save it
3408
           */
3409
0
          exec->transno = -1; /* trick */
3410
0
          exec->state = to;
3411
0
          xmlFARegExecSave(exec);
3412
0
          exec->transno = transno;
3413
0
          exec->state = state;
3414
0
      }
3415
0
      codepoint = CUR_SCHAR(&(exec->inputString[exec->index]),
3416
0
                      len);
3417
0
      ret = xmlRegCheckCharacter(atom, codepoint);
3418
0
      exec->transcount++;
3419
0
        } while (ret == 1);
3420
0
        if (exec->transcount < atom->min)
3421
0
      ret = 0;
3422
3423
        /*
3424
         * If the last check failed but one transition was found
3425
         * possible, rollback
3426
         */
3427
0
        if (ret < 0)
3428
0
      ret = 0;
3429
0
        if (ret == 0) {
3430
0
      goto rollback;
3431
0
        }
3432
0
        if (trans->counter >= 0) {
3433
0
      if (exec->counts == NULL) {
3434
0
          exec->status = -1;
3435
0
          goto error;
3436
0
      }
3437
#ifdef DEBUG_REGEXP_EXEC
3438
      printf("Decreasing count %d\n", trans->counter);
3439
#endif
3440
0
      exec->counts[trans->counter]--;
3441
0
        }
3442
0
    } else if ((ret == 0) && (atom->min == 0) && (atom->max > 0)) {
3443
        /*
3444
         * we don't match on the codepoint, but minOccurs of 0
3445
         * says that's ok.  Setting len to 0 inhibits stepping
3446
         * over the codepoint.
3447
         */
3448
0
        exec->transcount = 1;
3449
0
        len = 0;
3450
0
        ret = 1;
3451
0
    }
3452
0
      } else if ((atom->min == 0) && (atom->max > 0)) {
3453
          /* another spot to match when minOccurs is 0 */
3454
0
    exec->transcount = 1;
3455
0
    len = 0;
3456
0
    ret = 1;
3457
0
      }
3458
0
      if (ret == 1) {
3459
0
    if ((trans->nd == 1) ||
3460
0
        ((trans->count >= 0) && (deter == 0) &&
3461
0
         (exec->state->nbTrans > exec->transno + 1))) {
3462
#ifdef DEBUG_REGEXP_EXEC
3463
        if (trans->nd == 1)
3464
            printf("Saving on nd transition atom %d for %c at %d\n",
3465
             trans->atom->no, codepoint, exec->index);
3466
        else
3467
            printf("Saving on counted transition count %d for %c at %d\n",
3468
             trans->count, codepoint, exec->index);
3469
#endif
3470
0
        xmlFARegExecSave(exec);
3471
0
    }
3472
0
    if (trans->counter >= 0) {
3473
0
        xmlRegCounterPtr counter;
3474
3475
                    /* make sure we don't go over the counter maximum value */
3476
0
        if ((exec->counts == NULL) ||
3477
0
      (exec->comp == NULL) ||
3478
0
      (exec->comp->counters == NULL)) {
3479
0
      exec->status = -1;
3480
0
      goto error;
3481
0
        }
3482
0
        counter = &exec->comp->counters[trans->counter];
3483
0
        if (exec->counts[trans->counter] >= counter->max)
3484
0
      continue; /* for loop on transitions */
3485
#ifdef DEBUG_REGEXP_EXEC
3486
        printf("Increasing count %d\n", trans->counter);
3487
#endif
3488
0
        exec->counts[trans->counter]++;
3489
0
    }
3490
0
    if ((trans->count >= 0) &&
3491
0
        (trans->count < REGEXP_ALL_COUNTER)) {
3492
0
        if (exec->counts == NULL) {
3493
0
            exec->status = -1;
3494
0
      goto error;
3495
0
        }
3496
#ifdef DEBUG_REGEXP_EXEC
3497
        printf("resetting count %d on transition\n",
3498
               trans->count);
3499
#endif
3500
0
        exec->counts[trans->count] = 0;
3501
0
    }
3502
#ifdef DEBUG_REGEXP_EXEC
3503
    printf("entering state %d\n", trans->to);
3504
#endif
3505
0
    exec->state = comp->states[trans->to];
3506
0
    exec->transno = 0;
3507
0
    if (trans->atom != NULL) {
3508
0
        exec->index += len;
3509
0
    }
3510
0
    goto progress;
3511
0
      } else if (ret < 0) {
3512
0
    exec->status = -4;
3513
0
    break;
3514
0
      }
3515
0
  }
3516
0
  if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
3517
0
rollback:
3518
      /*
3519
       * Failed to find a way out
3520
       */
3521
0
      exec->determinist = 0;
3522
#ifdef DEBUG_REGEXP_EXEC
3523
      printf("rollback from state %d on %d:%c\n", exec->state->no,
3524
             codepoint,codepoint);
3525
#endif
3526
0
      xmlFARegExecRollBack(exec);
3527
0
  }
3528
0
progress:
3529
0
  continue;
3530
0
    }
3531
0
error:
3532
0
    if (exec->rollbacks != NULL) {
3533
0
  if (exec->counts != NULL) {
3534
0
      int i;
3535
3536
0
      for (i = 0;i < exec->maxRollbacks;i++)
3537
0
    if (exec->rollbacks[i].counts != NULL)
3538
0
        xmlFree(exec->rollbacks[i].counts);
3539
0
  }
3540
0
  xmlFree(exec->rollbacks);
3541
0
    }
3542
0
    if (exec->state == NULL)
3543
0
        return(-1);
3544
0
    if (exec->counts != NULL)
3545
0
  xmlFree(exec->counts);
3546
0
    if (exec->status == 0)
3547
0
  return(1);
3548
0
    if (exec->status == -1) {
3549
0
  if (exec->nbPush > MAX_PUSH)
3550
0
      return(-1);
3551
0
  return(0);
3552
0
    }
3553
0
    return(exec->status);
3554
0
}
3555
3556
/************************************************************************
3557
 *                  *
3558
 *  Progressive interface to the verifier one atom at a time  *
3559
 *                  *
3560
 ************************************************************************/
3561
#ifdef DEBUG_ERR
3562
static void testerr(xmlRegExecCtxtPtr exec);
3563
#endif
3564
3565
/**
3566
 * xmlRegNewExecCtxt:
3567
 * @comp: a precompiled regular expression
3568
 * @callback: a callback function used for handling progresses in the
3569
 *            automata matching phase
3570
 * @data: the context data associated to the callback in this context
3571
 *
3572
 * Build a context used for progressive evaluation of a regexp.
3573
 *
3574
 * Returns the new context
3575
 */
3576
xmlRegExecCtxtPtr
3577
137k
xmlRegNewExecCtxt(xmlRegexpPtr comp, xmlRegExecCallbacks callback, void *data) {
3578
137k
    xmlRegExecCtxtPtr exec;
3579
3580
137k
    if (comp == NULL)
3581
0
  return(NULL);
3582
137k
    if ((comp->compact == NULL) && (comp->states == NULL))
3583
0
        return(NULL);
3584
137k
    exec = (xmlRegExecCtxtPtr) xmlMalloc(sizeof(xmlRegExecCtxt));
3585
137k
    if (exec == NULL) {
3586
0
  xmlRegexpErrMemory(NULL, "creating execution context");
3587
0
  return(NULL);
3588
0
    }
3589
137k
    memset(exec, 0, sizeof(xmlRegExecCtxt));
3590
137k
    exec->inputString = NULL;
3591
137k
    exec->index = 0;
3592
137k
    exec->determinist = 1;
3593
137k
    exec->maxRollbacks = 0;
3594
137k
    exec->nbRollbacks = 0;
3595
137k
    exec->rollbacks = NULL;
3596
137k
    exec->status = 0;
3597
137k
    exec->comp = comp;
3598
137k
    if (comp->compact == NULL)
3599
2.04k
  exec->state = comp->states[0];
3600
137k
    exec->transno = 0;
3601
137k
    exec->transcount = 0;
3602
137k
    exec->callback = callback;
3603
137k
    exec->data = data;
3604
137k
    if (comp->nbCounters > 0) {
3605
        /*
3606
   * For error handling, exec->counts is allocated twice the size
3607
   * the second half is used to store the data in case of rollback
3608
   */
3609
0
  exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int)
3610
0
                                   * 2);
3611
0
  if (exec->counts == NULL) {
3612
0
      xmlRegexpErrMemory(NULL, "creating execution context");
3613
0
      xmlFree(exec);
3614
0
      return(NULL);
3615
0
  }
3616
0
        memset(exec->counts, 0, comp->nbCounters * sizeof(int) * 2);
3617
0
  exec->errCounts = &exec->counts[comp->nbCounters];
3618
137k
    } else {
3619
137k
  exec->counts = NULL;
3620
137k
  exec->errCounts = NULL;
3621
137k
    }
3622
137k
    exec->inputStackMax = 0;
3623
137k
    exec->inputStackNr = 0;
3624
137k
    exec->inputStack = NULL;
3625
137k
    exec->errStateNo = -1;
3626
137k
    exec->errString = NULL;
3627
137k
    exec->nbPush = 0;
3628
137k
    return(exec);
3629
137k
}
3630
3631
/**
3632
 * xmlRegFreeExecCtxt:
3633
 * @exec: a regular expression evaluation context
3634
 *
3635
 * Free the structures associated to a regular expression evaluation context.
3636
 */
3637
void
3638
137k
xmlRegFreeExecCtxt(xmlRegExecCtxtPtr exec) {
3639
137k
    if (exec == NULL)
3640
0
  return;
3641
3642
137k
    if (exec->rollbacks != NULL) {
3643
333
  if (exec->counts != NULL) {
3644
0
      int i;
3645
3646
0
      for (i = 0;i < exec->maxRollbacks;i++)
3647
0
    if (exec->rollbacks[i].counts != NULL)
3648
0
        xmlFree(exec->rollbacks[i].counts);
3649
0
  }
3650
333
  xmlFree(exec->rollbacks);
3651
333
    }
3652
137k
    if (exec->counts != NULL)
3653
0
  xmlFree(exec->counts);
3654
137k
    if (exec->inputStack != NULL) {
3655
333
  int i;
3656
3657
979
  for (i = 0;i < exec->inputStackNr;i++) {
3658
646
      if (exec->inputStack[i].value != NULL)
3659
646
    xmlFree(exec->inputStack[i].value);
3660
646
  }
3661
333
  xmlFree(exec->inputStack);
3662
333
    }
3663
137k
    if (exec->errString != NULL)
3664
33.9k
        xmlFree(exec->errString);
3665
137k
    xmlFree(exec);
3666
137k
}
3667
3668
static void
3669
xmlFARegExecSaveInputString(xmlRegExecCtxtPtr exec, const xmlChar *value,
3670
646
                      void *data) {
3671
#ifdef DEBUG_PUSH
3672
    printf("saving value: %d:%s\n", exec->inputStackNr, value);
3673
#endif
3674
646
    if (exec->inputStackMax == 0) {
3675
333
  exec->inputStackMax = 4;
3676
333
  exec->inputStack = (xmlRegInputTokenPtr)
3677
333
      xmlMalloc(exec->inputStackMax * sizeof(xmlRegInputToken));
3678
333
  if (exec->inputStack == NULL) {
3679
0
      xmlRegexpErrMemory(NULL, "pushing input string");
3680
0
      exec->inputStackMax = 0;
3681
0
      return;
3682
0
  }
3683
333
    } else if (exec->inputStackNr + 1 >= exec->inputStackMax) {
3684
0
  xmlRegInputTokenPtr tmp;
3685
3686
0
  exec->inputStackMax *= 2;
3687
0
  tmp = (xmlRegInputTokenPtr) xmlRealloc(exec->inputStack,
3688
0
      exec->inputStackMax * sizeof(xmlRegInputToken));
3689
0
  if (tmp == NULL) {
3690
0
      xmlRegexpErrMemory(NULL, "pushing input string");
3691
0
      exec->inputStackMax /= 2;
3692
0
      return;
3693
0
  }
3694
0
  exec->inputStack = tmp;
3695
0
    }
3696
646
    exec->inputStack[exec->inputStackNr].value = xmlStrdup(value);
3697
646
    exec->inputStack[exec->inputStackNr].data = data;
3698
646
    exec->inputStackNr++;
3699
646
    exec->inputStack[exec->inputStackNr].value = NULL;
3700
646
    exec->inputStack[exec->inputStackNr].data = NULL;
3701
646
}
3702
3703
/**
3704
 * xmlRegStrEqualWildcard:
3705
 * @expStr:  the string to be evaluated
3706
 * @valStr:  the validation string
3707
 *
3708
 * Checks if both strings are equal or have the same content. "*"
3709
 * can be used as a wildcard in @valStr; "|" is used as a separator of
3710
 * substrings in both @expStr and @valStr.
3711
 *
3712
 * Returns 1 if the comparison is satisfied and the number of substrings
3713
 * is equal, 0 otherwise.
3714
 */
3715
3716
static int
3717
394M
xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr) {
3718
394M
    if (expStr == valStr) return(1);
3719
394M
    if (expStr == NULL) return(0);
3720
394M
    if (valStr == NULL) return(0);
3721
28.0G
    do {
3722
  /*
3723
  * Eval if we have a wildcard for the current item.
3724
  */
3725
28.0G
        if (*expStr != *valStr) {
3726
      /* if one of them starts with a wildcard make valStr be it */
3727
270M
      if (*valStr == '*') {
3728
0
          const xmlChar *tmp;
3729
3730
0
    tmp = valStr;
3731
0
    valStr = expStr;
3732
0
    expStr = tmp;
3733
0
      }
3734
270M
      if ((*valStr != 0) && (*expStr != 0) && (*expStr++ == '*')) {
3735
0
    do {
3736
0
        if (*valStr == XML_REG_STRING_SEPARATOR)
3737
0
      break;
3738
0
        valStr++;
3739
0
    } while (*valStr != 0);
3740
0
    continue;
3741
0
      } else
3742
270M
    return(0);
3743
270M
  }
3744
27.8G
  expStr++;
3745
27.8G
  valStr++;
3746
27.8G
    } while (*valStr != 0);
3747
123M
    if (*expStr != 0)
3748
28.8M
  return (0);
3749
94.4M
    else
3750
94.4M
  return (1);
3751
123M
}
3752
3753
/**
3754
 * xmlRegCompactPushString:
3755
 * @exec: a regexp execution context
3756
 * @comp:  the precompiled exec with a compact table
3757
 * @value: a string token input
3758
 * @data: data associated to the token to reuse in callbacks
3759
 *
3760
 * Push one input token in the execution context
3761
 *
3762
 * Returns: 1 if the regexp reached a final state, 0 if non-final, and
3763
 *     a negative value in case of error.
3764
 */
3765
static int
3766
xmlRegCompactPushString(xmlRegExecCtxtPtr exec,
3767
                  xmlRegexpPtr comp,
3768
                  const xmlChar *value,
3769
277k
                  void *data) {
3770
277k
    int state = exec->index;
3771
277k
    int i, target;
3772
3773
277k
    if ((comp == NULL) || (comp->compact == NULL) || (comp->stringMap == NULL))
3774
0
  return(-1);
3775
3776
277k
    if (value == NULL) {
3777
  /*
3778
   * are we at a final state ?
3779
   */
3780
96.2k
  if (comp->compact[state * (comp->nbstrings + 1)] ==
3781
96.2k
            XML_REGEXP_FINAL_STATE)
3782
90.6k
      return(1);
3783
5.68k
  return(0);
3784
96.2k
    }
3785
3786
#ifdef DEBUG_PUSH
3787
    printf("value pushed: %s\n", value);
3788
#endif
3789
3790
    /*
3791
     * Examine all outside transitions from current state
3792
     */
3793
817k
    for (i = 0;i < comp->nbstrings;i++) {
3794
785k
  target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
3795
785k
  if ((target > 0) && (target <= comp->nbstates)) {
3796
634k
      target--; /* to avoid 0 */
3797
634k
      if (xmlRegStrEqualWildcard(comp->stringMap[i], value)) {
3798
149k
    exec->index = target;
3799
149k
    if ((exec->callback != NULL) && (comp->transdata != NULL)) {
3800
0
        exec->callback(exec->data, value,
3801
0
        comp->transdata[state * comp->nbstrings + i], data);
3802
0
    }
3803
#ifdef DEBUG_PUSH
3804
    printf("entering state %d\n", target);
3805
#endif
3806
149k
    if (comp->compact[target * (comp->nbstrings + 1)] ==
3807
149k
        XML_REGEXP_SINK_STATE)
3808
0
        goto error;
3809
3810
149k
    if (comp->compact[target * (comp->nbstrings + 1)] ==
3811
149k
        XML_REGEXP_FINAL_STATE)
3812
131k
        return(1);
3813
18.2k
    return(0);
3814
149k
      }
3815
634k
  }
3816
785k
    }
3817
    /*
3818
     * Failed to find an exit transition out from current state for the
3819
     * current token
3820
     */
3821
#ifdef DEBUG_PUSH
3822
    printf("failed to find a transition for %s on state %d\n", value, state);
3823
#endif
3824
31.9k
error:
3825
31.9k
    if (exec->errString != NULL)
3826
0
        xmlFree(exec->errString);
3827
31.9k
    exec->errString = xmlStrdup(value);
3828
31.9k
    exec->errStateNo = state;
3829
31.9k
    exec->status = -1;
3830
#ifdef DEBUG_ERR
3831
    testerr(exec);
3832
#endif
3833
31.9k
    return(-1);
3834
181k
}
3835
3836
/**
3837
 * xmlRegExecPushStringInternal:
3838
 * @exec: a regexp execution context or NULL to indicate the end
3839
 * @value: a string token input
3840
 * @data: data associated to the token to reuse in callbacks
3841
 * @compound: value was assembled from 2 strings
3842
 *
3843
 * Push one input token in the execution context
3844
 *
3845
 * Returns: 1 if the regexp reached a final state, 0 if non-final, and
3846
 *     a negative value in case of error.
3847
 */
3848
static int
3849
xmlRegExecPushStringInternal(xmlRegExecCtxtPtr exec, const xmlChar *value,
3850
430k
                       void *data, int compound) {
3851
430k
    xmlRegTransPtr trans;
3852
430k
    xmlRegAtomPtr atom;
3853
430k
    int ret;
3854
430k
    int final = 0;
3855
430k
    int progress = 1;
3856
3857
430k
    if (exec == NULL)
3858
0
  return(-1);
3859
430k
    if (exec->comp == NULL)
3860
0
  return(-1);
3861
430k
    if (exec->status != 0)
3862
150k
  return(exec->status);
3863
3864
280k
    if (exec->comp->compact != NULL)
3865
277k
  return(xmlRegCompactPushString(exec, exec->comp, value, data));
3866
3867
3.06k
    if (value == NULL) {
3868
71
        if (exec->state->type == XML_REGEXP_FINAL_STATE)
3869
18
      return(1);
3870
53
  final = 1;
3871
53
    }
3872
3873
#ifdef DEBUG_PUSH
3874
    printf("value pushed: %s\n", value);
3875
#endif
3876
    /*
3877
     * If we have an active rollback stack push the new value there
3878
     * and get back to where we were left
3879
     */
3880
3.04k
    if ((value != NULL) && (exec->inputStackNr > 0)) {
3881
313
  xmlFARegExecSaveInputString(exec, value, data);
3882
313
  value = exec->inputStack[exec->index].value;
3883
313
  data = exec->inputStack[exec->index].data;
3884
#ifdef DEBUG_PUSH
3885
  printf("value loaded: %s\n", value);
3886
#endif
3887
313
    }
3888
3889
6.43k
    while ((exec->status == 0) &&
3890
6.43k
     ((value != NULL) ||
3891
4.40k
      ((final == 1) &&
3892
3.38k
       (exec->state->type != XML_REGEXP_FINAL_STATE)))) {
3893
3894
  /*
3895
   * End of input on non-terminal state, rollback, however we may
3896
   * still have epsilon like transition for counted transitions
3897
   * on counters, in that case don't break too early.
3898
   */
3899
3.38k
  if ((value == NULL) && (exec->counts == NULL))
3900
61
      goto rollback;
3901
3902
3.32k
  exec->transcount = 0;
3903
152k
  for (;exec->transno < exec->state->nbTrans;exec->transno++) {
3904
150k
      trans = &exec->state->trans[exec->transno];
3905
150k
      if (trans->to < 0)
3906
3.11k
    continue;
3907
147k
      atom = trans->atom;
3908
147k
      ret = 0;
3909
147k
      if (trans->count == REGEXP_ALL_LAX_COUNTER) {
3910
0
    int i;
3911
0
    int count;
3912
0
    xmlRegTransPtr t;
3913
0
    xmlRegCounterPtr counter;
3914
3915
0
    ret = 0;
3916
3917
#ifdef DEBUG_PUSH
3918
    printf("testing all lax %d\n", trans->count);
3919
#endif
3920
    /*
3921
     * Check all counted transitions from the current state
3922
     */
3923
0
    if ((value == NULL) && (final)) {
3924
0
        ret = 1;
3925
0
    } else if (value != NULL) {
3926
0
        for (i = 0;i < exec->state->nbTrans;i++) {
3927
0
      t = &exec->state->trans[i];
3928
0
      if ((t->counter < 0) || (t == trans))
3929
0
          continue;
3930
0
      counter = &exec->comp->counters[t->counter];
3931
0
      count = exec->counts[t->counter];
3932
0
      if ((count < counter->max) &&
3933
0
                (t->atom != NULL) &&
3934
0
          (xmlStrEqual(value, t->atom->valuep))) {
3935
0
          ret = 0;
3936
0
          break;
3937
0
      }
3938
0
      if ((count >= counter->min) &&
3939
0
          (count < counter->max) &&
3940
0
          (t->atom != NULL) &&
3941
0
          (xmlStrEqual(value, t->atom->valuep))) {
3942
0
          ret = 1;
3943
0
          break;
3944
0
      }
3945
0
        }
3946
0
    }
3947
147k
      } else if (trans->count == REGEXP_ALL_COUNTER) {
3948
0
    int i;
3949
0
    int count;
3950
0
    xmlRegTransPtr t;
3951
0
    xmlRegCounterPtr counter;
3952
3953
0
    ret = 1;
3954
3955
#ifdef DEBUG_PUSH
3956
    printf("testing all %d\n", trans->count);
3957
#endif
3958
    /*
3959
     * Check all counted transitions from the current state
3960
     */
3961
0
    for (i = 0;i < exec->state->nbTrans;i++) {
3962
0
                    t = &exec->state->trans[i];
3963
0
        if ((t->counter < 0) || (t == trans))
3964
0
      continue;
3965
0
                    counter = &exec->comp->counters[t->counter];
3966
0
        count = exec->counts[t->counter];
3967
0
        if ((count < counter->min) || (count > counter->max)) {
3968
0
      ret = 0;
3969
0
      break;
3970
0
        }
3971
0
    }
3972
147k
      } else if (trans->count >= 0) {
3973
0
    int count;
3974
0
    xmlRegCounterPtr counter;
3975
3976
    /*
3977
     * A counted transition.
3978
     */
3979
3980
0
    count = exec->counts[trans->count];
3981
0
    counter = &exec->comp->counters[trans->count];
3982
#ifdef DEBUG_PUSH
3983
    printf("testing count %d: val %d, min %d, max %d\n",
3984
           trans->count, count, counter->min,  counter->max);
3985
#endif
3986
0
    ret = ((count >= counter->min) && (count <= counter->max));
3987
147k
      } else if (atom == NULL) {
3988
0
    fprintf(stderr, "epsilon transition left at runtime\n");
3989
0
    exec->status = -2;
3990
0
    break;
3991
147k
      } else if (value != NULL) {
3992
147k
    ret = xmlRegStrEqualWildcard(atom->valuep, value);
3993
147k
    if (atom->neg) {
3994
0
        ret = !ret;
3995
0
        if (!compound)
3996
0
            ret = 0;
3997
0
    }
3998
147k
    if ((ret == 1) && (trans->counter >= 0)) {
3999
0
        xmlRegCounterPtr counter;
4000
0
        int count;
4001
4002
0
        count = exec->counts[trans->counter];
4003
0
        counter = &exec->comp->counters[trans->counter];
4004
0
        if (count >= counter->max)
4005
0
      ret = 0;
4006
0
    }
4007
4008
147k
    if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) {
4009
0
        xmlRegStatePtr to = exec->comp->states[trans->to];
4010
4011
        /*
4012
         * this is a multiple input sequence
4013
         */
4014
0
        if (exec->state->nbTrans > exec->transno + 1) {
4015
0
      if (exec->inputStackNr <= 0) {
4016
0
          xmlFARegExecSaveInputString(exec, value, data);
4017
0
      }
4018
0
      xmlFARegExecSave(exec);
4019
0
        }
4020
0
        exec->transcount = 1;
4021
0
        do {
4022
      /*
4023
       * Try to progress as much as possible on the input
4024
       */
4025
0
      if (exec->transcount == atom->max) {
4026
0
          break;
4027
0
      }
4028
0
      exec->index++;
4029
0
      value = exec->inputStack[exec->index].value;
4030
0
      data = exec->inputStack[exec->index].data;
4031
#ifdef DEBUG_PUSH
4032
      printf("value loaded: %s\n", value);
4033
#endif
4034
4035
      /*
4036
       * End of input: stop here
4037
       */
4038
0
      if (value == NULL) {
4039
0
          exec->index --;
4040
0
          break;
4041
0
      }
4042
0
      if (exec->transcount >= atom->min) {
4043
0
          int transno = exec->transno;
4044
0
          xmlRegStatePtr state = exec->state;
4045
4046
          /*
4047
           * The transition is acceptable save it
4048
           */
4049
0
          exec->transno = -1; /* trick */
4050
0
          exec->state = to;
4051
0
          if (exec->inputStackNr <= 0) {
4052
0
        xmlFARegExecSaveInputString(exec, value, data);
4053
0
          }
4054
0
          xmlFARegExecSave(exec);
4055
0
          exec->transno = transno;
4056
0
          exec->state = state;
4057
0
      }
4058
0
      ret = xmlStrEqual(value, atom->valuep);
4059
0
      exec->transcount++;
4060
0
        } while (ret == 1);
4061
0
        if (exec->transcount < atom->min)
4062
0
      ret = 0;
4063
4064
        /*
4065
         * If the last check failed but one transition was found
4066
         * possible, rollback
4067
         */
4068
0
        if (ret < 0)
4069
0
      ret = 0;
4070
0
        if (ret == 0) {
4071
0
      goto rollback;
4072
0
        }
4073
0
    }
4074
147k
      }
4075
147k
      if (ret == 1) {
4076
1.04k
    if ((exec->callback != NULL) && (atom != NULL) &&
4077
1.04k
      (data != NULL)) {
4078
0
        exec->callback(exec->data, atom->valuep,
4079
0
                 atom->data, data);
4080
0
    }
4081
1.04k
    if (exec->state->nbTrans > exec->transno + 1) {
4082
334
        if (exec->inputStackNr <= 0) {
4083
333
      xmlFARegExecSaveInputString(exec, value, data);
4084
333
        }
4085
334
        xmlFARegExecSave(exec);
4086
334
    }
4087
1.04k
    if (trans->counter >= 0) {
4088
#ifdef DEBUG_PUSH
4089
        printf("Increasing count %d\n", trans->counter);
4090
#endif
4091
0
        exec->counts[trans->counter]++;
4092
0
    }
4093
1.04k
    if ((trans->count >= 0) &&
4094
1.04k
        (trans->count < REGEXP_ALL_COUNTER)) {
4095
#ifdef DEBUG_REGEXP_EXEC
4096
        printf("resetting count %d on transition\n",
4097
               trans->count);
4098
#endif
4099
0
        exec->counts[trans->count] = 0;
4100
0
    }
4101
#ifdef DEBUG_PUSH
4102
    printf("entering state %d\n", trans->to);
4103
#endif
4104
1.04k
                if ((exec->comp->states[trans->to] != NULL) &&
4105
1.04k
        (exec->comp->states[trans->to]->type ==
4106
1.04k
         XML_REGEXP_SINK_STATE)) {
4107
        /*
4108
         * entering a sink state, save the current state as error
4109
         * state.
4110
         */
4111
0
        if (exec->errString != NULL)
4112
0
      xmlFree(exec->errString);
4113
0
        exec->errString = xmlStrdup(value);
4114
0
        exec->errState = exec->state;
4115
0
        memcpy(exec->errCounts, exec->counts,
4116
0
         exec->comp->nbCounters * sizeof(int));
4117
0
    }
4118
1.04k
    exec->state = exec->comp->states[trans->to];
4119
1.04k
    exec->transno = 0;
4120
1.04k
    if (trans->atom != NULL) {
4121
1.04k
        if (exec->inputStack != NULL) {
4122
364
      exec->index++;
4123
364
      if (exec->index < exec->inputStackNr) {
4124
18
          value = exec->inputStack[exec->index].value;
4125
18
          data = exec->inputStack[exec->index].data;
4126
#ifdef DEBUG_PUSH
4127
          printf("value loaded: %s\n", value);
4128
#endif
4129
346
      } else {
4130
346
          value = NULL;
4131
346
          data = NULL;
4132
#ifdef DEBUG_PUSH
4133
          printf("end of input\n");
4134
#endif
4135
346
      }
4136
676
        } else {
4137
676
      value = NULL;
4138
676
      data = NULL;
4139
#ifdef DEBUG_PUSH
4140
      printf("end of input\n");
4141
#endif
4142
676
        }
4143
1.04k
    }
4144
1.04k
    goto progress;
4145
146k
      } else if (ret < 0) {
4146
0
    exec->status = -4;
4147
0
    break;
4148
0
      }
4149
147k
  }
4150
2.28k
  if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
4151
2.34k
rollback:
4152
            /*
4153
       * if we didn't yet rollback on the current input
4154
       * store the current state as the error state.
4155
       */
4156
2.34k
      if ((progress) && (exec->state != NULL) &&
4157
2.34k
          (exec->state->type != XML_REGEXP_SINK_STATE)) {
4158
2.05k
          progress = 0;
4159
2.05k
    if (exec->errString != NULL)
4160
16
        xmlFree(exec->errString);
4161
2.05k
    exec->errString = xmlStrdup(value);
4162
2.05k
    exec->errState = exec->state;
4163
2.05k
                if (exec->comp->nbCounters)
4164
0
                    memcpy(exec->errCounts, exec->counts,
4165
0
                           exec->comp->nbCounters * sizeof(int));
4166
2.05k
      }
4167
4168
      /*
4169
       * Failed to find a way out
4170
       */
4171
2.34k
      exec->determinist = 0;
4172
2.34k
      xmlFARegExecRollBack(exec);
4173
2.34k
      if ((exec->inputStack != NULL ) && (exec->status == 0)) {
4174
319
    value = exec->inputStack[exec->index].value;
4175
319
    data = exec->inputStack[exec->index].data;
4176
#ifdef DEBUG_PUSH
4177
    printf("value loaded: %s\n", value);
4178
#endif
4179
319
      }
4180
2.34k
  }
4181
2.34k
  continue;
4182
2.34k
progress:
4183
1.04k
        progress = 1;
4184
1.04k
  continue;
4185
2.28k
    }
4186
3.04k
    if (exec->status == 0) {
4187
1.01k
        return(exec->state->type == XML_REGEXP_FINAL_STATE);
4188
1.01k
    }
4189
#ifdef DEBUG_ERR
4190
    if (exec->status < 0) {
4191
  testerr(exec);
4192
    }
4193
#endif
4194
2.03k
    return(exec->status);
4195
3.04k
}
4196
4197
/**
4198
 * xmlRegExecPushString:
4199
 * @exec: a regexp execution context or NULL to indicate the end
4200
 * @value: a string token input
4201
 * @data: data associated to the token to reuse in callbacks
4202
 *
4203
 * Push one input token in the execution context
4204
 *
4205
 * Returns: 1 if the regexp reached a final state, 0 if non-final, and
4206
 *     a negative value in case of error.
4207
 */
4208
int
4209
xmlRegExecPushString(xmlRegExecCtxtPtr exec, const xmlChar *value,
4210
430k
               void *data) {
4211
430k
    return(xmlRegExecPushStringInternal(exec, value, data, 0));
4212
430k
}
4213
4214
/**
4215
 * xmlRegExecPushString2:
4216
 * @exec: a regexp execution context or NULL to indicate the end
4217
 * @value: the first string token input
4218
 * @value2: the second string token input
4219
 * @data: data associated to the token to reuse in callbacks
4220
 *
4221
 * Push one input token in the execution context
4222
 *
4223
 * Returns: 1 if the regexp reached a final state, 0 if non-final, and
4224
 *     a negative value in case of error.
4225
 */
4226
int
4227
xmlRegExecPushString2(xmlRegExecCtxtPtr exec, const xmlChar *value,
4228
0
                      const xmlChar *value2, void *data) {
4229
0
    xmlChar buf[150];
4230
0
    int lenn, lenp, ret;
4231
0
    xmlChar *str;
4232
4233
0
    if (exec == NULL)
4234
0
  return(-1);
4235
0
    if (exec->comp == NULL)
4236
0
  return(-1);
4237
0
    if (exec->status != 0)
4238
0
  return(exec->status);
4239
4240
0
    if (value2 == NULL)
4241
0
        return(xmlRegExecPushString(exec, value, data));
4242
4243
0
    lenn = strlen((char *) value2);
4244
0
    lenp = strlen((char *) value);
4245
4246
0
    if (150 < lenn + lenp + 2) {
4247
0
  str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
4248
0
  if (str == NULL) {
4249
0
      exec->status = -1;
4250
0
      return(-1);
4251
0
  }
4252
0
    } else {
4253
0
  str = buf;
4254
0
    }
4255
0
    memcpy(&str[0], value, lenp);
4256
0
    str[lenp] = XML_REG_STRING_SEPARATOR;
4257
0
    memcpy(&str[lenp + 1], value2, lenn);
4258
0
    str[lenn + lenp + 1] = 0;
4259
4260
0
    if (exec->comp->compact != NULL)
4261
0
  ret = xmlRegCompactPushString(exec, exec->comp, str, data);
4262
0
    else
4263
0
        ret = xmlRegExecPushStringInternal(exec, str, data, 1);
4264
4265
0
    if (str != buf)
4266
0
        xmlFree(str);
4267
0
    return(ret);
4268
0
}
4269
4270
/**
4271
 * xmlRegExecGetValues:
4272
 * @exec: a regexp execution context
4273
 * @err: error extraction or normal one
4274
 * @nbval: pointer to the number of accepted values IN/OUT
4275
 * @nbneg: return number of negative transitions
4276
 * @values: pointer to the array of acceptable values
4277
 * @terminal: return value if this was a terminal state
4278
 *
4279
 * Extract information from the regexp execution, internal routine to
4280
 * implement xmlRegExecNextValues() and xmlRegExecErrInfo()
4281
 *
4282
 * Returns: 0 in case of success or -1 in case of error.
4283
 */
4284
static int
4285
xmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err,
4286
                    int *nbval, int *nbneg,
4287
0
        xmlChar **values, int *terminal) {
4288
0
    int maxval;
4289
0
    int nb = 0;
4290
4291
0
    if ((exec == NULL) || (nbval == NULL) || (nbneg == NULL) ||
4292
0
        (values == NULL) || (*nbval <= 0))
4293
0
        return(-1);
4294
4295
0
    maxval = *nbval;
4296
0
    *nbval = 0;
4297
0
    *nbneg = 0;
4298
0
    if ((exec->comp != NULL) && (exec->comp->compact != NULL)) {
4299
0
        xmlRegexpPtr comp;
4300
0
  int target, i, state;
4301
4302
0
        comp = exec->comp;
4303
4304
0
  if (err) {
4305
0
      if (exec->errStateNo == -1) return(-1);
4306
0
      state = exec->errStateNo;
4307
0
  } else {
4308
0
      state = exec->index;
4309
0
  }
4310
0
  if (terminal != NULL) {
4311
0
      if (comp->compact[state * (comp->nbstrings + 1)] ==
4312
0
          XML_REGEXP_FINAL_STATE)
4313
0
    *terminal = 1;
4314
0
      else
4315
0
    *terminal = 0;
4316
0
  }
4317
0
  for (i = 0;(i < comp->nbstrings) && (nb < maxval);i++) {
4318
0
      target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
4319
0
      if ((target > 0) && (target <= comp->nbstates) &&
4320
0
          (comp->compact[(target - 1) * (comp->nbstrings + 1)] !=
4321
0
     XML_REGEXP_SINK_STATE)) {
4322
0
          values[nb++] = comp->stringMap[i];
4323
0
    (*nbval)++;
4324
0
      }
4325
0
  }
4326
0
  for (i = 0;(i < comp->nbstrings) && (nb < maxval);i++) {
4327
0
      target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
4328
0
      if ((target > 0) && (target <= comp->nbstates) &&
4329
0
          (comp->compact[(target - 1) * (comp->nbstrings + 1)] ==
4330
0
     XML_REGEXP_SINK_STATE)) {
4331
0
          values[nb++] = comp->stringMap[i];
4332
0
    (*nbneg)++;
4333
0
      }
4334
0
  }
4335
0
    } else {
4336
0
        int transno;
4337
0
  xmlRegTransPtr trans;
4338
0
  xmlRegAtomPtr atom;
4339
0
  xmlRegStatePtr state;
4340
4341
0
  if (terminal != NULL) {
4342
0
      if (exec->state->type == XML_REGEXP_FINAL_STATE)
4343
0
    *terminal = 1;
4344
0
      else
4345
0
    *terminal = 0;
4346
0
  }
4347
4348
0
  if (err) {
4349
0
      if (exec->errState == NULL) return(-1);
4350
0
      state = exec->errState;
4351
0
  } else {
4352
0
      if (exec->state == NULL) return(-1);
4353
0
      state = exec->state;
4354
0
  }
4355
0
  for (transno = 0;
4356
0
       (transno < state->nbTrans) && (nb < maxval);
4357
0
       transno++) {
4358
0
      trans = &state->trans[transno];
4359
0
      if (trans->to < 0)
4360
0
    continue;
4361
0
      atom = trans->atom;
4362
0
      if ((atom == NULL) || (atom->valuep == NULL))
4363
0
    continue;
4364
0
      if (trans->count == REGEXP_ALL_LAX_COUNTER) {
4365
          /* this should not be reached but ... */
4366
0
          TODO;
4367
0
      } else if (trans->count == REGEXP_ALL_COUNTER) {
4368
          /* this should not be reached but ... */
4369
0
          TODO;
4370
0
      } else if (trans->counter >= 0) {
4371
0
    xmlRegCounterPtr counter = NULL;
4372
0
    int count;
4373
4374
0
    if (err)
4375
0
        count = exec->errCounts[trans->counter];
4376
0
    else
4377
0
        count = exec->counts[trans->counter];
4378
0
    if (exec->comp != NULL)
4379
0
        counter = &exec->comp->counters[trans->counter];
4380
0
    if ((counter == NULL) || (count < counter->max)) {
4381
0
        if (atom->neg)
4382
0
      values[nb++] = (xmlChar *) atom->valuep2;
4383
0
        else
4384
0
      values[nb++] = (xmlChar *) atom->valuep;
4385
0
        (*nbval)++;
4386
0
    }
4387
0
      } else {
4388
0
                if ((exec->comp != NULL) && (exec->comp->states[trans->to] != NULL) &&
4389
0
        (exec->comp->states[trans->to]->type !=
4390
0
         XML_REGEXP_SINK_STATE)) {
4391
0
        if (atom->neg)
4392
0
      values[nb++] = (xmlChar *) atom->valuep2;
4393
0
        else
4394
0
      values[nb++] = (xmlChar *) atom->valuep;
4395
0
        (*nbval)++;
4396
0
    }
4397
0
      }
4398
0
  }
4399
0
  for (transno = 0;
4400
0
       (transno < state->nbTrans) && (nb < maxval);
4401
0
       transno++) {
4402
0
      trans = &state->trans[transno];
4403
0
      if (trans->to < 0)
4404
0
    continue;
4405
0
      atom = trans->atom;
4406
0
      if ((atom == NULL) || (atom->valuep == NULL))
4407
0
    continue;
4408
0
      if (trans->count == REGEXP_ALL_LAX_COUNTER) {
4409
0
          continue;
4410
0
      } else if (trans->count == REGEXP_ALL_COUNTER) {
4411
0
          continue;
4412
0
      } else if (trans->counter >= 0) {
4413
0
          continue;
4414
0
      } else {
4415
0
                if ((exec->comp->states[trans->to] != NULL) &&
4416
0
        (exec->comp->states[trans->to]->type ==
4417
0
         XML_REGEXP_SINK_STATE)) {
4418
0
        if (atom->neg)
4419
0
      values[nb++] = (xmlChar *) atom->valuep2;
4420
0
        else
4421
0
      values[nb++] = (xmlChar *) atom->valuep;
4422
0
        (*nbneg)++;
4423
0
    }
4424
0
      }
4425
0
  }
4426
0
    }
4427
0
    return(0);
4428
0
}
4429
4430
/**
4431
 * xmlRegExecNextValues:
4432
 * @exec: a regexp execution context
4433
 * @nbval: pointer to the number of accepted values IN/OUT
4434
 * @nbneg: return number of negative transitions
4435
 * @values: pointer to the array of acceptable values
4436
 * @terminal: return value if this was a terminal state
4437
 *
4438
 * Extract information from the regexp execution,
4439
 * the parameter @values must point to an array of @nbval string pointers
4440
 * on return nbval will contain the number of possible strings in that
4441
 * state and the @values array will be updated with them. The string values
4442
 * returned will be freed with the @exec context and don't need to be
4443
 * deallocated.
4444
 *
4445
 * Returns: 0 in case of success or -1 in case of error.
4446
 */
4447
int
4448
xmlRegExecNextValues(xmlRegExecCtxtPtr exec, int *nbval, int *nbneg,
4449
0
                     xmlChar **values, int *terminal) {
4450
0
    return(xmlRegExecGetValues(exec, 0, nbval, nbneg, values, terminal));
4451
0
}
4452
4453
/**
4454
 * xmlRegExecErrInfo:
4455
 * @exec: a regexp execution context generating an error
4456
 * @string: return value for the error string
4457
 * @nbval: pointer to the number of accepted values IN/OUT
4458
 * @nbneg: return number of negative transitions
4459
 * @values: pointer to the array of acceptable values
4460
 * @terminal: return value if this was a terminal state
4461
 *
4462
 * Extract error information from the regexp execution, the parameter
4463
 * @string will be updated with the value pushed and not accepted,
4464
 * the parameter @values must point to an array of @nbval string pointers
4465
 * on return nbval will contain the number of possible strings in that
4466
 * state and the @values array will be updated with them. The string values
4467
 * returned will be freed with the @exec context and don't need to be
4468
 * deallocated.
4469
 *
4470
 * Returns: 0 in case of success or -1 in case of error.
4471
 */
4472
int
4473
xmlRegExecErrInfo(xmlRegExecCtxtPtr exec, const xmlChar **string,
4474
0
                  int *nbval, int *nbneg, xmlChar **values, int *terminal) {
4475
0
    if (exec == NULL)
4476
0
        return(-1);
4477
0
    if (string != NULL) {
4478
0
        if (exec->status != 0)
4479
0
      *string = exec->errString;
4480
0
  else
4481
0
      *string = NULL;
4482
0
    }
4483
0
    return(xmlRegExecGetValues(exec, 1, nbval, nbneg, values, terminal));
4484
0
}
4485
4486
#ifdef DEBUG_ERR
4487
static void testerr(xmlRegExecCtxtPtr exec) {
4488
    const xmlChar *string;
4489
    xmlChar *values[5];
4490
    int nb = 5;
4491
    int nbneg;
4492
    int terminal;
4493
    xmlRegExecErrInfo(exec, &string, &nb, &nbneg, &values[0], &terminal);
4494
}
4495
#endif
4496
4497
#if 0
4498
static int
4499
xmlRegExecPushChar(xmlRegExecCtxtPtr exec, int UCS) {
4500
    xmlRegTransPtr trans;
4501
    xmlRegAtomPtr atom;
4502
    int ret;
4503
    int codepoint, len;
4504
4505
    if (exec == NULL)
4506
  return(-1);
4507
    if (exec->status != 0)
4508
  return(exec->status);
4509
4510
    while ((exec->status == 0) &&
4511
     ((exec->inputString[exec->index] != 0) ||
4512
      (exec->state->type != XML_REGEXP_FINAL_STATE))) {
4513
4514
  /*
4515
   * End of input on non-terminal state, rollback, however we may
4516
   * still have epsilon like transition for counted transitions
4517
   * on counters, in that case don't break too early.
4518
   */
4519
  if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL))
4520
      goto rollback;
4521
4522
  exec->transcount = 0;
4523
  for (;exec->transno < exec->state->nbTrans;exec->transno++) {
4524
      trans = &exec->state->trans[exec->transno];
4525
      if (trans->to < 0)
4526
    continue;
4527
      atom = trans->atom;
4528
      ret = 0;
4529
      if (trans->count >= 0) {
4530
    int count;
4531
    xmlRegCounterPtr counter;
4532
4533
    /*
4534
     * A counted transition.
4535
     */
4536
4537
    count = exec->counts[trans->count];
4538
    counter = &exec->comp->counters[trans->count];
4539
#ifdef DEBUG_REGEXP_EXEC
4540
    printf("testing count %d: val %d, min %d, max %d\n",
4541
           trans->count, count, counter->min,  counter->max);
4542
#endif
4543
    ret = ((count >= counter->min) && (count <= counter->max));
4544
      } else if (atom == NULL) {
4545
    fprintf(stderr, "epsilon transition left at runtime\n");
4546
    exec->status = -2;
4547
    break;
4548
      } else if (exec->inputString[exec->index] != 0) {
4549
                codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len);
4550
    ret = xmlRegCheckCharacter(atom, codepoint);
4551
    if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) {
4552
        xmlRegStatePtr to = exec->comp->states[trans->to];
4553
4554
        /*
4555
         * this is a multiple input sequence
4556
         */
4557
        if (exec->state->nbTrans > exec->transno + 1) {
4558
      xmlFARegExecSave(exec);
4559
        }
4560
        exec->transcount = 1;
4561
        do {
4562
      /*
4563
       * Try to progress as much as possible on the input
4564
       */
4565
      if (exec->transcount == atom->max) {
4566
          break;
4567
      }
4568
      exec->index += len;
4569
      /*
4570
       * End of input: stop here
4571
       */
4572
      if (exec->inputString[exec->index] == 0) {
4573
          exec->index -= len;
4574
          break;
4575
      }
4576
      if (exec->transcount >= atom->min) {
4577
          int transno = exec->transno;
4578
          xmlRegStatePtr state = exec->state;
4579
4580
          /*
4581
           * The transition is acceptable save it
4582
           */
4583
          exec->transno = -1; /* trick */
4584
          exec->state = to;
4585
          xmlFARegExecSave(exec);
4586
          exec->transno = transno;
4587
          exec->state = state;
4588
      }
4589
      codepoint = CUR_SCHAR(&(exec->inputString[exec->index]),
4590
                      len);
4591
      ret = xmlRegCheckCharacter(atom, codepoint);
4592
      exec->transcount++;
4593
        } while (ret == 1);
4594
        if (exec->transcount < atom->min)
4595
      ret = 0;
4596
4597
        /*
4598
         * If the last check failed but one transition was found
4599
         * possible, rollback
4600
         */
4601
        if (ret < 0)
4602
      ret = 0;
4603
        if (ret == 0) {
4604
      goto rollback;
4605
        }
4606
    }
4607
      }
4608
      if (ret == 1) {
4609
    if (exec->state->nbTrans > exec->transno + 1) {
4610
        xmlFARegExecSave(exec);
4611
    }
4612
    /*
4613
     * restart count for expressions like this ((abc){2})*
4614
     */
4615
    if (trans->count >= 0) {
4616
#ifdef DEBUG_REGEXP_EXEC
4617
        printf("Reset count %d\n", trans->count);
4618
#endif
4619
        exec->counts[trans->count] = 0;
4620
    }
4621
    if (trans->counter >= 0) {
4622
#ifdef DEBUG_REGEXP_EXEC
4623
        printf("Increasing count %d\n", trans->counter);
4624
#endif
4625
        exec->counts[trans->counter]++;
4626
    }
4627
#ifdef DEBUG_REGEXP_EXEC
4628
    printf("entering state %d\n", trans->to);
4629
#endif
4630
    exec->state = exec->comp->states[trans->to];
4631
    exec->transno = 0;
4632
    if (trans->atom != NULL) {
4633
        exec->index += len;
4634
    }
4635
    goto progress;
4636
      } else if (ret < 0) {
4637
    exec->status = -4;
4638
    break;
4639
      }
4640
  }
4641
  if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
4642
rollback:
4643
      /*
4644
       * Failed to find a way out
4645
       */
4646
      exec->determinist = 0;
4647
      xmlFARegExecRollBack(exec);
4648
  }
4649
progress:
4650
  continue;
4651
    }
4652
}
4653
#endif
4654
/************************************************************************
4655
 *                  *
4656
 *  Parser for the Schemas Datatype Regular Expressions   *
4657
 *  http://www.w3.org/TR/2001/REC-xmlschema-2-20010502/#regexs  *
4658
 *                  *
4659
 ************************************************************************/
4660
4661
/**
4662
 * xmlFAIsChar:
4663
 * @ctxt:  a regexp parser context
4664
 *
4665
 * [10]   Char   ::=   [^.\?*+()|#x5B#x5D]
4666
 */
4667
static int
4668
0
xmlFAIsChar(xmlRegParserCtxtPtr ctxt) {
4669
0
    int cur;
4670
0
    int len;
4671
4672
0
    cur = CUR_SCHAR(ctxt->cur, len);
4673
0
    if ((cur == '.') || (cur == '\\') || (cur == '?') ||
4674
0
  (cur == '*') || (cur == '+') || (cur == '(') ||
4675
0
  (cur == ')') || (cur == '|') || (cur == 0x5B) ||
4676
0
  (cur == 0x5D) || (cur == 0))
4677
0
  return(-1);
4678
0
    return(cur);
4679
0
}
4680
4681
/**
4682
 * xmlFAParseCharProp:
4683
 * @ctxt:  a regexp parser context
4684
 *
4685
 * [27]   charProp   ::=   IsCategory | IsBlock
4686
 * [28]   IsCategory ::= Letters | Marks | Numbers | Punctuation |
4687
 *                       Separators | Symbols | Others
4688
 * [29]   Letters   ::=   'L' [ultmo]?
4689
 * [30]   Marks   ::=   'M' [nce]?
4690
 * [31]   Numbers   ::=   'N' [dlo]?
4691
 * [32]   Punctuation   ::=   'P' [cdseifo]?
4692
 * [33]   Separators   ::=   'Z' [slp]?
4693
 * [34]   Symbols   ::=   'S' [mcko]?
4694
 * [35]   Others   ::=   'C' [cfon]?
4695
 * [36]   IsBlock   ::=   'Is' [a-zA-Z0-9#x2D]+
4696
 */
4697
static void
4698
0
xmlFAParseCharProp(xmlRegParserCtxtPtr ctxt) {
4699
0
    int cur;
4700
0
    xmlRegAtomType type = (xmlRegAtomType) 0;
4701
0
    xmlChar *blockName = NULL;
4702
4703
0
    cur = CUR;
4704
0
    if (cur == 'L') {
4705
0
  NEXT;
4706
0
  cur = CUR;
4707
0
  if (cur == 'u') {
4708
0
      NEXT;
4709
0
      type = XML_REGEXP_LETTER_UPPERCASE;
4710
0
  } else if (cur == 'l') {
4711
0
      NEXT;
4712
0
      type = XML_REGEXP_LETTER_LOWERCASE;
4713
0
  } else if (cur == 't') {
4714
0
      NEXT;
4715
0
      type = XML_REGEXP_LETTER_TITLECASE;
4716
0
  } else if (cur == 'm') {
4717
0
      NEXT;
4718
0
      type = XML_REGEXP_LETTER_MODIFIER;
4719
0
  } else if (cur == 'o') {
4720
0
      NEXT;
4721
0
      type = XML_REGEXP_LETTER_OTHERS;
4722
0
  } else {
4723
0
      type = XML_REGEXP_LETTER;
4724
0
  }
4725
0
    } else if (cur == 'M') {
4726
0
  NEXT;
4727
0
  cur = CUR;
4728
0
  if (cur == 'n') {
4729
0
      NEXT;
4730
      /* nonspacing */
4731
0
      type = XML_REGEXP_MARK_NONSPACING;
4732
0
  } else if (cur == 'c') {
4733
0
      NEXT;
4734
      /* spacing combining */
4735
0
      type = XML_REGEXP_MARK_SPACECOMBINING;
4736
0
  } else if (cur == 'e') {
4737
0
      NEXT;
4738
      /* enclosing */
4739
0
      type = XML_REGEXP_MARK_ENCLOSING;
4740
0
  } else {
4741
      /* all marks */
4742
0
      type = XML_REGEXP_MARK;
4743
0
  }
4744
0
    } else if (cur == 'N') {
4745
0
  NEXT;
4746
0
  cur = CUR;
4747
0
  if (cur == 'd') {
4748
0
      NEXT;
4749
      /* digital */
4750
0
      type = XML_REGEXP_NUMBER_DECIMAL;
4751
0
  } else if (cur == 'l') {
4752
0
      NEXT;
4753
      /* letter */
4754
0
      type = XML_REGEXP_NUMBER_LETTER;
4755
0
  } else if (cur == 'o') {
4756
0
      NEXT;
4757
      /* other */
4758
0
      type = XML_REGEXP_NUMBER_OTHERS;
4759
0
  } else {
4760
      /* all numbers */
4761
0
      type = XML_REGEXP_NUMBER;
4762
0
  }
4763
0
    } else if (cur == 'P') {
4764
0
  NEXT;
4765
0
  cur = CUR;
4766
0
  if (cur == 'c') {
4767
0
      NEXT;
4768
      /* connector */
4769
0
      type = XML_REGEXP_PUNCT_CONNECTOR;
4770
0
  } else if (cur == 'd') {
4771
0
      NEXT;
4772
      /* dash */
4773
0
      type = XML_REGEXP_PUNCT_DASH;
4774
0
  } else if (cur == 's') {
4775
0
      NEXT;
4776
      /* open */
4777
0
      type = XML_REGEXP_PUNCT_OPEN;
4778
0
  } else if (cur == 'e') {
4779
0
      NEXT;
4780
      /* close */
4781
0
      type = XML_REGEXP_PUNCT_CLOSE;
4782
0
  } else if (cur == 'i') {
4783
0
      NEXT;
4784
      /* initial quote */
4785
0
      type = XML_REGEXP_PUNCT_INITQUOTE;
4786
0
  } else if (cur == 'f') {
4787
0
      NEXT;
4788
      /* final quote */
4789
0
      type = XML_REGEXP_PUNCT_FINQUOTE;
4790
0
  } else if (cur == 'o') {
4791
0
      NEXT;
4792
      /* other */
4793
0
      type = XML_REGEXP_PUNCT_OTHERS;
4794
0
  } else {
4795
      /* all punctuation */
4796
0
      type = XML_REGEXP_PUNCT;
4797
0
  }
4798
0
    } else if (cur == 'Z') {
4799
0
  NEXT;
4800
0
  cur = CUR;
4801
0
  if (cur == 's') {
4802
0
      NEXT;
4803
      /* space */
4804
0
      type = XML_REGEXP_SEPAR_SPACE;
4805
0
  } else if (cur == 'l') {
4806
0
      NEXT;
4807
      /* line */
4808
0
      type = XML_REGEXP_SEPAR_LINE;
4809
0
  } else if (cur == 'p') {
4810
0
      NEXT;
4811
      /* paragraph */
4812
0
      type = XML_REGEXP_SEPAR_PARA;
4813
0
  } else {
4814
      /* all separators */
4815
0
      type = XML_REGEXP_SEPAR;
4816
0
  }
4817
0
    } else if (cur == 'S') {
4818
0
  NEXT;
4819
0
  cur = CUR;
4820
0
  if (cur == 'm') {
4821
0
      NEXT;
4822
0
      type = XML_REGEXP_SYMBOL_MATH;
4823
      /* math */
4824
0
  } else if (cur == 'c') {
4825
0
      NEXT;
4826
0
      type = XML_REGEXP_SYMBOL_CURRENCY;
4827
      /* currency */
4828
0
  } else if (cur == 'k') {
4829
0
      NEXT;
4830
0
      type = XML_REGEXP_SYMBOL_MODIFIER;
4831
      /* modifiers */
4832
0
  } else if (cur == 'o') {
4833
0
      NEXT;
4834
0
      type = XML_REGEXP_SYMBOL_OTHERS;
4835
      /* other */
4836
0
  } else {
4837
      /* all symbols */
4838
0
      type = XML_REGEXP_SYMBOL;
4839
0
  }
4840
0
    } else if (cur == 'C') {
4841
0
  NEXT;
4842
0
  cur = CUR;
4843
0
  if (cur == 'c') {
4844
0
      NEXT;
4845
      /* control */
4846
0
      type = XML_REGEXP_OTHER_CONTROL;
4847
0
  } else if (cur == 'f') {
4848
0
      NEXT;
4849
      /* format */
4850
0
      type = XML_REGEXP_OTHER_FORMAT;
4851
0
  } else if (cur == 'o') {
4852
0
      NEXT;
4853
      /* private use */
4854
0
      type = XML_REGEXP_OTHER_PRIVATE;
4855
0
  } else if (cur == 'n') {
4856
0
      NEXT;
4857
      /* not assigned */
4858
0
      type = XML_REGEXP_OTHER_NA;
4859
0
  } else {
4860
      /* all others */
4861
0
      type = XML_REGEXP_OTHER;
4862
0
  }
4863
0
    } else if (cur == 'I') {
4864
0
  const xmlChar *start;
4865
0
  NEXT;
4866
0
  cur = CUR;
4867
0
  if (cur != 's') {
4868
0
      ERROR("IsXXXX expected");
4869
0
      return;
4870
0
  }
4871
0
  NEXT;
4872
0
  start = ctxt->cur;
4873
0
  cur = CUR;
4874
0
  if (((cur >= 'a') && (cur <= 'z')) ||
4875
0
      ((cur >= 'A') && (cur <= 'Z')) ||
4876
0
      ((cur >= '0') && (cur <= '9')) ||
4877
0
      (cur == 0x2D)) {
4878
0
      NEXT;
4879
0
      cur = CUR;
4880
0
      while (((cur >= 'a') && (cur <= 'z')) ||
4881
0
    ((cur >= 'A') && (cur <= 'Z')) ||
4882
0
    ((cur >= '0') && (cur <= '9')) ||
4883
0
    (cur == 0x2D)) {
4884
0
    NEXT;
4885
0
    cur = CUR;
4886
0
      }
4887
0
  }
4888
0
  type = XML_REGEXP_BLOCK_NAME;
4889
0
  blockName = xmlStrndup(start, ctxt->cur - start);
4890
0
    } else {
4891
0
  ERROR("Unknown char property");
4892
0
  return;
4893
0
    }
4894
0
    if (ctxt->atom == NULL) {
4895
0
  ctxt->atom = xmlRegNewAtom(ctxt, type);
4896
0
  if (ctxt->atom != NULL)
4897
0
      ctxt->atom->valuep = blockName;
4898
0
    } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
4899
0
        xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4900
0
               type, 0, 0, blockName);
4901
0
    }
4902
0
}
4903
4904
static int parse_escaped_codeunit(xmlRegParserCtxtPtr ctxt)
4905
0
{
4906
0
    int val = 0, i, cur;
4907
0
    for (i = 0; i < 4; i++) {
4908
0
  NEXT;
4909
0
  val *= 16;
4910
0
  cur = CUR;
4911
0
  if (cur >= '0' && cur <= '9') {
4912
0
      val += cur - '0';
4913
0
  } else if (cur >= 'A' && cur <= 'F') {
4914
0
      val += cur - 'A' + 10;
4915
0
  } else if (cur >= 'a' && cur <= 'f') {
4916
0
      val += cur - 'a' + 10;
4917
0
  } else {
4918
0
      ERROR("Expecting hex digit");
4919
0
      return -1;
4920
0
  }
4921
0
    }
4922
0
    return val;
4923
0
}
4924
4925
static int parse_escaped_codepoint(xmlRegParserCtxtPtr ctxt)
4926
0
{
4927
0
    int val = parse_escaped_codeunit(ctxt);
4928
0
    if (0xD800 <= val && val <= 0xDBFF) {
4929
0
  NEXT;
4930
0
  if (CUR == '\\') {
4931
0
      NEXT;
4932
0
      if (CUR == 'u') {
4933
0
    int low = parse_escaped_codeunit(ctxt);
4934
0
    if (0xDC00 <= low && low <= 0xDFFF) {
4935
0
        return (val - 0xD800) * 0x400 + (low - 0xDC00) + 0x10000;
4936
0
    }
4937
0
      }
4938
0
  }
4939
0
  ERROR("Invalid low surrogate pair code unit");
4940
0
  val = -1;
4941
0
    }
4942
0
    return val;
4943
0
}
4944
4945
/**
4946
 * xmlFAParseCharClassEsc:
4947
 * @ctxt:  a regexp parser context
4948
 *
4949
 * [23] charClassEsc ::= ( SingleCharEsc | MultiCharEsc | catEsc | complEsc )
4950
 * [24] SingleCharEsc ::= '\' [nrt\|.?*+(){}#x2D#x5B#x5D#x5E]
4951
 * [25] catEsc   ::=   '\p{' charProp '}'
4952
 * [26] complEsc ::=   '\P{' charProp '}'
4953
 * [37] MultiCharEsc ::= '.' | ('\' [sSiIcCdDwW])
4954
 */
4955
static void
4956
0
xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) {
4957
0
    int cur;
4958
4959
0
    if (CUR == '.') {
4960
0
  if (ctxt->atom == NULL) {
4961
0
      ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_ANYCHAR);
4962
0
  } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
4963
0
      xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4964
0
             XML_REGEXP_ANYCHAR, 0, 0, NULL);
4965
0
  }
4966
0
  NEXT;
4967
0
  return;
4968
0
    }
4969
0
    if (CUR != '\\') {
4970
0
  ERROR("Escaped sequence: expecting \\");
4971
0
  return;
4972
0
    }
4973
0
    NEXT;
4974
0
    cur = CUR;
4975
0
    if (cur == 'p') {
4976
0
  NEXT;
4977
0
  if (CUR != '{') {
4978
0
      ERROR("Expecting '{'");
4979
0
      return;
4980
0
  }
4981
0
  NEXT;
4982
0
  xmlFAParseCharProp(ctxt);
4983
0
  if (CUR != '}') {
4984
0
      ERROR("Expecting '}'");
4985
0
      return;
4986
0
  }
4987
0
  NEXT;
4988
0
    } else if (cur == 'P') {
4989
0
  NEXT;
4990
0
  if (CUR != '{') {
4991
0
      ERROR("Expecting '{'");
4992
0
      return;
4993
0
  }
4994
0
  NEXT;
4995
0
  xmlFAParseCharProp(ctxt);
4996
0
        if (ctxt->atom != NULL)
4997
0
      ctxt->atom->neg = 1;
4998
0
  if (CUR != '}') {
4999
0
      ERROR("Expecting '}'");
5000
0
      return;
5001
0
  }
5002
0
  NEXT;
5003
0
    } else if ((cur == 'n') || (cur == 'r') || (cur == 't') || (cur == '\\') ||
5004
0
  (cur == '|') || (cur == '.') || (cur == '?') || (cur == '*') ||
5005
0
  (cur == '+') || (cur == '(') || (cur == ')') || (cur == '{') ||
5006
0
  (cur == '}') || (cur == 0x2D) || (cur == 0x5B) || (cur == 0x5D) ||
5007
0
  (cur == 0x5E) ||
5008
5009
  /* Non-standard escape sequences:
5010
   *                  Java 1.8|.NET Core 3.1|MSXML 6 */
5011
0
  (cur == '!') ||     /*   +  |     +       |    +   */
5012
0
  (cur == '"') ||     /*   +  |     +       |    +   */
5013
0
  (cur == '#') ||     /*   +  |     +       |    +   */
5014
0
  (cur == '$') ||     /*   +  |     +       |    +   */
5015
0
  (cur == '%') ||     /*   +  |     +       |    +   */
5016
0
  (cur == ',') ||     /*   +  |     +       |    +   */
5017
0
  (cur == '/') ||     /*   +  |     +       |    +   */
5018
0
  (cur == ':') ||     /*   +  |     +       |    +   */
5019
0
  (cur == ';') ||     /*   +  |     +       |    +   */
5020
0
  (cur == '=') ||     /*   +  |     +       |    +   */
5021
0
  (cur == '>') ||     /*      |     +       |    +   */
5022
0
  (cur == '@') ||     /*   +  |     +       |    +   */
5023
0
  (cur == '`') ||     /*   +  |     +       |    +   */
5024
0
  (cur == '~') ||     /*   +  |     +       |    +   */
5025
0
  (cur == 'u')) {     /*      |     +       |    +   */
5026
0
  if (ctxt->atom == NULL) {
5027
0
      ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
5028
0
      if (ctxt->atom != NULL) {
5029
0
          switch (cur) {
5030
0
        case 'n':
5031
0
            ctxt->atom->codepoint = '\n';
5032
0
      break;
5033
0
        case 'r':
5034
0
            ctxt->atom->codepoint = '\r';
5035
0
      break;
5036
0
        case 't':
5037
0
            ctxt->atom->codepoint = '\t';
5038
0
      break;
5039
0
        case 'u':
5040
0
      cur = parse_escaped_codepoint(ctxt);
5041
0
      if (cur < 0) {
5042
0
          return;
5043
0
      }
5044
0
      ctxt->atom->codepoint = cur;
5045
0
      break;
5046
0
        default:
5047
0
      ctxt->atom->codepoint = cur;
5048
0
    }
5049
0
      }
5050
0
  } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
5051
0
            switch (cur) {
5052
0
                case 'n':
5053
0
                    cur = '\n';
5054
0
                    break;
5055
0
                case 'r':
5056
0
                    cur = '\r';
5057
0
                    break;
5058
0
                case 't':
5059
0
                    cur = '\t';
5060
0
                    break;
5061
0
            }
5062
0
      xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
5063
0
             XML_REGEXP_CHARVAL, cur, cur, NULL);
5064
0
  }
5065
0
  NEXT;
5066
0
    } else if ((cur == 's') || (cur == 'S') || (cur == 'i') || (cur == 'I') ||
5067
0
  (cur == 'c') || (cur == 'C') || (cur == 'd') || (cur == 'D') ||
5068
0
  (cur == 'w') || (cur == 'W')) {
5069
0
  xmlRegAtomType type = XML_REGEXP_ANYSPACE;
5070
5071
0
  switch (cur) {
5072
0
      case 's':
5073
0
    type = XML_REGEXP_ANYSPACE;
5074
0
    break;
5075
0
      case 'S':
5076
0
    type = XML_REGEXP_NOTSPACE;
5077
0
    break;
5078
0
      case 'i':
5079
0
    type = XML_REGEXP_INITNAME;
5080
0
    break;
5081
0
      case 'I':
5082
0
    type = XML_REGEXP_NOTINITNAME;
5083
0
    break;
5084
0
      case 'c':
5085
0
    type = XML_REGEXP_NAMECHAR;
5086
0
    break;
5087
0
      case 'C':
5088
0
    type = XML_REGEXP_NOTNAMECHAR;
5089
0
    break;
5090
0
      case 'd':
5091
0
    type = XML_REGEXP_DECIMAL;
5092
0
    break;
5093
0
      case 'D':
5094
0
    type = XML_REGEXP_NOTDECIMAL;
5095
0
    break;
5096
0
      case 'w':
5097
0
    type = XML_REGEXP_REALCHAR;
5098
0
    break;
5099
0
      case 'W':
5100
0
    type = XML_REGEXP_NOTREALCHAR;
5101
0
    break;
5102
0
  }
5103
0
  NEXT;
5104
0
  if (ctxt->atom == NULL) {
5105
0
      ctxt->atom = xmlRegNewAtom(ctxt, type);
5106
0
  } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
5107
0
      xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
5108
0
             type, 0, 0, NULL);
5109
0
  }
5110
0
    } else {
5111
0
  ERROR("Wrong escape sequence, misuse of character '\\'");
5112
0
    }
5113
0
}
5114
5115
/**
5116
 * xmlFAParseCharRange:
5117
 * @ctxt:  a regexp parser context
5118
 *
5119
 * [17]   charRange   ::=     seRange | XmlCharRef | XmlCharIncDash
5120
 * [18]   seRange   ::=   charOrEsc '-' charOrEsc
5121
 * [20]   charOrEsc   ::=   XmlChar | SingleCharEsc
5122
 * [21]   XmlChar   ::=   [^\#x2D#x5B#x5D]
5123
 * [22]   XmlCharIncDash   ::=   [^\#x5B#x5D]
5124
 */
5125
static void
5126
0
xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) {
5127
0
    int cur, len;
5128
0
    int start = -1;
5129
0
    int end = -1;
5130
5131
0
    if (CUR == '\0') {
5132
0
        ERROR("Expecting ']'");
5133
0
  return;
5134
0
    }
5135
5136
0
    cur = CUR;
5137
0
    if (cur == '\\') {
5138
0
  NEXT;
5139
0
  cur = CUR;
5140
0
  switch (cur) {
5141
0
      case 'n': start = 0xA; break;
5142
0
      case 'r': start = 0xD; break;
5143
0
      case 't': start = 0x9; break;
5144
0
      case '\\': case '|': case '.': case '-': case '^': case '?':
5145
0
      case '*': case '+': case '{': case '}': case '(': case ')':
5146
0
      case '[': case ']':
5147
0
    start = cur; break;
5148
0
      default:
5149
0
    ERROR("Invalid escape value");
5150
0
    return;
5151
0
  }
5152
0
  end = start;
5153
0
        len = 1;
5154
0
    } else if ((cur != 0x5B) && (cur != 0x5D)) {
5155
0
        end = start = CUR_SCHAR(ctxt->cur, len);
5156
0
    } else {
5157
0
  ERROR("Expecting a char range");
5158
0
  return;
5159
0
    }
5160
    /*
5161
     * Since we are "inside" a range, we can assume ctxt->cur is past
5162
     * the start of ctxt->string, and PREV should be safe
5163
     */
5164
0
    if ((start == '-') && (NXT(1) != ']') && (PREV != '[') && (PREV != '^')) {
5165
0
  NEXTL(len);
5166
0
  return;
5167
0
    }
5168
0
    NEXTL(len);
5169
0
    cur = CUR;
5170
0
    if ((cur != '-') || (NXT(1) == '[') || (NXT(1) == ']')) {
5171
0
        xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
5172
0
                  XML_REGEXP_CHARVAL, start, end, NULL);
5173
0
  return;
5174
0
    }
5175
0
    NEXT;
5176
0
    cur = CUR;
5177
0
    if (cur == '\\') {
5178
0
  NEXT;
5179
0
  cur = CUR;
5180
0
  switch (cur) {
5181
0
      case 'n': end = 0xA; break;
5182
0
      case 'r': end = 0xD; break;
5183
0
      case 't': end = 0x9; break;
5184
0
      case '\\': case '|': case '.': case '-': case '^': case '?':
5185
0
      case '*': case '+': case '{': case '}': case '(': case ')':
5186
0
      case '[': case ']':
5187
0
    end = cur; break;
5188
0
      default:
5189
0
    ERROR("Invalid escape value");
5190
0
    return;
5191
0
  }
5192
0
        len = 1;
5193
0
    } else if ((cur != '\0') && (cur != 0x5B) && (cur != 0x5D)) {
5194
0
        end = CUR_SCHAR(ctxt->cur, len);
5195
0
    } else {
5196
0
  ERROR("Expecting the end of a char range");
5197
0
  return;
5198
0
    }
5199
5200
    /* TODO check that the values are acceptable character ranges for XML */
5201
0
    if (end < start) {
5202
0
  ERROR("End of range is before start of range");
5203
0
    } else {
5204
0
        NEXTL(len);
5205
0
        xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
5206
0
               XML_REGEXP_CHARVAL, start, end, NULL);
5207
0
    }
5208
0
    return;
5209
0
}
5210
5211
/**
5212
 * xmlFAParsePosCharGroup:
5213
 * @ctxt:  a regexp parser context
5214
 *
5215
 * [14]   posCharGroup ::= ( charRange | charClassEsc  )+
5216
 */
5217
static void
5218
0
xmlFAParsePosCharGroup(xmlRegParserCtxtPtr ctxt) {
5219
0
    do {
5220
0
  if (CUR == '\\') {
5221
0
      xmlFAParseCharClassEsc(ctxt);
5222
0
  } else {
5223
0
      xmlFAParseCharRange(ctxt);
5224
0
  }
5225
0
    } while ((CUR != ']') && (CUR != '-') &&
5226
0
             (CUR != 0) && (ctxt->error == 0));
5227
0
}
5228
5229
/**
5230
 * xmlFAParseCharGroup:
5231
 * @ctxt:  a regexp parser context
5232
 *
5233
 * [13]   charGroup    ::= posCharGroup | negCharGroup | charClassSub
5234
 * [15]   negCharGroup ::= '^' posCharGroup
5235
 * [16]   charClassSub ::= ( posCharGroup | negCharGroup ) '-' charClassExpr
5236
 * [12]   charClassExpr ::= '[' charGroup ']'
5237
 */
5238
static void
5239
0
xmlFAParseCharGroup(xmlRegParserCtxtPtr ctxt) {
5240
0
    int neg = ctxt->neg;
5241
5242
0
    if (CUR == '^') {
5243
0
  NEXT;
5244
0
  ctxt->neg = !ctxt->neg;
5245
0
  xmlFAParsePosCharGroup(ctxt);
5246
0
  ctxt->neg = neg;
5247
0
    }
5248
0
    while ((CUR != ']') && (ctxt->error == 0)) {
5249
0
  if ((CUR == '-') && (NXT(1) == '[')) {
5250
0
      NEXT; /* eat the '-' */
5251
0
      NEXT; /* eat the '[' */
5252
0
      ctxt->neg = 2;
5253
0
      xmlFAParseCharGroup(ctxt);
5254
0
      ctxt->neg = neg;
5255
0
      if (CUR == ']') {
5256
0
    NEXT;
5257
0
      } else {
5258
0
    ERROR("charClassExpr: ']' expected");
5259
0
      }
5260
0
      break;
5261
0
  } else {
5262
0
      xmlFAParsePosCharGroup(ctxt);
5263
0
  }
5264
0
    }
5265
0
}
5266
5267
/**
5268
 * xmlFAParseCharClass:
5269
 * @ctxt:  a regexp parser context
5270
 *
5271
 * [11]   charClass   ::=     charClassEsc | charClassExpr
5272
 * [12]   charClassExpr   ::=   '[' charGroup ']'
5273
 */
5274
static void
5275
0
xmlFAParseCharClass(xmlRegParserCtxtPtr ctxt) {
5276
0
    if (CUR == '[') {
5277
0
  NEXT;
5278
0
  ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_RANGES);
5279
0
  if (ctxt->atom == NULL)
5280
0
      return;
5281
0
  xmlFAParseCharGroup(ctxt);
5282
0
  if (CUR == ']') {
5283
0
      NEXT;
5284
0
  } else {
5285
0
      ERROR("xmlFAParseCharClass: ']' expected");
5286
0
  }
5287
0
    } else {
5288
0
  xmlFAParseCharClassEsc(ctxt);
5289
0
    }
5290
0
}
5291
5292
/**
5293
 * xmlFAParseQuantExact:
5294
 * @ctxt:  a regexp parser context
5295
 *
5296
 * [8]   QuantExact   ::=   [0-9]+
5297
 *
5298
 * Returns 0 if success or -1 in case of error
5299
 */
5300
static int
5301
0
xmlFAParseQuantExact(xmlRegParserCtxtPtr ctxt) {
5302
0
    int ret = 0;
5303
0
    int ok = 0;
5304
0
    int overflow = 0;
5305
5306
0
    while ((CUR >= '0') && (CUR <= '9')) {
5307
0
        if (ret > INT_MAX / 10) {
5308
0
            overflow = 1;
5309
0
        } else {
5310
0
            int digit = CUR - '0';
5311
5312
0
            ret *= 10;
5313
0
            if (ret > INT_MAX - digit)
5314
0
                overflow = 1;
5315
0
            else
5316
0
                ret += digit;
5317
0
        }
5318
0
  ok = 1;
5319
0
  NEXT;
5320
0
    }
5321
0
    if ((ok != 1) || (overflow == 1)) {
5322
0
  return(-1);
5323
0
    }
5324
0
    return(ret);
5325
0
}
5326
5327
/**
5328
 * xmlFAParseQuantifier:
5329
 * @ctxt:  a regexp parser context
5330
 *
5331
 * [4]   quantifier   ::=   [?*+] | ( '{' quantity '}' )
5332
 * [5]   quantity   ::=   quantRange | quantMin | QuantExact
5333
 * [6]   quantRange   ::=   QuantExact ',' QuantExact
5334
 * [7]   quantMin   ::=   QuantExact ','
5335
 * [8]   QuantExact   ::=   [0-9]+
5336
 */
5337
static int
5338
0
xmlFAParseQuantifier(xmlRegParserCtxtPtr ctxt) {
5339
0
    int cur;
5340
5341
0
    cur = CUR;
5342
0
    if ((cur == '?') || (cur == '*') || (cur == '+')) {
5343
0
  if (ctxt->atom != NULL) {
5344
0
      if (cur == '?')
5345
0
    ctxt->atom->quant = XML_REGEXP_QUANT_OPT;
5346
0
      else if (cur == '*')
5347
0
    ctxt->atom->quant = XML_REGEXP_QUANT_MULT;
5348
0
      else if (cur == '+')
5349
0
    ctxt->atom->quant = XML_REGEXP_QUANT_PLUS;
5350
0
  }
5351
0
  NEXT;
5352
0
  return(1);
5353
0
    }
5354
0
    if (cur == '{') {
5355
0
  int min = 0, max = 0;
5356
5357
0
  NEXT;
5358
0
  cur = xmlFAParseQuantExact(ctxt);
5359
0
  if (cur >= 0)
5360
0
      min = cur;
5361
0
        else {
5362
0
            ERROR("Improper quantifier");
5363
0
        }
5364
0
  if (CUR == ',') {
5365
0
      NEXT;
5366
0
      if (CUR == '}')
5367
0
          max = INT_MAX;
5368
0
      else {
5369
0
          cur = xmlFAParseQuantExact(ctxt);
5370
0
          if (cur >= 0)
5371
0
        max = cur;
5372
0
    else {
5373
0
        ERROR("Improper quantifier");
5374
0
    }
5375
0
      }
5376
0
  }
5377
0
  if (CUR == '}') {
5378
0
      NEXT;
5379
0
  } else {
5380
0
      ERROR("Unterminated quantifier");
5381
0
  }
5382
0
  if (max == 0)
5383
0
      max = min;
5384
0
  if (ctxt->atom != NULL) {
5385
0
      ctxt->atom->quant = XML_REGEXP_QUANT_RANGE;
5386
0
      ctxt->atom->min = min;
5387
0
      ctxt->atom->max = max;
5388
0
  }
5389
0
  return(1);
5390
0
    }
5391
0
    return(0);
5392
0
}
5393
5394
/**
5395
 * xmlFAParseAtom:
5396
 * @ctxt:  a regexp parser context
5397
 *
5398
 * [9]   atom   ::=   Char | charClass | ( '(' regExp ')' )
5399
 */
5400
static int
5401
0
xmlFAParseAtom(xmlRegParserCtxtPtr ctxt) {
5402
0
    int codepoint, len;
5403
5404
0
    codepoint = xmlFAIsChar(ctxt);
5405
0
    if (codepoint > 0) {
5406
0
  ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
5407
0
  if (ctxt->atom == NULL)
5408
0
      return(-1);
5409
0
  codepoint = CUR_SCHAR(ctxt->cur, len);
5410
0
  ctxt->atom->codepoint = codepoint;
5411
0
  NEXTL(len);
5412
0
  return(1);
5413
0
    } else if (CUR == '|') {
5414
0
  return(0);
5415
0
    } else if (CUR == 0) {
5416
0
  return(0);
5417
0
    } else if (CUR == ')') {
5418
0
  return(0);
5419
0
    } else if (CUR == '(') {
5420
0
  xmlRegStatePtr start, oldend, start0;
5421
5422
0
  NEXT;
5423
0
        if (ctxt->depth >= 50) {
5424
0
      ERROR("xmlFAParseAtom: maximum nesting depth exceeded");
5425
0
            return(-1);
5426
0
        }
5427
  /*
5428
   * this extra Epsilon transition is needed if we count with 0 allowed
5429
   * unfortunately this can't be known at that point
5430
   */
5431
0
  xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);
5432
0
  start0 = ctxt->state;
5433
0
  xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);
5434
0
  start = ctxt->state;
5435
0
  oldend = ctxt->end;
5436
0
  ctxt->end = NULL;
5437
0
  ctxt->atom = NULL;
5438
0
        ctxt->depth++;
5439
0
  xmlFAParseRegExp(ctxt, 0);
5440
0
        ctxt->depth--;
5441
0
  if (CUR == ')') {
5442
0
      NEXT;
5443
0
  } else {
5444
0
      ERROR("xmlFAParseAtom: expecting ')'");
5445
0
  }
5446
0
  ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_SUBREG);
5447
0
  if (ctxt->atom == NULL)
5448
0
      return(-1);
5449
0
  ctxt->atom->start = start;
5450
0
  ctxt->atom->start0 = start0;
5451
0
  ctxt->atom->stop = ctxt->state;
5452
0
  ctxt->end = oldend;
5453
0
  return(1);
5454
0
    } else if ((CUR == '[') || (CUR == '\\') || (CUR == '.')) {
5455
0
  xmlFAParseCharClass(ctxt);
5456
0
  return(1);
5457
0
    }
5458
0
    return(0);
5459
0
}
5460
5461
/**
5462
 * xmlFAParsePiece:
5463
 * @ctxt:  a regexp parser context
5464
 *
5465
 * [3]   piece   ::=   atom quantifier?
5466
 */
5467
static int
5468
0
xmlFAParsePiece(xmlRegParserCtxtPtr ctxt) {
5469
0
    int ret;
5470
5471
0
    ctxt->atom = NULL;
5472
0
    ret = xmlFAParseAtom(ctxt);
5473
0
    if (ret == 0)
5474
0
  return(0);
5475
0
    if (ctxt->atom == NULL) {
5476
0
  ERROR("internal: no atom generated");
5477
0
    }
5478
0
    xmlFAParseQuantifier(ctxt);
5479
0
    return(1);
5480
0
}
5481
5482
/**
5483
 * xmlFAParseBranch:
5484
 * @ctxt:  a regexp parser context
5485
 * @to: optional target to the end of the branch
5486
 *
5487
 * @to is used to optimize by removing duplicate path in automata
5488
 * in expressions like (a|b)(c|d)
5489
 *
5490
 * [2]   branch   ::=   piece*
5491
 */
5492
static int
5493
0
xmlFAParseBranch(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr to) {
5494
0
    xmlRegStatePtr previous;
5495
0
    int ret;
5496
5497
0
    previous = ctxt->state;
5498
0
    ret = xmlFAParsePiece(ctxt);
5499
0
    if (ret == 0) {
5500
        /* Empty branch */
5501
0
  xmlFAGenerateEpsilonTransition(ctxt, previous, to);
5502
0
    } else {
5503
0
  if (xmlFAGenerateTransitions(ctxt, previous,
5504
0
          (CUR=='|' || CUR==')' || CUR==0) ? to : NULL, ctxt->atom) < 0)
5505
0
      return(-1);
5506
0
  previous = ctxt->state;
5507
0
  ctxt->atom = NULL;
5508
0
    }
5509
0
    while ((ret != 0) && (ctxt->error == 0)) {
5510
0
  ret = xmlFAParsePiece(ctxt);
5511
0
  if (ret != 0) {
5512
0
      if (xmlFAGenerateTransitions(ctxt, previous,
5513
0
              (CUR=='|' || CUR==')' || CUR==0) ? to : NULL,
5514
0
                    ctxt->atom) < 0)
5515
0
        return(-1);
5516
0
      previous = ctxt->state;
5517
0
      ctxt->atom = NULL;
5518
0
  }
5519
0
    }
5520
0
    return(0);
5521
0
}
5522
5523
/**
5524
 * xmlFAParseRegExp:
5525
 * @ctxt:  a regexp parser context
5526
 * @top:  is this the top-level expression ?
5527
 *
5528
 * [1]   regExp   ::=     branch  ( '|' branch )*
5529
 */
5530
static void
5531
0
xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top) {
5532
0
    xmlRegStatePtr start, end;
5533
5534
    /* if not top start should have been generated by an epsilon trans */
5535
0
    start = ctxt->state;
5536
0
    ctxt->end = NULL;
5537
0
    xmlFAParseBranch(ctxt, NULL);
5538
0
    if (top) {
5539
#ifdef DEBUG_REGEXP_GRAPH
5540
  printf("State %d is final\n", ctxt->state->no);
5541
#endif
5542
0
  ctxt->state->type = XML_REGEXP_FINAL_STATE;
5543
0
    }
5544
0
    if (CUR != '|') {
5545
0
  ctxt->end = ctxt->state;
5546
0
  return;
5547
0
    }
5548
0
    end = ctxt->state;
5549
0
    while ((CUR == '|') && (ctxt->error == 0)) {
5550
0
  NEXT;
5551
0
  ctxt->state = start;
5552
0
  ctxt->end = NULL;
5553
0
  xmlFAParseBranch(ctxt, end);
5554
0
    }
5555
0
    if (!top) {
5556
0
  ctxt->state = end;
5557
0
  ctxt->end = end;
5558
0
    }
5559
0
}
5560
5561
/************************************************************************
5562
 *                  *
5563
 *      The basic API         *
5564
 *                  *
5565
 ************************************************************************/
5566
5567
/**
5568
 * xmlRegexpPrint:
5569
 * @output: the file for the output debug
5570
 * @regexp: the compiled regexp
5571
 *
5572
 * Print the content of the compiled regular expression
5573
 */
5574
void
5575
0
xmlRegexpPrint(FILE *output, xmlRegexpPtr regexp) {
5576
0
    int i;
5577
5578
0
    if (output == NULL)
5579
0
        return;
5580
0
    fprintf(output, " regexp: ");
5581
0
    if (regexp == NULL) {
5582
0
  fprintf(output, "NULL\n");
5583
0
  return;
5584
0
    }
5585
0
    fprintf(output, "'%s' ", regexp->string);
5586
0
    fprintf(output, "\n");
5587
0
    fprintf(output, "%d atoms:\n", regexp->nbAtoms);
5588
0
    for (i = 0;i < regexp->nbAtoms; i++) {
5589
0
  fprintf(output, " %02d ", i);
5590
0
  xmlRegPrintAtom(output, regexp->atoms[i]);
5591
0
    }
5592
0
    fprintf(output, "%d states:", regexp->nbStates);
5593
0
    fprintf(output, "\n");
5594
0
    for (i = 0;i < regexp->nbStates; i++) {
5595
0
  xmlRegPrintState(output, regexp->states[i]);
5596
0
    }
5597
0
    fprintf(output, "%d counters:\n", regexp->nbCounters);
5598
0
    for (i = 0;i < regexp->nbCounters; i++) {
5599
0
  fprintf(output, " %d: min %d max %d\n", i, regexp->counters[i].min,
5600
0
                                    regexp->counters[i].max);
5601
0
    }
5602
0
}
5603
5604
/**
5605
 * xmlRegexpCompile:
5606
 * @regexp:  a regular expression string
5607
 *
5608
 * Parses a regular expression conforming to XML Schemas Part 2 Datatype
5609
 * Appendix F and builds an automata suitable for testing strings against
5610
 * that regular expression
5611
 *
5612
 * Returns the compiled expression or NULL in case of error
5613
 */
5614
xmlRegexpPtr
5615
0
xmlRegexpCompile(const xmlChar *regexp) {
5616
0
    xmlRegexpPtr ret;
5617
0
    xmlRegParserCtxtPtr ctxt;
5618
5619
0
    ctxt = xmlRegNewParserCtxt(regexp);
5620
0
    if (ctxt == NULL)
5621
0
  return(NULL);
5622
5623
    /* initialize the parser */
5624
0
    ctxt->end = NULL;
5625
0
    ctxt->start = ctxt->state = xmlRegNewState(ctxt);
5626
0
    xmlRegStatePush(ctxt, ctxt->start);
5627
5628
    /* parse the expression building an automata */
5629
0
    xmlFAParseRegExp(ctxt, 1);
5630
0
    if (CUR != 0) {
5631
0
  ERROR("xmlFAParseRegExp: extra characters");
5632
0
    }
5633
0
    if (ctxt->error != 0) {
5634
0
  xmlRegFreeParserCtxt(ctxt);
5635
0
  return(NULL);
5636
0
    }
5637
0
    ctxt->end = ctxt->state;
5638
0
    ctxt->start->type = XML_REGEXP_START_STATE;
5639
0
    ctxt->end->type = XML_REGEXP_FINAL_STATE;
5640
5641
    /* remove the Epsilon except for counted transitions */
5642
0
    xmlFAEliminateEpsilonTransitions(ctxt);
5643
5644
5645
0
    if (ctxt->error != 0) {
5646
0
  xmlRegFreeParserCtxt(ctxt);
5647
0
  return(NULL);
5648
0
    }
5649
0
    ret = xmlRegEpxFromParse(ctxt);
5650
0
    xmlRegFreeParserCtxt(ctxt);
5651
0
    return(ret);
5652
0
}
5653
5654
/**
5655
 * xmlRegexpExec:
5656
 * @comp:  the compiled regular expression
5657
 * @content:  the value to check against the regular expression
5658
 *
5659
 * Check if the regular expression generates the value
5660
 *
5661
 * Returns 1 if it matches, 0 if not and a negative value in case of error
5662
 */
5663
int
5664
0
xmlRegexpExec(xmlRegexpPtr comp, const xmlChar *content) {
5665
0
    if ((comp == NULL) || (content == NULL))
5666
0
  return(-1);
5667
0
    return(xmlFARegExec(comp, content));
5668
0
}
5669
5670
/**
5671
 * xmlRegexpIsDeterminist:
5672
 * @comp:  the compiled regular expression
5673
 *
5674
 * Check if the regular expression is determinist
5675
 *
5676
 * Returns 1 if it yes, 0 if not and a negative value in case of error
5677
 */
5678
int
5679
238k
xmlRegexpIsDeterminist(xmlRegexpPtr comp) {
5680
238k
    xmlAutomataPtr am;
5681
238k
    int ret;
5682
5683
238k
    if (comp == NULL)
5684
0
  return(-1);
5685
238k
    if (comp->determinist != -1)
5686
166k
  return(comp->determinist);
5687
5688
71.2k
    am = xmlNewAutomata();
5689
71.2k
    if (am == NULL)
5690
0
        return(-1);
5691
71.2k
    if (am->states != NULL) {
5692
71.2k
  int i;
5693
5694
142k
  for (i = 0;i < am->nbStates;i++)
5695
71.2k
      xmlRegFreeState(am->states[i]);
5696
71.2k
  xmlFree(am->states);
5697
71.2k
    }
5698
71.2k
    am->nbAtoms = comp->nbAtoms;
5699
71.2k
    am->atoms = comp->atoms;
5700
71.2k
    am->nbStates = comp->nbStates;
5701
71.2k
    am->states = comp->states;
5702
71.2k
    am->determinist = -1;
5703
71.2k
    am->flags = comp->flags;
5704
71.2k
    ret = xmlFAComputesDeterminism(am);
5705
71.2k
    am->atoms = NULL;
5706
71.2k
    am->states = NULL;
5707
71.2k
    xmlFreeAutomata(am);
5708
71.2k
    comp->determinist = ret;
5709
71.2k
    return(ret);
5710
71.2k
}
5711
5712
/**
5713
 * xmlRegFreeRegexp:
5714
 * @regexp:  the regexp
5715
 *
5716
 * Free a regexp
5717
 */
5718
void
5719
71.2k
xmlRegFreeRegexp(xmlRegexpPtr regexp) {
5720
71.2k
    int i;
5721
71.2k
    if (regexp == NULL)
5722
0
  return;
5723
5724
71.2k
    if (regexp->string != NULL)
5725
0
  xmlFree(regexp->string);
5726
71.2k
    if (regexp->states != NULL) {
5727
481k
  for (i = 0;i < regexp->nbStates;i++)
5728
475k
      xmlRegFreeState(regexp->states[i]);
5729
5.40k
  xmlFree(regexp->states);
5730
5.40k
    }
5731
71.2k
    if (regexp->atoms != NULL) {
5732
454k
  for (i = 0;i < regexp->nbAtoms;i++)
5733
449k
      xmlRegFreeAtom(regexp->atoms[i]);
5734
5.40k
  xmlFree(regexp->atoms);
5735
5.40k
    }
5736
71.2k
    if (regexp->counters != NULL)
5737
0
  xmlFree(regexp->counters);
5738
71.2k
    if (regexp->compact != NULL)
5739
65.8k
  xmlFree(regexp->compact);
5740
71.2k
    if (regexp->transdata != NULL)
5741
0
  xmlFree(regexp->transdata);
5742
71.2k
    if (regexp->stringMap != NULL) {
5743
370k
  for (i = 0; i < regexp->nbstrings;i++)
5744
304k
      xmlFree(regexp->stringMap[i]);
5745
65.8k
  xmlFree(regexp->stringMap);
5746
65.8k
    }
5747
5748
71.2k
    xmlFree(regexp);
5749
71.2k
}
5750
5751
#ifdef LIBXML_AUTOMATA_ENABLED
5752
/************************************************************************
5753
 *                  *
5754
 *      The Automata interface        *
5755
 *                  *
5756
 ************************************************************************/
5757
5758
/**
5759
 * xmlNewAutomata:
5760
 *
5761
 * Create a new automata
5762
 *
5763
 * Returns the new object or NULL in case of failure
5764
 */
5765
xmlAutomataPtr
5766
142k
xmlNewAutomata(void) {
5767
142k
    xmlAutomataPtr ctxt;
5768
5769
142k
    ctxt = xmlRegNewParserCtxt(NULL);
5770
142k
    if (ctxt == NULL)
5771
0
  return(NULL);
5772
5773
    /* initialize the parser */
5774
142k
    ctxt->end = NULL;
5775
142k
    ctxt->start = ctxt->state = xmlRegNewState(ctxt);
5776
142k
    if (ctxt->start == NULL) {
5777
0
  xmlFreeAutomata(ctxt);
5778
0
  return(NULL);
5779
0
    }
5780
142k
    ctxt->start->type = XML_REGEXP_START_STATE;
5781
142k
    if (xmlRegStatePush(ctxt, ctxt->start) < 0) {
5782
0
        xmlRegFreeState(ctxt->start);
5783
0
  xmlFreeAutomata(ctxt);
5784
0
  return(NULL);
5785
0
    }
5786
142k
    ctxt->flags = 0;
5787
5788
142k
    return(ctxt);
5789
142k
}
5790
5791
/**
5792
 * xmlFreeAutomata:
5793
 * @am: an automata
5794
 *
5795
 * Free an automata
5796
 */
5797
void
5798
142k
xmlFreeAutomata(xmlAutomataPtr am) {
5799
142k
    if (am == NULL)
5800
0
  return;
5801
142k
    xmlRegFreeParserCtxt(am);
5802
142k
}
5803
5804
/**
5805
 * xmlAutomataSetFlags:
5806
 * @am: an automata
5807
 * @flags:  a set of internal flags
5808
 *
5809
 * Set some flags on the automata
5810
 */
5811
void
5812
0
xmlAutomataSetFlags(xmlAutomataPtr am, int flags) {
5813
0
    if (am == NULL)
5814
0
  return;
5815
0
    am->flags |= flags;
5816
0
}
5817
5818
/**
5819
 * xmlAutomataGetInitState:
5820
 * @am: an automata
5821
 *
5822
 * Initial state lookup
5823
 *
5824
 * Returns the initial state of the automata
5825
 */
5826
xmlAutomataStatePtr
5827
71.2k
xmlAutomataGetInitState(xmlAutomataPtr am) {
5828
71.2k
    if (am == NULL)
5829
0
  return(NULL);
5830
71.2k
    return(am->start);
5831
71.2k
}
5832
5833
/**
5834
 * xmlAutomataSetFinalState:
5835
 * @am: an automata
5836
 * @state: a state in this automata
5837
 *
5838
 * Makes that state a final state
5839
 *
5840
 * Returns 0 or -1 in case of error
5841
 */
5842
int
5843
71.2k
xmlAutomataSetFinalState(xmlAutomataPtr am, xmlAutomataStatePtr state) {
5844
71.2k
    if ((am == NULL) || (state == NULL))
5845
0
  return(-1);
5846
71.2k
    state->type = XML_REGEXP_FINAL_STATE;
5847
71.2k
    return(0);
5848
71.2k
}
5849
5850
/**
5851
 * xmlAutomataNewTransition:
5852
 * @am: an automata
5853
 * @from: the starting point of the transition
5854
 * @to: the target point of the transition or NULL
5855
 * @token: the input string associated to that transition
5856
 * @data: data passed to the callback function if the transition is activated
5857
 *
5858
 * If @to is NULL, this creates first a new target state in the automata
5859
 * and then adds a transition from the @from state to the target state
5860
 * activated by the value of @token
5861
 *
5862
 * Returns the target state or NULL in case of error
5863
 */
5864
xmlAutomataStatePtr
5865
xmlAutomataNewTransition(xmlAutomataPtr am, xmlAutomataStatePtr from,
5866
       xmlAutomataStatePtr to, const xmlChar *token,
5867
890k
       void *data) {
5868
890k
    xmlRegAtomPtr atom;
5869
5870
890k
    if ((am == NULL) || (from == NULL) || (token == NULL))
5871
0
  return(NULL);
5872
890k
    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5873
890k
    if (atom == NULL)
5874
0
        return(NULL);
5875
890k
    atom->data = data;
5876
890k
    atom->valuep = xmlStrdup(token);
5877
5878
890k
    if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
5879
0
        xmlRegFreeAtom(atom);
5880
0
  return(NULL);
5881
0
    }
5882
890k
    if (to == NULL)
5883
847k
  return(am->state);
5884
42.5k
    return(to);
5885
890k
}
5886
5887
/**
5888
 * xmlAutomataNewTransition2:
5889
 * @am: an automata
5890
 * @from: the starting point of the transition
5891
 * @to: the target point of the transition or NULL
5892
 * @token: the first input string associated to that transition
5893
 * @token2: the second input string associated to that transition
5894
 * @data: data passed to the callback function if the transition is activated
5895
 *
5896
 * If @to is NULL, this creates first a new target state in the automata
5897
 * and then adds a transition from the @from state to the target state
5898
 * activated by the value of @token
5899
 *
5900
 * Returns the target state or NULL in case of error
5901
 */
5902
xmlAutomataStatePtr
5903
xmlAutomataNewTransition2(xmlAutomataPtr am, xmlAutomataStatePtr from,
5904
        xmlAutomataStatePtr to, const xmlChar *token,
5905
0
        const xmlChar *token2, void *data) {
5906
0
    xmlRegAtomPtr atom;
5907
5908
0
    if ((am == NULL) || (from == NULL) || (token == NULL))
5909
0
  return(NULL);
5910
0
    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5911
0
    if (atom == NULL)
5912
0
  return(NULL);
5913
0
    atom->data = data;
5914
0
    if ((token2 == NULL) || (*token2 == 0)) {
5915
0
  atom->valuep = xmlStrdup(token);
5916
0
    } else {
5917
0
  int lenn, lenp;
5918
0
  xmlChar *str;
5919
5920
0
  lenn = strlen((char *) token2);
5921
0
  lenp = strlen((char *) token);
5922
5923
0
  str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
5924
0
  if (str == NULL) {
5925
0
      xmlRegFreeAtom(atom);
5926
0
      return(NULL);
5927
0
  }
5928
0
  memcpy(&str[0], token, lenp);
5929
0
  str[lenp] = '|';
5930
0
  memcpy(&str[lenp + 1], token2, lenn);
5931
0
  str[lenn + lenp + 1] = 0;
5932
5933
0
  atom->valuep = str;
5934
0
    }
5935
5936
0
    if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
5937
0
        xmlRegFreeAtom(atom);
5938
0
  return(NULL);
5939
0
    }
5940
0
    if (to == NULL)
5941
0
  return(am->state);
5942
0
    return(to);
5943
0
}
5944
5945
/**
5946
 * xmlAutomataNewNegTrans:
5947
 * @am: an automata
5948
 * @from: the starting point of the transition
5949
 * @to: the target point of the transition or NULL
5950
 * @token: the first input string associated to that transition
5951
 * @token2: the second input string associated to that transition
5952
 * @data: data passed to the callback function if the transition is activated
5953
 *
5954
 * If @to is NULL, this creates first a new target state in the automata
5955
 * and then adds a transition from the @from state to the target state
5956
 * activated by any value except (@token,@token2)
5957
 * Note that if @token2 is not NULL, then (X, NULL) won't match to follow
5958
 # the semantic of XSD ##other
5959
 *
5960
 * Returns the target state or NULL in case of error
5961
 */
5962
xmlAutomataStatePtr
5963
xmlAutomataNewNegTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
5964
           xmlAutomataStatePtr to, const xmlChar *token,
5965
0
           const xmlChar *token2, void *data) {
5966
0
    xmlRegAtomPtr atom;
5967
0
    xmlChar err_msg[200];
5968
5969
0
    if ((am == NULL) || (from == NULL) || (token == NULL))
5970
0
  return(NULL);
5971
0
    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5972
0
    if (atom == NULL)
5973
0
  return(NULL);
5974
0
    atom->data = data;
5975
0
    atom->neg = 1;
5976
0
    if ((token2 == NULL) || (*token2 == 0)) {
5977
0
  atom->valuep = xmlStrdup(token);
5978
0
    } else {
5979
0
  int lenn, lenp;
5980
0
  xmlChar *str;
5981
5982
0
  lenn = strlen((char *) token2);
5983
0
  lenp = strlen((char *) token);
5984
5985
0
  str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
5986
0
  if (str == NULL) {
5987
0
      xmlRegFreeAtom(atom);
5988
0
      return(NULL);
5989
0
  }
5990
0
  memcpy(&str[0], token, lenp);
5991
0
  str[lenp] = '|';
5992
0
  memcpy(&str[lenp + 1], token2, lenn);
5993
0
  str[lenn + lenp + 1] = 0;
5994
5995
0
  atom->valuep = str;
5996
0
    }
5997
0
    snprintf((char *) err_msg, 199, "not %s", (const char *) atom->valuep);
5998
0
    err_msg[199] = 0;
5999
0
    atom->valuep2 = xmlStrdup(err_msg);
6000
6001
0
    if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
6002
0
        xmlRegFreeAtom(atom);
6003
0
  return(NULL);
6004
0
    }
6005
0
    am->negs++;
6006
0
    if (to == NULL)
6007
0
  return(am->state);
6008
0
    return(to);
6009
0
}
6010
6011
/**
6012
 * xmlAutomataNewCountTrans2:
6013
 * @am: an automata
6014
 * @from: the starting point of the transition
6015
 * @to: the target point of the transition or NULL
6016
 * @token: the input string associated to that transition
6017
 * @token2: the second input string associated to that transition
6018
 * @min:  the minimum successive occurrences of token
6019
 * @max:  the maximum successive occurrences of token
6020
 * @data:  data associated to the transition
6021
 *
6022
 * If @to is NULL, this creates first a new target state in the automata
6023
 * and then adds a transition from the @from state to the target state
6024
 * activated by a succession of input of value @token and @token2 and
6025
 * whose number is between @min and @max
6026
 *
6027
 * Returns the target state or NULL in case of error
6028
 */
6029
xmlAutomataStatePtr
6030
xmlAutomataNewCountTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from,
6031
       xmlAutomataStatePtr to, const xmlChar *token,
6032
       const xmlChar *token2,
6033
0
       int min, int max, void *data) {
6034
0
    xmlRegAtomPtr atom;
6035
0
    int counter;
6036
6037
0
    if ((am == NULL) || (from == NULL) || (token == NULL))
6038
0
  return(NULL);
6039
0
    if (min < 0)
6040
0
  return(NULL);
6041
0
    if ((max < min) || (max < 1))
6042
0
  return(NULL);
6043
0
    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
6044
0
    if (atom == NULL)
6045
0
  return(NULL);
6046
0
    if ((token2 == NULL) || (*token2 == 0)) {
6047
0
  atom->valuep = xmlStrdup(token);
6048
0
    } else {
6049
0
  int lenn, lenp;
6050
0
  xmlChar *str;
6051
6052
0
  lenn = strlen((char *) token2);
6053
0
  lenp = strlen((char *) token);
6054
6055
0
  str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
6056
0
  if (str == NULL) {
6057
0
      xmlRegFreeAtom(atom);
6058
0
      return(NULL);
6059
0
  }
6060
0
  memcpy(&str[0], token, lenp);
6061
0
  str[lenp] = '|';
6062
0
  memcpy(&str[lenp + 1], token2, lenn);
6063
0
  str[lenn + lenp + 1] = 0;
6064
6065
0
  atom->valuep = str;
6066
0
    }
6067
0
    atom->data = data;
6068
0
    if (min == 0)
6069
0
  atom->min = 1;
6070
0
    else
6071
0
  atom->min = min;
6072
0
    atom->max = max;
6073
6074
    /*
6075
     * associate a counter to the transition.
6076
     */
6077
0
    counter = xmlRegGetCounter(am);
6078
0
    am->counters[counter].min = min;
6079
0
    am->counters[counter].max = max;
6080
6081
    /* xmlFAGenerateTransitions(am, from, to, atom); */
6082
0
    if (to == NULL) {
6083
0
        to = xmlRegNewState(am);
6084
0
  xmlRegStatePush(am, to);
6085
0
    }
6086
0
    xmlRegStateAddTrans(am, from, atom, to, counter, -1);
6087
0
    xmlRegAtomPush(am, atom);
6088
0
    am->state = to;
6089
6090
0
    if (to == NULL)
6091
0
  to = am->state;
6092
0
    if (to == NULL)
6093
0
  return(NULL);
6094
0
    if (min == 0)
6095
0
  xmlFAGenerateEpsilonTransition(am, from, to);
6096
0
    return(to);
6097
0
}
6098
6099
/**
6100
 * xmlAutomataNewCountTrans:
6101
 * @am: an automata
6102
 * @from: the starting point of the transition
6103
 * @to: the target point of the transition or NULL
6104
 * @token: the input string associated to that transition
6105
 * @min:  the minimum successive occurrences of token
6106
 * @max:  the maximum successive occurrences of token
6107
 * @data:  data associated to the transition
6108
 *
6109
 * If @to is NULL, this creates first a new target state in the automata
6110
 * and then adds a transition from the @from state to the target state
6111
 * activated by a succession of input of value @token and whose number
6112
 * is between @min and @max
6113
 *
6114
 * Returns the target state or NULL in case of error
6115
 */
6116
xmlAutomataStatePtr
6117
xmlAutomataNewCountTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
6118
       xmlAutomataStatePtr to, const xmlChar *token,
6119
0
       int min, int max, void *data) {
6120
0
    xmlRegAtomPtr atom;
6121
0
    int counter;
6122
6123
0
    if ((am == NULL) || (from == NULL) || (token == NULL))
6124
0
  return(NULL);
6125
0
    if (min < 0)
6126
0
  return(NULL);
6127
0
    if ((max < min) || (max < 1))
6128
0
  return(NULL);
6129
0
    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
6130
0
    if (atom == NULL)
6131
0
  return(NULL);
6132
0
    atom->valuep = xmlStrdup(token);
6133
0
    atom->data = data;
6134
0
    if (min == 0)
6135
0
  atom->min = 1;
6136
0
    else
6137
0
  atom->min = min;
6138
0
    atom->max = max;
6139
6140
    /*
6141
     * associate a counter to the transition.
6142
     */
6143
0
    counter = xmlRegGetCounter(am);
6144
0
    am->counters[counter].min = min;
6145
0
    am->counters[counter].max = max;
6146
6147
    /* xmlFAGenerateTransitions(am, from, to, atom); */
6148
0
    if (to == NULL) {
6149
0
        to = xmlRegNewState(am);
6150
0
  xmlRegStatePush(am, to);
6151
0
    }
6152
0
    xmlRegStateAddTrans(am, from, atom, to, counter, -1);
6153
0
    xmlRegAtomPush(am, atom);
6154
0
    am->state = to;
6155
6156
0
    if (to == NULL)
6157
0
  to = am->state;
6158
0
    if (to == NULL)
6159
0
  return(NULL);
6160
0
    if (min == 0)
6161
0
  xmlFAGenerateEpsilonTransition(am, from, to);
6162
0
    return(to);
6163
0
}
6164
6165
/**
6166
 * xmlAutomataNewOnceTrans2:
6167
 * @am: an automata
6168
 * @from: the starting point of the transition
6169
 * @to: the target point of the transition or NULL
6170
 * @token: the input string associated to that transition
6171
 * @token2: the second input string associated to that transition
6172
 * @min:  the minimum successive occurrences of token
6173
 * @max:  the maximum successive occurrences of token
6174
 * @data:  data associated to the transition
6175
 *
6176
 * If @to is NULL, this creates first a new target state in the automata
6177
 * and then adds a transition from the @from state to the target state
6178
 * activated by a succession of input of value @token and @token2 and whose
6179
 * number is between @min and @max, moreover that transition can only be
6180
 * crossed once.
6181
 *
6182
 * Returns the target state or NULL in case of error
6183
 */
6184
xmlAutomataStatePtr
6185
xmlAutomataNewOnceTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from,
6186
       xmlAutomataStatePtr to, const xmlChar *token,
6187
       const xmlChar *token2,
6188
0
       int min, int max, void *data) {
6189
0
    xmlRegAtomPtr atom;
6190
0
    int counter;
6191
6192
0
    if ((am == NULL) || (from == NULL) || (token == NULL))
6193
0
  return(NULL);
6194
0
    if (min < 1)
6195
0
  return(NULL);
6196
0
    if (max < min)
6197
0
  return(NULL);
6198
0
    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
6199
0
    if (atom == NULL)
6200
0
  return(NULL);
6201
0
    if ((token2 == NULL) || (*token2 == 0)) {
6202
0
  atom->valuep = xmlStrdup(token);
6203
0
    } else {
6204
0
  int lenn, lenp;
6205
0
  xmlChar *str;
6206
6207
0
  lenn = strlen((char *) token2);
6208
0
  lenp = strlen((char *) token);
6209
6210
0
  str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
6211
0
  if (str == NULL) {
6212
0
      xmlRegFreeAtom(atom);
6213
0
      return(NULL);
6214
0
  }
6215
0
  memcpy(&str[0], token, lenp);
6216
0
  str[lenp] = '|';
6217
0
  memcpy(&str[lenp + 1], token2, lenn);
6218
0
  str[lenn + lenp + 1] = 0;
6219
6220
0
  atom->valuep = str;
6221
0
    }
6222
0
    atom->data = data;
6223
0
    atom->quant = XML_REGEXP_QUANT_ONCEONLY;
6224
0
    atom->min = min;
6225
0
    atom->max = max;
6226
    /*
6227
     * associate a counter to the transition.
6228
     */
6229
0
    counter = xmlRegGetCounter(am);
6230
0
    am->counters[counter].min = 1;
6231
0
    am->counters[counter].max = 1;
6232
6233
    /* xmlFAGenerateTransitions(am, from, to, atom); */
6234
0
    if (to == NULL) {
6235
0
  to = xmlRegNewState(am);
6236
0
  xmlRegStatePush(am, to);
6237
0
    }
6238
0
    xmlRegStateAddTrans(am, from, atom, to, counter, -1);
6239
0
    xmlRegAtomPush(am, atom);
6240
0
    am->state = to;
6241
0
    return(to);
6242
0
}
6243
6244
6245
6246
/**
6247
 * xmlAutomataNewOnceTrans:
6248
 * @am: an automata
6249
 * @from: the starting point of the transition
6250
 * @to: the target point of the transition or NULL
6251
 * @token: the input string associated to that transition
6252
 * @min:  the minimum successive occurrences of token
6253
 * @max:  the maximum successive occurrences of token
6254
 * @data:  data associated to the transition
6255
 *
6256
 * If @to is NULL, this creates first a new target state in the automata
6257
 * and then adds a transition from the @from state to the target state
6258
 * activated by a succession of input of value @token and whose number
6259
 * is between @min and @max, moreover that transition can only be crossed
6260
 * once.
6261
 *
6262
 * Returns the target state or NULL in case of error
6263
 */
6264
xmlAutomataStatePtr
6265
xmlAutomataNewOnceTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
6266
       xmlAutomataStatePtr to, const xmlChar *token,
6267
0
       int min, int max, void *data) {
6268
0
    xmlRegAtomPtr atom;
6269
0
    int counter;
6270
6271
0
    if ((am == NULL) || (from == NULL) || (token == NULL))
6272
0
  return(NULL);
6273
0
    if (min < 1)
6274
0
  return(NULL);
6275
0
    if (max < min)
6276
0
  return(NULL);
6277
0
    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
6278
0
    if (atom == NULL)
6279
0
  return(NULL);
6280
0
    atom->valuep = xmlStrdup(token);
6281
0
    atom->data = data;
6282
0
    atom->quant = XML_REGEXP_QUANT_ONCEONLY;
6283
0
    atom->min = min;
6284
0
    atom->max = max;
6285
    /*
6286
     * associate a counter to the transition.
6287
     */
6288
0
    counter = xmlRegGetCounter(am);
6289
0
    am->counters[counter].min = 1;
6290
0
    am->counters[counter].max = 1;
6291
6292
    /* xmlFAGenerateTransitions(am, from, to, atom); */
6293
0
    if (to == NULL) {
6294
0
  to = xmlRegNewState(am);
6295
0
  xmlRegStatePush(am, to);
6296
0
    }
6297
0
    xmlRegStateAddTrans(am, from, atom, to, counter, -1);
6298
0
    xmlRegAtomPush(am, atom);
6299
0
    am->state = to;
6300
0
    return(to);
6301
0
}
6302
6303
/**
6304
 * xmlAutomataNewState:
6305
 * @am: an automata
6306
 *
6307
 * Create a new disconnected state in the automata
6308
 *
6309
 * Returns the new state or NULL in case of error
6310
 */
6311
xmlAutomataStatePtr
6312
41.2k
xmlAutomataNewState(xmlAutomataPtr am) {
6313
41.2k
    xmlAutomataStatePtr to;
6314
6315
41.2k
    if (am == NULL)
6316
0
  return(NULL);
6317
41.2k
    to = xmlRegNewState(am);
6318
41.2k
    xmlRegStatePush(am, to);
6319
41.2k
    return(to);
6320
41.2k
}
6321
6322
/**
6323
 * xmlAutomataNewEpsilon:
6324
 * @am: an automata
6325
 * @from: the starting point of the transition
6326
 * @to: the target point of the transition or NULL
6327
 *
6328
 * If @to is NULL, this creates first a new target state in the automata
6329
 * and then adds an epsilon transition from the @from state to the
6330
 * target state
6331
 *
6332
 * Returns the target state or NULL in case of error
6333
 */
6334
xmlAutomataStatePtr
6335
xmlAutomataNewEpsilon(xmlAutomataPtr am, xmlAutomataStatePtr from,
6336
1.00M
          xmlAutomataStatePtr to) {
6337
1.00M
    if ((am == NULL) || (from == NULL))
6338
0
  return(NULL);
6339
1.00M
    xmlFAGenerateEpsilonTransition(am, from, to);
6340
1.00M
    if (to == NULL)
6341
127k
  return(am->state);
6342
874k
    return(to);
6343
1.00M
}
6344
6345
/**
6346
 * xmlAutomataNewAllTrans:
6347
 * @am: an automata
6348
 * @from: the starting point of the transition
6349
 * @to: the target point of the transition or NULL
6350
 * @lax: allow to transition if not all all transitions have been activated
6351
 *
6352
 * If @to is NULL, this creates first a new target state in the automata
6353
 * and then adds a an ALL transition from the @from state to the
6354
 * target state. That transition is an epsilon transition allowed only when
6355
 * all transitions from the @from node have been activated.
6356
 *
6357
 * Returns the target state or NULL in case of error
6358
 */
6359
xmlAutomataStatePtr
6360
xmlAutomataNewAllTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
6361
0
           xmlAutomataStatePtr to, int lax) {
6362
0
    if ((am == NULL) || (from == NULL))
6363
0
  return(NULL);
6364
0
    xmlFAGenerateAllTransition(am, from, to, lax);
6365
0
    if (to == NULL)
6366
0
  return(am->state);
6367
0
    return(to);
6368
0
}
6369
6370
/**
6371
 * xmlAutomataNewCounter:
6372
 * @am: an automata
6373
 * @min:  the minimal value on the counter
6374
 * @max:  the maximal value on the counter
6375
 *
6376
 * Create a new counter
6377
 *
6378
 * Returns the counter number or -1 in case of error
6379
 */
6380
int
6381
0
xmlAutomataNewCounter(xmlAutomataPtr am, int min, int max) {
6382
0
    int ret;
6383
6384
0
    if (am == NULL)
6385
0
  return(-1);
6386
6387
0
    ret = xmlRegGetCounter(am);
6388
0
    if (ret < 0)
6389
0
  return(-1);
6390
0
    am->counters[ret].min = min;
6391
0
    am->counters[ret].max = max;
6392
0
    return(ret);
6393
0
}
6394
6395
/**
6396
 * xmlAutomataNewCountedTrans:
6397
 * @am: an automata
6398
 * @from: the starting point of the transition
6399
 * @to: the target point of the transition or NULL
6400
 * @counter: the counter associated to that transition
6401
 *
6402
 * If @to is NULL, this creates first a new target state in the automata
6403
 * and then adds an epsilon transition from the @from state to the target state
6404
 * which will increment the counter provided
6405
 *
6406
 * Returns the target state or NULL in case of error
6407
 */
6408
xmlAutomataStatePtr
6409
xmlAutomataNewCountedTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
6410
0
    xmlAutomataStatePtr to, int counter) {
6411
0
    if ((am == NULL) || (from == NULL) || (counter < 0))
6412
0
  return(NULL);
6413
0
    xmlFAGenerateCountedEpsilonTransition(am, from, to, counter);
6414
0
    if (to == NULL)
6415
0
  return(am->state);
6416
0
    return(to);
6417
0
}
6418
6419
/**
6420
 * xmlAutomataNewCounterTrans:
6421
 * @am: an automata
6422
 * @from: the starting point of the transition
6423
 * @to: the target point of the transition or NULL
6424
 * @counter: the counter associated to that transition
6425
 *
6426
 * If @to is NULL, this creates first a new target state in the automata
6427
 * and then adds an epsilon transition from the @from state to the target state
6428
 * which will be allowed only if the counter is within the right range.
6429
 *
6430
 * Returns the target state or NULL in case of error
6431
 */
6432
xmlAutomataStatePtr
6433
xmlAutomataNewCounterTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
6434
0
    xmlAutomataStatePtr to, int counter) {
6435
0
    if ((am == NULL) || (from == NULL) || (counter < 0))
6436
0
  return(NULL);
6437
0
    xmlFAGenerateCountedTransition(am, from, to, counter);
6438
0
    if (to == NULL)
6439
0
  return(am->state);
6440
0
    return(to);
6441
0
}
6442
6443
/**
6444
 * xmlAutomataCompile:
6445
 * @am: an automata
6446
 *
6447
 * Compile the automata into a Reg Exp ready for being executed.
6448
 * The automata should be free after this point.
6449
 *
6450
 * Returns the compiled regexp or NULL in case of error
6451
 */
6452
xmlRegexpPtr
6453
71.2k
xmlAutomataCompile(xmlAutomataPtr am) {
6454
71.2k
    xmlRegexpPtr ret;
6455
6456
71.2k
    if ((am == NULL) || (am->error != 0)) return(NULL);
6457
71.2k
    xmlFAEliminateEpsilonTransitions(am);
6458
    /* xmlFAComputesDeterminism(am); */
6459
71.2k
    ret = xmlRegEpxFromParse(am);
6460
6461
71.2k
    return(ret);
6462
71.2k
}
6463
6464
/**
6465
 * xmlAutomataIsDeterminist:
6466
 * @am: an automata
6467
 *
6468
 * Checks if an automata is determinist.
6469
 *
6470
 * Returns 1 if true, 0 if not, and -1 in case of error
6471
 */
6472
int
6473
0
xmlAutomataIsDeterminist(xmlAutomataPtr am) {
6474
0
    int ret;
6475
6476
0
    if (am == NULL)
6477
0
  return(-1);
6478
6479
0
    ret = xmlFAComputesDeterminism(am);
6480
0
    return(ret);
6481
0
}
6482
#endif /* LIBXML_AUTOMATA_ENABLED */
6483
6484
#ifdef LIBXML_EXPR_ENABLED
6485
/************************************************************************
6486
 *                  *
6487
 *    Formal Expression handling code       *
6488
 *                  *
6489
 ************************************************************************/
6490
/************************************************************************
6491
 *                  *
6492
 *    Expression handling context       *
6493
 *                  *
6494
 ************************************************************************/
6495
6496
struct _xmlExpCtxt {
6497
    xmlDictPtr dict;
6498
    xmlExpNodePtr *table;
6499
    int size;
6500
    int nbElems;
6501
    int nb_nodes;
6502
    int maxNodes;
6503
    const char *expr;
6504
    const char *cur;
6505
    int nb_cons;
6506
    int tabSize;
6507
};
6508
6509
/**
6510
 * xmlExpNewCtxt:
6511
 * @maxNodes:  the maximum number of nodes
6512
 * @dict:  optional dictionary to use internally
6513
 *
6514
 * Creates a new context for manipulating expressions
6515
 *
6516
 * Returns the context or NULL in case of error
6517
 */
6518
xmlExpCtxtPtr
6519
xmlExpNewCtxt(int maxNodes, xmlDictPtr dict) {
6520
    xmlExpCtxtPtr ret;
6521
    int size = 256;
6522
6523
    if (maxNodes <= 4096)
6524
        maxNodes = 4096;
6525
6526
    ret = (xmlExpCtxtPtr) xmlMalloc(sizeof(xmlExpCtxt));
6527
    if (ret == NULL)
6528
        return(NULL);
6529
    memset(ret, 0, sizeof(xmlExpCtxt));
6530
    ret->size = size;
6531
    ret->nbElems = 0;
6532
    ret->maxNodes = maxNodes;
6533
    ret->table = xmlMalloc(size * sizeof(xmlExpNodePtr));
6534
    if (ret->table == NULL) {
6535
        xmlFree(ret);
6536
  return(NULL);
6537
    }
6538
    memset(ret->table, 0, size * sizeof(xmlExpNodePtr));
6539
    if (dict == NULL) {
6540
        ret->dict = xmlDictCreate();
6541
  if (ret->dict == NULL) {
6542
      xmlFree(ret->table);
6543
      xmlFree(ret);
6544
      return(NULL);
6545
  }
6546
    } else {
6547
        ret->dict = dict;
6548
  xmlDictReference(ret->dict);
6549
    }
6550
    return(ret);
6551
}
6552
6553
/**
6554
 * xmlExpFreeCtxt:
6555
 * @ctxt:  an expression context
6556
 *
6557
 * Free an expression context
6558
 */
6559
void
6560
xmlExpFreeCtxt(xmlExpCtxtPtr ctxt) {
6561
    if (ctxt == NULL)
6562
        return;
6563
    xmlDictFree(ctxt->dict);
6564
    if (ctxt->table != NULL)
6565
  xmlFree(ctxt->table);
6566
    xmlFree(ctxt);
6567
}
6568
6569
/************************************************************************
6570
 *                  *
6571
 *    Structure associated to an expression node    *
6572
 *                  *
6573
 ************************************************************************/
6574
#define MAX_NODES 10000
6575
6576
/* #define DEBUG_DERIV */
6577
6578
/*
6579
 * TODO:
6580
 * - Wildcards
6581
 * - public API for creation
6582
 *
6583
 * Started
6584
 * - regression testing
6585
 *
6586
 * Done
6587
 * - split into module and test tool
6588
 * - memleaks
6589
 */
6590
6591
typedef enum {
6592
    XML_EXP_NILABLE = (1 << 0)
6593
} xmlExpNodeInfo;
6594
6595
#define IS_NILLABLE(node) ((node)->info & XML_EXP_NILABLE)
6596
6597
struct _xmlExpNode {
6598
    unsigned char type;/* xmlExpNodeType */
6599
    unsigned char info;/* OR of xmlExpNodeInfo */
6600
    unsigned short key; /* the hash key */
6601
    unsigned int ref; /* The number of references */
6602
    int c_max;    /* the maximum length it can consume */
6603
    xmlExpNodePtr exp_left;
6604
    xmlExpNodePtr next;/* the next node in the hash table or free list */
6605
    union {
6606
  struct {
6607
      int f_min;
6608
      int f_max;
6609
  } count;
6610
  struct {
6611
      xmlExpNodePtr f_right;
6612
  } children;
6613
        const xmlChar *f_str;
6614
    } field;
6615
};
6616
6617
#define exp_min field.count.f_min
6618
#define exp_max field.count.f_max
6619
/* #define exp_left field.children.f_left */
6620
#define exp_right field.children.f_right
6621
#define exp_str field.f_str
6622
6623
static xmlExpNodePtr xmlExpNewNode(xmlExpCtxtPtr ctxt, xmlExpNodeType type);
6624
static xmlExpNode forbiddenExpNode = {
6625
    XML_EXP_FORBID, 0, 0, 0, 0, NULL, NULL, {{ 0, 0}}
6626
};
6627
xmlExpNodePtr forbiddenExp = &forbiddenExpNode;
6628
static xmlExpNode emptyExpNode = {
6629
    XML_EXP_EMPTY, 1, 0, 0, 0, NULL, NULL, {{ 0, 0}}
6630
};
6631
xmlExpNodePtr emptyExp = &emptyExpNode;
6632
6633
/************************************************************************
6634
 *                  *
6635
 *  The custom hash table for unicity and canonicalization    *
6636
 *  of sub-expressions pointers           *
6637
 *                  *
6638
 ************************************************************************/
6639
/*
6640
 * xmlExpHashNameComputeKey:
6641
 * Calculate the hash key for a token
6642
 */
6643
static unsigned short
6644
xmlExpHashNameComputeKey(const xmlChar *name) {
6645
    unsigned short value = 0L;
6646
    char ch;
6647
6648
    if (name != NULL) {
6649
  value += 30 * (*name);
6650
  while ((ch = *name++) != 0) {
6651
      value = value ^ ((value << 5) + (value >> 3) + (unsigned long)ch);
6652
  }
6653
    }
6654
    return (value);
6655
}
6656
6657
/*
6658
 * xmlExpHashComputeKey:
6659
 * Calculate the hash key for a compound expression
6660
 */
6661
static unsigned short
6662
xmlExpHashComputeKey(xmlExpNodeType type, xmlExpNodePtr left,
6663
                     xmlExpNodePtr right) {
6664
    unsigned long value;
6665
    unsigned short ret;
6666
6667
    switch (type) {
6668
        case XML_EXP_SEQ:
6669
      value = left->key;
6670
      value += right->key;
6671
      value *= 3;
6672
      ret = (unsigned short) value;
6673
      break;
6674
        case XML_EXP_OR:
6675
      value = left->key;
6676
      value += right->key;
6677
      value *= 7;
6678
      ret = (unsigned short) value;
6679
      break;
6680
        case XML_EXP_COUNT:
6681
      value = left->key;
6682
      value += right->key;
6683
      ret = (unsigned short) value;
6684
      break;
6685
  default:
6686
      ret = 0;
6687
    }
6688
    return(ret);
6689
}
6690
6691
6692
static xmlExpNodePtr
6693
xmlExpNewNode(xmlExpCtxtPtr ctxt, xmlExpNodeType type) {
6694
    xmlExpNodePtr ret;
6695
6696
    if (ctxt->nb_nodes >= MAX_NODES)
6697
        return(NULL);
6698
    ret = (xmlExpNodePtr) xmlMalloc(sizeof(xmlExpNode));
6699
    if (ret == NULL)
6700
        return(NULL);
6701
    memset(ret, 0, sizeof(xmlExpNode));
6702
    ret->type = type;
6703
    ret->next = NULL;
6704
    ctxt->nb_nodes++;
6705
    ctxt->nb_cons++;
6706
    return(ret);
6707
}
6708
6709
/**
6710
 * xmlExpHashGetEntry:
6711
 * @table: the hash table
6712
 *
6713
 * Get the unique entry from the hash table. The entry is created if
6714
 * needed. @left and @right are consumed, i.e. their ref count will
6715
 * be decremented by the operation.
6716
 *
6717
 * Returns the pointer or NULL in case of error
6718
 */
6719
static xmlExpNodePtr
6720
xmlExpHashGetEntry(xmlExpCtxtPtr ctxt, xmlExpNodeType type,
6721
                   xmlExpNodePtr left, xmlExpNodePtr right,
6722
       const xmlChar *name, int min, int max) {
6723
    unsigned short kbase, key;
6724
    xmlExpNodePtr entry;
6725
    xmlExpNodePtr insert;
6726
6727
    if (ctxt == NULL)
6728
  return(NULL);
6729
6730
    /*
6731
     * Check for duplicate and insertion location.
6732
     */
6733
    if (type == XML_EXP_ATOM) {
6734
  kbase = xmlExpHashNameComputeKey(name);
6735
    } else if (type == XML_EXP_COUNT) {
6736
        /* COUNT reduction rule 1 */
6737
  /* a{1} -> a */
6738
  if (min == max) {
6739
      if (min == 1) {
6740
    return(left);
6741
      }
6742
      if (min == 0) {
6743
    xmlExpFree(ctxt, left);
6744
          return(emptyExp);
6745
      }
6746
  }
6747
  if (min < 0) {
6748
      xmlExpFree(ctxt, left);
6749
      return(forbiddenExp);
6750
  }
6751
        if (max == -1)
6752
      kbase = min + 79;
6753
  else
6754
      kbase = max - min;
6755
  kbase += left->key;
6756
    } else if (type == XML_EXP_OR) {
6757
        /* Forbid reduction rules */
6758
        if (left->type == XML_EXP_FORBID) {
6759
      xmlExpFree(ctxt, left);
6760
      return(right);
6761
  }
6762
        if (right->type == XML_EXP_FORBID) {
6763
      xmlExpFree(ctxt, right);
6764
      return(left);
6765
  }
6766
6767
        /* OR reduction rule 1 */
6768
  /* a | a reduced to a */
6769
        if (left == right) {
6770
      xmlExpFree(ctxt, right);
6771
      return(left);
6772
  }
6773
        /* OR canonicalization rule 1 */
6774
  /* linearize (a | b) | c into a | (b | c) */
6775
        if ((left->type == XML_EXP_OR) && (right->type != XML_EXP_OR)) {
6776
      xmlExpNodePtr tmp = left;
6777
            left = right;
6778
      right = tmp;
6779
  }
6780
        /* OR reduction rule 2 */
6781
  /* a | (a | b) and b | (a | b) are reduced to a | b */
6782
        if (right->type == XML_EXP_OR) {
6783
      if ((left == right->exp_left) ||
6784
          (left == right->exp_right)) {
6785
    xmlExpFree(ctxt, left);
6786
    return(right);
6787
      }
6788
  }
6789
        /* OR canonicalization rule 2 */
6790
  /* linearize (a | b) | c into a | (b | c) */
6791
        if (left->type == XML_EXP_OR) {
6792
      xmlExpNodePtr tmp;
6793
6794
      /* OR canonicalization rule 2 */
6795
      if ((left->exp_right->type != XML_EXP_OR) &&
6796
          (left->exp_right->key < left->exp_left->key)) {
6797
          tmp = left->exp_right;
6798
    left->exp_right = left->exp_left;
6799
    left->exp_left = tmp;
6800
      }
6801
      left->exp_right->ref++;
6802
      tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left->exp_right, right,
6803
                               NULL, 0, 0);
6804
      left->exp_left->ref++;
6805
      tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left->exp_left, tmp,
6806
                               NULL, 0, 0);
6807
6808
      xmlExpFree(ctxt, left);
6809
      return(tmp);
6810
  }
6811
  if (right->type == XML_EXP_OR) {
6812
      /* Ordering in the tree */
6813
      /* C | (A | B) -> A | (B | C) */
6814
      if (left->key > right->exp_right->key) {
6815
    xmlExpNodePtr tmp;
6816
    right->exp_right->ref++;
6817
    tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_right,
6818
                             left, NULL, 0, 0);
6819
    right->exp_left->ref++;
6820
    tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_left,
6821
                             tmp, NULL, 0, 0);
6822
    xmlExpFree(ctxt, right);
6823
    return(tmp);
6824
      }
6825
      /* Ordering in the tree */
6826
      /* B | (A | C) -> A | (B | C) */
6827
      if (left->key > right->exp_left->key) {
6828
    xmlExpNodePtr tmp;
6829
    right->exp_right->ref++;
6830
    tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left,
6831
                             right->exp_right, NULL, 0, 0);
6832
    right->exp_left->ref++;
6833
    tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_left,
6834
                             tmp, NULL, 0, 0);
6835
    xmlExpFree(ctxt, right);
6836
    return(tmp);
6837
      }
6838
  }
6839
  /* we know both types are != XML_EXP_OR here */
6840
        else if (left->key > right->key) {
6841
      xmlExpNodePtr tmp = left;
6842
            left = right;
6843
      right = tmp;
6844
  }
6845
  kbase = xmlExpHashComputeKey(type, left, right);
6846
    } else if (type == XML_EXP_SEQ) {
6847
        /* Forbid reduction rules */
6848
        if (left->type == XML_EXP_FORBID) {
6849
      xmlExpFree(ctxt, right);
6850
      return(left);
6851
  }
6852
        if (right->type == XML_EXP_FORBID) {
6853
      xmlExpFree(ctxt, left);
6854
      return(right);
6855
  }
6856
        /* Empty reduction rules */
6857
        if (right->type == XML_EXP_EMPTY) {
6858
      return(left);
6859
  }
6860
        if (left->type == XML_EXP_EMPTY) {
6861
      return(right);
6862
  }
6863
  kbase = xmlExpHashComputeKey(type, left, right);
6864
    } else
6865
        return(NULL);
6866
6867
    key = kbase % ctxt->size;
6868
    if (ctxt->table[key] != NULL) {
6869
  for (insert = ctxt->table[key]; insert != NULL;
6870
       insert = insert->next) {
6871
      if ((insert->key == kbase) &&
6872
          (insert->type == type)) {
6873
    if (type == XML_EXP_ATOM) {
6874
        if (name == insert->exp_str) {
6875
      insert->ref++;
6876
      return(insert);
6877
        }
6878
    } else if (type == XML_EXP_COUNT) {
6879
        if ((insert->exp_min == min) && (insert->exp_max == max) &&
6880
            (insert->exp_left == left)) {
6881
      insert->ref++;
6882
      left->ref--;
6883
      return(insert);
6884
        }
6885
    } else if ((insert->exp_left == left) &&
6886
         (insert->exp_right == right)) {
6887
        insert->ref++;
6888
        left->ref--;
6889
        right->ref--;
6890
        return(insert);
6891
    }
6892
      }
6893
  }
6894
    }
6895
6896
    entry = xmlExpNewNode(ctxt, type);
6897
    if (entry == NULL)
6898
        return(NULL);
6899
    entry->key = kbase;
6900
    if (type == XML_EXP_ATOM) {
6901
  entry->exp_str = name;
6902
  entry->c_max = 1;
6903
    } else if (type == XML_EXP_COUNT) {
6904
        entry->exp_min = min;
6905
        entry->exp_max = max;
6906
  entry->exp_left = left;
6907
  if ((min == 0) || (IS_NILLABLE(left)))
6908
      entry->info |= XML_EXP_NILABLE;
6909
  if (max < 0)
6910
      entry->c_max = -1;
6911
  else
6912
      entry->c_max = max * entry->exp_left->c_max;
6913
    } else {
6914
  entry->exp_left = left;
6915
  entry->exp_right = right;
6916
  if (type == XML_EXP_OR) {
6917
      if ((IS_NILLABLE(left)) || (IS_NILLABLE(right)))
6918
    entry->info |= XML_EXP_NILABLE;
6919
      if ((entry->exp_left->c_max == -1) ||
6920
          (entry->exp_right->c_max == -1))
6921
    entry->c_max = -1;
6922
      else if (entry->exp_left->c_max > entry->exp_right->c_max)
6923
          entry->c_max = entry->exp_left->c_max;
6924
      else
6925
          entry->c_max = entry->exp_right->c_max;
6926
  } else {
6927
      if ((IS_NILLABLE(left)) && (IS_NILLABLE(right)))
6928
    entry->info |= XML_EXP_NILABLE;
6929
      if ((entry->exp_left->c_max == -1) ||
6930
          (entry->exp_right->c_max == -1))
6931
    entry->c_max = -1;
6932
      else
6933
          entry->c_max = entry->exp_left->c_max + entry->exp_right->c_max;
6934
  }
6935
    }
6936
    entry->ref = 1;
6937
    if (ctxt->table[key] != NULL)
6938
        entry->next = ctxt->table[key];
6939
6940
    ctxt->table[key] = entry;
6941
    ctxt->nbElems++;
6942
6943
    return(entry);
6944
}
6945
6946
/**
6947
 * xmlExpFree:
6948
 * @ctxt: the expression context
6949
 * @exp: the expression
6950
 *
6951
 * Dereference the expression
6952
 */
6953
void
6954
xmlExpFree(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp) {
6955
    if ((exp == NULL) || (exp == forbiddenExp) || (exp == emptyExp))
6956
        return;
6957
    exp->ref--;
6958
    if (exp->ref == 0) {
6959
        unsigned short key;
6960
6961
        /* Unlink it first from the hash table */
6962
  key = exp->key % ctxt->size;
6963
  if (ctxt->table[key] == exp) {
6964
      ctxt->table[key] = exp->next;
6965
  } else {
6966
      xmlExpNodePtr tmp;
6967
6968
      tmp = ctxt->table[key];
6969
      while (tmp != NULL) {
6970
          if (tmp->next == exp) {
6971
        tmp->next = exp->next;
6972
        break;
6973
    }
6974
          tmp = tmp->next;
6975
      }
6976
  }
6977
6978
        if ((exp->type == XML_EXP_SEQ) || (exp->type == XML_EXP_OR)) {
6979
      xmlExpFree(ctxt, exp->exp_left);
6980
      xmlExpFree(ctxt, exp->exp_right);
6981
  } else if (exp->type == XML_EXP_COUNT) {
6982
      xmlExpFree(ctxt, exp->exp_left);
6983
  }
6984
        xmlFree(exp);
6985
  ctxt->nb_nodes--;
6986
    }
6987
}
6988
6989
/**
6990
 * xmlExpRef:
6991
 * @exp: the expression
6992
 *
6993
 * Increase the reference count of the expression
6994
 */
6995
void
6996
xmlExpRef(xmlExpNodePtr exp) {
6997
    if (exp != NULL)
6998
        exp->ref++;
6999
}
7000
7001
/**
7002
 * xmlExpNewAtom:
7003
 * @ctxt: the expression context
7004
 * @name: the atom name
7005
 * @len: the atom name length in byte (or -1);
7006
 *
7007
 * Get the atom associated to this name from that context
7008
 *
7009
 * Returns the node or NULL in case of error
7010
 */
7011
xmlExpNodePtr
7012
xmlExpNewAtom(xmlExpCtxtPtr ctxt, const xmlChar *name, int len) {
7013
    if ((ctxt == NULL) || (name == NULL))
7014
        return(NULL);
7015
    name = xmlDictLookup(ctxt->dict, name, len);
7016
    if (name == NULL)
7017
        return(NULL);
7018
    return(xmlExpHashGetEntry(ctxt, XML_EXP_ATOM, NULL, NULL, name, 0, 0));
7019
}
7020
7021
/**
7022
 * xmlExpNewOr:
7023
 * @ctxt: the expression context
7024
 * @left: left expression
7025
 * @right: right expression
7026
 *
7027
 * Get the atom associated to the choice @left | @right
7028
 * Note that @left and @right are consumed in the operation, to keep
7029
 * an handle on them use xmlExpRef() and use xmlExpFree() to release them,
7030
 * this is true even in case of failure (unless ctxt == NULL).
7031
 *
7032
 * Returns the node or NULL in case of error
7033
 */
7034
xmlExpNodePtr
7035
xmlExpNewOr(xmlExpCtxtPtr ctxt, xmlExpNodePtr left, xmlExpNodePtr right) {
7036
    if (ctxt == NULL)
7037
        return(NULL);
7038
    if ((left == NULL) || (right == NULL)) {
7039
        xmlExpFree(ctxt, left);
7040
        xmlExpFree(ctxt, right);
7041
        return(NULL);
7042
    }
7043
    return(xmlExpHashGetEntry(ctxt, XML_EXP_OR, left, right, NULL, 0, 0));
7044
}
7045
7046
/**
7047
 * xmlExpNewSeq:
7048
 * @ctxt: the expression context
7049
 * @left: left expression
7050
 * @right: right expression
7051
 *
7052
 * Get the atom associated to the sequence @left , @right
7053
 * Note that @left and @right are consumed in the operation, to keep
7054
 * an handle on them use xmlExpRef() and use xmlExpFree() to release them,
7055
 * this is true even in case of failure (unless ctxt == NULL).
7056
 *
7057
 * Returns the node or NULL in case of error
7058
 */
7059
xmlExpNodePtr
7060
xmlExpNewSeq(xmlExpCtxtPtr ctxt, xmlExpNodePtr left, xmlExpNodePtr right) {
7061
    if (ctxt == NULL)
7062
        return(NULL);
7063
    if ((left == NULL) || (right == NULL)) {
7064
        xmlExpFree(ctxt, left);
7065
        xmlExpFree(ctxt, right);
7066
        return(NULL);
7067
    }
7068
    return(xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, left, right, NULL, 0, 0));
7069
}
7070
7071
/**
7072
 * xmlExpNewRange:
7073
 * @ctxt: the expression context
7074
 * @subset: the expression to be repeated
7075
 * @min: the lower bound for the repetition
7076
 * @max: the upper bound for the repetition, -1 means infinite
7077
 *
7078
 * Get the atom associated to the range (@subset){@min, @max}
7079
 * Note that @subset is consumed in the operation, to keep
7080
 * an handle on it use xmlExpRef() and use xmlExpFree() to release it,
7081
 * this is true even in case of failure (unless ctxt == NULL).
7082
 *
7083
 * Returns the node or NULL in case of error
7084
 */
7085
xmlExpNodePtr
7086
xmlExpNewRange(xmlExpCtxtPtr ctxt, xmlExpNodePtr subset, int min, int max) {
7087
    if (ctxt == NULL)
7088
        return(NULL);
7089
    if ((subset == NULL) || (min < 0) || (max < -1) ||
7090
        ((max >= 0) && (min > max))) {
7091
  xmlExpFree(ctxt, subset);
7092
        return(NULL);
7093
    }
7094
    return(xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, subset,
7095
                              NULL, NULL, min, max));
7096
}
7097
7098
/************************************************************************
7099
 *                  *
7100
 *    Public API for operations on expressions    *
7101
 *                  *
7102
 ************************************************************************/
7103
7104
static int
7105
xmlExpGetLanguageInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
7106
                     const xmlChar**list, int len, int nb) {
7107
    int tmp, tmp2;
7108
tail:
7109
    switch (exp->type) {
7110
        case XML_EXP_EMPTY:
7111
      return(0);
7112
        case XML_EXP_ATOM:
7113
      for (tmp = 0;tmp < nb;tmp++)
7114
          if (list[tmp] == exp->exp_str)
7115
        return(0);
7116
            if (nb >= len)
7117
          return(-2);
7118
      list[nb] = exp->exp_str;
7119
      return(1);
7120
        case XML_EXP_COUNT:
7121
      exp = exp->exp_left;
7122
      goto tail;
7123
        case XML_EXP_SEQ:
7124
        case XML_EXP_OR:
7125
      tmp = xmlExpGetLanguageInt(ctxt, exp->exp_left, list, len, nb);
7126
      if (tmp < 0)
7127
          return(tmp);
7128
      tmp2 = xmlExpGetLanguageInt(ctxt, exp->exp_right, list, len,
7129
                                  nb + tmp);
7130
      if (tmp2 < 0)
7131
          return(tmp2);
7132
            return(tmp + tmp2);
7133
    }
7134
    return(-1);
7135
}
7136
7137
/**
7138
 * xmlExpGetLanguage:
7139
 * @ctxt: the expression context
7140
 * @exp: the expression
7141
 * @langList: where to store the tokens
7142
 * @len: the allocated length of @list
7143
 *
7144
 * Find all the strings used in @exp and store them in @list
7145
 *
7146
 * Returns the number of unique strings found, -1 in case of errors and
7147
 *         -2 if there is more than @len strings
7148
 */
7149
int
7150
xmlExpGetLanguage(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
7151
                  const xmlChar**langList, int len) {
7152
    if ((ctxt == NULL) || (exp == NULL) || (langList == NULL) || (len <= 0))
7153
        return(-1);
7154
    return(xmlExpGetLanguageInt(ctxt, exp, langList, len, 0));
7155
}
7156
7157
static int
7158
xmlExpGetStartInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
7159
                  const xmlChar**list, int len, int nb) {
7160
    int tmp, tmp2;
7161
tail:
7162
    switch (exp->type) {
7163
        case XML_EXP_FORBID:
7164
      return(0);
7165
        case XML_EXP_EMPTY:
7166
      return(0);
7167
        case XML_EXP_ATOM:
7168
      for (tmp = 0;tmp < nb;tmp++)
7169
          if (list[tmp] == exp->exp_str)
7170
        return(0);
7171
            if (nb >= len)
7172
          return(-2);
7173
      list[nb] = exp->exp_str;
7174
      return(1);
7175
        case XML_EXP_COUNT:
7176
      exp = exp->exp_left;
7177
      goto tail;
7178
        case XML_EXP_SEQ:
7179
      tmp = xmlExpGetStartInt(ctxt, exp->exp_left, list, len, nb);
7180
      if (tmp < 0)
7181
          return(tmp);
7182
      if (IS_NILLABLE(exp->exp_left)) {
7183
    tmp2 = xmlExpGetStartInt(ctxt, exp->exp_right, list, len,
7184
              nb + tmp);
7185
    if (tmp2 < 0)
7186
        return(tmp2);
7187
    tmp += tmp2;
7188
      }
7189
            return(tmp);
7190
        case XML_EXP_OR:
7191
      tmp = xmlExpGetStartInt(ctxt, exp->exp_left, list, len, nb);
7192
      if (tmp < 0)
7193
          return(tmp);
7194
      tmp2 = xmlExpGetStartInt(ctxt, exp->exp_right, list, len,
7195
                                  nb + tmp);
7196
      if (tmp2 < 0)
7197
          return(tmp2);
7198
            return(tmp + tmp2);
7199
    }
7200
    return(-1);
7201
}
7202
7203
/**
7204
 * xmlExpGetStart:
7205
 * @ctxt: the expression context
7206
 * @exp: the expression
7207
 * @tokList: where to store the tokens
7208
 * @len: the allocated length of @list
7209
 *
7210
 * Find all the strings that appears at the start of the languages
7211
 * accepted by @exp and store them in @list. E.g. for (a, b) | c
7212
 * it will return the list [a, c]
7213
 *
7214
 * Returns the number of unique strings found, -1 in case of errors and
7215
 *         -2 if there is more than @len strings
7216
 */
7217
int
7218
xmlExpGetStart(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
7219
               const xmlChar**tokList, int len) {
7220
    if ((ctxt == NULL) || (exp == NULL) || (tokList == NULL) || (len <= 0))
7221
        return(-1);
7222
    return(xmlExpGetStartInt(ctxt, exp, tokList, len, 0));
7223
}
7224
7225
/**
7226
 * xmlExpIsNillable:
7227
 * @exp: the expression
7228
 *
7229
 * Finds if the expression is nillable, i.e. if it accepts the empty sequence
7230
 *
7231
 * Returns 1 if nillable, 0 if not and -1 in case of error
7232
 */
7233
int
7234
xmlExpIsNillable(xmlExpNodePtr exp) {
7235
    if (exp == NULL)
7236
        return(-1);
7237
    return(IS_NILLABLE(exp) != 0);
7238
}
7239
7240
static xmlExpNodePtr
7241
xmlExpStringDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, const xmlChar *str)
7242
{
7243
    xmlExpNodePtr ret;
7244
7245
    switch (exp->type) {
7246
  case XML_EXP_EMPTY:
7247
      return(forbiddenExp);
7248
  case XML_EXP_FORBID:
7249
      return(forbiddenExp);
7250
  case XML_EXP_ATOM:
7251
      if (exp->exp_str == str) {
7252
#ifdef DEBUG_DERIV
7253
    printf("deriv atom: equal => Empty\n");
7254
#endif
7255
          ret = emptyExp;
7256
      } else {
7257
#ifdef DEBUG_DERIV
7258
    printf("deriv atom: mismatch => forbid\n");
7259
#endif
7260
          /* TODO wildcards here */
7261
    ret = forbiddenExp;
7262
      }
7263
      return(ret);
7264
  case XML_EXP_OR: {
7265
      xmlExpNodePtr tmp;
7266
7267
#ifdef DEBUG_DERIV
7268
      printf("deriv or: => or(derivs)\n");
7269
#endif
7270
      tmp = xmlExpStringDeriveInt(ctxt, exp->exp_left, str);
7271
      if (tmp == NULL) {
7272
    return(NULL);
7273
      }
7274
      ret = xmlExpStringDeriveInt(ctxt, exp->exp_right, str);
7275
      if (ret == NULL) {
7276
          xmlExpFree(ctxt, tmp);
7277
    return(NULL);
7278
      }
7279
            ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, tmp, ret,
7280
           NULL, 0, 0);
7281
      return(ret);
7282
  }
7283
  case XML_EXP_SEQ:
7284
#ifdef DEBUG_DERIV
7285
      printf("deriv seq: starting with left\n");
7286
#endif
7287
      ret = xmlExpStringDeriveInt(ctxt, exp->exp_left, str);
7288
      if (ret == NULL) {
7289
          return(NULL);
7290
      } else if (ret == forbiddenExp) {
7291
          if (IS_NILLABLE(exp->exp_left)) {
7292
#ifdef DEBUG_DERIV
7293
        printf("deriv seq: left failed but nillable\n");
7294
#endif
7295
        ret = xmlExpStringDeriveInt(ctxt, exp->exp_right, str);
7296
    }
7297
      } else {
7298
#ifdef DEBUG_DERIV
7299
    printf("deriv seq: left match => sequence\n");
7300
#endif
7301
          exp->exp_right->ref++;
7302
          ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, exp->exp_right,
7303
                             NULL, 0, 0);
7304
      }
7305
      return(ret);
7306
  case XML_EXP_COUNT: {
7307
      int min, max;
7308
      xmlExpNodePtr tmp;
7309
7310
      if (exp->exp_max == 0)
7311
    return(forbiddenExp);
7312
      ret = xmlExpStringDeriveInt(ctxt, exp->exp_left, str);
7313
      if (ret == NULL)
7314
          return(NULL);
7315
      if (ret == forbiddenExp) {
7316
#ifdef DEBUG_DERIV
7317
    printf("deriv count: pattern mismatch => forbid\n");
7318
#endif
7319
          return(ret);
7320
      }
7321
      if (exp->exp_max == 1)
7322
    return(ret);
7323
      if (exp->exp_max < 0) /* unbounded */
7324
    max = -1;
7325
      else
7326
    max = exp->exp_max - 1;
7327
      if (exp->exp_min > 0)
7328
    min = exp->exp_min - 1;
7329
      else
7330
    min = 0;
7331
      exp->exp_left->ref++;
7332
      tmp = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, exp->exp_left, NULL,
7333
             NULL, min, max);
7334
      if (ret == emptyExp) {
7335
#ifdef DEBUG_DERIV
7336
    printf("deriv count: match to empty => new count\n");
7337
#endif
7338
          return(tmp);
7339
      }
7340
#ifdef DEBUG_DERIV
7341
      printf("deriv count: match => sequence with new count\n");
7342
#endif
7343
      return(xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, tmp,
7344
                                NULL, 0, 0));
7345
  }
7346
    }
7347
    return(NULL);
7348
}
7349
7350
/**
7351
 * xmlExpStringDerive:
7352
 * @ctxt: the expression context
7353
 * @exp: the expression
7354
 * @str: the string
7355
 * @len: the string len in bytes if available
7356
 *
7357
 * Do one step of Brzozowski derivation of the expression @exp with
7358
 * respect to the input string
7359
 *
7360
 * Returns the resulting expression or NULL in case of internal error
7361
 */
7362
xmlExpNodePtr
7363
xmlExpStringDerive(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
7364
                   const xmlChar *str, int len) {
7365
    const xmlChar *input;
7366
7367
    if ((exp == NULL) || (ctxt == NULL) || (str == NULL)) {
7368
        return(NULL);
7369
    }
7370
    /*
7371
     * check the string is in the dictionary, if yes use an interned
7372
     * copy, otherwise we know it's not an acceptable input
7373
     */
7374
    input = xmlDictExists(ctxt->dict, str, len);
7375
    if (input == NULL) {
7376
        return(forbiddenExp);
7377
    }
7378
    return(xmlExpStringDeriveInt(ctxt, exp, input));
7379
}
7380
7381
static int
7382
xmlExpCheckCard(xmlExpNodePtr exp, xmlExpNodePtr sub) {
7383
    int ret = 1;
7384
7385
    if (sub->c_max == -1) {
7386
        if (exp->c_max != -1)
7387
      ret = 0;
7388
    } else if ((exp->c_max >= 0) && (exp->c_max < sub->c_max)) {
7389
        ret = 0;
7390
    }
7391
#if 0
7392
    if ((IS_NILLABLE(sub)) && (!IS_NILLABLE(exp)))
7393
        ret = 0;
7394
#endif
7395
    return(ret);
7396
}
7397
7398
static xmlExpNodePtr xmlExpExpDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
7399
                                        xmlExpNodePtr sub);
7400
/**
7401
 * xmlExpDivide:
7402
 * @ctxt: the expressions context
7403
 * @exp: the englobing expression
7404
 * @sub: the subexpression
7405
 * @mult: the multiple expression
7406
 * @remain: the remain from the derivation of the multiple
7407
 *
7408
 * Check if exp is a multiple of sub, i.e. if there is a finite number n
7409
 * so that sub{n} subsume exp
7410
 *
7411
 * Returns the multiple value if successful, 0 if it is not a multiple
7412
 *         and -1 in case of internal error.
7413
 */
7414
7415
static int
7416
xmlExpDivide(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub,
7417
             xmlExpNodePtr *mult, xmlExpNodePtr *remain) {
7418
    int i;
7419
    xmlExpNodePtr tmp, tmp2;
7420
7421
    if (mult != NULL) *mult = NULL;
7422
    if (remain != NULL) *remain = NULL;
7423
    if (exp->c_max == -1) return(0);
7424
    if (IS_NILLABLE(exp) && (!IS_NILLABLE(sub))) return(0);
7425
7426
    for (i = 1;i <= exp->c_max;i++) {
7427
        sub->ref++;
7428
        tmp = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT,
7429
         sub, NULL, NULL, i, i);
7430
  if (tmp == NULL) {
7431
      return(-1);
7432
  }
7433
  if (!xmlExpCheckCard(tmp, exp)) {
7434
      xmlExpFree(ctxt, tmp);
7435
      continue;
7436
  }
7437
  tmp2 = xmlExpExpDeriveInt(ctxt, tmp, exp);
7438
  if (tmp2 == NULL) {
7439
      xmlExpFree(ctxt, tmp);
7440
      return(-1);
7441
  }
7442
  if ((tmp2 != forbiddenExp) && (IS_NILLABLE(tmp2))) {
7443
      if (remain != NULL)
7444
          *remain = tmp2;
7445
      else
7446
          xmlExpFree(ctxt, tmp2);
7447
      if (mult != NULL)
7448
          *mult = tmp;
7449
      else
7450
          xmlExpFree(ctxt, tmp);
7451
#ifdef DEBUG_DERIV
7452
      printf("Divide succeeded %d\n", i);
7453
#endif
7454
      return(i);
7455
  }
7456
  xmlExpFree(ctxt, tmp);
7457
  xmlExpFree(ctxt, tmp2);
7458
    }
7459
#ifdef DEBUG_DERIV
7460
    printf("Divide failed\n");
7461
#endif
7462
    return(0);
7463
}
7464
7465
/**
7466
 * xmlExpExpDeriveInt:
7467
 * @ctxt: the expressions context
7468
 * @exp: the englobing expression
7469
 * @sub: the subexpression
7470
 *
7471
 * Try to do a step of Brzozowski derivation but at a higher level
7472
 * the input being a subexpression.
7473
 *
7474
 * Returns the resulting expression or NULL in case of internal error
7475
 */
7476
static xmlExpNodePtr
7477
xmlExpExpDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
7478
    xmlExpNodePtr ret, tmp, tmp2, tmp3;
7479
    const xmlChar **tab;
7480
    int len, i;
7481
7482
    /*
7483
     * In case of equality and if the expression can only consume a finite
7484
     * amount, then the derivation is empty
7485
     */
7486
    if ((exp == sub) && (exp->c_max >= 0)) {
7487
#ifdef DEBUG_DERIV
7488
        printf("Equal(exp, sub) and finite -> Empty\n");
7489
#endif
7490
        return(emptyExp);
7491
    }
7492
    /*
7493
     * decompose sub sequence first
7494
     */
7495
    if (sub->type == XML_EXP_EMPTY) {
7496
#ifdef DEBUG_DERIV
7497
        printf("Empty(sub) -> Empty\n");
7498
#endif
7499
  exp->ref++;
7500
        return(exp);
7501
    }
7502
    if (sub->type == XML_EXP_SEQ) {
7503
#ifdef DEBUG_DERIV
7504
        printf("Seq(sub) -> decompose\n");
7505
#endif
7506
        tmp = xmlExpExpDeriveInt(ctxt, exp, sub->exp_left);
7507
  if (tmp == NULL)
7508
      return(NULL);
7509
  if (tmp == forbiddenExp)
7510
      return(tmp);
7511
  ret = xmlExpExpDeriveInt(ctxt, tmp, sub->exp_right);
7512
  xmlExpFree(ctxt, tmp);
7513
  return(ret);
7514
    }
7515
    if (sub->type == XML_EXP_OR) {
7516
#ifdef DEBUG_DERIV
7517
        printf("Or(sub) -> decompose\n");
7518
#endif
7519
        tmp = xmlExpExpDeriveInt(ctxt, exp, sub->exp_left);
7520
  if (tmp == forbiddenExp)
7521
      return(tmp);
7522
  if (tmp == NULL)
7523
      return(NULL);
7524
  ret = xmlExpExpDeriveInt(ctxt, exp, sub->exp_right);
7525
  if ((ret == NULL) || (ret == forbiddenExp)) {
7526
      xmlExpFree(ctxt, tmp);
7527
      return(ret);
7528
  }
7529
  return(xmlExpHashGetEntry(ctxt, XML_EXP_OR, tmp, ret, NULL, 0, 0));
7530
    }
7531
    if (!xmlExpCheckCard(exp, sub)) {
7532
#ifdef DEBUG_DERIV
7533
        printf("CheckCard(exp, sub) failed -> Forbid\n");
7534
#endif
7535
        return(forbiddenExp);
7536
    }
7537
    switch (exp->type) {
7538
        case XML_EXP_EMPTY:
7539
      if (sub == emptyExp)
7540
          return(emptyExp);
7541
#ifdef DEBUG_DERIV
7542
      printf("Empty(exp) -> Forbid\n");
7543
#endif
7544
      return(forbiddenExp);
7545
        case XML_EXP_FORBID:
7546
#ifdef DEBUG_DERIV
7547
      printf("Forbid(exp) -> Forbid\n");
7548
#endif
7549
      return(forbiddenExp);
7550
        case XML_EXP_ATOM:
7551
      if (sub->type == XML_EXP_ATOM) {
7552
          /* TODO: handle wildcards */
7553
          if (exp->exp_str == sub->exp_str) {
7554
#ifdef DEBUG_DERIV
7555
        printf("Atom match -> Empty\n");
7556
#endif
7557
        return(emptyExp);
7558
                }
7559
#ifdef DEBUG_DERIV
7560
    printf("Atom mismatch -> Forbid\n");
7561
#endif
7562
          return(forbiddenExp);
7563
      }
7564
      if ((sub->type == XML_EXP_COUNT) &&
7565
          (sub->exp_max == 1) &&
7566
          (sub->exp_left->type == XML_EXP_ATOM)) {
7567
          /* TODO: handle wildcards */
7568
          if (exp->exp_str == sub->exp_left->exp_str) {
7569
#ifdef DEBUG_DERIV
7570
        printf("Atom match -> Empty\n");
7571
#endif
7572
        return(emptyExp);
7573
    }
7574
#ifdef DEBUG_DERIV
7575
    printf("Atom mismatch -> Forbid\n");
7576
#endif
7577
          return(forbiddenExp);
7578
      }
7579
#ifdef DEBUG_DERIV
7580
      printf("Complex exp vs Atom -> Forbid\n");
7581
#endif
7582
      return(forbiddenExp);
7583
        case XML_EXP_SEQ:
7584
      /* try to get the sequence consumed only if possible */
7585
      if (xmlExpCheckCard(exp->exp_left, sub)) {
7586
    /* See if the sequence can be consumed directly */
7587
#ifdef DEBUG_DERIV
7588
    printf("Seq trying left only\n");
7589
#endif
7590
    ret = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub);
7591
    if ((ret != forbiddenExp) && (ret != NULL)) {
7592
#ifdef DEBUG_DERIV
7593
        printf("Seq trying left only worked\n");
7594
#endif
7595
        /*
7596
         * TODO: assumption here that we are determinist
7597
         *       i.e. we won't get to a nillable exp left
7598
         *       subset which could be matched by the right
7599
         *       part too.
7600
         * e.g.: (a | b)+,(a | c) and 'a+,a'
7601
         */
7602
        exp->exp_right->ref++;
7603
        return(xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret,
7604
                exp->exp_right, NULL, 0, 0));
7605
    }
7606
#ifdef DEBUG_DERIV
7607
      } else {
7608
    printf("Seq: left too short\n");
7609
#endif
7610
      }
7611
      /* Try instead to decompose */
7612
      if (sub->type == XML_EXP_COUNT) {
7613
    int min, max;
7614
7615
#ifdef DEBUG_DERIV
7616
    printf("Seq: sub is a count\n");
7617
#endif
7618
          ret = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub->exp_left);
7619
    if (ret == NULL)
7620
        return(NULL);
7621
    if (ret != forbiddenExp) {
7622
#ifdef DEBUG_DERIV
7623
        printf("Seq , Count match on left\n");
7624
#endif
7625
        if (sub->exp_max < 0)
7626
            max = -1;
7627
              else
7628
            max = sub->exp_max -1;
7629
        if (sub->exp_min > 0)
7630
            min = sub->exp_min -1;
7631
        else
7632
            min = 0;
7633
        exp->exp_right->ref++;
7634
        tmp = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret,
7635
                                 exp->exp_right, NULL, 0, 0);
7636
        if (tmp == NULL)
7637
            return(NULL);
7638
7639
        sub->exp_left->ref++;
7640
        tmp2 = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT,
7641
              sub->exp_left, NULL, NULL, min, max);
7642
        if (tmp2 == NULL) {
7643
            xmlExpFree(ctxt, tmp);
7644
      return(NULL);
7645
        }
7646
        ret = xmlExpExpDeriveInt(ctxt, tmp, tmp2);
7647
        xmlExpFree(ctxt, tmp);
7648
        xmlExpFree(ctxt, tmp2);
7649
        return(ret);
7650
    }
7651
      }
7652
      /* we made no progress on structured operations */
7653
      break;
7654
        case XML_EXP_OR:
7655
#ifdef DEBUG_DERIV
7656
      printf("Or , trying both side\n");
7657
#endif
7658
      ret = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub);
7659
      if (ret == NULL)
7660
          return(NULL);
7661
      tmp = xmlExpExpDeriveInt(ctxt, exp->exp_right, sub);
7662
      if (tmp == NULL) {
7663
    xmlExpFree(ctxt, ret);
7664
          return(NULL);
7665
      }
7666
      return(xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, tmp, NULL, 0, 0));
7667
        case XML_EXP_COUNT: {
7668
      int min, max;
7669
7670
      if (sub->type == XML_EXP_COUNT) {
7671
          /*
7672
     * Try to see if the loop is completely subsumed
7673
     */
7674
          tmp = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub->exp_left);
7675
    if (tmp == NULL)
7676
        return(NULL);
7677
    if (tmp == forbiddenExp) {
7678
        int mult;
7679
7680
#ifdef DEBUG_DERIV
7681
        printf("Count, Count inner don't subsume\n");
7682
#endif
7683
        mult = xmlExpDivide(ctxt, sub->exp_left, exp->exp_left,
7684
                            NULL, &tmp);
7685
        if (mult <= 0) {
7686
#ifdef DEBUG_DERIV
7687
      printf("Count, Count not multiple => forbidden\n");
7688
#endif
7689
                        return(forbiddenExp);
7690
        }
7691
        if (sub->exp_max == -1) {
7692
            max = -1;
7693
      if (exp->exp_max == -1) {
7694
          if (exp->exp_min <= sub->exp_min * mult)
7695
              min = 0;
7696
          else
7697
              min = exp->exp_min - sub->exp_min * mult;
7698
      } else {
7699
#ifdef DEBUG_DERIV
7700
          printf("Count, Count finite can't subsume infinite\n");
7701
#endif
7702
                            xmlExpFree(ctxt, tmp);
7703
          return(forbiddenExp);
7704
      }
7705
        } else {
7706
      if (exp->exp_max == -1) {
7707
#ifdef DEBUG_DERIV
7708
          printf("Infinite loop consume mult finite loop\n");
7709
#endif
7710
          if (exp->exp_min > sub->exp_min * mult) {
7711
        max = -1;
7712
        min = exp->exp_min - sub->exp_min * mult;
7713
          } else {
7714
        max = -1;
7715
        min = 0;
7716
          }
7717
      } else {
7718
          if (exp->exp_max < sub->exp_max * mult) {
7719
#ifdef DEBUG_DERIV
7720
        printf("loops max mult mismatch => forbidden\n");
7721
#endif
7722
        xmlExpFree(ctxt, tmp);
7723
        return(forbiddenExp);
7724
          }
7725
          if (sub->exp_max * mult > exp->exp_min)
7726
        min = 0;
7727
          else
7728
        min = exp->exp_min - sub->exp_max * mult;
7729
          max = exp->exp_max - sub->exp_max * mult;
7730
      }
7731
        }
7732
    } else if (!IS_NILLABLE(tmp)) {
7733
        /*
7734
         * TODO: loop here to try to grow if working on finite
7735
         *       blocks.
7736
         */
7737
#ifdef DEBUG_DERIV
7738
        printf("Count, Count remain not nillable => forbidden\n");
7739
#endif
7740
        xmlExpFree(ctxt, tmp);
7741
        return(forbiddenExp);
7742
    } else if (sub->exp_max == -1) {
7743
        if (exp->exp_max == -1) {
7744
            if (exp->exp_min <= sub->exp_min) {
7745
#ifdef DEBUG_DERIV
7746
          printf("Infinite loops Okay => COUNT(0,Inf)\n");
7747
#endif
7748
                            max = -1;
7749
          min = 0;
7750
      } else {
7751
#ifdef DEBUG_DERIV
7752
          printf("Infinite loops min => Count(X,Inf)\n");
7753
#endif
7754
                            max = -1;
7755
          min = exp->exp_min - sub->exp_min;
7756
      }
7757
        } else if (exp->exp_min > sub->exp_min) {
7758
#ifdef DEBUG_DERIV
7759
      printf("loops min mismatch 1 => forbidden ???\n");
7760
#endif
7761
            xmlExpFree(ctxt, tmp);
7762
            return(forbiddenExp);
7763
        } else {
7764
      max = -1;
7765
      min = 0;
7766
        }
7767
    } else {
7768
        if (exp->exp_max == -1) {
7769
#ifdef DEBUG_DERIV
7770
      printf("Infinite loop consume finite loop\n");
7771
#endif
7772
            if (exp->exp_min > sub->exp_min) {
7773
          max = -1;
7774
          min = exp->exp_min - sub->exp_min;
7775
      } else {
7776
          max = -1;
7777
          min = 0;
7778
      }
7779
        } else {
7780
            if (exp->exp_max < sub->exp_max) {
7781
#ifdef DEBUG_DERIV
7782
          printf("loops max mismatch => forbidden\n");
7783
#endif
7784
          xmlExpFree(ctxt, tmp);
7785
          return(forbiddenExp);
7786
      }
7787
      if (sub->exp_max > exp->exp_min)
7788
          min = 0;
7789
      else
7790
          min = exp->exp_min - sub->exp_max;
7791
      max = exp->exp_max - sub->exp_max;
7792
        }
7793
    }
7794
#ifdef DEBUG_DERIV
7795
    printf("loops match => SEQ(COUNT())\n");
7796
#endif
7797
    exp->exp_left->ref++;
7798
    tmp2 = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, exp->exp_left,
7799
                              NULL, NULL, min, max);
7800
    if (tmp2 == NULL) {
7801
        return(NULL);
7802
    }
7803
                ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, tmp, tmp2,
7804
                             NULL, 0, 0);
7805
    return(ret);
7806
      }
7807
      tmp = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub);
7808
      if (tmp == NULL)
7809
    return(NULL);
7810
      if (tmp == forbiddenExp) {
7811
#ifdef DEBUG_DERIV
7812
    printf("loop mismatch => forbidden\n");
7813
#endif
7814
    return(forbiddenExp);
7815
      }
7816
      if (exp->exp_min > 0)
7817
    min = exp->exp_min - 1;
7818
      else
7819
    min = 0;
7820
      if (exp->exp_max < 0)
7821
    max = -1;
7822
      else
7823
    max = exp->exp_max - 1;
7824
7825
#ifdef DEBUG_DERIV
7826
      printf("loop match => SEQ(COUNT())\n");
7827
#endif
7828
      exp->exp_left->ref++;
7829
      tmp2 = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, exp->exp_left,
7830
              NULL, NULL, min, max);
7831
      if (tmp2 == NULL)
7832
    return(NULL);
7833
      ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, tmp, tmp2,
7834
             NULL, 0, 0);
7835
      return(ret);
7836
  }
7837
    }
7838
7839
#ifdef DEBUG_DERIV
7840
    printf("Fallback to derivative\n");
7841
#endif
7842
    if (IS_NILLABLE(sub)) {
7843
        if (!(IS_NILLABLE(exp)))
7844
      return(forbiddenExp);
7845
  else
7846
      ret = emptyExp;
7847
    } else
7848
  ret = NULL;
7849
    /*
7850
     * here the structured derivation made no progress so
7851
     * we use the default token based derivation to force one more step
7852
     */
7853
    if (ctxt->tabSize == 0)
7854
        ctxt->tabSize = 40;
7855
7856
    tab = (const xmlChar **) xmlMalloc(ctxt->tabSize *
7857
                                 sizeof(const xmlChar *));
7858
    if (tab == NULL) {
7859
  return(NULL);
7860
    }
7861
7862
    /*
7863
     * collect all the strings accepted by the subexpression on input
7864
     */
7865
    len = xmlExpGetStartInt(ctxt, sub, tab, ctxt->tabSize, 0);
7866
    while (len < 0) {
7867
        const xmlChar **temp;
7868
  temp = (const xmlChar **) xmlRealloc((xmlChar **) tab, ctxt->tabSize * 2 *
7869
                                       sizeof(const xmlChar *));
7870
  if (temp == NULL) {
7871
      xmlFree((xmlChar **) tab);
7872
      return(NULL);
7873
  }
7874
  tab = temp;
7875
  ctxt->tabSize *= 2;
7876
  len = xmlExpGetStartInt(ctxt, sub, tab, ctxt->tabSize, 0);
7877
    }
7878
    for (i = 0;i < len;i++) {
7879
        tmp = xmlExpStringDeriveInt(ctxt, exp, tab[i]);
7880
  if ((tmp == NULL) || (tmp == forbiddenExp)) {
7881
      xmlExpFree(ctxt, ret);
7882
      xmlFree((xmlChar **) tab);
7883
      return(tmp);
7884
  }
7885
  tmp2 = xmlExpStringDeriveInt(ctxt, sub, tab[i]);
7886
  if ((tmp2 == NULL) || (tmp2 == forbiddenExp)) {
7887
      xmlExpFree(ctxt, tmp);
7888
      xmlExpFree(ctxt, ret);
7889
      xmlFree((xmlChar **) tab);
7890
      return(tmp);
7891
  }
7892
  tmp3 = xmlExpExpDeriveInt(ctxt, tmp, tmp2);
7893
  xmlExpFree(ctxt, tmp);
7894
  xmlExpFree(ctxt, tmp2);
7895
7896
  if ((tmp3 == NULL) || (tmp3 == forbiddenExp)) {
7897
      xmlExpFree(ctxt, ret);
7898
      xmlFree((xmlChar **) tab);
7899
      return(tmp3);
7900
  }
7901
7902
  if (ret == NULL)
7903
      ret = tmp3;
7904
  else {
7905
      ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, tmp3, NULL, 0, 0);
7906
      if (ret == NULL) {
7907
    xmlFree((xmlChar **) tab);
7908
          return(NULL);
7909
      }
7910
  }
7911
    }
7912
    xmlFree((xmlChar **) tab);
7913
    return(ret);
7914
}
7915
7916
/**
7917
 * xmlExpExpDerive:
7918
 * @ctxt: the expressions context
7919
 * @exp: the englobing expression
7920
 * @sub: the subexpression
7921
 *
7922
 * Evaluates the expression resulting from @exp consuming a sub expression @sub
7923
 * Based on algebraic derivation and sometimes direct Brzozowski derivation
7924
 * it usually takes less than linear time and can handle expressions generating
7925
 * infinite languages.
7926
 *
7927
 * Returns the resulting expression or NULL in case of internal error, the
7928
 *         result must be freed
7929
 */
7930
xmlExpNodePtr
7931
xmlExpExpDerive(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
7932
    if ((exp == NULL) || (ctxt == NULL) || (sub == NULL))
7933
        return(NULL);
7934
7935
    /*
7936
     * O(1) speedups
7937
     */
7938
    if (IS_NILLABLE(sub) && (!IS_NILLABLE(exp))) {
7939
#ifdef DEBUG_DERIV
7940
  printf("Sub nillable and not exp : can't subsume\n");
7941
#endif
7942
        return(forbiddenExp);
7943
    }
7944
    if (xmlExpCheckCard(exp, sub) == 0) {
7945
#ifdef DEBUG_DERIV
7946
  printf("sub generate longer sequences than exp : can't subsume\n");
7947
#endif
7948
        return(forbiddenExp);
7949
    }
7950
    return(xmlExpExpDeriveInt(ctxt, exp, sub));
7951
}
7952
7953
/**
7954
 * xmlExpSubsume:
7955
 * @ctxt: the expressions context
7956
 * @exp: the englobing expression
7957
 * @sub: the subexpression
7958
 *
7959
 * Check whether @exp accepts all the languages accepted by @sub
7960
 * the input being a subexpression.
7961
 *
7962
 * Returns 1 if true 0 if false and -1 in case of failure.
7963
 */
7964
int
7965
xmlExpSubsume(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
7966
    xmlExpNodePtr tmp;
7967
7968
    if ((exp == NULL) || (ctxt == NULL) || (sub == NULL))
7969
        return(-1);
7970
7971
    /*
7972
     * TODO: speedup by checking the language of sub is a subset of the
7973
     *       language of exp
7974
     */
7975
    /*
7976
     * O(1) speedups
7977
     */
7978
    if (IS_NILLABLE(sub) && (!IS_NILLABLE(exp))) {
7979
#ifdef DEBUG_DERIV
7980
  printf("Sub nillable and not exp : can't subsume\n");
7981
#endif
7982
        return(0);
7983
    }
7984
    if (xmlExpCheckCard(exp, sub) == 0) {
7985
#ifdef DEBUG_DERIV
7986
  printf("sub generate longer sequences than exp : can't subsume\n");
7987
#endif
7988
        return(0);
7989
    }
7990
    tmp = xmlExpExpDeriveInt(ctxt, exp, sub);
7991
#ifdef DEBUG_DERIV
7992
    printf("Result derivation :\n");
7993
    PRINT_EXP(tmp);
7994
#endif
7995
    if (tmp == NULL)
7996
        return(-1);
7997
    if (tmp == forbiddenExp)
7998
  return(0);
7999
    if (tmp == emptyExp)
8000
  return(1);
8001
    if ((tmp != NULL) && (IS_NILLABLE(tmp))) {
8002
        xmlExpFree(ctxt, tmp);
8003
        return(1);
8004
    }
8005
    xmlExpFree(ctxt, tmp);
8006
    return(0);
8007
}
8008
8009
/************************************************************************
8010
 *                  *
8011
 *      Parsing expression        *
8012
 *                  *
8013
 ************************************************************************/
8014
8015
static xmlExpNodePtr xmlExpParseExpr(xmlExpCtxtPtr ctxt);
8016
8017
#undef CUR
8018
#define CUR (*ctxt->cur)
8019
#undef NEXT
8020
#define NEXT ctxt->cur++;
8021
#undef IS_BLANK
8022
#define IS_BLANK(c) ((c == ' ') || (c == '\n') || (c == '\r') || (c == '\t'))
8023
#define SKIP_BLANKS while (IS_BLANK(*ctxt->cur)) ctxt->cur++;
8024
8025
static int
8026
xmlExpParseNumber(xmlExpCtxtPtr ctxt) {
8027
    int ret = 0;
8028
8029
    SKIP_BLANKS
8030
    if (CUR == '*') {
8031
  NEXT
8032
  return(-1);
8033
    }
8034
    if ((CUR < '0') || (CUR > '9'))
8035
        return(-1);
8036
    while ((CUR >= '0') && (CUR <= '9')) {
8037
        ret = ret * 10 + (CUR - '0');
8038
  NEXT
8039
    }
8040
    return(ret);
8041
}
8042
8043
static xmlExpNodePtr
8044
xmlExpParseOr(xmlExpCtxtPtr ctxt) {
8045
    const char *base;
8046
    xmlExpNodePtr ret;
8047
    const xmlChar *val;
8048
8049
    SKIP_BLANKS
8050
    base = ctxt->cur;
8051
    if (*ctxt->cur == '(') {
8052
        NEXT
8053
  ret = xmlExpParseExpr(ctxt);
8054
  SKIP_BLANKS
8055
  if (*ctxt->cur != ')') {
8056
      fprintf(stderr, "unbalanced '(' : %s\n", base);
8057
      xmlExpFree(ctxt, ret);
8058
      return(NULL);
8059
  }
8060
  NEXT;
8061
  SKIP_BLANKS
8062
  goto parse_quantifier;
8063
    }
8064
    while ((CUR != 0) && (!(IS_BLANK(CUR))) && (CUR != '(') &&
8065
           (CUR != ')') && (CUR != '|') && (CUR != ',') && (CUR != '{') &&
8066
     (CUR != '*') && (CUR != '+') && (CUR != '?') && (CUR != '}'))
8067
  NEXT;
8068
    val = xmlDictLookup(ctxt->dict, BAD_CAST base, ctxt->cur - base);
8069
    if (val == NULL)
8070
        return(NULL);
8071
    ret = xmlExpHashGetEntry(ctxt, XML_EXP_ATOM, NULL, NULL, val, 0, 0);
8072
    if (ret == NULL)
8073
        return(NULL);
8074
    SKIP_BLANKS
8075
parse_quantifier:
8076
    if (CUR == '{') {
8077
        int min, max;
8078
8079
        NEXT
8080
  min = xmlExpParseNumber(ctxt);
8081
  if (min < 0) {
8082
      xmlExpFree(ctxt, ret);
8083
      return(NULL);
8084
  }
8085
  SKIP_BLANKS
8086
  if (CUR == ',') {
8087
      NEXT
8088
      max = xmlExpParseNumber(ctxt);
8089
      SKIP_BLANKS
8090
  } else
8091
      max = min;
8092
  if (CUR != '}') {
8093
      xmlExpFree(ctxt, ret);
8094
      return(NULL);
8095
  }
8096
        NEXT
8097
  ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
8098
                           min, max);
8099
  SKIP_BLANKS
8100
    } else if (CUR == '?') {
8101
        NEXT
8102
  ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
8103
                           0, 1);
8104
  SKIP_BLANKS
8105
    } else if (CUR == '+') {
8106
        NEXT
8107
  ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
8108
                           1, -1);
8109
  SKIP_BLANKS
8110
    } else if (CUR == '*') {
8111
        NEXT
8112
  ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
8113
                           0, -1);
8114
  SKIP_BLANKS
8115
    }
8116
    return(ret);
8117
}
8118
8119
8120
static xmlExpNodePtr
8121
xmlExpParseSeq(xmlExpCtxtPtr ctxt) {
8122
    xmlExpNodePtr ret, right;
8123
8124
    ret = xmlExpParseOr(ctxt);
8125
    SKIP_BLANKS
8126
    while (CUR == '|') {
8127
        NEXT
8128
  right = xmlExpParseOr(ctxt);
8129
  if (right == NULL) {
8130
      xmlExpFree(ctxt, ret);
8131
      return(NULL);
8132
  }
8133
  ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, right, NULL, 0, 0);
8134
  if (ret == NULL)
8135
      return(NULL);
8136
    }
8137
    return(ret);
8138
}
8139
8140
static xmlExpNodePtr
8141
xmlExpParseExpr(xmlExpCtxtPtr ctxt) {
8142
    xmlExpNodePtr ret, right;
8143
8144
    ret = xmlExpParseSeq(ctxt);
8145
    SKIP_BLANKS
8146
    while (CUR == ',') {
8147
        NEXT
8148
  right = xmlExpParseSeq(ctxt);
8149
  if (right == NULL) {
8150
      xmlExpFree(ctxt, ret);
8151
      return(NULL);
8152
  }
8153
  ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, right, NULL, 0, 0);
8154
  if (ret == NULL)
8155
      return(NULL);
8156
    }
8157
    return(ret);
8158
}
8159
8160
/**
8161
 * xmlExpParse:
8162
 * @ctxt: the expressions context
8163
 * @expr: the 0 terminated string
8164
 *
8165
 * Minimal parser for regexps, it understand the following constructs
8166
 *  - string terminals
8167
 *  - choice operator |
8168
 *  - sequence operator ,
8169
 *  - subexpressions (...)
8170
 *  - usual cardinality operators + * and ?
8171
 *  - finite sequences  { min, max }
8172
 *  - infinite sequences { min, * }
8173
 * There is minimal checkings made especially no checking on strings values
8174
 *
8175
 * Returns a new expression or NULL in case of failure
8176
 */
8177
xmlExpNodePtr
8178
xmlExpParse(xmlExpCtxtPtr ctxt, const char *expr) {
8179
    xmlExpNodePtr ret;
8180
8181
    ctxt->expr = expr;
8182
    ctxt->cur = expr;
8183
8184
    ret = xmlExpParseExpr(ctxt);
8185
    SKIP_BLANKS
8186
    if (*ctxt->cur != 0) {
8187
        xmlExpFree(ctxt, ret);
8188
        return(NULL);
8189
    }
8190
    return(ret);
8191
}
8192
8193
static void
8194
xmlExpDumpInt(xmlBufferPtr buf, xmlExpNodePtr expr, int glob) {
8195
    xmlExpNodePtr c;
8196
8197
    if (expr == NULL) return;
8198
    if (glob) xmlBufferWriteChar(buf, "(");
8199
    switch (expr->type) {
8200
        case XML_EXP_EMPTY:
8201
      xmlBufferWriteChar(buf, "empty");
8202
      break;
8203
        case XML_EXP_FORBID:
8204
      xmlBufferWriteChar(buf, "forbidden");
8205
      break;
8206
        case XML_EXP_ATOM:
8207
      xmlBufferWriteCHAR(buf, expr->exp_str);
8208
      break;
8209
        case XML_EXP_SEQ:
8210
      c = expr->exp_left;
8211
      if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
8212
          xmlExpDumpInt(buf, c, 1);
8213
      else
8214
          xmlExpDumpInt(buf, c, 0);
8215
      xmlBufferWriteChar(buf, " , ");
8216
      c = expr->exp_right;
8217
      if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
8218
          xmlExpDumpInt(buf, c, 1);
8219
      else
8220
          xmlExpDumpInt(buf, c, 0);
8221
            break;
8222
        case XML_EXP_OR:
8223
      c = expr->exp_left;
8224
      if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
8225
          xmlExpDumpInt(buf, c, 1);
8226
      else
8227
          xmlExpDumpInt(buf, c, 0);
8228
      xmlBufferWriteChar(buf, " | ");
8229
      c = expr->exp_right;
8230
      if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
8231
          xmlExpDumpInt(buf, c, 1);
8232
      else
8233
          xmlExpDumpInt(buf, c, 0);
8234
            break;
8235
        case XML_EXP_COUNT: {
8236
      char rep[40];
8237
8238
      c = expr->exp_left;
8239
      if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
8240
          xmlExpDumpInt(buf, c, 1);
8241
      else
8242
          xmlExpDumpInt(buf, c, 0);
8243
      if ((expr->exp_min == 0) && (expr->exp_max == 1)) {
8244
    rep[0] = '?';
8245
    rep[1] = 0;
8246
      } else if ((expr->exp_min == 0) && (expr->exp_max == -1)) {
8247
    rep[0] = '*';
8248
    rep[1] = 0;
8249
      } else if ((expr->exp_min == 1) && (expr->exp_max == -1)) {
8250
    rep[0] = '+';
8251
    rep[1] = 0;
8252
      } else if (expr->exp_max == expr->exp_min) {
8253
          snprintf(rep, 39, "{%d}", expr->exp_min);
8254
      } else if (expr->exp_max < 0) {
8255
          snprintf(rep, 39, "{%d,inf}", expr->exp_min);
8256
      } else {
8257
          snprintf(rep, 39, "{%d,%d}", expr->exp_min, expr->exp_max);
8258
      }
8259
      rep[39] = 0;
8260
      xmlBufferWriteChar(buf, rep);
8261
      break;
8262
  }
8263
  default:
8264
      fprintf(stderr, "Error in tree\n");
8265
    }
8266
    if (glob)
8267
        xmlBufferWriteChar(buf, ")");
8268
}
8269
/**
8270
 * xmlExpDump:
8271
 * @buf:  a buffer to receive the output
8272
 * @expr:  the compiled expression
8273
 *
8274
 * Serialize the expression as compiled to the buffer
8275
 */
8276
void
8277
xmlExpDump(xmlBufferPtr buf, xmlExpNodePtr expr) {
8278
    if ((buf == NULL) || (expr == NULL))
8279
        return;
8280
    xmlExpDumpInt(buf, expr, 0);
8281
}
8282
8283
/**
8284
 * xmlExpMaxToken:
8285
 * @expr: a compiled expression
8286
 *
8287
 * Indicate the maximum number of input a expression can accept
8288
 *
8289
 * Returns the maximum length or -1 in case of error
8290
 */
8291
int
8292
xmlExpMaxToken(xmlExpNodePtr expr) {
8293
    if (expr == NULL)
8294
        return(-1);
8295
    return(expr->c_max);
8296
}
8297
8298
/**
8299
 * xmlExpCtxtNbNodes:
8300
 * @ctxt: an expression context
8301
 *
8302
 * Debugging facility provides the number of allocated nodes at a that point
8303
 *
8304
 * Returns the number of nodes in use or -1 in case of error
8305
 */
8306
int
8307
xmlExpCtxtNbNodes(xmlExpCtxtPtr ctxt) {
8308
    if (ctxt == NULL)
8309
        return(-1);
8310
    return(ctxt->nb_nodes);
8311
}
8312
8313
/**
8314
 * xmlExpCtxtNbCons:
8315
 * @ctxt: an expression context
8316
 *
8317
 * Debugging facility provides the number of allocated nodes over lifetime
8318
 *
8319
 * Returns the number of nodes ever allocated or -1 in case of error
8320
 */
8321
int
8322
xmlExpCtxtNbCons(xmlExpCtxtPtr ctxt) {
8323
    if (ctxt == NULL)
8324
        return(-1);
8325
    return(ctxt->nb_cons);
8326
}
8327
8328
#endif /* LIBXML_EXPR_ENABLED */
8329
8330
#endif /* LIBXML_REGEXP_ENABLED */