Coverage Report

Created: 2023-11-19 06:57

/src/jq/modules/oniguruma/src/regparse.c
Line
Count
Source (jump to first uncovered line)
1
/**********************************************************************
2
  regparse.c -  Oniguruma (regular expression library)
3
**********************************************************************/
4
/*-
5
 * Copyright (c) 2002-2023  K.Kosako
6
 * All rights reserved.
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 * 1. Redistributions of source code must retain the above copyright
12
 *    notice, this list of conditions and the following disclaimer.
13
 * 2. Redistributions in binary form must reproduce the above copyright
14
 *    notice, this list of conditions and the following disclaimer in the
15
 *    documentation and/or other materials provided with the distribution.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27
 * SUCH DAMAGE.
28
 */
29
30
#ifdef DEBUG_ND_FREE
31
#ifndef NEED_TO_INCLUDE_STDIO
32
#define NEED_TO_INCLUDE_STDIO
33
#endif
34
#endif
35
36
#include "regparse.h"
37
#include "st.h"
38
39
0
#define INIT_TAG_NAMES_ALLOC_NUM   5
40
41
0
#define WARN_BUFSIZE    256
42
43
#define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
44
45
#define IS_ALLOWED_CODE_IN_CALLOUT_NAME(c) \
46
0
  ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_' /* || c == '!' */)
47
#define IS_ALLOWED_CODE_IN_CALLOUT_TAG_NAME(c) \
48
0
  ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_')
49
50
0
#define OPTON_SINGLELINE(option)     ((option) & ONIG_OPTION_SINGLELINE)
51
0
#define OPTON_MULTILINE(option)      ((option) & ONIG_OPTION_MULTILINE)
52
0
#define OPTON_IGNORECASE(option)     ((option) & ONIG_OPTION_IGNORECASE)
53
0
#define OPTON_EXTEND(option)         ((option) & ONIG_OPTION_EXTEND)
54
#define OPTON_WORD_ASCII(option) \
55
0
  ((option) & (ONIG_OPTION_WORD_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII))
56
#define OPTON_DIGIT_ASCII(option) \
57
0
  ((option) & (ONIG_OPTION_DIGIT_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII))
58
#define OPTON_SPACE_ASCII(option) \
59
0
  ((option) & (ONIG_OPTION_SPACE_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII))
60
0
#define OPTON_POSIX_ASCII(option)    ((option) & ONIG_OPTION_POSIX_IS_ASCII)
61
0
#define OPTON_TEXT_SEGMENT_WORD(option)  ((option) & ONIG_OPTION_TEXT_SEGMENT_WORD)
62
63
#define OPTON_IS_ASCII_MODE_CTYPE(ctype, options) \
64
0
  ((ctype) >= 0 && \
65
0
  (((ctype) < ONIGENC_CTYPE_ASCII  && OPTON_POSIX_ASCII(options)) ||\
66
0
   ((ctype) == ONIGENC_CTYPE_WORD  && OPTON_WORD_ASCII(options))  ||\
67
0
   ((ctype) == ONIGENC_CTYPE_DIGIT && OPTON_DIGIT_ASCII(options)) ||\
68
0
   ((ctype) == ONIGENC_CTYPE_SPACE && OPTON_SPACE_ASCII(options))))
69
70
71
OnigSyntaxType OnigSyntaxOniguruma = {
72
  (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
73
     ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
74
     ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL |
75
     ONIG_SYN_OP_ESC_CONTROL_CHARS |
76
     ONIG_SYN_OP_ESC_C_CONTROL )
77
   & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
78
  , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
79
      ONIG_SYN_OP2_OPTION_ONIGURUMA |
80
      ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
81
      ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |
82
      ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |
83
      ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS |
84
      ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME    |
85
      ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT |
86
      ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |
87
      ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT |
88
      ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |
89
      ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |
90
      ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY  |
91
      ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
92
      ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
93
      ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |
94
      ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |
95
      ONIG_SYN_OP2_ESC_H_XDIGIT | ONIG_SYN_OP2_ESC_U_HEX4 )
96
  , ( SYN_GNU_REGEX_BV |
97
      ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |
98
      ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |
99
      ONIG_SYN_VARIABLE_LEN_LOOK_BEHIND |
100
      ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
101
      ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
102
      ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |
103
      ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC |
104
      ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
105
#ifdef USE_WHOLE_OPTIONS
106
      ONIG_SYN_WHOLE_OPTIONS |
107
#endif
108
      ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT
109
    )
110
  , ONIG_OPTION_NONE
111
  ,
112
  {
113
      (OnigCodePoint )'\\'                       /* esc */
114
    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
115
    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
116
    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
117
    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
118
    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
119
  }
120
};
121
122
OnigSyntaxType OnigSyntaxRuby = {
123
  (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
124
     ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
125
     ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL |
126
     ONIG_SYN_OP_ESC_CONTROL_CHARS |
127
     ONIG_SYN_OP_ESC_C_CONTROL )
128
   & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
129
  , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
130
      ONIG_SYN_OP2_OPTION_RUBY |
131
      ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
132
      ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |
133
      ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |
134
      ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT |
135
      ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |
136
      ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |
137
      ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |
138
      ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY  |
139
      ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
140
      ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
141
      ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |
142
      ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |
143
      ONIG_SYN_OP2_ESC_H_XDIGIT | ONIG_SYN_OP2_ESC_U_HEX4 )
144
  , ( SYN_GNU_REGEX_BV |
145
      ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |
146
      ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |
147
      ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
148
      ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
149
      ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |
150
      ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
151
      ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )
152
  , ONIG_OPTION_NONE
153
  ,
154
  {
155
      (OnigCodePoint )'\\'                       /* esc */
156
    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
157
    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
158
    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
159
    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
160
    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
161
  }
162
};
163
164
OnigSyntaxType*  OnigDefaultSyntax = ONIG_SYNTAX_ONIGURUMA;
165
166
167
0
#define BB_INIT(buf,size)    bbuf_init((BBuf* )(buf), (size))
168
169
0
#define BB_EXPAND(buf,low) do{\
170
0
  do { (buf)->alloc *= 2; } while ((buf)->alloc < (unsigned int )low);\
171
0
  (buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\
172
0
  if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
173
0
} while (0)
174
175
0
#define BB_ENSURE_SIZE(buf,size) do{\
176
0
  unsigned int new_alloc = (buf)->alloc;\
177
0
  while (new_alloc < (unsigned int )(size)) { new_alloc *= 2; }\
178
0
  if ((buf)->alloc != new_alloc) {\
179
0
    (buf)->p = (UChar* )xrealloc((buf)->p, new_alloc);\
180
0
    if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
181
0
    (buf)->alloc = new_alloc;\
182
0
  }\
183
0
} while (0)
184
185
0
#define BB_WRITE(buf,pos,bytes,n) do{\
186
0
  int used = (pos) + (n);\
187
0
  if ((buf)->alloc < (unsigned int )used) BB_EXPAND((buf),used);\
188
0
  xmemcpy((buf)->p + (pos), (bytes), (n));\
189
0
  if ((buf)->used < (unsigned int )used) (buf)->used = used;\
190
0
} while (0)
191
192
#define BB_WRITE1(buf,pos,byte) do{\
193
  int used = (pos) + 1;\
194
  if ((buf)->alloc < (unsigned int )used) BB_EXPAND((buf),used);\
195
  (buf)->p[(pos)] = (byte);\
196
  if ((buf)->used < (unsigned int )used) (buf)->used = used;\
197
} while (0)
198
199
#define BB_ADD(buf,bytes,n)       BB_WRITE((buf),(buf)->used,(bytes),(n))
200
#define BB_ADD1(buf,byte)         BB_WRITE1((buf),(buf)->used,(byte))
201
#define BB_GET_ADD_ADDRESS(buf)   ((buf)->p + (buf)->used)
202
#define BB_GET_OFFSET_POS(buf)    ((buf)->used)
203
204
/* from < to */
205
0
#define BB_MOVE_RIGHT(buf,from,to,n) do {\
206
0
  if ((unsigned int )((to)+(n)) > (buf)->alloc) BB_EXPAND((buf),(to) + (n));\
207
0
  xmemmove((buf)->p + (to), (buf)->p + (from), (n));\
208
0
  if ((unsigned int )((to)+(n)) > (buf)->used) (buf)->used = (to) + (n);\
209
0
} while (0)
210
211
/* from > to */
212
#define BB_MOVE_LEFT(buf,from,to,n) do {\
213
  xmemmove((buf)->p + (to), (buf)->p + (from), (n));\
214
} while (0)
215
216
/* from > to */
217
0
#define BB_MOVE_LEFT_REDUCE(buf,from,to) do {\
218
0
  xmemmove((buf)->p + (to), (buf)->p + (from), (buf)->used - (from));\
219
0
  (buf)->used -= (from - to);\
220
0
} while (0)
221
222
#define BB_INSERT(buf,pos,bytes,n) do {\
223
  if (pos >= (buf)->used) {\
224
    BB_WRITE(buf,pos,bytes,n);\
225
  }\
226
  else {\
227
    BB_MOVE_RIGHT((buf),(pos),(pos) + (n),((buf)->used - (pos)));\
228
    xmemcpy((buf)->p + (pos), (bytes), (n));\
229
  }\
230
} while (0)
231
232
#define BB_GET_BYTE(buf, pos) (buf)->p[(pos)]
233
234
235
typedef enum {
236
  CS_VALUE,
237
  CS_RANGE,
238
  CS_COMPLETE,
239
  CS_START
240
} CSTATE;
241
242
typedef enum {
243
  CV_UNDEF,
244
  CV_SB,
245
  CV_MB,
246
  CV_CPROP
247
} CVAL;
248
249
0
extern void onig_null_warn(const char* s ARG_UNUSED) { }
250
251
#ifdef DEFAULT_WARN_FUNCTION
252
static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;
253
#else
254
static OnigWarnFunc onig_warn = onig_null_warn;
255
#endif
256
257
#ifdef DEFAULT_VERB_WARN_FUNCTION
258
static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION;
259
#else
260
static OnigWarnFunc onig_verb_warn = onig_null_warn;
261
#endif
262
263
extern void onig_set_warn_func(OnigWarnFunc f)
264
0
{
265
0
  onig_warn = f;
266
0
}
267
268
extern void onig_set_verb_warn_func(OnigWarnFunc f)
269
0
{
270
0
  onig_verb_warn = f;
271
0
}
272
273
extern void
274
onig_warning(const char* s)
275
0
{
276
0
  if (onig_warn == onig_null_warn) return ;
277
278
0
  (*onig_warn)(s);
279
0
}
280
281
#define DEFAULT_MAX_CAPTURE_NUM   32767
282
283
static int MaxCaptureNum = DEFAULT_MAX_CAPTURE_NUM;
284
285
extern int
286
onig_set_capture_num_limit(int num)
287
0
{
288
0
  if (num < 0) return -1;
289
290
0
  MaxCaptureNum = num;
291
0
  return 0;
292
0
}
293
294
static unsigned int ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;
295
296
extern unsigned int
297
onig_get_parse_depth_limit(void)
298
0
{
299
0
  return ParseDepthLimit;
300
0
}
301
302
extern int
303
onig_set_parse_depth_limit(unsigned int depth)
304
0
{
305
0
  if (depth == 0)
306
0
    ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;
307
0
  else
308
0
    ParseDepthLimit = depth;
309
0
  return 0;
310
0
}
311
312
#ifdef ONIG_DEBUG_PARSE
313
#define INC_PARSE_DEPTH(d) do {\
314
  (d)++;\
315
  if (env->max_parse_depth < (d)) env->max_parse_depth = d;\
316
  if ((d) > ParseDepthLimit) \
317
    return ONIGERR_PARSE_DEPTH_LIMIT_OVER;\
318
} while (0)
319
#else
320
0
#define INC_PARSE_DEPTH(d) do {\
321
0
  (d)++;\
322
0
  if ((d) > ParseDepthLimit) \
323
0
    return ONIGERR_PARSE_DEPTH_LIMIT_OVER;\
324
0
} while (0)
325
#endif
326
327
0
#define DEC_PARSE_DEPTH(d)  (d)--
328
329
330
static int
331
bbuf_init(BBuf* buf, int size)
332
0
{
333
0
  if (size <= 0) {
334
0
    size   = 0;
335
0
    buf->p = NULL;
336
0
  }
337
0
  else {
338
0
    buf->p = (UChar* )xmalloc(size);
339
0
    if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
340
0
  }
341
342
0
  buf->alloc = size;
343
0
  buf->used  = 0;
344
0
  return 0;
345
0
}
346
347
static void
348
bbuf_free(BBuf* bbuf)
349
0
{
350
0
  if (IS_NOT_NULL(bbuf)) {
351
0
    if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p);
352
0
    xfree(bbuf);
353
0
  }
354
0
}
355
356
static int
357
bbuf_clone(BBuf** rto, BBuf* from)
358
0
{
359
0
  int r;
360
0
  BBuf *to;
361
362
0
  *rto = to = (BBuf* )xmalloc(sizeof(BBuf));
363
0
  CHECK_NULL_RETURN_MEMERR(to);
364
0
  r = BB_INIT(to, from->alloc);
365
0
  if (r != 0) {
366
0
    bbuf_free(to);
367
0
    *rto = 0;
368
0
    return r;
369
0
  }
370
0
  to->used = from->used;
371
0
  xmemcpy(to->p, from->p, from->used);
372
0
  return 0;
373
0
}
374
375
static int
376
backref_rel_to_abs(int rel_no, ParseEnv* env)
377
0
{
378
0
  if (rel_no > 0) {
379
0
    if (rel_no > ONIG_INT_MAX - env->num_mem)
380
0
      return ONIGERR_INVALID_BACKREF;
381
0
    return env->num_mem + rel_no;
382
0
  }
383
0
  else {
384
0
    return env->num_mem + 1 + rel_no;
385
0
  }
386
0
}
387
388
#define OPTION_ON(v,f)     ((v) |= (f))
389
#define OPTION_OFF(v,f)    ((v) &= ~(f))
390
391
0
#define OPTION_NEGATE(v,f,negative)    (negative) ? ((v) &= ~(f)) : ((v) |= (f))
392
393
#define MBCODE_START_POS(enc) \
394
0
  (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)
395
396
#define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \
397
0
  add_code_range_to_buf(pbuf, MBCODE_START_POS(enc), ~((OnigCodePoint )0))
398
399
0
#define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\
400
0
  if (! ONIGENC_IS_SINGLEBYTE(enc)) {\
401
0
    r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\
402
0
    if (r != 0) return r;\
403
0
  }\
404
0
} while (0)
405
406
407
0
#define BITSET_IS_EMPTY(bs,empty) do {\
408
0
  int i;\
409
0
  empty = 1;\
410
0
  for (i = 0; i < (int )BITSET_REAL_SIZE; i++) {\
411
0
    if ((bs)[i] != 0) {\
412
0
      empty = 0; break;\
413
0
    }\
414
0
  }\
415
0
} while (0)
416
417
static void
418
bitset_set_range(BitSetRef bs, int from, int to)
419
0
{
420
0
  int i;
421
0
  for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) {
422
0
    BITSET_SET_BIT(bs, i);
423
0
  }
424
0
}
425
426
static void
427
bitset_invert(BitSetRef bs)
428
0
{
429
0
  int i;
430
0
  for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { bs[i] = ~(bs[i]); }
431
0
}
432
433
static void
434
bitset_invert_to(BitSetRef from, BitSetRef to)
435
0
{
436
0
  int i;
437
0
  for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { to[i] = ~(from[i]); }
438
0
}
439
440
static void
441
bitset_and(BitSetRef dest, BitSetRef bs)
442
0
{
443
0
  int i;
444
0
  for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { dest[i] &= bs[i]; }
445
0
}
446
447
static void
448
bitset_or(BitSetRef dest, BitSetRef bs)
449
0
{
450
0
  int i;
451
0
  for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { dest[i] |= bs[i]; }
452
0
}
453
454
static void
455
bitset_copy(BitSetRef dest, BitSetRef bs)
456
0
{
457
0
  int i;
458
0
  for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { dest[i] = bs[i]; }
459
0
}
460
461
extern int
462
onig_strncmp(const UChar* s1, const UChar* s2, int n)
463
0
{
464
0
  int x;
465
466
0
  while (n-- > 0) {
467
0
    x = *s2++ - *s1++;
468
0
    if (x) return x;
469
0
  }
470
0
  return 0;
471
0
}
472
473
extern void
474
onig_strcpy(UChar* dest, const UChar* src, const UChar* end)
475
0
{
476
0
  int len = (int )(end - src);
477
0
  if (len > 0) {
478
0
    xmemcpy(dest, src, len);
479
0
    dest[len] = (UChar )0;
480
0
  }
481
0
}
482
483
/* scan pattern methods */
484
0
#define PEND_VALUE   0
485
486
0
#define PFETCH_READY  UChar* pfetch_prev
487
0
#define PEND         (p < end ?  0 : 1)
488
0
#define PUNFETCH     p = pfetch_prev
489
0
#define PPREV        pfetch_prev
490
0
#define PINC       do { \
491
0
  pfetch_prev = p; \
492
0
  p += ONIGENC_MBC_ENC_LEN(enc, p); \
493
0
} while (0)
494
0
#define PFETCH(c)  do { \
495
0
  c = ONIGENC_MBC_TO_CODE(enc, p, end); \
496
0
  pfetch_prev = p; \
497
0
  p += ONIGENC_MBC_ENC_LEN(enc, p); \
498
0
} while (0)
499
500
0
#define PINC_S     do { \
501
0
  p += ONIGENC_MBC_ENC_LEN(enc, p); \
502
0
} while (0)
503
0
#define PFETCH_S(c) do { \
504
0
  c = ONIGENC_MBC_TO_CODE(enc, p, end); \
505
0
  p += ONIGENC_MBC_ENC_LEN(enc, p); \
506
0
} while (0)
507
508
0
#define PPEEK        (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)
509
0
#define PPEEK_IS(c)  (PPEEK == (OnigCodePoint )c)
510
511
static UChar*
512
strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end,
513
            int capa)
514
0
{
515
0
  UChar* r;
516
0
  ptrdiff_t dest_delta = dest_end - dest;
517
518
0
  if (dest)
519
0
    r = (UChar* )xrealloc(dest, capa + 1);
520
0
  else
521
0
    r = (UChar* )xmalloc(capa + 1);
522
523
0
  CHECK_NULL_RETURN(r);
524
0
  onig_strcpy(r + dest_delta, src, src_end);
525
0
  return r;
526
0
}
527
528
/* dest on static area */
529
static UChar*
530
strcat_capa_from_static(UChar* dest, UChar* dest_end,
531
                        const UChar* src, const UChar* src_end, int capa)
532
0
{
533
0
  UChar* r;
534
535
0
  r = (UChar* )xmalloc(capa + 1);
536
0
  CHECK_NULL_RETURN(r);
537
0
  onig_strcpy(r, dest, dest_end);
538
0
  onig_strcpy(r + (dest_end - dest), src, src_end);
539
0
  return r;
540
0
}
541
542
543
#ifdef USE_ST_LIBRARY
544
545
typedef struct {
546
  UChar* s;
547
  UChar* end;
548
} st_str_end_key;
549
550
static int
551
str_end_cmp(st_str_end_key* x, st_str_end_key* y)
552
0
{
553
0
  UChar *p, *q;
554
0
  int c;
555
556
0
  if ((x->end - x->s) != (y->end - y->s))
557
0
    return 1;
558
559
0
  p = x->s;
560
0
  q = y->s;
561
0
  while (p < x->end) {
562
0
    c = (int )*p - (int )*q;
563
0
    if (c != 0) return c;
564
565
0
    p++; q++;
566
0
  }
567
568
0
  return 0;
569
0
}
570
571
static int
572
str_end_hash(st_str_end_key* x)
573
0
{
574
0
  UChar *p;
575
0
  unsigned val = 0;
576
577
0
  p = x->s;
578
0
  while (p < x->end) {
579
0
    val = val * 997 + (unsigned )*p++;
580
0
  }
581
582
0
  return (int) (val + (val >> 5));
583
0
}
584
585
extern hash_table_type
586
onig_st_init_strend_table_with_size(int size)
587
0
{
588
0
  static struct st_hash_type hashType = {
589
0
    str_end_cmp,
590
0
    str_end_hash,
591
0
  };
592
593
0
  return (hash_table_type )onig_st_init_table_with_size(&hashType, size);
594
0
}
595
596
extern int
597
onig_st_lookup_strend(hash_table_type table, const UChar* str_key,
598
                      const UChar* end_key, hash_data_type *value)
599
0
{
600
0
  st_str_end_key key;
601
602
0
  key.s   = (UChar* )str_key;
603
0
  key.end = (UChar* )end_key;
604
605
0
  return onig_st_lookup(table, (st_data_t )(&key), value);
606
0
}
607
608
extern int
609
onig_st_insert_strend(hash_table_type table, const UChar* str_key,
610
                      const UChar* end_key, hash_data_type value)
611
0
{
612
0
  st_str_end_key* key;
613
0
  int result;
614
615
0
  key = (st_str_end_key* )xmalloc(sizeof(st_str_end_key));
616
0
  CHECK_NULL_RETURN_MEMERR(key);
617
618
0
  key->s   = (UChar* )str_key;
619
0
  key->end = (UChar* )end_key;
620
0
  result = onig_st_insert(table, (st_data_t )key, value);
621
0
  if (result) {
622
0
    xfree(key);
623
0
  }
624
0
  return result;
625
0
}
626
627
628
#ifdef USE_CALLOUT
629
630
typedef struct {
631
  OnigEncoding enc;
632
  int    type; /* callout type: single or not */
633
  UChar* s;
634
  UChar* end;
635
} st_callout_name_key;
636
637
static int
638
callout_name_table_cmp(st_callout_name_key* x, st_callout_name_key* y)
639
0
{
640
0
  UChar *p, *q;
641
0
  int c;
642
643
0
  if (x->enc  != y->enc)  return 1;
644
0
  if (x->type != y->type) return 1;
645
0
  if ((x->end - x->s) != (y->end - y->s))
646
0
    return 1;
647
648
0
  p = x->s;
649
0
  q = y->s;
650
0
  while (p < x->end) {
651
0
    c = (int )*p - (int )*q;
652
0
    if (c != 0) return c;
653
654
0
    p++; q++;
655
0
  }
656
657
0
  return 0;
658
0
}
659
660
static int
661
callout_name_table_hash(st_callout_name_key* x)
662
0
{
663
0
  UChar *p;
664
0
  unsigned int val = 0;
665
666
0
  p = x->s;
667
0
  while (p < x->end) {
668
0
    val = val * 997 + (unsigned int )*p++;
669
0
  }
670
671
  /* use intptr_t for escape warning in Windows */
672
0
  return (int )(val + (val >> 5) + ((intptr_t )x->enc & 0xffff) + x->type);
673
0
}
674
675
extern hash_table_type
676
onig_st_init_callout_name_table_with_size(int size)
677
0
{
678
0
  static struct st_hash_type hashType = {
679
0
    callout_name_table_cmp,
680
0
    callout_name_table_hash,
681
0
  };
682
683
0
  return (hash_table_type )onig_st_init_table_with_size(&hashType, size);
684
0
}
685
686
extern int
687
onig_st_lookup_callout_name_table(hash_table_type table,
688
                                  OnigEncoding enc,
689
                                  int type,
690
                                  const UChar* str_key,
691
                                  const UChar* end_key,
692
                                  hash_data_type *value)
693
0
{
694
0
  st_callout_name_key key;
695
696
0
  key.enc  = enc;
697
0
  key.type = type;
698
0
  key.s    = (UChar* )str_key;
699
0
  key.end  = (UChar* )end_key;
700
701
0
  return onig_st_lookup(table, (st_data_t )(&key), value);
702
0
}
703
704
static int
705
st_insert_callout_name_table(hash_table_type table,
706
                             OnigEncoding enc, int type,
707
                             UChar* str_key, UChar* end_key,
708
                             hash_data_type value)
709
0
{
710
0
  st_callout_name_key* key;
711
0
  int result;
712
713
0
  key = (st_callout_name_key* )xmalloc(sizeof(st_callout_name_key));
714
0
  CHECK_NULL_RETURN_MEMERR(key);
715
716
  /* key->s: don't duplicate, because str_key is duped in callout_name_entry() */
717
0
  key->enc  = enc;
718
0
  key->type = type;
719
0
  key->s    = str_key;
720
0
  key->end  = end_key;
721
0
  result = onig_st_insert(table, (st_data_t )key, value);
722
0
  if (result) {
723
0
    xfree(key);
724
0
  }
725
0
  return result;
726
0
}
727
#endif
728
729
#endif /* USE_ST_LIBRARY */
730
731
732
0
#define INIT_NAME_BACKREFS_ALLOC_NUM   8
733
734
typedef struct {
735
  UChar* name;
736
  int    name_len;   /* byte length */
737
  int    back_num;   /* number of backrefs */
738
  int    back_alloc;
739
  int    back_ref1;
740
  int*   back_refs;
741
} NameEntry;
742
743
#ifdef USE_ST_LIBRARY
744
745
0
#define INIT_NAMES_ALLOC_NUM    5
746
747
typedef st_table  NameTable;
748
typedef st_data_t HashDataType;   /* 1.6 st.h doesn't define st_data_t type */
749
750
#define NAMEBUF_SIZE    24
751
#define NAMEBUF_SIZE_1  25
752
753
#ifdef ONIG_DEBUG
754
static int
755
i_print_name_entry(UChar* key, NameEntry* e, void* arg)
756
{
757
  int i;
758
  FILE* fp = (FILE* )arg;
759
760
  fprintf(fp, "%s: ", e->name);
761
  if (e->back_num == 0)
762
    fputs("-", fp);
763
  else if (e->back_num == 1)
764
    fprintf(fp, "%d", e->back_ref1);
765
  else {
766
    for (i = 0; i < e->back_num; i++) {
767
      if (i > 0) fprintf(fp, ", ");
768
      fprintf(fp, "%d", e->back_refs[i]);
769
    }
770
  }
771
  fputs("\n", fp);
772
  return ST_CONTINUE;
773
}
774
775
extern int
776
onig_print_names(FILE* fp, regex_t* reg)
777
{
778
  NameTable* t = (NameTable* )reg->name_table;
779
780
  if (IS_NOT_NULL(t)) {
781
    fprintf(fp, "name table\n");
782
    onig_st_foreach(t, i_print_name_entry, (HashDataType )fp);
783
    fputs("\n", fp);
784
  }
785
  return 0;
786
}
787
#endif /* ONIG_DEBUG */
788
789
static int
790
i_free_name_entry(UChar* key, NameEntry* e, void* arg ARG_UNUSED)
791
0
{
792
0
  xfree(e->name);
793
0
  if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
794
0
  xfree(key);
795
0
  xfree(e);
796
0
  return ST_DELETE;
797
0
}
798
799
static int
800
names_clear(regex_t* reg)
801
0
{
802
0
  NameTable* t = (NameTable* )reg->name_table;
803
804
0
  if (IS_NOT_NULL(t)) {
805
0
    onig_st_foreach(t, i_free_name_entry, 0);
806
0
  }
807
0
  return 0;
808
0
}
809
810
extern int
811
onig_names_free(regex_t* reg)
812
0
{
813
0
  int r;
814
0
  NameTable* t;
815
816
0
  r = names_clear(reg);
817
0
  if (r != 0) return r;
818
819
0
  t = (NameTable* )reg->name_table;
820
0
  if (IS_NOT_NULL(t)) onig_st_free_table(t);
821
0
  reg->name_table = (void* )NULL;
822
0
  return 0;
823
0
}
824
825
static NameEntry*
826
name_find(regex_t* reg, const UChar* name, const UChar* name_end)
827
0
{
828
0
  NameEntry* e;
829
0
  NameTable* t = (NameTable* )reg->name_table;
830
831
0
  e = (NameEntry* )NULL;
832
0
  if (IS_NOT_NULL(t)) {
833
0
    onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));
834
0
  }
835
0
  return e;
836
0
}
837
838
typedef struct {
839
  int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*);
840
  regex_t* reg;
841
  void* arg;
842
  int ret;
843
  OnigEncoding enc;
844
} INamesArg;
845
846
static int
847
i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg)
848
0
{
849
0
  int r = (*(arg->func))(e->name,
850
0
                         e->name + e->name_len,
851
0
                         e->back_num,
852
0
                         (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
853
0
                         arg->reg, arg->arg);
854
0
  if (r != 0) {
855
0
    arg->ret = r;
856
0
    return ST_STOP;
857
0
  }
858
0
  return ST_CONTINUE;
859
0
}
860
861
extern int
862
onig_foreach_name(regex_t* reg,
863
  int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
864
0
{
865
0
  INamesArg narg;
866
0
  NameTable* t = (NameTable* )reg->name_table;
867
868
0
  narg.ret = 0;
869
0
  if (IS_NOT_NULL(t)) {
870
0
    narg.func = func;
871
0
    narg.reg  = reg;
872
0
    narg.arg  = arg;
873
0
    narg.enc  = reg->enc; /* should be pattern encoding. */
874
0
    onig_st_foreach(t, i_names, (HashDataType )&narg);
875
0
  }
876
0
  return narg.ret;
877
0
}
878
879
static int
880
i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumMap* map)
881
0
{
882
0
  int i;
883
884
0
  if (e->back_num > 1) {
885
0
    for (i = 0; i < e->back_num; i++) {
886
0
      e->back_refs[i] = map[e->back_refs[i]].new_val;
887
0
    }
888
0
  }
889
0
  else if (e->back_num == 1) {
890
0
    e->back_ref1 = map[e->back_ref1].new_val;
891
0
  }
892
893
0
  return ST_CONTINUE;
894
0
}
895
896
extern int
897
onig_renumber_name_table(regex_t* reg, GroupNumMap* map)
898
0
{
899
0
  NameTable* t = (NameTable* )reg->name_table;
900
901
0
  if (IS_NOT_NULL(t)) {
902
0
    onig_st_foreach(t, i_renumber_name, (HashDataType )map);
903
0
  }
904
0
  return 0;
905
0
}
906
907
908
extern int
909
onig_number_of_names(regex_t* reg)
910
0
{
911
0
  NameTable* t = (NameTable* )reg->name_table;
912
913
0
  if (IS_NOT_NULL(t))
914
0
    return t->num_entries;
915
0
  else
916
0
    return 0;
917
0
}
918
919
#else  /* USE_ST_LIBRARY */
920
921
#define INIT_NAMES_ALLOC_NUM    8
922
923
typedef struct {
924
  NameEntry* e;
925
  int        num;
926
  int        alloc;
927
} NameTable;
928
929
#ifdef ONIG_DEBUG
930
extern int
931
onig_print_names(FILE* fp, regex_t* reg)
932
{
933
  int i, j;
934
  NameEntry* e;
935
  NameTable* t = (NameTable* )reg->name_table;
936
937
  if (IS_NOT_NULL(t) && t->num > 0) {
938
    fprintf(fp, "name table\n");
939
    for (i = 0; i < t->num; i++) {
940
      e = &(t->e[i]);
941
      fprintf(fp, "%s: ", e->name);
942
      if (e->back_num == 0) {
943
        fputs("-", fp);
944
      }
945
      else if (e->back_num == 1) {
946
        fprintf(fp, "%d", e->back_ref1);
947
      }
948
      else {
949
        for (j = 0; j < e->back_num; j++) {
950
          if (j > 0) fprintf(fp, ", ");
951
          fprintf(fp, "%d", e->back_refs[j]);
952
        }
953
      }
954
      fputs("\n", fp);
955
    }
956
    fputs("\n", fp);
957
  }
958
  return 0;
959
}
960
#endif
961
962
static int
963
names_clear(regex_t* reg)
964
{
965
  int i;
966
  NameEntry* e;
967
  NameTable* t = (NameTable* )reg->name_table;
968
969
  if (IS_NOT_NULL(t)) {
970
    for (i = 0; i < t->num; i++) {
971
      e = &(t->e[i]);
972
      if (IS_NOT_NULL(e->name)) {
973
        xfree(e->name);
974
        e->name       = NULL;
975
        e->name_len   = 0;
976
        e->back_num   = 0;
977
        e->back_alloc = 0;
978
        if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
979
        e->back_refs = (int* )NULL;
980
      }
981
    }
982
    if (IS_NOT_NULL(t->e)) {
983
      xfree(t->e);
984
      t->e = NULL;
985
    }
986
    t->num = 0;
987
  }
988
  return 0;
989
}
990
991
extern int
992
onig_names_free(regex_t* reg)
993
{
994
  int r;
995
  NameTable* t;
996
997
  r = names_clear(reg);
998
  if (r != 0) return r;
999
1000
  t = (NameTable* )reg->name_table;
1001
  if (IS_NOT_NULL(t)) xfree(t);
1002
  reg->name_table = NULL;
1003
  return 0;
1004
}
1005
1006
static NameEntry*
1007
name_find(regex_t* reg, UChar* name, UChar* name_end)
1008
{
1009
  int i, len;
1010
  NameEntry* e;
1011
  NameTable* t = (NameTable* )reg->name_table;
1012
1013
  if (IS_NOT_NULL(t)) {
1014
    len = name_end - name;
1015
    for (i = 0; i < t->num; i++) {
1016
      e = &(t->e[i]);
1017
      if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)
1018
        return e;
1019
    }
1020
  }
1021
  return (NameEntry* )NULL;
1022
}
1023
1024
extern int
1025
onig_foreach_name(regex_t* reg,
1026
  int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
1027
{
1028
  int i, r;
1029
  NameEntry* e;
1030
  NameTable* t = (NameTable* )reg->name_table;
1031
1032
  if (IS_NOT_NULL(t)) {
1033
    for (i = 0; i < t->num; i++) {
1034
      e = &(t->e[i]);
1035
      r = (*func)(e->name, e->name + e->name_len, e->back_num,
1036
                  (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
1037
                  reg, arg);
1038
      if (r != 0) return r;
1039
    }
1040
  }
1041
  return 0;
1042
}
1043
1044
extern int
1045
onig_number_of_names(regex_t* reg)
1046
{
1047
  NameTable* t = (NameTable* )reg->name_table;
1048
1049
  if (IS_NOT_NULL(t))
1050
    return t->num;
1051
  else
1052
    return 0;
1053
}
1054
1055
#endif /* else USE_ST_LIBRARY */
1056
1057
static int
1058
name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ParseEnv* env)
1059
0
{
1060
0
  int r;
1061
0
  int alloc;
1062
0
  NameEntry* e;
1063
0
  NameTable* t = (NameTable* )reg->name_table;
1064
1065
0
  if (name_end - name <= 0)
1066
0
    return ONIGERR_EMPTY_GROUP_NAME;
1067
1068
0
  e = name_find(reg, name, name_end);
1069
0
  if (IS_NULL(e)) {
1070
0
#ifdef USE_ST_LIBRARY
1071
0
    if (IS_NULL(t)) {
1072
0
      t = onig_st_init_strend_table_with_size(INIT_NAMES_ALLOC_NUM);
1073
0
      CHECK_NULL_RETURN_MEMERR(t);
1074
0
      reg->name_table = (void* )t;
1075
0
    }
1076
0
    e = (NameEntry* )xmalloc(sizeof(NameEntry));
1077
0
    CHECK_NULL_RETURN_MEMERR(e);
1078
1079
0
    e->name = onigenc_strdup(reg->enc, name, name_end);
1080
0
    if (IS_NULL(e->name)) {
1081
0
      xfree(e);  return ONIGERR_MEMORY;
1082
0
    }
1083
0
    r = onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),
1084
0
                              (HashDataType )e);
1085
0
    if (r < 0) return r;
1086
1087
0
    e->name_len   = (int )(name_end - name);
1088
0
    e->back_num   = 0;
1089
0
    e->back_alloc = 0;
1090
0
    e->back_refs  = (int* )NULL;
1091
1092
#else
1093
1094
    if (IS_NULL(t)) {
1095
      alloc = INIT_NAMES_ALLOC_NUM;
1096
      t = (NameTable* )xmalloc(sizeof(NameTable));
1097
      CHECK_NULL_RETURN_MEMERR(t);
1098
      t->e     = NULL;
1099
      t->alloc = 0;
1100
      t->num   = 0;
1101
1102
      t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc);
1103
      if (IS_NULL(t->e)) {
1104
        xfree(t);
1105
        return ONIGERR_MEMORY;
1106
      }
1107
      t->alloc = alloc;
1108
      reg->name_table = t;
1109
      goto clear;
1110
    }
1111
    else if (t->num == t->alloc) {
1112
      int i;
1113
1114
      alloc = t->alloc * 2;
1115
      t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc);
1116
      CHECK_NULL_RETURN_MEMERR(t->e);
1117
      t->alloc = alloc;
1118
1119
    clear:
1120
      for (i = t->num; i < t->alloc; i++) {
1121
        t->e[i].name       = NULL;
1122
        t->e[i].name_len   = 0;
1123
        t->e[i].back_num   = 0;
1124
        t->e[i].back_alloc = 0;
1125
        t->e[i].back_refs  = (int* )NULL;
1126
      }
1127
    }
1128
    e = &(t->e[t->num]);
1129
    t->num++;
1130
    e->name = onigenc_strdup(reg->enc, name, name_end);
1131
    if (IS_NULL(e->name)) return ONIGERR_MEMORY;
1132
    e->name_len = name_end - name;
1133
#endif
1134
0
  }
1135
1136
0
  if (e->back_num >= 1 &&
1137
0
      ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME)) {
1138
0
    onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME,
1139
0
                                   name, name_end);
1140
0
    return ONIGERR_MULTIPLEX_DEFINED_NAME;
1141
0
  }
1142
1143
0
  e->back_num++;
1144
0
  if (e->back_num == 1) {
1145
0
    e->back_ref1 = backref;
1146
0
  }
1147
0
  else {
1148
0
    if (e->back_num == 2) {
1149
0
      alloc = INIT_NAME_BACKREFS_ALLOC_NUM;
1150
0
      e->back_refs = (int* )xmalloc(sizeof(int) * alloc);
1151
0
      CHECK_NULL_RETURN_MEMERR(e->back_refs);
1152
0
      e->back_alloc = alloc;
1153
0
      e->back_refs[0] = e->back_ref1;
1154
0
      e->back_refs[1] = backref;
1155
0
    }
1156
0
    else {
1157
0
      if (e->back_num > e->back_alloc) {
1158
0
        alloc = e->back_alloc * 2;
1159
0
        e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc);
1160
0
        CHECK_NULL_RETURN_MEMERR(e->back_refs);
1161
0
        e->back_alloc = alloc;
1162
0
      }
1163
0
      e->back_refs[e->back_num - 1] = backref;
1164
0
    }
1165
0
  }
1166
1167
0
  return 0;
1168
0
}
1169
1170
extern int
1171
onig_name_to_group_numbers(regex_t* reg, const UChar* name,
1172
                           const UChar* name_end, int** nums)
1173
0
{
1174
0
  NameEntry* e = name_find(reg, name, name_end);
1175
1176
0
  if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE;
1177
1178
0
  switch (e->back_num) {
1179
0
  case 0:
1180
0
    break;
1181
0
  case 1:
1182
0
    *nums = &(e->back_ref1);
1183
0
    break;
1184
0
  default:
1185
0
    *nums = e->back_refs;
1186
0
    break;
1187
0
  }
1188
0
  return e->back_num;
1189
0
}
1190
1191
static int
1192
name_to_group_numbers(ParseEnv* env, const UChar* name, const UChar* name_end,
1193
                      int** nums)
1194
0
{
1195
0
  regex_t* reg;
1196
0
  NameEntry* e;
1197
1198
0
  reg = env->reg;
1199
0
  e = name_find(reg, name, name_end);
1200
1201
0
  if (IS_NULL(e)) {
1202
0
    onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,
1203
0
                                   (UChar* )name, (UChar* )name_end);
1204
0
    return ONIGERR_UNDEFINED_NAME_REFERENCE;
1205
0
  }
1206
1207
0
  switch (e->back_num) {
1208
0
  case 0:
1209
0
    break;
1210
0
  case 1:
1211
0
    *nums = &(e->back_ref1);
1212
0
    break;
1213
0
  default:
1214
0
    *nums = e->back_refs;
1215
0
    break;
1216
0
  }
1217
0
  return e->back_num;
1218
0
}
1219
1220
extern int
1221
onig_name_to_backref_number(regex_t* reg, const UChar* name,
1222
                            const UChar* name_end, OnigRegion *region)
1223
0
{
1224
0
  int i, n, *nums;
1225
1226
0
  n = onig_name_to_group_numbers(reg, name, name_end, &nums);
1227
0
  if (n < 0)
1228
0
    return n;
1229
0
  else if (n == 0)
1230
0
    return ONIGERR_PARSER_BUG;
1231
0
  else if (n == 1)
1232
0
    return nums[0];
1233
0
  else {
1234
0
    if (IS_NOT_NULL(region)) {
1235
0
      for (i = n - 1; i >= 0; i--) {
1236
0
        if (region->beg[nums[i]] != ONIG_REGION_NOTPOS)
1237
0
          return nums[i];
1238
0
      }
1239
0
    }
1240
0
    return nums[n - 1];
1241
0
  }
1242
0
}
1243
1244
extern int
1245
onig_noname_group_capture_is_active(regex_t* reg)
1246
0
{
1247
0
  if (OPTON_DONT_CAPTURE_GROUP(reg->options))
1248
0
    return 0;
1249
1250
0
  if (onig_number_of_names(reg) > 0 &&
1251
0
      IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
1252
0
      ! OPTON_CAPTURE_GROUP(reg->options)) {
1253
0
    return 0;
1254
0
  }
1255
1256
0
  return 1;
1257
0
}
1258
1259
#ifdef USE_CALLOUT
1260
1261
typedef struct {
1262
  OnigCalloutType type;
1263
  int             in;
1264
  OnigCalloutFunc start_func;
1265
  OnigCalloutFunc end_func;
1266
  int             arg_num;
1267
  int             opt_arg_num;
1268
  unsigned int    arg_types[ONIG_CALLOUT_MAX_ARGS_NUM];
1269
  OnigValue       opt_defaults[ONIG_CALLOUT_MAX_ARGS_NUM];
1270
  UChar*          name; /* reference to GlobalCalloutNameTable entry: e->name */
1271
} CalloutNameListEntry;
1272
1273
typedef struct {
1274
  int  n;
1275
  int  alloc;
1276
  CalloutNameListEntry* v;
1277
} CalloutNameListType;
1278
1279
static CalloutNameListType* GlobalCalloutNameList;
1280
1281
static int
1282
make_callout_func_list(CalloutNameListType** rs, int init_size)
1283
0
{
1284
0
  CalloutNameListType* s;
1285
0
  CalloutNameListEntry* v;
1286
1287
0
  *rs = 0;
1288
1289
0
  s = xmalloc(sizeof(*s));
1290
0
  if (IS_NULL(s)) return ONIGERR_MEMORY;
1291
1292
0
  v = (CalloutNameListEntry* )xmalloc(sizeof(CalloutNameListEntry) * init_size);
1293
0
  if (IS_NULL(v)) {
1294
0
    xfree(s);
1295
0
    return ONIGERR_MEMORY;
1296
0
  }
1297
1298
0
  s->n = 0;
1299
0
  s->alloc = init_size;
1300
0
  s->v = v;
1301
1302
0
  *rs = s;
1303
0
  return ONIG_NORMAL;
1304
0
}
1305
1306
static void
1307
free_callout_func_list(CalloutNameListType* s)
1308
0
{
1309
0
  if (IS_NOT_NULL(s)) {
1310
0
    if (IS_NOT_NULL(s->v)) {
1311
0
      int i, j;
1312
1313
0
      for (i = 0; i < s->n; i++) {
1314
0
        CalloutNameListEntry* e = s->v + i;
1315
0
        for (j = e->arg_num - e->opt_arg_num; j < e->arg_num; j++) {
1316
0
          if (e->arg_types[j] == ONIG_TYPE_STRING) {
1317
0
            UChar* p = e->opt_defaults[j].s.start;
1318
0
            if (IS_NOT_NULL(p)) xfree(p);
1319
0
          }
1320
0
        }
1321
0
      }
1322
0
      xfree(s->v);
1323
0
    }
1324
0
    xfree(s);
1325
0
  }
1326
0
}
1327
1328
static int
1329
callout_func_list_add(CalloutNameListType* s, int* rid)
1330
0
{
1331
0
  if (s->n >= s->alloc) {
1332
0
    int new_size = s->alloc * 2;
1333
0
    CalloutNameListEntry* nv = (CalloutNameListEntry* )
1334
0
      xrealloc(s->v, sizeof(CalloutNameListEntry) * new_size);
1335
0
    if (IS_NULL(nv)) return ONIGERR_MEMORY;
1336
1337
0
    s->alloc = new_size;
1338
0
    s->v = nv;
1339
0
  }
1340
1341
0
  *rid = s->n;
1342
1343
0
  xmemset(&(s->v[s->n]), 0, sizeof(*(s->v)));
1344
0
  s->n++;
1345
0
  return ONIG_NORMAL;
1346
0
}
1347
1348
1349
typedef struct {
1350
  UChar* name;
1351
  int    name_len;   /* byte length */
1352
  int    id;
1353
} CalloutNameEntry;
1354
1355
#ifdef USE_ST_LIBRARY
1356
typedef st_table  CalloutNameTable;
1357
#else
1358
typedef struct {
1359
  CalloutNameEntry* e;
1360
  int               num;
1361
  int               alloc;
1362
} CalloutNameTable;
1363
#endif
1364
1365
static CalloutNameTable* GlobalCalloutNameTable;
1366
static int CalloutNameIDCounter;
1367
1368
#ifdef USE_ST_LIBRARY
1369
1370
static int
1371
i_free_callout_name_entry(st_callout_name_key* key, CalloutNameEntry* e,
1372
                          void* arg ARG_UNUSED)
1373
0
{
1374
0
  if (IS_NOT_NULL(e)) {
1375
0
    xfree(e->name);
1376
0
  }
1377
  /*xfree(key->s); */ /* is same as e->name */
1378
0
  xfree(key);
1379
0
  xfree(e);
1380
0
  return ST_DELETE;
1381
0
}
1382
1383
static int
1384
callout_name_table_clear(CalloutNameTable* t)
1385
0
{
1386
0
  if (IS_NOT_NULL(t)) {
1387
0
    onig_st_foreach(t, i_free_callout_name_entry, 0);
1388
0
  }
1389
0
  return 0;
1390
0
}
1391
1392
static int
1393
global_callout_name_table_free(void)
1394
0
{
1395
0
  if (IS_NOT_NULL(GlobalCalloutNameTable)) {
1396
0
    int r = callout_name_table_clear(GlobalCalloutNameTable);
1397
0
    if (r != 0) return r;
1398
1399
0
    onig_st_free_table(GlobalCalloutNameTable);
1400
0
    GlobalCalloutNameTable = 0;
1401
0
    CalloutNameIDCounter = 0;
1402
0
  }
1403
1404
0
  return 0;
1405
0
}
1406
1407
static CalloutNameEntry*
1408
callout_name_find(OnigEncoding enc, int is_not_single,
1409
                  const UChar* name, const UChar* name_end)
1410
0
{
1411
0
  int r;
1412
0
  CalloutNameEntry* e;
1413
0
  CalloutNameTable* t = GlobalCalloutNameTable;
1414
1415
0
  e = (CalloutNameEntry* )NULL;
1416
0
  if (IS_NOT_NULL(t)) {
1417
0
    r = onig_st_lookup_callout_name_table(t, enc, is_not_single, name, name_end,
1418
0
                                          (HashDataType* )((void* )(&e)));
1419
0
    if (r == 0) { /* not found */
1420
0
      if (enc != ONIG_ENCODING_ASCII &&
1421
0
          ONIGENC_IS_ASCII_COMPATIBLE_ENCODING(enc)) {
1422
0
        enc = ONIG_ENCODING_ASCII;
1423
0
        onig_st_lookup_callout_name_table(t, enc, is_not_single, name, name_end,
1424
0
                                          (HashDataType* )((void* )(&e)));
1425
0
      }
1426
0
    }
1427
0
  }
1428
0
  return e;
1429
0
}
1430
1431
#else
1432
1433
static int
1434
callout_name_table_clear(CalloutNameTable* t)
1435
{
1436
  int i;
1437
  CalloutNameEntry* e;
1438
1439
  if (IS_NOT_NULL(t)) {
1440
    for (i = 0; i < t->num; i++) {
1441
      e = &(t->e[i]);
1442
      if (IS_NOT_NULL(e->name)) {
1443
        xfree(e->name);
1444
        e->name     = NULL;
1445
        e->name_len = 0;
1446
        e->id       = 0;
1447
        e->func     = 0;
1448
      }
1449
    }
1450
    if (IS_NOT_NULL(t->e)) {
1451
      xfree(t->e);
1452
      t->e = NULL;
1453
    }
1454
    t->num = 0;
1455
  }
1456
  return 0;
1457
}
1458
1459
static int
1460
global_callout_name_table_free(void)
1461
{
1462
  if (IS_NOT_NULL(GlobalCalloutNameTable)) {
1463
    int r = callout_name_table_clear(GlobalCalloutNameTable);
1464
    if (r != 0) return r;
1465
1466
    xfree(GlobalCalloutNameTable);
1467
    GlobalCalloutNameTable = 0;
1468
    CalloutNameIDCounter = 0;
1469
  }
1470
  return 0;
1471
}
1472
1473
static CalloutNameEntry*
1474
callout_name_find(UChar* name, UChar* name_end)
1475
{
1476
  int i, len;
1477
  CalloutNameEntry* e;
1478
  CalloutNameTable* t = Calloutnames;
1479
1480
  if (IS_NOT_NULL(t)) {
1481
    len = name_end - name;
1482
    for (i = 0; i < t->num; i++) {
1483
      e = &(t->e[i]);
1484
      if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)
1485
        return e;
1486
    }
1487
  }
1488
  return (CalloutNameEntry* )NULL;
1489
}
1490
1491
#endif
1492
1493
/* name string must be single byte char string. */
1494
static int
1495
callout_name_entry(CalloutNameEntry** rentry, OnigEncoding enc,
1496
                   int is_not_single, UChar* name, UChar* name_end)
1497
0
{
1498
0
  int r;
1499
0
  CalloutNameEntry* e;
1500
0
  CalloutNameTable* t = GlobalCalloutNameTable;
1501
1502
0
  *rentry = 0;
1503
0
  if (name_end - name <= 0)
1504
0
    return ONIGERR_INVALID_CALLOUT_NAME;
1505
1506
0
  e = callout_name_find(enc, is_not_single, name, name_end);
1507
0
  if (IS_NULL(e)) {
1508
0
#ifdef USE_ST_LIBRARY
1509
0
    if (IS_NULL(t)) {
1510
0
      t = onig_st_init_callout_name_table_with_size(INIT_NAMES_ALLOC_NUM);
1511
0
      CHECK_NULL_RETURN_MEMERR(t);
1512
0
      GlobalCalloutNameTable = t;
1513
0
    }
1514
0
    e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry));
1515
0
    CHECK_NULL_RETURN_MEMERR(e);
1516
1517
0
    e->name = onigenc_strdup(enc, name, name_end);
1518
0
    if (IS_NULL(e->name)) {
1519
0
      xfree(e);  return ONIGERR_MEMORY;
1520
0
    }
1521
1522
0
    r = st_insert_callout_name_table(t, enc, is_not_single,
1523
0
                                     e->name, (e->name + (name_end - name)),
1524
0
                                     (HashDataType )e);
1525
0
    if (r < 0) return r;
1526
1527
#else
1528
1529
    int alloc;
1530
1531
    if (IS_NULL(t)) {
1532
      alloc = INIT_NAMES_ALLOC_NUM;
1533
      t = (CalloutNameTable* )xmalloc(sizeof(CalloutNameTable));
1534
      CHECK_NULL_RETURN_MEMERR(t);
1535
      t->e     = NULL;
1536
      t->alloc = 0;
1537
      t->num   = 0;
1538
1539
      t->e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry) * alloc);
1540
      if (IS_NULL(t->e)) {
1541
        xfree(t);
1542
        return ONIGERR_MEMORY;
1543
      }
1544
      t->alloc = alloc;
1545
      GlobalCalloutNameTable = t;
1546
      goto clear;
1547
    }
1548
    else if (t->num == t->alloc) {
1549
      int i;
1550
1551
      alloc = t->alloc * 2;
1552
      t->e = (CalloutNameEntry* )xrealloc(t->e, sizeof(CalloutNameEntry) * alloc);
1553
      CHECK_NULL_RETURN_MEMERR(t->e);
1554
      t->alloc = alloc;
1555
1556
    clear:
1557
      for (i = t->num; i < t->alloc; i++) {
1558
        t->e[i].name       = NULL;
1559
        t->e[i].name_len   = 0;
1560
        t->e[i].id         = 0;
1561
      }
1562
    }
1563
    e = &(t->e[t->num]);
1564
    t->num++;
1565
    e->name = onigenc_strdup(enc, name, name_end);
1566
    if (IS_NULL(e->name)) return ONIGERR_MEMORY;
1567
#endif
1568
1569
0
    CalloutNameIDCounter++;
1570
0
    e->id = CalloutNameIDCounter;
1571
0
    e->name_len = (int )(name_end - name);
1572
0
  }
1573
1574
0
  *rentry = e;
1575
0
  return e->id;
1576
0
}
1577
1578
static int
1579
is_allowed_callout_name(OnigEncoding enc, UChar* name, UChar* name_end)
1580
0
{
1581
0
  UChar* p;
1582
0
  OnigCodePoint c;
1583
1584
0
  if (name >= name_end) return 0;
1585
1586
0
  p = name;
1587
0
  while (p < name_end) {
1588
0
    c = ONIGENC_MBC_TO_CODE(enc, p, name_end);
1589
0
    if (! IS_ALLOWED_CODE_IN_CALLOUT_NAME(c))
1590
0
      return 0;
1591
1592
0
    if (p == name) {
1593
0
      if (c >= '0' && c <= '9') return 0;
1594
0
    }
1595
1596
0
    p += ONIGENC_MBC_ENC_LEN(enc, p);
1597
0
  }
1598
1599
0
  return 1;
1600
0
}
1601
1602
static int
1603
is_allowed_callout_tag_name(OnigEncoding enc, UChar* name, UChar* name_end)
1604
0
{
1605
0
  UChar* p;
1606
0
  OnigCodePoint c;
1607
1608
0
  if (name >= name_end) return 0;
1609
1610
0
  p = name;
1611
0
  while (p < name_end) {
1612
0
    c = ONIGENC_MBC_TO_CODE(enc, p, name_end);
1613
0
    if (! IS_ALLOWED_CODE_IN_CALLOUT_TAG_NAME(c))
1614
0
      return 0;
1615
1616
0
    if (p == name) {
1617
0
      if (c >= '0' && c <= '9') return 0;
1618
0
    }
1619
1620
0
    p += ONIGENC_MBC_ENC_LEN(enc, p);
1621
0
  }
1622
1623
0
  return 1;
1624
0
}
1625
1626
extern int
1627
onig_set_callout_of_name(OnigEncoding enc, OnigCalloutType callout_type,
1628
                         UChar* name, UChar* name_end, int in,
1629
                         OnigCalloutFunc start_func,
1630
                         OnigCalloutFunc end_func,
1631
                         int arg_num, unsigned int arg_types[],
1632
                         int opt_arg_num, OnigValue opt_defaults[])
1633
0
{
1634
0
  int r;
1635
0
  int i;
1636
0
  int j;
1637
0
  int id;
1638
0
  int is_not_single;
1639
0
  CalloutNameEntry* e;
1640
0
  CalloutNameListEntry* fe;
1641
1642
0
  if (callout_type != ONIG_CALLOUT_TYPE_SINGLE)
1643
0
    return ONIGERR_INVALID_ARGUMENT;
1644
1645
0
  if (arg_num < 0 || arg_num > ONIG_CALLOUT_MAX_ARGS_NUM)
1646
0
    return ONIGERR_INVALID_CALLOUT_ARG;
1647
1648
0
  if (opt_arg_num < 0 || opt_arg_num > arg_num)
1649
0
    return ONIGERR_INVALID_CALLOUT_ARG;
1650
1651
0
  if (start_func == 0 && end_func == 0)
1652
0
    return ONIGERR_INVALID_CALLOUT_ARG;
1653
1654
0
  if ((in & ONIG_CALLOUT_IN_PROGRESS) == 0 && (in & ONIG_CALLOUT_IN_RETRACTION) == 0)
1655
0
    return ONIGERR_INVALID_CALLOUT_ARG;
1656
1657
0
  for (i = 0; i < arg_num; i++) {
1658
0
    unsigned int t = arg_types[i];
1659
0
    if (t == ONIG_TYPE_VOID)
1660
0
      return ONIGERR_INVALID_CALLOUT_ARG;
1661
0
    else {
1662
0
      if (i >= arg_num - opt_arg_num) {
1663
0
        if (t != ONIG_TYPE_LONG && t != ONIG_TYPE_CHAR && t != ONIG_TYPE_STRING &&
1664
0
            t != ONIG_TYPE_TAG)
1665
0
          return ONIGERR_INVALID_CALLOUT_ARG;
1666
0
      }
1667
0
      else {
1668
0
        if (t != ONIG_TYPE_LONG) {
1669
0
          t = t & ~ONIG_TYPE_LONG;
1670
0
          if (t != ONIG_TYPE_CHAR && t != ONIG_TYPE_STRING && t != ONIG_TYPE_TAG)
1671
0
            return ONIGERR_INVALID_CALLOUT_ARG;
1672
0
        }
1673
0
      }
1674
0
    }
1675
0
  }
1676
1677
0
  if (! is_allowed_callout_name(enc, name, name_end)) {
1678
0
    return ONIGERR_INVALID_CALLOUT_NAME;
1679
0
  }
1680
1681
0
  is_not_single = (callout_type != ONIG_CALLOUT_TYPE_SINGLE);
1682
0
  id = callout_name_entry(&e, enc, is_not_single, name, name_end);
1683
0
  if (id < 0) return id;
1684
1685
0
  r = ONIG_NORMAL;
1686
0
  if (IS_NULL(GlobalCalloutNameList)) {
1687
0
    r = make_callout_func_list(&GlobalCalloutNameList, 10);
1688
0
    if (r != ONIG_NORMAL) return r;
1689
0
  }
1690
1691
0
  while (id >= GlobalCalloutNameList->n) {
1692
0
    int rid;
1693
0
    r = callout_func_list_add(GlobalCalloutNameList, &rid);
1694
0
    if (r != ONIG_NORMAL) return r;
1695
0
  }
1696
1697
0
  fe = GlobalCalloutNameList->v + id;
1698
0
  fe->type         = callout_type;
1699
0
  fe->in           = in;
1700
0
  fe->start_func   = start_func;
1701
0
  fe->end_func     = end_func;
1702
0
  fe->arg_num      = arg_num;
1703
0
  fe->opt_arg_num  = opt_arg_num;
1704
0
  fe->name         = e->name;
1705
1706
0
  for (i = 0; i < arg_num; i++) {
1707
0
    fe->arg_types[i] = arg_types[i];
1708
0
  }
1709
0
  for (i = arg_num - opt_arg_num, j = 0; i < arg_num; i++, j++) {
1710
0
    if (IS_NULL(opt_defaults)) return ONIGERR_INVALID_ARGUMENT;
1711
0
    if (fe->arg_types[i] == ONIG_TYPE_STRING) {
1712
0
      OnigValue* val;
1713
0
      UChar* ds;
1714
1715
0
      val = opt_defaults + j;
1716
0
      ds = onigenc_strdup(enc, val->s.start, val->s.end);
1717
0
      CHECK_NULL_RETURN_MEMERR(ds);
1718
1719
0
      fe->opt_defaults[i].s.start = ds;
1720
0
      fe->opt_defaults[i].s.end   = ds + (val->s.end - val->s.start);
1721
0
    }
1722
0
    else {
1723
0
      fe->opt_defaults[i] = opt_defaults[j];
1724
0
    }
1725
0
  }
1726
1727
0
  r = id;
1728
0
  return r;
1729
0
}
1730
1731
static int
1732
get_callout_name_id_by_name(OnigEncoding enc, int is_not_single,
1733
                            UChar* name, UChar* name_end, int* rid)
1734
0
{
1735
0
  int r;
1736
0
  CalloutNameEntry* e;
1737
1738
0
  if (! is_allowed_callout_name(enc, name, name_end)) {
1739
0
    return ONIGERR_INVALID_CALLOUT_NAME;
1740
0
  }
1741
1742
0
  e = callout_name_find(enc, is_not_single, name, name_end);
1743
0
  if (IS_NULL(e)) {
1744
0
    return ONIGERR_UNDEFINED_CALLOUT_NAME;
1745
0
  }
1746
1747
0
  r = ONIG_NORMAL;
1748
0
  *rid = e->id;
1749
1750
0
  return r;
1751
0
}
1752
1753
extern OnigCalloutFunc
1754
onig_get_callout_start_func(regex_t* reg, int callout_num)
1755
0
{
1756
  /* If used for callouts of contents, return 0. */
1757
0
  CalloutListEntry* e;
1758
1759
0
  e = onig_reg_callout_list_at(reg, callout_num);
1760
0
  CHECK_NULL_RETURN(e);
1761
0
  return e->start_func;
1762
0
}
1763
1764
extern const UChar*
1765
onig_get_callout_tag_start(regex_t* reg, int callout_num)
1766
0
{
1767
0
  CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num);
1768
0
  CHECK_NULL_RETURN(e);
1769
0
  return e->tag_start;
1770
0
}
1771
1772
extern const UChar*
1773
onig_get_callout_tag_end(regex_t* reg, int callout_num)
1774
0
{
1775
0
  CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num);
1776
0
  CHECK_NULL_RETURN(e);
1777
0
  return e->tag_end;
1778
0
}
1779
1780
1781
extern OnigCalloutType
1782
onig_get_callout_type_by_name_id(int name_id)
1783
0
{
1784
0
  if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
1785
0
    return 0;
1786
1787
0
  return GlobalCalloutNameList->v[name_id].type;
1788
0
}
1789
1790
extern OnigCalloutFunc
1791
onig_get_callout_start_func_by_name_id(int name_id)
1792
0
{
1793
0
  if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
1794
0
    return 0;
1795
1796
0
  return GlobalCalloutNameList->v[name_id].start_func;
1797
0
}
1798
1799
extern OnigCalloutFunc
1800
onig_get_callout_end_func_by_name_id(int name_id)
1801
0
{
1802
0
  if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
1803
0
    return 0;
1804
1805
0
  return GlobalCalloutNameList->v[name_id].end_func;
1806
0
}
1807
1808
extern int
1809
onig_get_callout_in_by_name_id(int name_id)
1810
0
{
1811
0
  if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
1812
0
    return 0;
1813
1814
0
  return GlobalCalloutNameList->v[name_id].in;
1815
0
}
1816
1817
static int
1818
get_callout_arg_num_by_name_id(int name_id)
1819
0
{
1820
0
  return GlobalCalloutNameList->v[name_id].arg_num;
1821
0
}
1822
1823
static int
1824
get_callout_opt_arg_num_by_name_id(int name_id)
1825
0
{
1826
0
  return GlobalCalloutNameList->v[name_id].opt_arg_num;
1827
0
}
1828
1829
static unsigned int
1830
get_callout_arg_type_by_name_id(int name_id, int index)
1831
0
{
1832
0
  return GlobalCalloutNameList->v[name_id].arg_types[index];
1833
0
}
1834
1835
static OnigValue
1836
get_callout_opt_default_by_name_id(int name_id, int index)
1837
0
{
1838
0
  return GlobalCalloutNameList->v[name_id].opt_defaults[index];
1839
0
}
1840
1841
extern UChar*
1842
onig_get_callout_name_by_name_id(int name_id)
1843
0
{
1844
0
  if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
1845
0
    return 0;
1846
1847
0
  return GlobalCalloutNameList->v[name_id].name;
1848
0
}
1849
1850
extern int
1851
onig_global_callout_names_free(void)
1852
0
{
1853
0
  free_callout_func_list(GlobalCalloutNameList);
1854
0
  GlobalCalloutNameList = 0;
1855
1856
0
  global_callout_name_table_free();
1857
0
  return ONIG_NORMAL;
1858
0
}
1859
1860
1861
typedef st_table   CalloutTagTable;
1862
typedef intptr_t   CalloutTagVal;
1863
1864
0
#define CALLOUT_TAG_LIST_FLAG_TAG_EXIST     (1<<0)
1865
1866
static int
1867
i_callout_callout_list_set(UChar* key, CalloutTagVal e, void* arg)
1868
0
{
1869
0
  int num;
1870
0
  RegexExt* ext = (RegexExt* )arg;
1871
1872
0
  num = (int )e - 1;
1873
0
  ext->callout_list[num].flag |= CALLOUT_TAG_LIST_FLAG_TAG_EXIST;
1874
0
  return ST_CONTINUE;
1875
0
}
1876
1877
static int
1878
setup_ext_callout_list_values(regex_t* reg)
1879
0
{
1880
0
  int i, j;
1881
0
  RegexExt* ext;
1882
1883
0
  ext = reg->extp;
1884
0
  if (IS_NOT_NULL(ext->tag_table)) {
1885
0
    onig_st_foreach((CalloutTagTable *)ext->tag_table, i_callout_callout_list_set,
1886
0
                    (st_data_t )ext);
1887
0
  }
1888
1889
0
  for (i = 0; i < ext->callout_num; i++) {
1890
0
    CalloutListEntry* e = ext->callout_list + i;
1891
0
    if (e->of == ONIG_CALLOUT_OF_NAME) {
1892
0
      for (j = 0; j < e->u.arg.num; j++) {
1893
0
        if (e->u.arg.types[j] == ONIG_TYPE_TAG) {
1894
0
          UChar* start;
1895
0
          UChar* end;
1896
0
          int num;
1897
0
          start = e->u.arg.vals[j].s.start;
1898
0
          end   = e->u.arg.vals[j].s.end;
1899
0
          num = onig_get_callout_num_by_tag(reg, start, end);
1900
0
          if (num < 0) return num;
1901
0
          e->u.arg.vals[j].tag = num;
1902
0
        }
1903
0
      }
1904
0
    }
1905
0
  }
1906
1907
0
  return ONIG_NORMAL;
1908
0
}
1909
1910
extern int
1911
onig_callout_tag_is_exist_at_callout_num(regex_t* reg, int callout_num)
1912
0
{
1913
0
  RegexExt* ext = reg->extp;
1914
1915
0
  if (IS_NULL(ext) || IS_NULL(ext->callout_list)) return 0;
1916
0
  if (callout_num > ext->callout_num) return 0;
1917
1918
0
  return (ext->callout_list[callout_num].flag &
1919
0
          CALLOUT_TAG_LIST_FLAG_TAG_EXIST) != 0;
1920
0
}
1921
1922
static int
1923
i_free_callout_tag_entry(UChar* key, CalloutTagVal e, void* arg ARG_UNUSED)
1924
0
{
1925
0
  xfree(key);
1926
0
  return ST_DELETE;
1927
0
}
1928
1929
static int
1930
callout_tag_table_clear(CalloutTagTable* t)
1931
0
{
1932
0
  if (IS_NOT_NULL(t)) {
1933
0
    onig_st_foreach(t, i_free_callout_tag_entry, 0);
1934
0
  }
1935
0
  return 0;
1936
0
}
1937
1938
extern int
1939
onig_callout_tag_table_free(void* table)
1940
0
{
1941
0
  CalloutTagTable* t = (CalloutTagTable* )table;
1942
1943
0
  if (IS_NOT_NULL(t)) {
1944
0
    int r = callout_tag_table_clear(t);
1945
0
    if (r != 0) return r;
1946
1947
0
    onig_st_free_table(t);
1948
0
  }
1949
1950
0
  return 0;
1951
0
}
1952
1953
extern int
1954
onig_get_callout_num_by_tag(regex_t* reg,
1955
                            const UChar* tag, const UChar* tag_end)
1956
0
{
1957
0
  int r;
1958
0
  RegexExt* ext;
1959
0
  CalloutTagVal e;
1960
1961
0
  ext = reg->extp;
1962
0
  if (IS_NULL(ext) || IS_NULL(ext->tag_table))
1963
0
    return ONIGERR_INVALID_CALLOUT_TAG_NAME;
1964
1965
0
  r = onig_st_lookup_strend(ext->tag_table, tag, tag_end,
1966
0
                            (HashDataType* )((void* )(&e)));
1967
0
  if (r == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME;
1968
0
  return (int )e;
1969
0
}
1970
1971
static CalloutTagVal
1972
callout_tag_find(CalloutTagTable* t, const UChar* name, const UChar* name_end)
1973
0
{
1974
0
  CalloutTagVal e;
1975
1976
0
  e = -1;
1977
0
  if (IS_NOT_NULL(t)) {
1978
0
    onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));
1979
0
  }
1980
0
  return e;
1981
0
}
1982
1983
static int
1984
callout_tag_table_new(CalloutTagTable** rt)
1985
0
{
1986
0
  CalloutTagTable* t;
1987
1988
0
  *rt = 0;
1989
0
  t = onig_st_init_strend_table_with_size(INIT_TAG_NAMES_ALLOC_NUM);
1990
0
  CHECK_NULL_RETURN_MEMERR(t);
1991
1992
0
  *rt = t;
1993
0
  return ONIG_NORMAL;
1994
0
}
1995
1996
static int
1997
callout_tag_entry_raw(ParseEnv* env, CalloutTagTable* t, UChar* name,
1998
                      UChar* name_end, CalloutTagVal entry_val)
1999
0
{
2000
0
  int r;
2001
0
  CalloutTagVal val;
2002
2003
0
  if (name_end - name <= 0)
2004
0
    return ONIGERR_INVALID_CALLOUT_TAG_NAME;
2005
2006
0
  val = callout_tag_find(t, name, name_end);
2007
0
  if (val >= 0) {
2008
0
    onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME,
2009
0
                                   name, name_end);
2010
0
    return ONIGERR_MULTIPLEX_DEFINED_NAME;
2011
0
  }
2012
2013
0
  r = onig_st_insert_strend(t, name, name_end, (HashDataType )entry_val);
2014
0
  if (r < 0) return r;
2015
2016
0
  return ONIG_NORMAL;
2017
0
}
2018
2019
static int
2020
ext_ensure_tag_table(regex_t* reg)
2021
0
{
2022
0
  int r;
2023
0
  RegexExt* ext;
2024
0
  CalloutTagTable* t;
2025
2026
0
  ext = onig_get_regex_ext(reg);
2027
0
  CHECK_NULL_RETURN_MEMERR(ext);
2028
2029
0
  if (IS_NULL(ext->tag_table)) {
2030
0
    r = callout_tag_table_new(&t);
2031
0
    if (r != ONIG_NORMAL) return r;
2032
2033
0
    ext->tag_table = t;
2034
0
  }
2035
2036
0
  return ONIG_NORMAL;
2037
0
}
2038
2039
static int
2040
callout_tag_entry(ParseEnv* env, regex_t* reg, UChar* name, UChar* name_end,
2041
                  CalloutTagVal entry_val)
2042
0
{
2043
0
  int r;
2044
0
  RegexExt* ext;
2045
0
  CalloutListEntry* e;
2046
2047
0
  r = ext_ensure_tag_table(reg);
2048
0
  if (r != ONIG_NORMAL) return r;
2049
2050
0
  ext = onig_get_regex_ext(reg);
2051
0
  CHECK_NULL_RETURN_MEMERR(ext);
2052
0
  r = callout_tag_entry_raw(env, ext->tag_table, name, name_end, entry_val);
2053
2054
0
  e = onig_reg_callout_list_at(reg, (int )entry_val);
2055
0
  CHECK_NULL_RETURN_MEMERR(e);
2056
0
  e->tag_start = name;
2057
0
  e->tag_end   = name_end;
2058
2059
0
  return r;
2060
0
}
2061
2062
#endif /* USE_CALLOUT */
2063
2064
2065
0
#define INIT_PARSEENV_MEMENV_ALLOC_SIZE   16
2066
2067
static void
2068
scan_env_clear(ParseEnv* env)
2069
0
{
2070
0
  MEM_STATUS_CLEAR(env->cap_history);
2071
0
  MEM_STATUS_CLEAR(env->backtrack_mem);
2072
0
  MEM_STATUS_CLEAR(env->backrefed_mem);
2073
0
  env->error      = (UChar* )NULL;
2074
0
  env->error_end  = (UChar* )NULL;
2075
0
  env->num_call   = 0;
2076
2077
0
#ifdef USE_CALL
2078
0
  env->unset_addr_list = NULL;
2079
0
#endif
2080
2081
0
  env->num_mem    = 0;
2082
0
  env->num_named  = 0;
2083
0
  env->mem_alloc  = 0;
2084
0
  env->mem_env_dynamic = (MemEnv* )NULL;
2085
2086
0
  xmemset(env->mem_env_static, 0, sizeof(env->mem_env_static));
2087
2088
0
  env->parse_depth      = 0;
2089
#ifdef ONIG_DEBUG_PARSE
2090
  env->max_parse_depth  = 0;
2091
#endif
2092
0
  env->backref_num      = 0;
2093
0
  env->keep_num         = 0;
2094
0
  env->id_num           = 0;
2095
0
  env->save_alloc_num   = 0;
2096
0
  env->saves            = 0;
2097
0
  env->flags            = 0;
2098
0
}
2099
2100
static int
2101
scan_env_add_mem_entry(ParseEnv* env)
2102
0
{
2103
0
  int i, need, alloc;
2104
0
  MemEnv* p;
2105
2106
0
  need = env->num_mem + 1;
2107
0
  if (need > MaxCaptureNum && MaxCaptureNum != 0)
2108
0
    return ONIGERR_TOO_MANY_CAPTURES;
2109
2110
0
  if (need >= PARSEENV_MEMENV_SIZE) {
2111
0
    if (env->mem_alloc <= need) {
2112
0
      if (IS_NULL(env->mem_env_dynamic)) {
2113
0
        alloc = INIT_PARSEENV_MEMENV_ALLOC_SIZE;
2114
0
        p = (MemEnv* )xmalloc(sizeof(MemEnv) * alloc);
2115
0
        CHECK_NULL_RETURN_MEMERR(p);
2116
0
        xmemcpy(p, env->mem_env_static, sizeof(env->mem_env_static));
2117
0
      }
2118
0
      else {
2119
0
        alloc = env->mem_alloc * 2;
2120
0
        p = (MemEnv* )xrealloc(env->mem_env_dynamic, sizeof(MemEnv) * alloc);
2121
0
        CHECK_NULL_RETURN_MEMERR(p);
2122
0
      }
2123
2124
0
      for (i = env->num_mem + 1; i < alloc; i++) {
2125
0
        p[i].mem_node = NULL_NODE;
2126
0
        p[i].empty_repeat_node = NULL_NODE;
2127
0
      }
2128
2129
0
      env->mem_env_dynamic = p;
2130
0
      env->mem_alloc = alloc;
2131
0
    }
2132
0
  }
2133
2134
0
  env->num_mem++;
2135
0
  return env->num_mem;
2136
0
}
2137
2138
static int
2139
scan_env_set_mem_node(ParseEnv* env, int num, Node* node)
2140
0
{
2141
0
  if (env->num_mem >= num)
2142
0
    PARSEENV_MEMENV(env)[num].mem_node = node;
2143
0
  else
2144
0
    return ONIGERR_PARSER_BUG;
2145
0
  return 0;
2146
0
}
2147
2148
static void
2149
node_free_body(Node* node)
2150
0
{
2151
0
  if (IS_NULL(node)) return ;
2152
2153
0
  switch (ND_TYPE(node)) {
2154
0
  case ND_STRING:
2155
0
    if (STR_(node)->capacity != 0 &&
2156
0
        IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) {
2157
0
      xfree(STR_(node)->s);
2158
0
    }
2159
0
    break;
2160
2161
0
  case ND_LIST:
2162
0
  case ND_ALT:
2163
0
    onig_node_free(ND_CAR(node));
2164
0
    node = ND_CDR(node);
2165
0
    while (IS_NOT_NULL(node)) {
2166
0
      Node* next = ND_CDR(node);
2167
0
      onig_node_free(ND_CAR(node));
2168
0
      xfree(node);
2169
0
      node = next;
2170
0
    }
2171
0
    break;
2172
2173
0
  case ND_CCLASS:
2174
0
    {
2175
0
      CClassNode* cc = CCLASS_(node);
2176
2177
0
      if (cc->mbuf)
2178
0
        bbuf_free(cc->mbuf);
2179
0
    }
2180
0
    break;
2181
2182
0
  case ND_BACKREF:
2183
0
    if (IS_NOT_NULL(BACKREF_(node)->back_dynamic))
2184
0
      xfree(BACKREF_(node)->back_dynamic);
2185
0
    break;
2186
2187
0
  case ND_BAG:
2188
0
    if (ND_BODY(node))
2189
0
      onig_node_free(ND_BODY(node));
2190
2191
0
    {
2192
0
      BagNode* en = BAG_(node);
2193
0
      if (en->type == BAG_IF_ELSE) {
2194
0
        onig_node_free(en->te.Then);
2195
0
        onig_node_free(en->te.Else);
2196
0
      }
2197
0
    }
2198
0
    break;
2199
2200
0
  case ND_QUANT:
2201
0
    if (ND_BODY(node))
2202
0
      onig_node_free(ND_BODY(node));
2203
0
    break;
2204
2205
0
  case ND_ANCHOR:
2206
0
    if (ND_BODY(node))
2207
0
      onig_node_free(ND_BODY(node));
2208
0
    if (IS_NOT_NULL(ANCHOR_(node)->lead_node))
2209
0
      onig_node_free(ANCHOR_(node)->lead_node);
2210
0
    break;
2211
2212
0
  case ND_CTYPE:
2213
0
  case ND_CALL:
2214
0
  case ND_GIMMICK:
2215
0
    break;
2216
0
  }
2217
0
}
2218
2219
extern void
2220
onig_node_free(Node* node)
2221
0
{
2222
0
  if (IS_NULL(node)) return ;
2223
2224
#ifdef DEBUG_ND_FREE
2225
  fprintf(stderr, "onig_node_free: %p\n", node);
2226
#endif
2227
2228
0
  node_free_body(node);
2229
0
  xfree(node);
2230
0
}
2231
2232
static void
2233
cons_node_free_alone(Node* node)
2234
0
{
2235
0
  ND_CAR(node) = 0;
2236
0
  ND_CDR(node) = 0;
2237
0
  onig_node_free(node);
2238
0
}
2239
2240
static Node*
2241
node_new(void)
2242
0
{
2243
0
  Node* node;
2244
2245
0
  node = (Node* )xmalloc(sizeof(Node));
2246
0
  CHECK_NULL_RETURN(node);
2247
0
  xmemset(node, 0, sizeof(*node));
2248
2249
#ifdef DEBUG_ND_FREE
2250
  fprintf(stderr, "node_new: %p\n", node);
2251
#endif
2252
0
  return node;
2253
0
}
2254
2255
extern int
2256
onig_node_copy(Node** rcopy, Node* from)
2257
0
{
2258
0
  int r;
2259
0
  Node* copy;
2260
2261
0
  *rcopy = NULL_NODE;
2262
2263
0
  switch (ND_TYPE(from)) {
2264
0
  case ND_LIST:
2265
0
  case ND_ALT:
2266
0
  case ND_ANCHOR:
2267
    /* These node's link to other nodes are processed by caller. */
2268
0
    break;
2269
0
  case ND_STRING:
2270
0
  case ND_CCLASS:
2271
0
  case ND_CTYPE:
2272
    /* Fixed contents after copy. */
2273
0
    break;
2274
0
  default:
2275
    /* Not supported yet. */
2276
0
    return ONIGERR_TYPE_BUG;
2277
0
    break;
2278
0
  }
2279
2280
0
  copy = node_new();
2281
0
  CHECK_NULL_RETURN_MEMERR(copy);
2282
0
  xmemcpy(copy, from, sizeof(*copy));
2283
2284
0
  switch (ND_TYPE(copy)) {
2285
0
  case ND_STRING:
2286
0
    r = onig_node_str_set(copy, STR_(from)->s, STR_(from)->end, FALSE);
2287
0
    if (r != 0) {
2288
0
    err:
2289
0
      onig_node_free(copy);
2290
0
      return r;
2291
0
    }
2292
0
    break;
2293
2294
0
  case ND_CCLASS:
2295
0
    {
2296
0
      CClassNode *fcc, *tcc;
2297
2298
0
      fcc = CCLASS_(from);
2299
0
      tcc = CCLASS_(copy);
2300
0
      if (IS_NOT_NULL(fcc->mbuf)) {
2301
0
        r = bbuf_clone(&(tcc->mbuf), fcc->mbuf);
2302
0
        if (r != 0) goto err;
2303
0
      }
2304
0
    }
2305
0
    break;
2306
2307
0
  default:
2308
0
    break;
2309
0
  }
2310
2311
0
  *rcopy = copy;
2312
0
  return ONIG_NORMAL;
2313
0
}
2314
2315
2316
static void
2317
initialize_cclass(CClassNode* cc)
2318
0
{
2319
0
  BITSET_CLEAR(cc->bs);
2320
0
  cc->flags = 0;
2321
0
  cc->mbuf  = NULL;
2322
0
}
2323
2324
static Node*
2325
node_new_cclass(void)
2326
0
{
2327
0
  Node* node = node_new();
2328
0
  CHECK_NULL_RETURN(node);
2329
2330
0
  ND_SET_TYPE(node, ND_CCLASS);
2331
0
  initialize_cclass(CCLASS_(node));
2332
0
  return node;
2333
0
}
2334
2335
static Node*
2336
node_new_ctype(int type, int not, OnigOptionType options)
2337
0
{
2338
0
  Node* node = node_new();
2339
0
  CHECK_NULL_RETURN(node);
2340
2341
0
  ND_SET_TYPE(node, ND_CTYPE);
2342
0
  CTYPE_(node)->ctype   = type;
2343
0
  CTYPE_(node)->not     = not;
2344
0
  CTYPE_(node)->ascii_mode = OPTON_IS_ASCII_MODE_CTYPE(type, options);
2345
0
  return node;
2346
0
}
2347
2348
static Node*
2349
node_new_anychar(OnigOptionType options)
2350
0
{
2351
0
  Node* node;
2352
2353
0
  node = node_new_ctype(CTYPE_ANYCHAR, FALSE, options);
2354
0
  CHECK_NULL_RETURN(node);
2355
2356
0
  if (OPTON_MULTILINE(options))
2357
0
    ND_STATUS_ADD(node, MULTILINE);
2358
0
  return node;
2359
0
}
2360
2361
static int
2362
node_new_no_newline(Node** node, ParseEnv* env)
2363
0
{
2364
0
  Node* n;
2365
2366
0
  n = node_new_anychar(ONIG_OPTION_NONE);
2367
0
  CHECK_NULL_RETURN_MEMERR(n);
2368
0
  *node = n;
2369
0
  return 0;
2370
0
}
2371
2372
static int
2373
node_new_true_anychar(Node** node)
2374
0
{
2375
0
  Node* n;
2376
2377
0
  n = node_new_anychar(ONIG_OPTION_MULTILINE);
2378
0
  CHECK_NULL_RETURN_MEMERR(n);
2379
0
  *node = n;
2380
0
  return 0;
2381
0
}
2382
2383
static Node*
2384
node_new_list(Node* left, Node* right)
2385
0
{
2386
0
  Node* node = node_new();
2387
0
  CHECK_NULL_RETURN(node);
2388
2389
0
  ND_SET_TYPE(node, ND_LIST);
2390
0
  ND_CAR(node)  = left;
2391
0
  ND_CDR(node) = right;
2392
0
  return node;
2393
0
}
2394
2395
extern Node*
2396
onig_node_new_list(Node* left, Node* right)
2397
0
{
2398
0
  return node_new_list(left, right);
2399
0
}
2400
2401
extern Node*
2402
onig_node_new_alt(Node* left, Node* right)
2403
0
{
2404
0
  Node* node = node_new();
2405
0
  CHECK_NULL_RETURN(node);
2406
2407
0
  ND_SET_TYPE(node, ND_ALT);
2408
0
  ND_CAR(node)  = left;
2409
0
  ND_CDR(node) = right;
2410
0
  return node;
2411
0
}
2412
2413
static Node*
2414
make_list_or_alt(NodeType type, int n, Node* ns[])
2415
0
{
2416
0
  Node* r;
2417
2418
0
  if (n <= 0) return NULL_NODE;
2419
2420
0
  if (n == 1) {
2421
0
    r = node_new();
2422
0
    CHECK_NULL_RETURN(r);
2423
0
    ND_SET_TYPE(r, type);
2424
0
    ND_CAR(r) = ns[0];
2425
0
    ND_CDR(r) = NULL_NODE;
2426
0
  }
2427
0
  else {
2428
0
    Node* right;
2429
2430
0
    r = node_new();
2431
0
    CHECK_NULL_RETURN(r);
2432
2433
0
    right = make_list_or_alt(type, n - 1, ns + 1);
2434
0
    if (IS_NULL(right)) {
2435
0
      onig_node_free(r);
2436
0
      return NULL_NODE;
2437
0
    }
2438
2439
0
    ND_SET_TYPE(r, type);
2440
0
    ND_CAR(r) = ns[0];
2441
0
    ND_CDR(r) = right;
2442
0
  }
2443
2444
0
  return r;
2445
0
}
2446
2447
static Node*
2448
make_list(int n, Node* ns[])
2449
0
{
2450
0
  return make_list_or_alt(ND_LIST, n, ns);
2451
0
}
2452
2453
static Node*
2454
make_alt(int n, Node* ns[])
2455
0
{
2456
0
  return make_list_or_alt(ND_ALT, n, ns);
2457
0
}
2458
2459
static Node*
2460
node_new_anchor(int type)
2461
0
{
2462
0
  Node* node;
2463
2464
0
  node = node_new();
2465
0
  CHECK_NULL_RETURN(node);
2466
2467
0
  ND_SET_TYPE(node, ND_ANCHOR);
2468
0
  ANCHOR_(node)->type       = type;
2469
0
  ANCHOR_(node)->char_min_len = 0;
2470
0
  ANCHOR_(node)->char_max_len = INFINITE_LEN;
2471
0
  ANCHOR_(node)->ascii_mode = 0;
2472
0
  ANCHOR_(node)->lead_node  = NULL_NODE;
2473
0
  return node;
2474
0
}
2475
2476
static Node*
2477
node_new_anchor_with_options(int type, OnigOptionType options)
2478
0
{
2479
0
  int ascii_mode;
2480
0
  Node* node;
2481
2482
0
  node = node_new_anchor(type);
2483
0
  CHECK_NULL_RETURN(node);
2484
2485
0
  ascii_mode = OPTON_WORD_ASCII(options) && IS_WORD_ANCHOR_TYPE(type) ? 1 : 0;
2486
0
  ANCHOR_(node)->ascii_mode = ascii_mode;
2487
2488
0
  if (type == ANCR_TEXT_SEGMENT_BOUNDARY ||
2489
0
      type == ANCR_NO_TEXT_SEGMENT_BOUNDARY) {
2490
0
    if (OPTON_TEXT_SEGMENT_WORD(options))
2491
0
      ND_STATUS_ADD(node, TEXT_SEGMENT_WORD);
2492
0
  }
2493
2494
0
  return node;
2495
0
}
2496
2497
static Node*
2498
node_new_backref(int back_num, int* backrefs, int by_name,
2499
#ifdef USE_BACKREF_WITH_LEVEL
2500
                 int exist_level, int nest_level,
2501
#endif
2502
                 ParseEnv* env)
2503
0
{
2504
0
  int i;
2505
0
  Node* node;
2506
2507
0
  node = node_new();
2508
0
  CHECK_NULL_RETURN(node);
2509
2510
0
  ND_SET_TYPE(node, ND_BACKREF);
2511
0
  BACKREF_(node)->back_num = back_num;
2512
0
  BACKREF_(node)->back_dynamic = (int* )NULL;
2513
0
  if (by_name != 0)
2514
0
    ND_STATUS_ADD(node, BY_NAME);
2515
2516
0
  if (OPTON_IGNORECASE(env->options))
2517
0
    ND_STATUS_ADD(node, IGNORECASE);
2518
2519
0
#ifdef USE_BACKREF_WITH_LEVEL
2520
0
  if (exist_level != 0) {
2521
0
    ND_STATUS_ADD(node, NEST_LEVEL);
2522
0
    BACKREF_(node)->nest_level  = nest_level;
2523
0
  }
2524
0
#endif
2525
2526
0
  for (i = 0; i < back_num; i++) {
2527
0
    if (backrefs[i] <= env->num_mem &&
2528
0
        IS_NULL(PARSEENV_MEMENV(env)[backrefs[i]].mem_node)) {
2529
0
      ND_STATUS_ADD(node, RECURSION);   /* /...(\1).../ */
2530
0
      break;
2531
0
    }
2532
0
  }
2533
2534
0
  if (back_num <= ND_BACKREFS_SIZE) {
2535
0
    for (i = 0; i < back_num; i++)
2536
0
      BACKREF_(node)->back_static[i] = backrefs[i];
2537
0
  }
2538
0
  else {
2539
0
    int* p = (int* )xmalloc(sizeof(int) * back_num);
2540
0
    if (IS_NULL(p)) {
2541
0
      onig_node_free(node);
2542
0
      return NULL;
2543
0
    }
2544
0
    BACKREF_(node)->back_dynamic = p;
2545
0
    for (i = 0; i < back_num; i++)
2546
0
      p[i] = backrefs[i];
2547
0
  }
2548
2549
0
  env->backref_num++;
2550
0
  return node;
2551
0
}
2552
2553
static Node*
2554
node_new_backref_checker(int back_num, int* backrefs, int by_name,
2555
#ifdef USE_BACKREF_WITH_LEVEL
2556
                         int exist_level, int nest_level,
2557
#endif
2558
                         ParseEnv* env)
2559
0
{
2560
0
  Node* node;
2561
2562
0
  node = node_new_backref(back_num, backrefs, by_name,
2563
0
#ifdef USE_BACKREF_WITH_LEVEL
2564
0
                          exist_level, nest_level,
2565
0
#endif
2566
0
                          env);
2567
0
  CHECK_NULL_RETURN(node);
2568
2569
0
  ND_STATUS_ADD(node, CHECKER);
2570
0
  return node;
2571
0
}
2572
2573
#ifdef USE_CALL
2574
static Node*
2575
node_new_call(UChar* name, UChar* name_end, int gnum, int by_number)
2576
0
{
2577
0
  Node* node = node_new();
2578
0
  CHECK_NULL_RETURN(node);
2579
2580
0
  ND_SET_TYPE(node, ND_CALL);
2581
0
  CALL_(node)->by_number   = by_number;
2582
0
  CALL_(node)->name        = name;
2583
0
  CALL_(node)->name_end    = name_end;
2584
0
  CALL_(node)->called_gnum = gnum;
2585
0
  CALL_(node)->entry_count = 1;
2586
0
  return node;
2587
0
}
2588
#endif
2589
2590
static Node*
2591
node_new_quantifier(int lower, int upper, int by_number)
2592
0
{
2593
0
  Node* node = node_new();
2594
0
  CHECK_NULL_RETURN(node);
2595
2596
0
  ND_SET_TYPE(node, ND_QUANT);
2597
0
  QUANT_(node)->lower            = lower;
2598
0
  QUANT_(node)->upper            = upper;
2599
0
  QUANT_(node)->greedy           = 1;
2600
0
  QUANT_(node)->emptiness        = BODY_IS_NOT_EMPTY;
2601
0
  QUANT_(node)->head_exact       = NULL_NODE;
2602
0
  QUANT_(node)->next_head_exact  = NULL_NODE;
2603
0
  QUANT_(node)->include_referred = 0;
2604
0
  QUANT_(node)->empty_status_mem = 0;
2605
0
  if (by_number != 0)
2606
0
    ND_STATUS_ADD(node, BY_NUMBER);
2607
2608
0
  return node;
2609
0
}
2610
2611
static Node*
2612
node_new_bag(enum BagType type)
2613
0
{
2614
0
  Node* node = node_new();
2615
0
  CHECK_NULL_RETURN(node);
2616
2617
0
  ND_SET_TYPE(node, ND_BAG);
2618
0
  BAG_(node)->type = type;
2619
2620
0
  switch (type) {
2621
0
  case BAG_MEMORY:
2622
0
    BAG_(node)->m.regnum       =  0;
2623
0
    BAG_(node)->m.called_addr  = -1;
2624
0
    BAG_(node)->m.entry_count  =  1;
2625
0
    BAG_(node)->m.called_state =  0;
2626
0
    break;
2627
2628
0
  case BAG_OPTION:
2629
0
    BAG_(node)->o.options =  0;
2630
0
    break;
2631
2632
0
  case BAG_STOP_BACKTRACK:
2633
0
    break;
2634
2635
0
  case BAG_IF_ELSE:
2636
0
    BAG_(node)->te.Then = 0;
2637
0
    BAG_(node)->te.Else = 0;
2638
0
    break;
2639
0
  }
2640
2641
0
  BAG_(node)->opt_count = 0;
2642
0
  return node;
2643
0
}
2644
2645
extern Node*
2646
onig_node_new_bag(enum BagType type)
2647
0
{
2648
0
  return node_new_bag(type);
2649
0
}
2650
2651
static Node*
2652
node_new_bag_if_else(Node* cond, Node* Then, Node* Else)
2653
0
{
2654
0
  Node* n;
2655
0
  n = node_new_bag(BAG_IF_ELSE);
2656
0
  CHECK_NULL_RETURN(n);
2657
2658
0
  ND_BODY(n) = cond;
2659
0
  BAG_(n)->te.Then = Then;
2660
0
  BAG_(n)->te.Else = Else;
2661
0
  return n;
2662
0
}
2663
2664
static Node*
2665
node_new_memory(int is_named)
2666
0
{
2667
0
  Node* node = node_new_bag(BAG_MEMORY);
2668
0
  CHECK_NULL_RETURN(node);
2669
0
  if (is_named != 0)
2670
0
    ND_STATUS_ADD(node, NAMED_GROUP);
2671
2672
0
  return node;
2673
0
}
2674
2675
static Node*
2676
node_new_option(OnigOptionType option)
2677
0
{
2678
0
  Node* node = node_new_bag(BAG_OPTION);
2679
0
  CHECK_NULL_RETURN(node);
2680
0
  BAG_(node)->o.options = option;
2681
0
  return node;
2682
0
}
2683
2684
static Node*
2685
node_new_group(Node* content)
2686
0
{
2687
0
  Node* node;
2688
2689
0
  node = node_new();
2690
0
  CHECK_NULL_RETURN(node);
2691
0
  ND_SET_TYPE(node, ND_LIST);
2692
0
  ND_CAR(node) = content;
2693
0
  ND_CDR(node) = NULL_NODE;
2694
2695
0
  return node;
2696
0
}
2697
2698
static Node*
2699
node_drop_group(Node* group)
2700
0
{
2701
0
  Node* content;
2702
2703
0
  content = ND_CAR(group);
2704
0
  ND_CAR(group) = NULL_NODE;
2705
0
  onig_node_free(group);
2706
0
  return content;
2707
0
}
2708
2709
static int
2710
node_set_fail(Node* node)
2711
0
{
2712
0
  ND_SET_TYPE(node, ND_GIMMICK);
2713
0
  GIMMICK_(node)->type = GIMMICK_FAIL;
2714
0
  return ONIG_NORMAL;
2715
0
}
2716
2717
static int
2718
node_new_fail(Node** node, ParseEnv* env)
2719
0
{
2720
0
  *node = node_new();
2721
0
  CHECK_NULL_RETURN_MEMERR(*node);
2722
2723
0
  return node_set_fail(*node);
2724
0
}
2725
2726
extern int
2727
onig_node_reset_fail(Node* node)
2728
0
{
2729
0
  node_free_body(node);
2730
0
  return node_set_fail(node);
2731
0
}
2732
2733
static int
2734
node_new_save_gimmick(Node** node, enum SaveType save_type, ParseEnv* env)
2735
0
{
2736
0
  int id;
2737
2738
0
  ID_ENTRY(env, id);
2739
2740
0
  *node = node_new();
2741
0
  CHECK_NULL_RETURN_MEMERR(*node);
2742
2743
0
  ND_SET_TYPE(*node, ND_GIMMICK);
2744
0
  GIMMICK_(*node)->id   = id;
2745
0
  GIMMICK_(*node)->type = GIMMICK_SAVE;
2746
0
  GIMMICK_(*node)->detail_type = (int )save_type;
2747
2748
0
  return ONIG_NORMAL;
2749
0
}
2750
2751
static int
2752
node_new_update_var_gimmick(Node** node, enum UpdateVarType update_var_type,
2753
                            int id, ParseEnv* env)
2754
0
{
2755
0
  *node = node_new();
2756
0
  CHECK_NULL_RETURN_MEMERR(*node);
2757
2758
0
  ND_SET_TYPE(*node, ND_GIMMICK);
2759
0
  GIMMICK_(*node)->id   = id;
2760
0
  GIMMICK_(*node)->type = GIMMICK_UPDATE_VAR;
2761
0
  GIMMICK_(*node)->detail_type = (int )update_var_type;
2762
2763
0
  return ONIG_NORMAL;
2764
0
}
2765
2766
static int
2767
node_new_keep(Node** node, ParseEnv* env)
2768
0
{
2769
0
  int r;
2770
2771
0
  r = node_new_save_gimmick(node, SAVE_KEEP, env);
2772
0
  if (r != 0) return r;
2773
2774
0
  env->keep_num++;
2775
0
  return ONIG_NORMAL;
2776
0
}
2777
2778
#ifdef USE_CALLOUT
2779
2780
extern void
2781
onig_free_reg_callout_list(int n, CalloutListEntry* list)
2782
0
{
2783
0
  int i;
2784
0
  int j;
2785
2786
0
  if (IS_NULL(list)) return ;
2787
2788
0
  for (i = 0; i < n; i++) {
2789
0
    if (list[i].of == ONIG_CALLOUT_OF_NAME) {
2790
0
      for (j = 0; j < list[i].u.arg.passed_num; j++) {
2791
0
        if (list[i].u.arg.types[j] == ONIG_TYPE_STRING) {
2792
0
          if (IS_NOT_NULL(list[i].u.arg.vals[j].s.start))
2793
0
            xfree(list[i].u.arg.vals[j].s.start);
2794
0
        }
2795
0
      }
2796
0
    }
2797
0
    else { /* ONIG_CALLOUT_OF_CONTENTS */
2798
0
      if (IS_NOT_NULL(list[i].u.content.start)) {
2799
0
        xfree((void* )list[i].u.content.start);
2800
0
      }
2801
0
    }
2802
0
  }
2803
2804
0
  xfree(list);
2805
0
}
2806
2807
extern CalloutListEntry*
2808
onig_reg_callout_list_at(regex_t* reg, int num)
2809
0
{
2810
0
  RegexExt* ext = reg->extp;
2811
0
  CHECK_NULL_RETURN(ext);
2812
2813
0
  if (num <= 0 || num > ext->callout_num)
2814
0
    return 0;
2815
2816
0
  num--;
2817
0
  return ext->callout_list + num;
2818
0
}
2819
2820
static int
2821
reg_callout_list_entry(ParseEnv* env, int* rnum)
2822
0
{
2823
0
#define INIT_CALLOUT_LIST_NUM  3
2824
2825
0
  int num;
2826
0
  CalloutListEntry* list;
2827
0
  CalloutListEntry* e;
2828
0
  RegexExt* ext;
2829
2830
0
  ext = onig_get_regex_ext(env->reg);
2831
0
  CHECK_NULL_RETURN_MEMERR(ext);
2832
2833
0
  if (IS_NULL(ext->callout_list)) {
2834
0
    list = (CalloutListEntry* )xmalloc(sizeof(*list) * INIT_CALLOUT_LIST_NUM);
2835
0
    CHECK_NULL_RETURN_MEMERR(list);
2836
2837
0
    ext->callout_list = list;
2838
0
    ext->callout_list_alloc = INIT_CALLOUT_LIST_NUM;
2839
0
    ext->callout_num = 0;
2840
0
  }
2841
2842
0
  num = ext->callout_num + 1;
2843
0
  if (num > ext->callout_list_alloc) {
2844
0
    int alloc = ext->callout_list_alloc * 2;
2845
0
    list = (CalloutListEntry* )xrealloc(ext->callout_list,
2846
0
                                        sizeof(CalloutListEntry) * alloc);
2847
0
    CHECK_NULL_RETURN_MEMERR(list);
2848
2849
0
    ext->callout_list       = list;
2850
0
    ext->callout_list_alloc = alloc;
2851
0
  }
2852
2853
0
  e = ext->callout_list + (num - 1);
2854
2855
0
  e->flag             = 0;
2856
0
  e->of               = 0;
2857
0
  e->in               = ONIG_CALLOUT_OF_CONTENTS;
2858
0
  e->type             = 0;
2859
0
  e->tag_start        = 0;
2860
0
  e->tag_end          = 0;
2861
0
  e->start_func       = 0;
2862
0
  e->end_func         = 0;
2863
0
  e->u.arg.num        = 0;
2864
0
  e->u.arg.passed_num = 0;
2865
2866
0
  ext->callout_num = num;
2867
0
  *rnum = num;
2868
0
  return ONIG_NORMAL;
2869
0
}
2870
2871
static int
2872
node_new_callout(Node** node, OnigCalloutOf callout_of, int num, int id,
2873
                 ParseEnv* env)
2874
0
{
2875
0
  *node = node_new();
2876
0
  CHECK_NULL_RETURN_MEMERR(*node);
2877
2878
0
  ND_SET_TYPE(*node, ND_GIMMICK);
2879
0
  GIMMICK_(*node)->id          = id;
2880
0
  GIMMICK_(*node)->num         = num;
2881
0
  GIMMICK_(*node)->type        = GIMMICK_CALLOUT;
2882
0
  GIMMICK_(*node)->detail_type = (int )callout_of;
2883
2884
0
  return ONIG_NORMAL;
2885
0
}
2886
#endif
2887
2888
static int
2889
make_text_segment(Node** node, ParseEnv* env)
2890
0
{
2891
0
  int r;
2892
0
  int i;
2893
0
  Node* x;
2894
0
  Node* ns[2];
2895
2896
  /* \X == (?>\O(?:\Y\O)*) */
2897
2898
0
  ns[1] = NULL_NODE;
2899
2900
0
  r = ONIGERR_MEMORY;
2901
0
  ns[0] = node_new_anchor_with_options(ANCR_NO_TEXT_SEGMENT_BOUNDARY, env->options);
2902
0
  if (IS_NULL(ns[0])) goto err;
2903
2904
0
  r = node_new_true_anychar(&ns[1]);
2905
0
  if (r != 0) goto err1;
2906
2907
0
  x = make_list(2, ns);
2908
0
  if (IS_NULL(x)) goto err;
2909
0
  ns[0] = x;
2910
0
  ns[1] = NULL_NODE;
2911
2912
0
  x = node_new_quantifier(0, INFINITE_REPEAT, TRUE);
2913
0
  if (IS_NULL(x)) goto err;
2914
2915
0
  ND_BODY(x) = ns[0];
2916
0
  ns[0] = NULL_NODE;
2917
0
  ns[1] = x;
2918
2919
0
  r = node_new_true_anychar(&ns[0]);
2920
0
  if (r != 0) goto err1;
2921
2922
0
  x = make_list(2, ns);
2923
0
  if (IS_NULL(x)) goto err;
2924
2925
0
  ns[0] = x;
2926
0
  ns[1] = NULL_NODE;
2927
2928
0
  x = node_new_bag(BAG_STOP_BACKTRACK);
2929
0
  if (IS_NULL(x)) goto err;
2930
2931
0
  ND_BODY(x) = ns[0];
2932
2933
0
  *node = x;
2934
0
  return ONIG_NORMAL;
2935
2936
0
 err:
2937
0
  r = ONIGERR_MEMORY;
2938
0
 err1:
2939
0
  for (i = 0; i < 2; i++) onig_node_free(ns[i]);
2940
0
  return r;
2941
0
}
2942
2943
static int
2944
make_absent_engine(Node** node, int pre_save_right_id, Node* absent,
2945
                   Node* step_one, int lower, int upper, int possessive,
2946
                   int is_range_cutter, ParseEnv* env)
2947
0
{
2948
0
  int r;
2949
0
  int i;
2950
0
  int id;
2951
0
  Node* x;
2952
0
  Node* ns[4];
2953
2954
0
  for (i = 0; i < 4; i++) ns[i] = NULL_NODE;
2955
2956
0
  ns[1] = absent;
2957
0
  ns[3] = step_one; /* for err */
2958
0
  r = node_new_save_gimmick(&ns[0], SAVE_S, env);
2959
0
  if (r != 0) goto err;
2960
2961
0
  id = GIMMICK_(ns[0])->id;
2962
0
  r = node_new_update_var_gimmick(&ns[2], UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK,
2963
0
                                  id, env);
2964
0
  if (r != 0) goto err;
2965
2966
0
  if (is_range_cutter != 0)
2967
0
    ND_STATUS_ADD(ns[2], ABSENT_WITH_SIDE_EFFECTS);
2968
2969
0
  r = node_new_fail(&ns[3], env);
2970
0
  if (r != 0) goto err;
2971
2972
0
  x = make_list(4, ns);
2973
0
  if (IS_NULL(x)) goto err0;
2974
2975
0
  ns[0] = x;
2976
0
  ns[1] = step_one;
2977
0
  ns[2] = ns[3] = NULL_NODE;
2978
2979
0
  x = make_alt(2, ns);
2980
0
  if (IS_NULL(x)) goto err0;
2981
2982
0
  ns[0] = x;
2983
2984
0
  x = node_new_quantifier(lower, upper, FALSE);
2985
0
  if (IS_NULL(x)) goto err0;
2986
2987
0
  ND_BODY(x) = ns[0];
2988
0
  ns[0] = x;
2989
2990
0
  if (possessive != 0) {
2991
0
    x = node_new_bag(BAG_STOP_BACKTRACK);
2992
0
    if (IS_NULL(x)) goto err0;
2993
2994
0
    ND_BODY(x) = ns[0];
2995
0
    ns[0] = x;
2996
0
  }
2997
2998
0
  r = node_new_update_var_gimmick(&ns[1], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
2999
0
                                  pre_save_right_id, env);
3000
0
  if (r != 0) goto err;
3001
3002
0
  r = node_new_fail(&ns[2], env);
3003
0
  if (r != 0) goto err;
3004
3005
0
  x = make_list(2, ns + 1);
3006
0
  if (IS_NULL(x)) goto err0;
3007
3008
0
  ns[1] = x; ns[2] = NULL_NODE;
3009
3010
0
  x = make_alt(2, ns);
3011
0
  if (IS_NULL(x)) goto err0;
3012
3013
0
  if (is_range_cutter != FALSE)
3014
0
    ND_STATUS_ADD(x, SUPER);
3015
3016
0
  *node = x;
3017
0
  return ONIG_NORMAL;
3018
3019
0
 err0:
3020
0
  r = ONIGERR_MEMORY;
3021
0
 err:
3022
0
  for (i = 0; i < 4; i++) onig_node_free(ns[i]);
3023
0
  return r;
3024
0
}
3025
3026
static int
3027
make_absent_tail(Node** node1, Node** node2, int pre_save_right_id,
3028
                 ParseEnv* env)
3029
0
{
3030
0
  int r;
3031
0
  int id;
3032
0
  Node* save;
3033
0
  Node* x;
3034
0
  Node* ns[2];
3035
3036
0
  *node1 = *node2 = NULL_NODE;
3037
0
  save = ns[0] = ns[1] = NULL_NODE;
3038
3039
0
  r = node_new_save_gimmick(&save, SAVE_RIGHT_RANGE, env);
3040
0
  if (r != 0) goto err;
3041
3042
0
  id = GIMMICK_(save)->id;
3043
0
  r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
3044
0
                                  id, env);
3045
0
  if (r != 0) goto err;
3046
3047
0
  r = node_new_fail(&ns[1], env);
3048
0
  if (r != 0) goto err;
3049
3050
0
  x = make_list(2, ns);
3051
0
  if (IS_NULL(x)) goto err0;
3052
3053
0
  ns[0] = NULL_NODE; ns[1] = x;
3054
3055
0
  r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
3056
0
                                  pre_save_right_id, env);
3057
0
  if (r != 0) goto err;
3058
3059
0
  x = make_alt(2, ns);
3060
0
  if (IS_NULL(x)) goto err0;
3061
3062
0
  *node1 = save;
3063
0
  *node2 = x;
3064
0
  return ONIG_NORMAL;
3065
3066
0
 err0:
3067
0
  r = ONIGERR_MEMORY;
3068
0
 err:
3069
0
  onig_node_free(save);
3070
0
  onig_node_free(ns[0]);
3071
0
  onig_node_free(ns[1]);
3072
0
  return r;
3073
0
}
3074
3075
static int
3076
make_range_clear(Node** node, ParseEnv* env)
3077
0
{
3078
0
  int r;
3079
0
  int id;
3080
0
  Node* save;
3081
0
  Node* x;
3082
0
  Node* ns[2];
3083
3084
0
  *node = NULL_NODE;
3085
0
  save = ns[0] = ns[1] = NULL_NODE;
3086
3087
0
  r = node_new_save_gimmick(&save, SAVE_RIGHT_RANGE, env);
3088
0
  if (r != 0) goto err;
3089
3090
0
  id = GIMMICK_(save)->id;
3091
0
  r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
3092
0
                                  id, env);
3093
0
  if (r != 0) goto err;
3094
3095
0
  r = node_new_fail(&ns[1], env);
3096
0
  if (r != 0) goto err;
3097
3098
0
  x = make_list(2, ns);
3099
0
  if (IS_NULL(x)) goto err0;
3100
3101
0
  ns[0] = NULL_NODE; ns[1] = x;
3102
3103
0
#define ID_NOT_USED_DONT_CARE_ME   0
3104
3105
0
  r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_INIT,
3106
0
                                  ID_NOT_USED_DONT_CARE_ME, env);
3107
0
  if (r != 0) goto err;
3108
0
  ND_STATUS_ADD(ns[0], ABSENT_WITH_SIDE_EFFECTS);
3109
3110
0
  x = make_alt(2, ns);
3111
0
  if (IS_NULL(x)) goto err0;
3112
3113
0
  ND_STATUS_ADD(x, SUPER);
3114
3115
0
  ns[0] = save;
3116
0
  ns[1] = x;
3117
0
  save = NULL_NODE;
3118
0
  x = make_list(2, ns);
3119
0
  if (IS_NULL(x)) goto err0;
3120
3121
0
  *node = x;
3122
0
  return ONIG_NORMAL;
3123
3124
0
 err0:
3125
0
  r = ONIGERR_MEMORY;
3126
0
 err:
3127
0
  onig_node_free(save);
3128
0
  onig_node_free(ns[0]);
3129
0
  onig_node_free(ns[1]);
3130
0
  return r;
3131
0
}
3132
3133
static int
3134
is_simple_one_char_repeat(Node* node, Node** rquant, Node** rbody,
3135
                          int* is_possessive, ParseEnv* env)
3136
0
{
3137
0
  Node* quant;
3138
0
  Node* body;
3139
3140
0
  *rquant = *rbody = 0;
3141
0
  *is_possessive = 0;
3142
3143
0
  if (ND_TYPE(node) == ND_QUANT) {
3144
0
    quant = node;
3145
0
  }
3146
0
  else {
3147
0
    if (ND_TYPE(node) == ND_BAG) {
3148
0
      BagNode* en = BAG_(node);
3149
0
      if (en->type == BAG_STOP_BACKTRACK) {
3150
0
        *is_possessive = 1;
3151
0
        quant = ND_BAG_BODY(en);
3152
0
        if (ND_TYPE(quant) != ND_QUANT)
3153
0
          return 0;
3154
0
      }
3155
0
      else
3156
0
        return 0;
3157
0
    }
3158
0
    else
3159
0
      return 0;
3160
0
  }
3161
3162
0
  if (QUANT_(quant)->greedy == 0)
3163
0
    return 0;
3164
3165
0
  body = ND_BODY(quant);
3166
0
  switch (ND_TYPE(body)) {
3167
0
  case ND_STRING:
3168
0
    {
3169
0
      int len;
3170
0
      StrNode* sn = STR_(body);
3171
0
      UChar *s = sn->s;
3172
3173
0
      len = 0;
3174
0
      while (s < sn->end) {
3175
0
        s += enclen(env->enc, s);
3176
0
        len++;
3177
0
      }
3178
0
      if (len != 1)
3179
0
        return 0;
3180
0
    }
3181
3182
0
  case ND_CCLASS:
3183
0
    break;
3184
3185
0
  default:
3186
0
    return 0;
3187
0
    break;
3188
0
  }
3189
3190
0
  if (node != quant) {
3191
0
    ND_BODY(node) = 0;
3192
0
    onig_node_free(node);
3193
0
  }
3194
0
  ND_BODY(quant) = NULL_NODE;
3195
0
  *rquant = quant;
3196
0
  *rbody  = body;
3197
0
  return 1;
3198
0
}
3199
3200
static int
3201
make_absent_tree_for_simple_one_char_repeat(Node** node,
3202
  Node* absent, Node* quant, Node* body, int possessive, ParseEnv* env)
3203
0
{
3204
0
  int r;
3205
0
  int i;
3206
0
  int id1;
3207
0
  int lower, upper;
3208
0
  Node* x;
3209
0
  Node* ns[4];
3210
3211
0
  *node = NULL_NODE;
3212
0
  r = ONIGERR_MEMORY;
3213
0
  ns[0] = ns[1] = NULL_NODE;
3214
0
  ns[2] = body, ns[3] = absent;
3215
3216
0
  lower = QUANT_(quant)->lower;
3217
0
  upper = QUANT_(quant)->upper;
3218
3219
0
  r = node_new_save_gimmick(&ns[0], SAVE_RIGHT_RANGE, env);
3220
0
  if (r != 0) goto err;
3221
3222
0
  id1 = GIMMICK_(ns[0])->id;
3223
3224
0
  r = make_absent_engine(&ns[1], id1, absent, body, lower, upper, possessive,
3225
0
                         FALSE, env);
3226
0
  if (r != 0) goto err;
3227
3228
0
  ns[2] = ns[3] = NULL_NODE;
3229
3230
0
  r = node_new_update_var_gimmick(&ns[2], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
3231
0
                                  id1, env);
3232
0
  if (r != 0) goto err;
3233
3234
0
  x = make_list(3, ns);
3235
0
  if (IS_NULL(x)) goto err0;
3236
3237
0
  *node = x;
3238
0
  return ONIG_NORMAL;
3239
3240
0
 err0:
3241
0
  r = ONIGERR_MEMORY;
3242
0
 err:
3243
0
  for (i = 0; i < 4; i++) onig_node_free(ns[i]);
3244
0
  return r;
3245
0
}
3246
3247
static int
3248
make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter,
3249
                 ParseEnv* env)
3250
0
{
3251
0
  int r;
3252
0
  int i;
3253
0
  int id1, id2;
3254
0
  int possessive;
3255
0
  Node* x;
3256
0
  Node* ns[7];
3257
3258
0
  r = ONIGERR_MEMORY;
3259
0
  for (i = 0; i < 7; i++) ns[i] = NULL_NODE;
3260
0
  ns[4] = expr; ns[5] = absent;
3261
3262
0
  if (is_range_cutter == 0) {
3263
0
    Node* quant;
3264
0
    Node* body;
3265
3266
0
    if (expr == NULL_NODE) {
3267
      /* default expr \O* */
3268
0
      quant = node_new_quantifier(0, INFINITE_REPEAT, FALSE);
3269
0
      if (IS_NULL(quant)) goto err0;
3270
3271
0
      r = node_new_true_anychar(&body);
3272
0
      if (r != 0) {
3273
0
        onig_node_free(quant);
3274
0
        goto err;
3275
0
      }
3276
0
      possessive = 0;
3277
0
      goto simple;
3278
0
    }
3279
0
    else {
3280
0
      if (is_simple_one_char_repeat(expr, &quant, &body, &possessive, env)) {
3281
0
      simple:
3282
0
        r = make_absent_tree_for_simple_one_char_repeat(node, absent, quant,
3283
0
                                                        body, possessive, env);
3284
0
        onig_node_free(quant);
3285
0
        if (r != 0) {
3286
0
          ns[4] = NULL_NODE;
3287
0
          onig_node_free(body);
3288
0
          goto err;
3289
0
        }
3290
3291
0
        return ONIG_NORMAL;
3292
0
      }
3293
0
    }
3294
0
  }
3295
3296
0
  r = node_new_save_gimmick(&ns[0], SAVE_RIGHT_RANGE, env);
3297
0
  if (r != 0) goto err;
3298
3299
0
  id1 = GIMMICK_(ns[0])->id;
3300
3301
0
  r = node_new_save_gimmick(&ns[1], SAVE_S, env);
3302
0
  if (r != 0) goto err;
3303
3304
0
  id2 = GIMMICK_(ns[1])->id;
3305
3306
0
  r = node_new_true_anychar(&ns[3]);
3307
0
  if (r != 0) goto err;
3308
3309
0
  possessive = 1;
3310
0
  r = make_absent_engine(&ns[2], id1, absent, ns[3], 0, INFINITE_REPEAT,
3311
0
                         possessive, is_range_cutter, env);
3312
0
  if (r != 0) goto err;
3313
3314
0
  ns[3] = NULL_NODE;
3315
0
  ns[5] = NULL_NODE;
3316
3317
0
  r = node_new_update_var_gimmick(&ns[3], UPDATE_VAR_S_FROM_STACK, id2, env);
3318
0
  if (r != 0) goto err;
3319
3320
0
  if (is_range_cutter != 0) {
3321
0
    x = make_list(4, ns);
3322
0
    if (IS_NULL(x)) goto err0;
3323
0
  }
3324
0
  else {
3325
0
    r = make_absent_tail(&ns[5], &ns[6], id1, env);
3326
0
    if (r != 0) goto err;
3327
3328
0
    x = make_list(7, ns);
3329
0
    if (IS_NULL(x)) goto err0;
3330
0
  }
3331
3332
0
  *node = x;
3333
0
  return ONIG_NORMAL;
3334
3335
0
 err0:
3336
0
  r = ONIGERR_MEMORY;
3337
0
 err:
3338
0
  for (i = 0; i < 7; i++) onig_node_free(ns[i]);
3339
0
  return r;
3340
0
}
3341
3342
extern int
3343
onig_node_str_cat(Node* node, const UChar* s, const UChar* end)
3344
0
{
3345
0
  int addlen = (int )(end - s);
3346
3347
0
  if (addlen > 0) {
3348
0
    int len  = (int )(STR_(node)->end - STR_(node)->s);
3349
3350
0
    if (STR_(node)->capacity > 0 || (len + addlen > ND_STRING_BUF_SIZE - 1)) {
3351
0
      UChar* p;
3352
0
      int capa = len + addlen + ND_STRING_MARGIN;
3353
3354
0
      if (capa <= STR_(node)->capacity) {
3355
0
        onig_strcpy(STR_(node)->s + len, s, end);
3356
0
      }
3357
0
      else {
3358
0
        if (STR_(node)->s == STR_(node)->buf)
3359
0
          p = strcat_capa_from_static(STR_(node)->s, STR_(node)->end,
3360
0
                                      s, end, capa);
3361
0
        else
3362
0
          p = strcat_capa(STR_(node)->s, STR_(node)->end, s, end, capa);
3363
3364
0
        CHECK_NULL_RETURN_MEMERR(p);
3365
0
        STR_(node)->s        = p;
3366
0
        STR_(node)->capacity = capa;
3367
0
      }
3368
0
    }
3369
0
    else {
3370
0
      onig_strcpy(STR_(node)->s + len, s, end);
3371
0
    }
3372
0
    STR_(node)->end = STR_(node)->s + len + addlen;
3373
0
  }
3374
3375
0
  return 0;
3376
0
}
3377
3378
extern int
3379
onig_node_str_set(Node* node, const UChar* s, const UChar* end, int need_free)
3380
0
{
3381
0
  onig_node_str_clear(node, need_free);
3382
0
  return onig_node_str_cat(node, s, end);
3383
0
}
3384
3385
static int
3386
node_str_cat_char(Node* node, UChar c)
3387
0
{
3388
0
  UChar s[1];
3389
3390
0
  s[0] = c;
3391
0
  return onig_node_str_cat(node, s, s + 1);
3392
0
}
3393
3394
extern void
3395
onig_node_str_clear(Node* node, int need_free)
3396
0
{
3397
0
  if (need_free != 0 &&
3398
0
      STR_(node)->capacity != 0 &&
3399
0
      IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) {
3400
0
    xfree(STR_(node)->s);
3401
0
  }
3402
3403
0
  STR_(node)->flag     = 0;
3404
0
  STR_(node)->s        = STR_(node)->buf;
3405
0
  STR_(node)->end      = STR_(node)->buf;
3406
0
  STR_(node)->capacity = 0;
3407
0
}
3408
3409
static int
3410
node_set_str(Node* node, const UChar* s, const UChar* end)
3411
0
{
3412
0
  int r;
3413
3414
0
  ND_SET_TYPE(node, ND_STRING);
3415
0
  STR_(node)->flag     = 0;
3416
0
  STR_(node)->s        = STR_(node)->buf;
3417
0
  STR_(node)->end      = STR_(node)->buf;
3418
0
  STR_(node)->capacity = 0;
3419
3420
0
  r = onig_node_str_cat(node, s, end);
3421
0
  return r;
3422
0
}
3423
3424
static Node*
3425
node_new_str(const UChar* s, const UChar* end)
3426
0
{
3427
0
  int r;
3428
0
  Node* node = node_new();
3429
0
  CHECK_NULL_RETURN(node);
3430
3431
0
  r = node_set_str(node, s, end);
3432
0
  if (r != 0) {
3433
0
    onig_node_free(node);
3434
0
    return NULL;
3435
0
  }
3436
3437
0
  return node;
3438
0
}
3439
3440
static int
3441
node_reset_str(Node* node, const UChar* s, const UChar* end)
3442
0
{
3443
0
  node_free_body(node);
3444
0
  return node_set_str(node, s, end);
3445
0
}
3446
3447
extern int
3448
onig_node_reset_empty(Node* node)
3449
0
{
3450
0
  return node_reset_str(node, NULL, NULL);
3451
0
}
3452
3453
extern Node*
3454
onig_node_new_str(const UChar* s, const UChar* end)
3455
0
{
3456
0
  return node_new_str(s, end);
3457
0
}
3458
3459
static Node*
3460
node_new_str_with_options(const UChar* s, const UChar* end,
3461
                          OnigOptionType options)
3462
0
{
3463
0
  Node* node;
3464
0
  node = node_new_str(s, end);
3465
3466
0
  if (OPTON_IGNORECASE(options))
3467
0
    ND_STATUS_ADD(node, IGNORECASE);
3468
3469
0
  return node;
3470
0
}
3471
3472
static Node*
3473
node_new_str_crude(UChar* s, UChar* end, OnigOptionType options)
3474
0
{
3475
0
  Node* node = node_new_str_with_options(s, end, options);
3476
0
  CHECK_NULL_RETURN(node);
3477
0
  ND_STRING_SET_CRUDE(node);
3478
0
  return node;
3479
0
}
3480
3481
static Node*
3482
node_new_empty(void)
3483
0
{
3484
0
  return node_new_str(NULL, NULL);
3485
0
}
3486
3487
static Node*
3488
node_new_str_crude_char(UChar c, OnigOptionType options)
3489
0
{
3490
0
  int i;
3491
0
  UChar p[1];
3492
0
  Node* node;
3493
3494
0
  p[0] = c;
3495
0
  node = node_new_str_crude(p, p + 1, options);
3496
0
  CHECK_NULL_RETURN(node);
3497
3498
  /* clear buf tail */
3499
0
  for (i = 1; i < ND_STRING_BUF_SIZE; i++)
3500
0
    STR_(node)->buf[i] = '\0';
3501
3502
0
  return node;
3503
0
}
3504
3505
static Node*
3506
str_node_split_last_char(Node* node, OnigEncoding enc)
3507
0
{
3508
0
  const UChar *p;
3509
0
  Node* rn;
3510
0
  StrNode* sn;
3511
3512
0
  sn = STR_(node);
3513
0
  rn = NULL_NODE;
3514
0
  if (sn->end > sn->s) {
3515
0
    p = onigenc_get_prev_char_head(enc, sn->s, sn->end);
3516
0
    if (p && p > sn->s) { /* can be split. */
3517
0
      rn = node_new_str(p, sn->end);
3518
0
      CHECK_NULL_RETURN(rn);
3519
3520
0
      sn->end = (UChar* )p;
3521
0
      STR_(rn)->flag = sn->flag;
3522
0
      ND_STATUS(rn) = ND_STATUS(node);
3523
0
    }
3524
0
  }
3525
3526
0
  return rn;
3527
0
}
3528
3529
static int
3530
str_node_can_be_split(Node* node, OnigEncoding enc)
3531
0
{
3532
0
  StrNode* sn = STR_(node);
3533
0
  if (sn->end > sn->s) {
3534
0
    return ((enclen(enc, sn->s) < sn->end - sn->s)  ?  1 : 0);
3535
0
  }
3536
0
  return 0;
3537
0
}
3538
3539
static int
3540
scan_number(UChar** src, const UChar* end, OnigEncoding enc)
3541
0
{
3542
0
  int num, val;
3543
0
  OnigCodePoint c;
3544
0
  UChar* p;
3545
0
  PFETCH_READY;
3546
3547
0
  p = *src;
3548
0
  num = 0;
3549
0
  while (! PEND) {
3550
0
    PFETCH(c);
3551
0
    if (IS_CODE_DIGIT_ASCII(enc, c)) {
3552
0
      val = (int )DIGITVAL(c);
3553
0
      if ((ONIG_INT_MAX - val) / 10 < num)
3554
0
        return -1;  /* overflow */
3555
3556
0
      num = num * 10 + val;
3557
0
    }
3558
0
    else {
3559
0
      PUNFETCH;
3560
0
      break;
3561
0
    }
3562
0
  }
3563
0
  *src = p;
3564
0
  return num;
3565
0
}
3566
3567
static int
3568
scan_hexadecimal_number(UChar** src, UChar* end, int minlen, int maxlen,
3569
                        OnigEncoding enc, OnigCodePoint* rcode)
3570
0
{
3571
0
  OnigCodePoint code;
3572
0
  OnigCodePoint c;
3573
0
  unsigned int val;
3574
0
  int n;
3575
0
  UChar* p;
3576
0
  PFETCH_READY;
3577
3578
0
  p = *src;
3579
0
  code = 0;
3580
0
  n = 0;
3581
0
  while (! PEND && n < maxlen) {
3582
0
    PFETCH(c);
3583
0
    if (IS_CODE_XDIGIT_ASCII(enc, c)) {
3584
0
      n++;
3585
0
      val = (unsigned int )XDIGITVAL(enc, c);
3586
0
      if ((UINT_MAX - val) / 16UL < code)
3587
0
        return ONIGERR_TOO_BIG_NUMBER; /* overflow */
3588
3589
0
      code = (code << 4) + val;
3590
0
    }
3591
0
    else {
3592
0
      PUNFETCH;
3593
0
      break;
3594
0
    }
3595
0
  }
3596
3597
0
  if (n < minlen)
3598
0
    return ONIGERR_INVALID_CODE_POINT_VALUE;
3599
3600
0
  *rcode = code;
3601
0
  *src = p;
3602
0
  return ONIG_NORMAL;
3603
0
}
3604
3605
static int
3606
scan_octal_number(UChar** src, UChar* end, int minlen, int maxlen,
3607
                  OnigEncoding enc, OnigCodePoint* rcode)
3608
0
{
3609
0
  OnigCodePoint code;
3610
0
  OnigCodePoint c;
3611
0
  unsigned int val;
3612
0
  int n;
3613
0
  UChar* p;
3614
0
  PFETCH_READY;
3615
3616
0
  p = *src;
3617
0
  code = 0;
3618
0
  n = 0;
3619
0
  while (! PEND && n < maxlen) {
3620
0
    PFETCH(c);
3621
0
    if (IS_CODE_DIGIT_ASCII(enc, c) && c < '8') {
3622
0
      n++;
3623
0
      val = (unsigned int )ODIGITVAL(c);
3624
0
      if ((UINT_MAX - val) / 8UL < code)
3625
0
        return ONIGERR_TOO_BIG_NUMBER; /* overflow */
3626
3627
0
      code = (code << 3) + val;
3628
0
    }
3629
0
    else {
3630
0
      PUNFETCH;
3631
0
      break;
3632
0
    }
3633
0
  }
3634
3635
0
  if (n < minlen)
3636
0
    return ONIGERR_INVALID_CODE_POINT_VALUE;
3637
3638
0
  *rcode = code;
3639
0
  *src = p;
3640
0
  return ONIG_NORMAL;
3641
0
}
3642
3643
static int
3644
scan_number_of_base(UChar** src, UChar* end, int minlen,
3645
                    OnigEncoding enc, OnigCodePoint* rcode, int base)
3646
0
{
3647
0
  int r;
3648
3649
0
  if (base == 16)
3650
0
    r = scan_hexadecimal_number(src, end, minlen, 8, enc, rcode);
3651
0
  else if (base == 8)
3652
0
    r = scan_octal_number(src, end, minlen, 11, enc, rcode);
3653
0
  else
3654
0
    r = ONIGERR_INVALID_CODE_POINT_VALUE;
3655
3656
0
  return r;
3657
0
}
3658
3659
0
#define IS_CODE_POINT_DIVIDE(c)  ((c) == ' ' || (c) == '\n')
3660
3661
enum CPS_STATE {
3662
  CPS_EMPTY = 0,
3663
  CPS_START = 1,
3664
  CPS_RANGE = 2
3665
};
3666
3667
static int
3668
check_code_point_sequence_cc(UChar* p, UChar* end, int base,
3669
                             OnigEncoding enc, int state)
3670
0
{
3671
0
  int r;
3672
0
  int n;
3673
0
  int end_digit;
3674
0
  OnigCodePoint code;
3675
0
  OnigCodePoint c;
3676
0
  PFETCH_READY;
3677
3678
0
  end_digit = FALSE;
3679
0
  n = 0;
3680
0
  while (! PEND) {
3681
0
  start:
3682
0
    PFETCH(c);
3683
0
    if (c == '}') {
3684
0
    end_char:
3685
0
      if (state == CPS_RANGE) return ONIGERR_INVALID_CODE_POINT_VALUE;
3686
0
      return n;
3687
0
    }
3688
3689
0
    if (IS_CODE_POINT_DIVIDE(c)) {
3690
0
      while (! PEND) {
3691
0
        PFETCH(c);
3692
0
        if (! IS_CODE_POINT_DIVIDE(c)) break;
3693
0
      }
3694
0
      if (IS_CODE_POINT_DIVIDE(c))
3695
0
        return ONIGERR_INVALID_CODE_POINT_VALUE;
3696
0
    }
3697
0
    else if (c == '-') {
3698
0
    range:
3699
0
      if (state != CPS_START) return ONIGERR_INVALID_CODE_POINT_VALUE;
3700
0
      if (PEND) return ONIGERR_INVALID_CODE_POINT_VALUE;
3701
0
      end_digit = FALSE;
3702
0
      state = CPS_RANGE;
3703
0
      goto start;
3704
0
    }
3705
0
    else if (end_digit == TRUE) {
3706
0
      if (base == 16) {
3707
0
        if (IS_CODE_XDIGIT_ASCII(enc, c))
3708
0
          return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
3709
0
      }
3710
0
      else if (base == 8) {
3711
0
        if (IS_CODE_DIGIT_ASCII(enc, c) && c < '8')
3712
0
          return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
3713
0
      }
3714
3715
0
      return ONIGERR_INVALID_CODE_POINT_VALUE;
3716
0
    }
3717
3718
0
    if (c == '}') goto end_char;
3719
0
    if (c == '-') goto range;
3720
3721
0
    PUNFETCH;
3722
0
    r = scan_number_of_base(&p, end, 1, enc, &code, base);
3723
0
    if (r != 0) return r;
3724
0
    n++;
3725
0
    end_digit = TRUE;
3726
0
    state = (state == CPS_RANGE) ? CPS_EMPTY : CPS_START;
3727
0
  }
3728
3729
0
  return ONIGERR_INVALID_CODE_POINT_VALUE;
3730
0
}
3731
3732
static int
3733
check_code_point_sequence(UChar* p, UChar* end, int base, OnigEncoding enc)
3734
0
{
3735
0
  int r;
3736
0
  int n;
3737
0
  int end_digit;
3738
0
  OnigCodePoint code;
3739
0
  OnigCodePoint c;
3740
0
  PFETCH_READY;
3741
3742
0
  end_digit = FALSE;
3743
0
  n = 0;
3744
0
  while (! PEND) {
3745
0
    PFETCH(c);
3746
0
    if (c == '}') {
3747
0
    end_char:
3748
0
      return n;
3749
0
    }
3750
3751
0
    if (IS_CODE_POINT_DIVIDE(c)) {
3752
0
      while (! PEND) {
3753
0
        PFETCH(c);
3754
0
        if (! IS_CODE_POINT_DIVIDE(c)) break;
3755
0
      }
3756
0
      if (IS_CODE_POINT_DIVIDE(c))
3757
0
        return ONIGERR_INVALID_CODE_POINT_VALUE;
3758
0
    }
3759
0
    else if (end_digit == TRUE) {
3760
0
      if (base == 16) {
3761
0
        if (IS_CODE_XDIGIT_ASCII(enc, c))
3762
0
          return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
3763
0
      }
3764
0
      else if (base == 8) {
3765
0
        if (IS_CODE_DIGIT_ASCII(enc, c) && c < '8')
3766
0
          return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
3767
0
      }
3768
3769
0
      return ONIGERR_INVALID_CODE_POINT_VALUE;
3770
0
    }
3771
3772
0
    if (c == '}') goto end_char;
3773
3774
0
    PUNFETCH;
3775
0
    r = scan_number_of_base(&p, end, 1, enc, &code, base);
3776
0
    if (r != 0) return r;
3777
0
    n++;
3778
0
    end_digit = TRUE;
3779
0
  }
3780
3781
0
  return ONIGERR_INVALID_CODE_POINT_VALUE;
3782
0
}
3783
3784
static int
3785
get_next_code_point(UChar** src, UChar* end, int base, OnigEncoding enc, int in_cc, OnigCodePoint* rcode)
3786
0
{
3787
0
  int r;
3788
0
  OnigCodePoint c;
3789
0
  UChar* p;
3790
0
  PFETCH_READY;
3791
3792
0
  p = *src;
3793
0
  while (! PEND) {
3794
0
    PFETCH(c);
3795
0
    if (! IS_CODE_POINT_DIVIDE(c)) {
3796
0
      if (c == '}') {
3797
0
        *src = p;
3798
0
        return 1; /* end of sequence */
3799
0
      }
3800
0
      else if (c == '-' && in_cc == TRUE) {
3801
0
        *src = p;
3802
0
        return 2; /* range */
3803
0
      }
3804
0
      PUNFETCH;
3805
0
      break;
3806
0
    }
3807
0
    else {
3808
0
      if (PEND)
3809
0
        return ONIGERR_INVALID_CODE_POINT_VALUE;
3810
0
    }
3811
0
  }
3812
3813
0
  r = scan_number_of_base(&p, end, 1, enc, rcode, base);
3814
0
  if (r != 0) return r;
3815
3816
0
  *src = p;
3817
0
  return ONIG_NORMAL;
3818
0
}
3819
3820
3821
#define BB_WRITE_CODE_POINT(bbuf,pos,code) \
3822
0
    BB_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)
3823
3824
/* data format:
3825
     [n][from-1][to-1][from-2][to-2] ... [from-n][to-n]
3826
     (all data size is OnigCodePoint)
3827
 */
3828
static int
3829
new_code_range(BBuf** pbuf)
3830
0
{
3831
0
#define INIT_MULTI_BYTE_RANGE_SIZE  (SIZE_CODE_POINT * 5)
3832
0
  int r;
3833
0
  OnigCodePoint n;
3834
0
  BBuf* bbuf;
3835
3836
0
  bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf));
3837
0
  CHECK_NULL_RETURN_MEMERR(bbuf);
3838
0
  r = BB_INIT(bbuf, INIT_MULTI_BYTE_RANGE_SIZE);
3839
0
  if (r != 0) {
3840
0
    xfree(bbuf);
3841
0
    *pbuf = 0;
3842
0
    return r;
3843
0
  }
3844
3845
0
  n = 0;
3846
0
  BB_WRITE_CODE_POINT(bbuf, 0, n);
3847
0
  return 0;
3848
0
}
3849
3850
static int
3851
add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to)
3852
0
{
3853
0
  int r, inc_n, pos;
3854
0
  int low, high, bound, x;
3855
0
  OnigCodePoint n, *data;
3856
0
  BBuf* bbuf;
3857
3858
0
  if (from > to) {
3859
0
    n = from; from = to; to = n;
3860
0
  }
3861
3862
0
  if (IS_NULL(*pbuf)) {
3863
0
    r = new_code_range(pbuf);
3864
0
    if (r != 0) return r;
3865
0
    bbuf = *pbuf;
3866
0
    n = 0;
3867
0
  }
3868
0
  else {
3869
0
    bbuf = *pbuf;
3870
0
    GET_CODE_POINT(n, bbuf->p);
3871
0
  }
3872
0
  data = (OnigCodePoint* )(bbuf->p);
3873
0
  data++;
3874
3875
0
  for (low = 0, bound = n; low < bound; ) {
3876
0
    x = (low + bound) >> 1;
3877
0
    if (from > data[x*2 + 1])
3878
0
      low = x + 1;
3879
0
    else
3880
0
      bound = x;
3881
0
  }
3882
3883
0
  high = (to == ~((OnigCodePoint )0)) ? n : low;
3884
0
  for (bound = n; high < bound; ) {
3885
0
    x = (high + bound) >> 1;
3886
0
    if (to + 1 >= data[x*2])
3887
0
      high = x + 1;
3888
0
    else
3889
0
      bound = x;
3890
0
  }
3891
3892
0
  inc_n = low + 1 - high;
3893
0
  if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM)
3894
0
    return ONIGERR_TOO_MANY_MULTI_BYTE_RANGES;
3895
3896
0
  if (inc_n != 1) {
3897
0
    if (from > data[low*2])
3898
0
      from = data[low*2];
3899
0
    if (to < data[(high - 1)*2 + 1])
3900
0
      to = data[(high - 1)*2 + 1];
3901
0
  }
3902
3903
0
  if (inc_n != 0 && (OnigCodePoint )high < n) {
3904
0
    int from_pos = SIZE_CODE_POINT * (1 + high * 2);
3905
0
    int to_pos   = SIZE_CODE_POINT * (1 + (low + 1) * 2);
3906
0
    int size = (n - high) * 2 * SIZE_CODE_POINT;
3907
3908
0
    if (inc_n > 0) {
3909
0
      BB_MOVE_RIGHT(bbuf, from_pos, to_pos, size);
3910
0
    }
3911
0
    else {
3912
0
      BB_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos);
3913
0
    }
3914
0
  }
3915
3916
0
  pos = SIZE_CODE_POINT * (1 + low * 2);
3917
0
  BB_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2);
3918
0
  BB_WRITE_CODE_POINT(bbuf, pos, from);
3919
0
  BB_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to);
3920
0
  n += inc_n;
3921
0
  BB_WRITE_CODE_POINT(bbuf, 0, n);
3922
3923
0
  return 0;
3924
0
}
3925
3926
static int
3927
add_code_range(BBuf** pbuf, ParseEnv* env, OnigCodePoint from, OnigCodePoint to)
3928
0
{
3929
0
  if (from > to) {
3930
0
    if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
3931
0
      return 0;
3932
0
    else
3933
0
      return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
3934
0
  }
3935
3936
0
  return add_code_range_to_buf(pbuf, from, to);
3937
0
}
3938
3939
static int
3940
not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf)
3941
0
{
3942
0
  int r, i, n;
3943
0
  OnigCodePoint pre, from, *data, to = 0;
3944
3945
0
  *pbuf = (BBuf* )NULL;
3946
0
  if (IS_NULL(bbuf)) {
3947
0
  set_all:
3948
0
    return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
3949
0
  }
3950
3951
0
  data = (OnigCodePoint* )(bbuf->p);
3952
0
  GET_CODE_POINT(n, data);
3953
0
  data++;
3954
0
  if (n <= 0) goto set_all;
3955
3956
0
  r = 0;
3957
0
  pre = MBCODE_START_POS(enc);
3958
0
  for (i = 0; i < n; i++) {
3959
0
    from = data[i*2];
3960
0
    to   = data[i*2+1];
3961
0
    if (pre <= from - 1) {
3962
0
      r = add_code_range_to_buf(pbuf, pre, from - 1);
3963
0
      if (r != 0) {
3964
0
        bbuf_free(*pbuf);
3965
0
        return r;
3966
0
      }
3967
0
    }
3968
0
    if (to == ~((OnigCodePoint )0)) break;
3969
0
    pre = to + 1;
3970
0
  }
3971
0
  if (to < ~((OnigCodePoint )0)) {
3972
0
    r = add_code_range_to_buf(pbuf, to + 1, ~((OnigCodePoint )0));
3973
0
    if (r != 0) bbuf_free(*pbuf);
3974
0
  }
3975
0
  return r;
3976
0
}
3977
3978
0
#define SWAP_BB_NOT(bbuf1, not1, bbuf2, not2) do {\
3979
0
  BBuf *tbuf; \
3980
0
  int  tnot; \
3981
0
  tnot = not1;  not1  = not2;  not2  = tnot; \
3982
0
  tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \
3983
0
} while (0)
3984
3985
static int
3986
or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,
3987
                  BBuf* bbuf2, int not2, BBuf** pbuf)
3988
0
{
3989
0
  int r;
3990
0
  OnigCodePoint i, n1, *data1;
3991
0
  OnigCodePoint from, to;
3992
3993
0
  *pbuf = (BBuf* )NULL;
3994
0
  if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) {
3995
0
    if (not1 != 0 || not2 != 0)
3996
0
      return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
3997
0
    return 0;
3998
0
  }
3999
4000
0
  r = 0;
4001
0
  if (IS_NULL(bbuf2))
4002
0
    SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);
4003
4004
0
  if (IS_NULL(bbuf1)) {
4005
0
    if (not1 != 0) {
4006
0
      return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
4007
0
    }
4008
0
    else {
4009
0
      if (not2 == 0) {
4010
0
        return bbuf_clone(pbuf, bbuf2);
4011
0
      }
4012
0
      else {
4013
0
        return not_code_range_buf(enc, bbuf2, pbuf);
4014
0
      }
4015
0
    }
4016
0
  }
4017
4018
0
  if (not1 != 0)
4019
0
    SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);
4020
4021
0
  data1 = (OnigCodePoint* )(bbuf1->p);
4022
0
  GET_CODE_POINT(n1, data1);
4023
0
  data1++;
4024
4025
0
  if (not2 == 0 && not1 == 0) { /* 1 OR 2 */
4026
0
    r = bbuf_clone(pbuf, bbuf2);
4027
0
  }
4028
0
  else if (not1 == 0) { /* 1 OR (not 2) */
4029
0
    r = not_code_range_buf(enc, bbuf2, pbuf);
4030
0
  }
4031
0
  if (r != 0) return r;
4032
4033
0
  for (i = 0; i < n1; i++) {
4034
0
    from = data1[i*2];
4035
0
    to   = data1[i*2+1];
4036
0
    r = add_code_range_to_buf(pbuf, from, to);
4037
0
    if (r != 0) return r;
4038
0
  }
4039
0
  return 0;
4040
0
}
4041
4042
static int
4043
and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1,
4044
                OnigCodePoint* data, int n)
4045
0
{
4046
0
  int i, r;
4047
0
  OnigCodePoint from2, to2;
4048
4049
0
  for (i = 0; i < n; i++) {
4050
0
    from2 = data[i*2];
4051
0
    to2   = data[i*2+1];
4052
0
    if (from2 < from1) {
4053
0
      if (to2 < from1) continue;
4054
0
      else {
4055
0
        from1 = to2 + 1;
4056
0
      }
4057
0
    }
4058
0
    else if (from2 <= to1) {
4059
0
      if (to2 < to1) {
4060
0
        if (from1 <= from2 - 1) {
4061
0
          r = add_code_range_to_buf(pbuf, from1, from2-1);
4062
0
          if (r != 0) return r;
4063
0
        }
4064
0
        from1 = to2 + 1;
4065
0
      }
4066
0
      else {
4067
0
        to1 = from2 - 1;
4068
0
      }
4069
0
    }
4070
0
    else {
4071
0
      from1 = from2;
4072
0
    }
4073
0
    if (from1 > to1) break;
4074
0
  }
4075
0
  if (from1 <= to1) {
4076
0
    r = add_code_range_to_buf(pbuf, from1, to1);
4077
0
    if (r != 0) return r;
4078
0
  }
4079
0
  return 0;
4080
0
}
4081
4082
static int
4083
and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)
4084
0
{
4085
0
  int r;
4086
0
  OnigCodePoint i, j, n1, n2, *data1, *data2;
4087
0
  OnigCodePoint from, to, from1, to1, from2, to2;
4088
4089
0
  *pbuf = (BBuf* )NULL;
4090
0
  if (IS_NULL(bbuf1)) {
4091
0
    if (not1 != 0 && IS_NOT_NULL(bbuf2)) /* not1 != 0 -> not2 == 0 */
4092
0
      return bbuf_clone(pbuf, bbuf2);
4093
0
    return 0;
4094
0
  }
4095
0
  else if (IS_NULL(bbuf2)) {
4096
0
    if (not2 != 0)
4097
0
      return bbuf_clone(pbuf, bbuf1);
4098
0
    return 0;
4099
0
  }
4100
4101
0
  if (not1 != 0)
4102
0
    SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);
4103
4104
0
  data1 = (OnigCodePoint* )(bbuf1->p);
4105
0
  data2 = (OnigCodePoint* )(bbuf2->p);
4106
0
  GET_CODE_POINT(n1, data1);
4107
0
  GET_CODE_POINT(n2, data2);
4108
0
  data1++;
4109
0
  data2++;
4110
4111
0
  if (not2 == 0 && not1 == 0) { /* 1 AND 2 */
4112
0
    for (i = 0; i < n1; i++) {
4113
0
      from1 = data1[i*2];
4114
0
      to1   = data1[i*2+1];
4115
0
      for (j = 0; j < n2; j++) {
4116
0
        from2 = data2[j*2];
4117
0
        to2   = data2[j*2+1];
4118
0
        if (from2 > to1) break;
4119
0
        if (to2 < from1) continue;
4120
0
        from = MAX(from1, from2);
4121
0
        to   = MIN(to1, to2);
4122
0
        r = add_code_range_to_buf(pbuf, from, to);
4123
0
        if (r != 0) return r;
4124
0
      }
4125
0
    }
4126
0
  }
4127
0
  else if (not1 == 0) { /* 1 AND (not 2) */
4128
0
    for (i = 0; i < n1; i++) {
4129
0
      from1 = data1[i*2];
4130
0
      to1   = data1[i*2+1];
4131
0
      r = and_code_range1(pbuf, from1, to1, data2, n2);
4132
0
      if (r != 0) return r;
4133
0
    }
4134
0
  }
4135
4136
0
  return 0;
4137
0
}
4138
4139
static int
4140
and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
4141
0
{
4142
0
  int r, not1, not2;
4143
0
  BBuf *buf1, *buf2, *pbuf;
4144
0
  BitSetRef bsr1, bsr2;
4145
0
  BitSet bs1, bs2;
4146
4147
0
  not1 = IS_NCCLASS_NOT(dest);
4148
0
  bsr1 = dest->bs;
4149
0
  buf1 = dest->mbuf;
4150
0
  not2 = IS_NCCLASS_NOT(cc);
4151
0
  bsr2 = cc->bs;
4152
0
  buf2 = cc->mbuf;
4153
4154
0
  if (not1 != 0) {
4155
0
    bitset_invert_to(bsr1, bs1);
4156
0
    bsr1 = bs1;
4157
0
  }
4158
0
  if (not2 != 0) {
4159
0
    bitset_invert_to(bsr2, bs2);
4160
0
    bsr2 = bs2;
4161
0
  }
4162
0
  bitset_and(bsr1, bsr2);
4163
0
  if (bsr1 != dest->bs) {
4164
0
    bitset_copy(dest->bs, bsr1);
4165
0
  }
4166
0
  if (not1 != 0) {
4167
0
    bitset_invert(dest->bs);
4168
0
  }
4169
4170
0
  if (! ONIGENC_IS_SINGLEBYTE(enc)) {
4171
0
    if (not1 != 0 && not2 != 0) {
4172
0
      r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf);
4173
0
    }
4174
0
    else {
4175
0
      r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf);
4176
0
      if (r == 0 && not1 != 0) {
4177
0
        BBuf *tbuf;
4178
0
        r = not_code_range_buf(enc, pbuf, &tbuf);
4179
0
        if (r != 0) {
4180
0
          bbuf_free(pbuf);
4181
0
          return r;
4182
0
        }
4183
0
        bbuf_free(pbuf);
4184
0
        pbuf = tbuf;
4185
0
      }
4186
0
    }
4187
0
    if (r != 0) return r;
4188
4189
0
    dest->mbuf = pbuf;
4190
0
    bbuf_free(buf1);
4191
0
    return r;
4192
0
  }
4193
0
  return 0;
4194
0
}
4195
4196
static int
4197
or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
4198
0
{
4199
0
  int r, not1, not2;
4200
0
  BBuf *buf1, *buf2, *pbuf;
4201
0
  BitSetRef bsr1, bsr2;
4202
0
  BitSet bs1, bs2;
4203
4204
0
  not1 = IS_NCCLASS_NOT(dest);
4205
0
  bsr1 = dest->bs;
4206
0
  buf1 = dest->mbuf;
4207
0
  not2 = IS_NCCLASS_NOT(cc);
4208
0
  bsr2 = cc->bs;
4209
0
  buf2 = cc->mbuf;
4210
4211
0
  if (not1 != 0) {
4212
0
    bitset_invert_to(bsr1, bs1);
4213
0
    bsr1 = bs1;
4214
0
  }
4215
0
  if (not2 != 0) {
4216
0
    bitset_invert_to(bsr2, bs2);
4217
0
    bsr2 = bs2;
4218
0
  }
4219
0
  bitset_or(bsr1, bsr2);
4220
0
  if (bsr1 != dest->bs) {
4221
0
    bitset_copy(dest->bs, bsr1);
4222
0
  }
4223
0
  if (not1 != 0) {
4224
0
    bitset_invert(dest->bs);
4225
0
  }
4226
4227
0
  if (! ONIGENC_IS_SINGLEBYTE(enc)) {
4228
0
    if (not1 != 0 && not2 != 0) {
4229
0
      r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf);
4230
0
    }
4231
0
    else {
4232
0
      r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf);
4233
0
      if (r == 0 && not1 != 0) {
4234
0
        BBuf *tbuf;
4235
0
        r = not_code_range_buf(enc, pbuf, &tbuf);
4236
0
        if (r != 0) {
4237
0
          bbuf_free(pbuf);
4238
0
          return r;
4239
0
        }
4240
0
        bbuf_free(pbuf);
4241
0
        pbuf = tbuf;
4242
0
      }
4243
0
    }
4244
0
    if (r != 0) return r;
4245
4246
0
    dest->mbuf = pbuf;
4247
0
    bbuf_free(buf1);
4248
0
    return r;
4249
0
  }
4250
0
  else
4251
0
    return 0;
4252
0
}
4253
4254
static OnigCodePoint
4255
conv_backslash_value(OnigCodePoint c, ParseEnv* env)
4256
0
{
4257
0
  if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) {
4258
0
    switch (c) {
4259
0
    case 'n': return '\n';
4260
0
    case 't': return '\t';
4261
0
    case 'r': return '\r';
4262
0
    case 'f': return '\f';
4263
0
    case 'a': return '\007';
4264
0
    case 'b': return '\010';
4265
0
    case 'e': return '\033';
4266
0
    case 'v':
4267
0
      if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB))
4268
0
        return '\v';
4269
0
      break;
4270
4271
0
    default:
4272
0
      break;
4273
0
    }
4274
0
  }
4275
0
  return c;
4276
0
}
4277
4278
static int
4279
is_invalid_quantifier_target(Node* node)
4280
0
{
4281
0
  switch (ND_TYPE(node)) {
4282
0
  case ND_ANCHOR:
4283
0
  case ND_GIMMICK:
4284
0
    return 1;
4285
0
    break;
4286
4287
0
  case ND_BAG:
4288
    /* allow enclosed elements */
4289
    /* return is_invalid_quantifier_target(ND_BODY(node)); */
4290
0
    break;
4291
4292
0
  case ND_LIST:
4293
0
    do {
4294
0
      if (! is_invalid_quantifier_target(ND_CAR(node))) return 0;
4295
0
    } while (IS_NOT_NULL(node = ND_CDR(node)));
4296
0
    return 0;
4297
0
    break;
4298
4299
0
  case ND_ALT:
4300
0
    do {
4301
0
      if (is_invalid_quantifier_target(ND_CAR(node))) return 1;
4302
0
    } while (IS_NOT_NULL(node = ND_CDR(node)));
4303
0
    break;
4304
4305
0
  default:
4306
0
    break;
4307
0
  }
4308
0
  return 0;
4309
0
}
4310
4311
/* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */
4312
static int
4313
quantifier_type_num(QuantNode* q)
4314
0
{
4315
0
  if (q->greedy) {
4316
0
    if (q->lower == 0) {
4317
0
      if (q->upper == 1) return 0;
4318
0
      else if (IS_INFINITE_REPEAT(q->upper)) return 1;
4319
0
    }
4320
0
    else if (q->lower == 1) {
4321
0
      if (IS_INFINITE_REPEAT(q->upper)) return 2;
4322
0
    }
4323
0
  }
4324
0
  else {
4325
0
    if (q->lower == 0) {
4326
0
      if (q->upper == 1) return 3;
4327
0
      else if (IS_INFINITE_REPEAT(q->upper)) return 4;
4328
0
    }
4329
0
    else if (q->lower == 1) {
4330
0
      if (IS_INFINITE_REPEAT(q->upper)) return 5;
4331
0
    }
4332
0
  }
4333
0
  return -1;
4334
0
}
4335
4336
4337
enum ReduceType {
4338
  RQ_ASIS = 0, /* as is */
4339
  RQ_DEL,      /* delete parent */
4340
  RQ_A,        /* to '*'    */
4341
  RQ_P,        /* to '+'    */
4342
  RQ_AQ,       /* to '*?'   */
4343
  RQ_QQ,       /* to '??'   */
4344
  RQ_P_QQ,     /* to '+)??' */
4345
};
4346
4347
static enum ReduceType ReduceTypeTable[6][6] = {
4348
  {RQ_DEL,  RQ_A,    RQ_A,   RQ_QQ,   RQ_AQ,   RQ_ASIS}, /* '?'  */
4349
  {RQ_DEL,  RQ_DEL,  RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL},  /* '*'  */
4350
  {RQ_A,    RQ_A,    RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL},  /* '+'  */
4351
  {RQ_DEL,  RQ_AQ,   RQ_AQ,  RQ_DEL,  RQ_AQ,   RQ_AQ},   /* '??' */
4352
  {RQ_DEL,  RQ_DEL,  RQ_DEL, RQ_DEL,  RQ_DEL,  RQ_DEL},  /* '*?' */
4353
  {RQ_ASIS, RQ_A,    RQ_P,   RQ_AQ,   RQ_AQ,   RQ_DEL}   /* '+?' */
4354
};
4355
4356
extern int
4357
onig_reduce_nested_quantifier(Node* pnode)
4358
0
{
4359
0
  int pnum, cnum;
4360
0
  QuantNode *p, *c;
4361
0
  Node* cnode;
4362
4363
0
  cnode = ND_BODY(pnode);
4364
4365
0
  p = QUANT_(pnode);
4366
0
  c = QUANT_(cnode);
4367
0
  pnum = quantifier_type_num(p);
4368
0
  cnum = quantifier_type_num(c);
4369
0
  if (pnum < 0 || cnum < 0) {
4370
0
    if (p->lower == p->upper && c->lower == c->upper) {
4371
0
      int n = onig_positive_int_multiply(p->lower, c->lower);
4372
0
      if (n < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
4373
4374
0
      p->lower = p->upper = n;
4375
0
      ND_BODY(pnode) = ND_BODY(cnode);
4376
0
      goto remove_cnode;
4377
0
    }
4378
4379
0
    return 0;
4380
0
  }
4381
4382
0
  switch(ReduceTypeTable[cnum][pnum]) {
4383
0
  case RQ_DEL:
4384
0
    *pnode = *cnode;
4385
0
    goto remove_cnode;
4386
0
    break;
4387
0
  case RQ_A:
4388
0
    ND_BODY(pnode) = ND_BODY(cnode);
4389
0
    p->lower  = 0;  p->upper = INFINITE_REPEAT;  p->greedy = 1;
4390
0
    goto remove_cnode;
4391
0
    break;
4392
0
  case RQ_P:
4393
0
    ND_BODY(pnode) = ND_BODY(cnode);
4394
0
    p->lower  = 1;  p->upper = INFINITE_REPEAT;  p->greedy = 1;
4395
0
    goto remove_cnode;
4396
0
    break;
4397
0
  case RQ_AQ:
4398
0
    ND_BODY(pnode) = ND_BODY(cnode);
4399
0
    p->lower  = 0;  p->upper = INFINITE_REPEAT;  p->greedy = 0;
4400
0
    goto remove_cnode;
4401
0
    break;
4402
0
  case RQ_QQ:
4403
0
    ND_BODY(pnode) = ND_BODY(cnode);
4404
0
    p->lower  = 0;  p->upper = 1;  p->greedy = 0;
4405
0
    goto remove_cnode;
4406
0
    break;
4407
0
  case RQ_P_QQ:
4408
0
    p->lower  = 0;  p->upper = 1;  p->greedy = 0;
4409
0
    c->lower  = 1;  c->upper = INFINITE_REPEAT;  c->greedy = 1;
4410
0
    break;
4411
0
  case RQ_ASIS:
4412
0
    break;
4413
0
  }
4414
4415
0
  return 0;
4416
4417
0
 remove_cnode:
4418
0
  ND_BODY(cnode) = NULL_NODE;
4419
0
  onig_node_free(cnode);
4420
0
  return 0;
4421
0
}
4422
4423
static int
4424
node_new_general_newline(Node** node, ParseEnv* env)
4425
0
{
4426
0
  int r;
4427
0
  int dlen, alen;
4428
0
  UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN * 2];
4429
0
  Node* crnl;
4430
0
  Node* ncc;
4431
0
  Node* x;
4432
0
  CClassNode* cc;
4433
4434
0
  dlen = ONIGENC_CODE_TO_MBC(env->enc, 0x0d, buf);
4435
0
  if (dlen < 0) return dlen;
4436
0
  alen = ONIGENC_CODE_TO_MBC(env->enc, NEWLINE_CODE, buf + dlen);
4437
0
  if (alen < 0) return alen;
4438
4439
0
  crnl = node_new_str_crude(buf, buf + dlen + alen, ONIG_OPTION_NONE);
4440
0
  CHECK_NULL_RETURN_MEMERR(crnl);
4441
4442
0
  ncc = node_new_cclass();
4443
0
  if (IS_NULL(ncc)) goto err2;
4444
4445
0
  cc = CCLASS_(ncc);
4446
0
  if (dlen == 1) {
4447
0
    bitset_set_range(cc->bs, NEWLINE_CODE, 0x0d);
4448
0
  }
4449
0
  else {
4450
0
    r = add_code_range(&(cc->mbuf), env, NEWLINE_CODE, 0x0d);
4451
0
    if (r != 0) {
4452
0
    err1:
4453
0
      onig_node_free(ncc);
4454
0
    err2:
4455
0
      onig_node_free(crnl);
4456
0
      return ONIGERR_MEMORY;
4457
0
    }
4458
0
  }
4459
4460
0
  if (ONIGENC_IS_UNICODE_ENCODING(env->enc)) {
4461
0
    r = add_code_range(&(cc->mbuf), env, 0x85, 0x85);
4462
0
    if (r != 0) goto err1;
4463
0
    r = add_code_range(&(cc->mbuf), env, 0x2028, 0x2029);
4464
0
    if (r != 0) goto err1;
4465
0
  }
4466
4467
0
  x = node_new_bag_if_else(crnl, NULL_NODE, ncc);
4468
0
  if (IS_NULL(x)) goto err1;
4469
4470
0
  *node = x;
4471
0
  return 0;
4472
0
}
4473
4474
enum TokenSyms {
4475
  TK_EOT      = 0,   /* end of token */
4476
  TK_CRUDE_BYTE,
4477
  TK_CHAR,
4478
  TK_STRING,
4479
  TK_CODE_POINT,
4480
  TK_ANYCHAR,
4481
  TK_CHAR_TYPE,
4482
  TK_BACKREF,
4483
  TK_CALL,
4484
  TK_ANCHOR,
4485
  TK_REPEAT,
4486
  TK_INTERVAL,
4487
  TK_ANYCHAR_ANYTIME,  /* SQL '%' == .* */
4488
  TK_ALT,
4489
  TK_SUBEXP_OPEN,
4490
  TK_SUBEXP_CLOSE,
4491
  TK_OPEN_CC,
4492
  TK_QUOTE_OPEN,
4493
  TK_CHAR_PROPERTY,    /* \p{...}, \P{...} */
4494
  TK_KEEP,             /* \K */
4495
  TK_GENERAL_NEWLINE,  /* \R */
4496
  TK_NO_NEWLINE,       /* \N */
4497
  TK_TRUE_ANYCHAR,     /* \O */
4498
  TK_TEXT_SEGMENT,     /* \X */
4499
4500
  /* in cc */
4501
  TK_CC_CLOSE,
4502
  TK_CC_RANGE,
4503
  TK_CC_POSIX_BRACKET_OPEN,
4504
  TK_CC_AND,           /* && */
4505
  TK_CC_OPEN_CC        /* [ */
4506
};
4507
4508
typedef struct {
4509
  enum TokenSyms type;
4510
  int code_point_continue;
4511
  int escaped;
4512
  int base_num;   /* is number: 8, 16 (used in [....]) */
4513
  UChar* backp;
4514
  union {
4515
    UChar* s;
4516
    UChar byte;
4517
    OnigCodePoint code;
4518
    int   anchor;
4519
    int   subtype;
4520
    struct {
4521
      int lower;
4522
      int upper;
4523
      int greedy;
4524
      int possessive;
4525
    } repeat;
4526
    struct {
4527
      int  num;
4528
      int  ref1;
4529
      int* refs;
4530
      int  by_name;
4531
#ifdef USE_BACKREF_WITH_LEVEL
4532
      int  exist_level;
4533
      int  level;   /* \k<name+n> */
4534
#endif
4535
    } backref;
4536
    struct {
4537
      UChar* name;
4538
      UChar* name_end;
4539
      int    gnum;
4540
      int    by_number;
4541
    } call;
4542
    struct {
4543
      int ctype;
4544
      int not;
4545
    } prop;
4546
  } u;
4547
} PToken;
4548
4549
static void
4550
ptoken_init(PToken* tok)
4551
0
{
4552
0
  tok->code_point_continue = 0;
4553
0
}
4554
4555
static int
4556
fetch_interval(UChar** src, UChar* end, PToken* tok, ParseEnv* env)
4557
0
{
4558
0
  int low, up, syn_allow, non_low;
4559
0
  int r;
4560
0
  OnigCodePoint c;
4561
0
  OnigEncoding enc;
4562
0
  UChar* p;
4563
0
  PFETCH_READY;
4564
4565
0
  p = *src;
4566
0
  r = 0;
4567
0
  non_low = 0;
4568
0
  enc = env->enc;
4569
0
  syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL);
4570
4571
0
  if (PEND) {
4572
0
    if (syn_allow)
4573
0
      return 1;  /* "....{" : OK! */
4574
0
    else
4575
0
      return ONIGERR_END_PATTERN_AT_LEFT_BRACE;  /* "....{" syntax error */
4576
0
  }
4577
4578
0
  if (! syn_allow) {
4579
0
    c = PPEEK;
4580
0
    if (c == ')' || c == '(' || c == '|') {
4581
0
      return ONIGERR_END_PATTERN_AT_LEFT_BRACE;
4582
0
    }
4583
0
  }
4584
4585
0
  low = scan_number(&p, end, env->enc);
4586
0
  if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
4587
0
  if (low > ONIG_MAX_REPEAT_NUM)
4588
0
    return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
4589
4590
0
  if (p == *src) { /* can't read low */
4591
0
    if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV)) {
4592
      /* allow {,n} as {0,n} */
4593
0
      low = 0;
4594
0
      non_low = 1;
4595
0
    }
4596
0
    else
4597
0
      goto invalid;
4598
0
  }
4599
4600
0
  if (PEND) goto invalid;
4601
0
  PFETCH(c);
4602
0
  if (c == ',') {
4603
0
    UChar* prev = p;
4604
0
    up = scan_number(&p, end, env->enc);
4605
0
    if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
4606
0
    if (up > ONIG_MAX_REPEAT_NUM)
4607
0
      return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
4608
4609
0
    if (p == prev) {
4610
0
      if (non_low != 0)
4611
0
        goto invalid;
4612
0
      up = INFINITE_REPEAT;  /* {n,} : {n,infinite} */
4613
0
    }
4614
0
  }
4615
0
  else {
4616
0
    if (non_low != 0)
4617
0
      goto invalid;
4618
4619
0
    PUNFETCH;
4620
0
    up = low;  /* {n} : exact n times */
4621
0
    r = 2;     /* fixed */
4622
0
  }
4623
4624
0
  if (PEND) goto invalid;
4625
0
  PFETCH(c);
4626
0
  if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) {
4627
0
    if (c != MC_ESC(env->syntax) || PEND) goto invalid;
4628
0
    PFETCH(c);
4629
0
  }
4630
0
  if (c != '}') goto invalid;
4631
4632
0
  if (!IS_INFINITE_REPEAT(up) && low > up) {
4633
    /* {n,m}+ supported case */
4634
0
    if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL))
4635
0
      return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE;
4636
4637
0
    tok->u.repeat.possessive = 1;
4638
0
    {
4639
0
      int tmp;
4640
0
      tmp = low; low = up; up = tmp;
4641
0
    }
4642
0
  }
4643
0
  else
4644
0
    tok->u.repeat.possessive = 0;
4645
4646
0
  tok->type = TK_INTERVAL;
4647
0
  tok->u.repeat.lower = low;
4648
0
  tok->u.repeat.upper = up;
4649
0
  *src = p;
4650
0
  return r; /* 0: normal {n,m}, 2: fixed {n} */
4651
4652
0
 invalid:
4653
0
  if (syn_allow) {
4654
    /* *src = p; */ /* !!! Don't do this line !!! */
4655
0
    return 1;  /* OK */
4656
0
  }
4657
0
  else
4658
0
    return ONIGERR_INVALID_REPEAT_RANGE_PATTERN;
4659
0
}
4660
4661
/* \M-, \C-, \c, or \... */
4662
static int
4663
fetch_escaped_value_raw(UChar** src, UChar* end, ParseEnv* env,
4664
                        OnigCodePoint* val)
4665
0
{
4666
0
  int v;
4667
0
  OnigCodePoint c;
4668
0
  OnigEncoding enc = env->enc;
4669
0
  UChar* p = *src;
4670
4671
0
  if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
4672
4673
0
  PFETCH_S(c);
4674
0
  switch (c) {
4675
0
  case 'M':
4676
0
    if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META)) {
4677
0
      if (PEND) return ONIGERR_END_PATTERN_AT_META;
4678
0
      PFETCH_S(c);
4679
0
      if (c != '-') return ONIGERR_META_CODE_SYNTAX;
4680
0
      if (PEND) return ONIGERR_END_PATTERN_AT_META;
4681
0
      PFETCH_S(c);
4682
0
      if (c == MC_ESC(env->syntax)) {
4683
0
        v = fetch_escaped_value_raw(&p, end, env, &c);
4684
0
        if (v < 0) return v;
4685
0
      }
4686
0
      c = ((c & 0xff) | 0x80);
4687
0
    }
4688
0
    else
4689
0
      goto backslash;
4690
0
    break;
4691
4692
0
  case 'C':
4693
0
    if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) {
4694
0
      if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
4695
0
      PFETCH_S(c);
4696
0
      if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX;
4697
0
      goto control;
4698
0
    }
4699
0
    else
4700
0
      goto backslash;
4701
4702
0
  case 'c':
4703
0
    if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) {
4704
0
    control:
4705
0
      if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
4706
0
      PFETCH_S(c);
4707
0
      if (c == '?') {
4708
0
        c = 0177;
4709
0
      }
4710
0
      else {
4711
0
        if (c == MC_ESC(env->syntax)) {
4712
0
          v = fetch_escaped_value_raw(&p, end, env, &c);
4713
0
          if (v < 0) return v;
4714
0
        }
4715
0
        c &= 0x9f;
4716
0
      }
4717
0
      break;
4718
0
    }
4719
    /* fall through */
4720
4721
0
  default:
4722
0
    {
4723
0
    backslash:
4724
0
      c = conv_backslash_value(c, env);
4725
0
    }
4726
0
    break;
4727
0
  }
4728
4729
0
  *src = p;
4730
0
  *val = c;
4731
0
  return 0;
4732
0
}
4733
4734
static int
4735
fetch_escaped_value(UChar** src, UChar* end, ParseEnv* env, OnigCodePoint* val)
4736
0
{
4737
0
  int r;
4738
0
  int len;
4739
4740
0
  r = fetch_escaped_value_raw(src, end, env, val);
4741
0
  if (r != 0) return r;
4742
4743
0
  len = ONIGENC_CODE_TO_MBCLEN(env->enc, *val);
4744
0
  if (len < 0) return len;
4745
4746
0
  return 0;
4747
0
}
4748
4749
static int fetch_token(PToken* tok, UChar** src, UChar* end, ParseEnv* env);
4750
4751
static OnigCodePoint
4752
get_name_end_code_point(OnigCodePoint start)
4753
0
{
4754
0
  switch (start) {
4755
0
  case '<':  return (OnigCodePoint )'>';  break;
4756
0
  case '\'': return (OnigCodePoint )'\''; break;
4757
0
  case '(':  return (OnigCodePoint )')';  break;
4758
0
  default:
4759
0
    break;
4760
0
  }
4761
4762
0
  return (OnigCodePoint )0;
4763
0
}
4764
4765
enum REF_NUM {
4766
  IS_NOT_NUM = 0,
4767
  IS_ABS_NUM = 1,
4768
  IS_REL_NUM = 2
4769
};
4770
4771
#ifdef USE_BACKREF_WITH_LEVEL
4772
/*
4773
   \k<name+n>, \k<name-n>
4774
   \k<num+n>,  \k<num-n>
4775
   \k<-num+n>, \k<-num-n>
4776
   \k<+num+n>, \k<+num-n>
4777
*/
4778
static int
4779
fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
4780
                      UChar** rname_end, ParseEnv* env,
4781
                      int* rback_num, int* rlevel, enum REF_NUM* num_type)
4782
0
{
4783
0
  int r, sign, exist_level;
4784
0
  int digit_count;
4785
0
  OnigCodePoint end_code;
4786
0
  OnigCodePoint c;
4787
0
  OnigEncoding enc;
4788
0
  UChar *name_end;
4789
0
  UChar *pnum_head;
4790
0
  UChar *p;
4791
0
  PFETCH_READY;
4792
4793
0
  p = *src;
4794
0
  c = 0;
4795
0
  enc = env->enc;
4796
0
  *rback_num = 0;
4797
0
  exist_level = 0;
4798
0
  *num_type = IS_NOT_NUM;
4799
0
  sign = 1;
4800
0
  pnum_head = *src;
4801
4802
0
  end_code = get_name_end_code_point(start_code);
4803
4804
0
  digit_count = 0;
4805
0
  name_end = end;
4806
0
  r = 0;
4807
0
  if (PEND) {
4808
0
    return ONIGERR_EMPTY_GROUP_NAME;
4809
0
  }
4810
0
  else {
4811
0
    PFETCH(c);
4812
0
    if (c == end_code)
4813
0
      return ONIGERR_EMPTY_GROUP_NAME;
4814
4815
0
    if (IS_CODE_DIGIT_ASCII(enc, c)) {
4816
0
      *num_type = IS_ABS_NUM;
4817
0
      digit_count++;
4818
0
    }
4819
0
    else if (c == '-') {
4820
0
      *num_type = IS_REL_NUM;
4821
0
      sign = -1;
4822
0
      pnum_head = p;
4823
0
    }
4824
0
    else if (c == '+') {
4825
0
      *num_type = IS_REL_NUM;
4826
0
      sign = 1;
4827
0
      pnum_head = p;
4828
0
    }
4829
0
    else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
4830
0
      r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
4831
0
    }
4832
0
  }
4833
4834
0
  while (!PEND) {
4835
0
    name_end = p;
4836
0
    PFETCH(c);
4837
0
    if (c == end_code || c == ')' || c == '+' || c == '-') {
4838
0
      if (*num_type != IS_NOT_NUM && digit_count == 0)
4839
0
        r = ONIGERR_INVALID_GROUP_NAME;
4840
0
      break;
4841
0
    }
4842
4843
0
    if (*num_type != IS_NOT_NUM) {
4844
0
      if (IS_CODE_DIGIT_ASCII(enc, c)) {
4845
0
        digit_count++;
4846