Coverage Report

Created: 2024-05-20 06:21

/src/jq/modules/oniguruma/src/regparse.c
Line
Count
Source (jump to first uncovered line)
1
/**********************************************************************
2
  regparse.c -  Oniguruma (regular expression library)
3
**********************************************************************/
4
/*-
5
 * Copyright (c) 2002-2023  K.Kosako
6
 * All rights reserved.
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 * 1. Redistributions of source code must retain the above copyright
12
 *    notice, this list of conditions and the following disclaimer.
13
 * 2. Redistributions in binary form must reproduce the above copyright
14
 *    notice, this list of conditions and the following disclaimer in the
15
 *    documentation and/or other materials provided with the distribution.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27
 * SUCH DAMAGE.
28
 */
29
30
#ifdef DEBUG_ND_FREE
31
#ifndef NEED_TO_INCLUDE_STDIO
32
#define NEED_TO_INCLUDE_STDIO
33
#endif
34
#endif
35
36
#include "regparse.h"
37
#include "st.h"
38
39
0
#define INIT_TAG_NAMES_ALLOC_NUM   5
40
41
0
#define WARN_BUFSIZE    256
42
43
#define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
44
45
#define IS_ALLOWED_CODE_IN_CALLOUT_NAME(c) \
46
6.60k
  ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_' /* || c == '!' */)
47
#define IS_ALLOWED_CODE_IN_CALLOUT_TAG_NAME(c) \
48
1.21k
  ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_')
49
50
255k
#define OPTON_SINGLELINE(option)     ((option) & ONIG_OPTION_SINGLELINE)
51
2.35M
#define OPTON_MULTILINE(option)      ((option) & ONIG_OPTION_MULTILINE)
52
6.04M
#define OPTON_IGNORECASE(option)     ((option) & ONIG_OPTION_IGNORECASE)
53
737k
#define OPTON_EXTEND(option)         ((option) & ONIG_OPTION_EXTEND)
54
#define OPTON_WORD_ASCII(option) \
55
541k
  ((option) & (ONIG_OPTION_WORD_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII))
56
#define OPTON_DIGIT_ASCII(option) \
57
8.56k
  ((option) & (ONIG_OPTION_DIGIT_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII))
58
#define OPTON_SPACE_ASCII(option) \
59
2.09k
  ((option) & (ONIG_OPTION_SPACE_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII))
60
14.1k
#define OPTON_POSIX_ASCII(option)    ((option) & ONIG_OPTION_POSIX_IS_ASCII)
61
8.78k
#define OPTON_TEXT_SEGMENT_WORD(option)  ((option) & ONIG_OPTION_TEXT_SEGMENT_WORD)
62
63
#define OPTON_IS_ASCII_MODE_CTYPE(ctype, options) \
64
2.36M
  ((ctype) >= 0 && \
65
2.36M
  (((ctype) < ONIGENC_CTYPE_ASCII  && OPTON_POSIX_ASCII(options)) ||\
66
14.1k
   ((ctype) == ONIGENC_CTYPE_WORD  && OPTON_WORD_ASCII(options))  ||\
67
14.1k
   ((ctype) == ONIGENC_CTYPE_DIGIT && OPTON_DIGIT_ASCII(options)) ||\
68
14.1k
   ((ctype) == ONIGENC_CTYPE_SPACE && OPTON_SPACE_ASCII(options))))
69
70
71
OnigSyntaxType OnigSyntaxOniguruma = {
72
  (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
73
     ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
74
     ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL |
75
     ONIG_SYN_OP_ESC_CONTROL_CHARS |
76
     ONIG_SYN_OP_ESC_C_CONTROL )
77
   & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
78
  , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
79
      ONIG_SYN_OP2_OPTION_ONIGURUMA |
80
      ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
81
      ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |
82
      ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |
83
      ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS |
84
      ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME    |
85
      ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT |
86
      ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |
87
      ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT |
88
      ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |
89
      ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |
90
      ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY  |
91
      ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
92
      ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
93
      ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |
94
      ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |
95
      ONIG_SYN_OP2_ESC_H_XDIGIT | ONIG_SYN_OP2_ESC_U_HEX4 )
96
  , ( SYN_GNU_REGEX_BV |
97
      ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |
98
      ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |
99
      ONIG_SYN_VARIABLE_LEN_LOOK_BEHIND |
100
      ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
101
      ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
102
      ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |
103
      ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC |
104
      ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
105
#ifdef USE_WHOLE_OPTIONS
106
      ONIG_SYN_WHOLE_OPTIONS |
107
#endif
108
      ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT
109
    )
110
  , ONIG_OPTION_NONE
111
  ,
112
  {
113
      (OnigCodePoint )'\\'                       /* esc */
114
    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
115
    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
116
    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
117
    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
118
    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
119
  }
120
};
121
122
OnigSyntaxType OnigSyntaxRuby = {
123
  (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
124
     ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
125
     ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL |
126
     ONIG_SYN_OP_ESC_CONTROL_CHARS |
127
     ONIG_SYN_OP_ESC_C_CONTROL )
128
   & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
129
  , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
130
      ONIG_SYN_OP2_OPTION_RUBY |
131
      ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
132
      ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |
133
      ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |
134
      ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT |
135
      ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |
136
      ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |
137
      ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |
138
      ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY  |
139
      ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
140
      ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
141
      ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |
142
      ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |
143
      ONIG_SYN_OP2_ESC_H_XDIGIT | ONIG_SYN_OP2_ESC_U_HEX4 )
144
  , ( SYN_GNU_REGEX_BV |
145
      ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |
146
      ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |
147
      ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
148
      ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
149
      ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |
150
      ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
151
      ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )
152
  , ONIG_OPTION_NONE
153
  ,
154
  {
155
      (OnigCodePoint )'\\'                       /* esc */
156
    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
157
    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
158
    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
159
    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
160
    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
161
  }
162
};
163
164
OnigSyntaxType*  OnigDefaultSyntax = ONIG_SYNTAX_ONIGURUMA;
165
166
167
1.21M
#define BB_INIT(buf,size)    bbuf_init((BBuf* )(buf), (size))
168
169
43.3k
#define BB_EXPAND(buf,low) do{\
170
43.3k
  do { (buf)->alloc *= 2; } while ((buf)->alloc < (unsigned int )low);\
171
43.3k
  (buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\
172
43.3k
  if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
173
43.3k
} while (0)
174
175
23.2M
#define BB_ENSURE_SIZE(buf,size) do{\
176
23.2M
  unsigned int new_alloc = (buf)->alloc;\
177
23.7M
  while (new_alloc < (unsigned int )(size)) { new_alloc *= 2; }\
178
23.2M
  if ((buf)->alloc != new_alloc) {\
179
517k
    (buf)->p = (UChar* )xrealloc((buf)->p, new_alloc);\
180
517k
    if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
181
517k
    (buf)->alloc = new_alloc;\
182
517k
  }\
183
23.2M
} while (0)
184
185
71.0M
#define BB_WRITE(buf,pos,bytes,n) do{\
186
71.0M
  int used = (pos) + (n);\
187
71.0M
  if ((buf)->alloc < (unsigned int )used) BB_EXPAND((buf),used);\
188
71.0M
  xmemcpy((buf)->p + (pos), (bytes), (n));\
189
71.0M
  if ((buf)->used < (unsigned int )used) (buf)->used = used;\
190
71.0M
} while (0)
191
192
#define BB_WRITE1(buf,pos,byte) do{\
193
  int used = (pos) + 1;\
194
  if ((buf)->alloc < (unsigned int )used) BB_EXPAND((buf),used);\
195
  (buf)->p[(pos)] = (byte);\
196
  if ((buf)->used < (unsigned int )used) (buf)->used = used;\
197
} while (0)
198
199
#define BB_ADD(buf,bytes,n)       BB_WRITE((buf),(buf)->used,(bytes),(n))
200
#define BB_ADD1(buf,byte)         BB_WRITE1((buf),(buf)->used,(byte))
201
#define BB_GET_ADD_ADDRESS(buf)   ((buf)->p + (buf)->used)
202
#define BB_GET_OFFSET_POS(buf)    ((buf)->used)
203
204
/* from < to */
205
952k
#define BB_MOVE_RIGHT(buf,from,to,n) do {\
206
952k
  if ((unsigned int )((to)+(n)) > (buf)->alloc) BB_EXPAND((buf),(to) + (n));\
207
952k
  xmemmove((buf)->p + (to), (buf)->p + (from), (n));\
208
952k
  if ((unsigned int )((to)+(n)) > (buf)->used) (buf)->used = (to) + (n);\
209
952k
} while (0)
210
211
/* from > to */
212
#define BB_MOVE_LEFT(buf,from,to,n) do {\
213
  xmemmove((buf)->p + (to), (buf)->p + (from), (n));\
214
} while (0)
215
216
/* from > to */
217
1.67k
#define BB_MOVE_LEFT_REDUCE(buf,from,to) do {\
218
1.67k
  xmemmove((buf)->p + (to), (buf)->p + (from), (buf)->used - (from));\
219
1.67k
  (buf)->used -= (from - to);\
220
1.67k
} while (0)
221
222
#define BB_INSERT(buf,pos,bytes,n) do {\
223
  if (pos >= (buf)->used) {\
224
    BB_WRITE(buf,pos,bytes,n);\
225
  }\
226
  else {\
227
    BB_MOVE_RIGHT((buf),(pos),(pos) + (n),((buf)->used - (pos)));\
228
    xmemcpy((buf)->p + (pos), (bytes), (n));\
229
  }\
230
} while (0)
231
232
#define BB_GET_BYTE(buf, pos) (buf)->p[(pos)]
233
234
235
typedef enum {
236
  CS_VALUE,
237
  CS_RANGE,
238
  CS_COMPLETE,
239
  CS_START
240
} CSTATE;
241
242
typedef enum {
243
  CV_UNDEF,
244
  CV_SB,
245
  CV_MB,
246
  CV_CPROP
247
} CVAL;
248
249
0
extern void onig_null_warn(const char* s ARG_UNUSED) { }
250
251
#ifdef DEFAULT_WARN_FUNCTION
252
static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;
253
#else
254
static OnigWarnFunc onig_warn = onig_null_warn;
255
#endif
256
257
#ifdef DEFAULT_VERB_WARN_FUNCTION
258
static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION;
259
#else
260
static OnigWarnFunc onig_verb_warn = onig_null_warn;
261
#endif
262
263
extern void onig_set_warn_func(OnigWarnFunc f)
264
0
{
265
0
  onig_warn = f;
266
0
}
267
268
extern void onig_set_verb_warn_func(OnigWarnFunc f)
269
0
{
270
0
  onig_verb_warn = f;
271
0
}
272
273
extern void
274
onig_warning(const char* s)
275
2
{
276
2
  if (onig_warn == onig_null_warn) return ;
277
278
0
  (*onig_warn)(s);
279
0
}
280
281
#define DEFAULT_MAX_CAPTURE_NUM   32767
282
283
static int MaxCaptureNum = DEFAULT_MAX_CAPTURE_NUM;
284
285
extern int
286
onig_set_capture_num_limit(int num)
287
0
{
288
0
  if (num < 0) return -1;
289
290
0
  MaxCaptureNum = num;
291
0
  return 0;
292
0
}
293
294
static unsigned int ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;
295
296
extern unsigned int
297
onig_get_parse_depth_limit(void)
298
0
{
299
0
  return ParseDepthLimit;
300
0
}
301
302
extern int
303
onig_set_parse_depth_limit(unsigned int depth)
304
0
{
305
0
  if (depth == 0)
306
0
    ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;
307
0
  else
308
0
    ParseDepthLimit = depth;
309
0
  return 0;
310
0
}
311
312
#ifdef ONIG_DEBUG_PARSE
313
#define INC_PARSE_DEPTH(d) do {\
314
  (d)++;\
315
  if (env->max_parse_depth < (d)) env->max_parse_depth = d;\
316
  if ((d) > ParseDepthLimit) \
317
    return ONIGERR_PARSE_DEPTH_LIMIT_OVER;\
318
} while (0)
319
#else
320
9.42M
#define INC_PARSE_DEPTH(d) do {\
321
9.42M
  (d)++;\
322
9.42M
  if ((d) > ParseDepthLimit) \
323
9.42M
    return ONIGERR_PARSE_DEPTH_LIMIT_OVER;\
324
9.42M
} while (0)
325
#endif
326
327
3.85M
#define DEC_PARSE_DEPTH(d)  (d)--
328
329
330
static int
331
bbuf_init(BBuf* buf, int size)
332
1.21M
{
333
1.21M
  if (size <= 0) {
334
0
    size   = 0;
335
0
    buf->p = NULL;
336
0
  }
337
1.21M
  else {
338
1.21M
    buf->p = (UChar* )xmalloc(size);
339
1.21M
    if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
340
1.21M
  }
341
342
1.21M
  buf->alloc = size;
343
1.21M
  buf->used  = 0;
344
1.21M
  return 0;
345
1.21M
}
346
347
static void
348
bbuf_free(BBuf* bbuf)
349
1.21M
{
350
1.21M
  if (IS_NOT_NULL(bbuf)) {
351
1.21M
    if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p);
352
1.21M
    xfree(bbuf);
353
1.21M
  }
354
1.21M
}
355
356
static int
357
bbuf_clone(BBuf** rto, BBuf* from)
358
0
{
359
0
  int r;
360
0
  BBuf *to;
361
362
0
  *rto = to = (BBuf* )xmalloc(sizeof(BBuf));
363
0
  CHECK_NULL_RETURN_MEMERR(to);
364
0
  r = BB_INIT(to, from->alloc);
365
0
  if (r != 0) {
366
0
    bbuf_free(to);
367
0
    *rto = 0;
368
0
    return r;
369
0
  }
370
0
  to->used = from->used;
371
0
  xmemcpy(to->p, from->p, from->used);
372
0
  return 0;
373
0
}
374
375
static int
376
backref_rel_to_abs(int rel_no, ParseEnv* env)
377
392
{
378
392
  if (rel_no > 0) {
379
43
    if (rel_no > ONIG_INT_MAX - env->num_mem)
380
0
      return ONIGERR_INVALID_BACKREF;
381
43
    return env->num_mem + rel_no;
382
43
  }
383
349
  else {
384
349
    return env->num_mem + 1 + rel_no;
385
349
  }
386
392
}
387
388
#define OPTION_ON(v,f)     ((v) |= (f))
389
#define OPTION_OFF(v,f)    ((v) &= ~(f))
390
391
19.1k
#define OPTION_NEGATE(v,f,negative)    (negative) ? ((v) &= ~(f)) : ((v) |= (f))
392
393
#define MBCODE_START_POS(enc) \
394
0
  (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)
395
396
#define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \
397
0
  add_code_range_to_buf(pbuf, MBCODE_START_POS(enc), ~((OnigCodePoint )0))
398
399
0
#define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\
400
0
  if (! ONIGENC_IS_SINGLEBYTE(enc)) {\
401
0
    r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\
402
0
    if (r != 0) return r;\
403
0
  }\
404
0
} while (0)
405
406
407
0
#define BITSET_IS_EMPTY(bs,empty) do {\
408
0
  int i;\
409
0
  empty = 1;\
410
0
  for (i = 0; i < (int )BITSET_REAL_SIZE; i++) {\
411
0
    if ((bs)[i] != 0) {\
412
0
      empty = 0; break;\
413
0
    }\
414
0
  }\
415
0
} while (0)
416
417
static void
418
bitset_set_range(BitSetRef bs, int from, int to)
419
40.4k
{
420
40.4k
  int i;
421
3.01M
  for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) {
422
2.97M
    BITSET_SET_BIT(bs, i);
423
2.97M
  }
424
40.4k
}
425
426
static void
427
bitset_invert(BitSetRef bs)
428
0
{
429
0
  int i;
430
0
  for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { bs[i] = ~(bs[i]); }
431
0
}
432
433
static void
434
bitset_invert_to(BitSetRef from, BitSetRef to)
435
0
{
436
0
  int i;
437
0
  for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { to[i] = ~(from[i]); }
438
0
}
439
440
static void
441
bitset_and(BitSetRef dest, BitSetRef bs)
442
0
{
443
0
  int i;
444
0
  for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { dest[i] &= bs[i]; }
445
0
}
446
447
static void
448
bitset_or(BitSetRef dest, BitSetRef bs)
449
0
{
450
0
  int i;
451
0
  for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { dest[i] |= bs[i]; }
452
0
}
453
454
static void
455
bitset_copy(BitSetRef dest, BitSetRef bs)
456
0
{
457
0
  int i;
458
0
  for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { dest[i] = bs[i]; }
459
0
}
460
461
extern int
462
onig_strncmp(const UChar* s1, const UChar* s2, int n)
463
0
{
464
0
  int x;
465
466
0
  while (n-- > 0) {
467
0
    x = *s2++ - *s1++;
468
0
    if (x) return x;
469
0
  }
470
0
  return 0;
471
0
}
472
473
extern void
474
onig_strcpy(UChar* dest, const UChar* src, const UChar* end)
475
80.4M
{
476
80.4M
  int len = (int )(end - src);
477
80.4M
  if (len > 0) {
478
80.4M
    xmemcpy(dest, src, len);
479
80.4M
    dest[len] = (UChar )0;
480
80.4M
  }
481
80.4M
}
482
483
/* scan pattern methods */
484
6.28k
#define PEND_VALUE   0
485
486
92.7M
#define PFETCH_READY  UChar* pfetch_prev
487
113M
#define PEND         (p < end ?  0 : 1)
488
1.82M
#define PUNFETCH     p = pfetch_prev
489
0
#define PPREV        pfetch_prev
490
275k
#define PINC       do { \
491
275k
  pfetch_prev = p; \
492
275k
  p += ONIGENC_MBC_ENC_LEN(enc, p); \
493
275k
} while (0)
494
92.9M
#define PFETCH(c)  do { \
495
92.9M
  c = ONIGENC_MBC_TO_CODE(enc, p, end); \
496
92.9M
  pfetch_prev = p; \
497
92.9M
  p += ONIGENC_MBC_ENC_LEN(enc, p); \
498
92.9M
} while (0)
499
500
389
#define PINC_S     do { \
501
389
  p += ONIGENC_MBC_ENC_LEN(enc, p); \
502
389
} while (0)
503
3.19M
#define PFETCH_S(c) do { \
504
3.19M
  c = ONIGENC_MBC_TO_CODE(enc, p, end); \
505
3.19M
  p += ONIGENC_MBC_ENC_LEN(enc, p); \
506
3.19M
} while (0)
507
508
8.74M
#define PPEEK        (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)
509
13.9M
#define PPEEK_IS(c)  (PPEEK == (OnigCodePoint )c)
510
511
static UChar*
512
strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end,
513
            int capa)
514
46.8M
{
515
46.8M
  UChar* r;
516
46.8M
  ptrdiff_t dest_delta = dest_end - dest;
517
518
46.8M
  if (dest)
519
46.8M
    r = (UChar* )xrealloc(dest, capa + 1);
520
0
  else
521
0
    r = (UChar* )xmalloc(capa + 1);
522
523
46.8M
  CHECK_NULL_RETURN(r);
524
46.8M
  onig_strcpy(r + dest_delta, src, src_end);
525
46.8M
  return r;
526
46.8M
}
527
528
/* dest on static area */
529
static UChar*
530
strcat_capa_from_static(UChar* dest, UChar* dest_end,
531
                        const UChar* src, const UChar* src_end, int capa)
532
593k
{
533
593k
  UChar* r;
534
535
593k
  r = (UChar* )xmalloc(capa + 1);
536
593k
  CHECK_NULL_RETURN(r);
537
593k
  onig_strcpy(r, dest, dest_end);
538
593k
  onig_strcpy(r + (dest_end - dest), src, src_end);
539
593k
  return r;
540
593k
}
541
542
543
#ifdef USE_ST_LIBRARY
544
545
typedef struct {
546
  UChar* s;
547
  UChar* end;
548
} st_str_end_key;
549
550
static int
551
str_end_cmp(st_str_end_key* x, st_str_end_key* y)
552
1.93k
{
553
1.93k
  UChar *p, *q;
554
1.93k
  int c;
555
556
1.93k
  if ((x->end - x->s) != (y->end - y->s))
557
0
    return 1;
558
559
1.93k
  p = x->s;
560
1.93k
  q = y->s;
561
1.26M
  while (p < x->end) {
562
1.26M
    c = (int )*p - (int )*q;
563
1.26M
    if (c != 0) return c;
564
565
1.26M
    p++; q++;
566
1.26M
  }
567
568
1.93k
  return 0;
569
1.93k
}
570
571
static int
572
str_end_hash(st_str_end_key* x)
573
3.49k
{
574
3.49k
  UChar *p;
575
3.49k
  unsigned val = 0;
576
577
3.49k
  p = x->s;
578
3.24M
  while (p < x->end) {
579
3.24M
    val = val * 997 + (unsigned )*p++;
580
3.24M
  }
581
582
3.49k
  return (int) (val + (val >> 5));
583
3.49k
}
584
585
extern hash_table_type
586
onig_st_init_strend_table_with_size(int size)
587
145
{
588
145
  static struct st_hash_type hashType = {
589
145
    str_end_cmp,
590
145
    str_end_hash,
591
145
  };
592
593
145
  return (hash_table_type )onig_st_init_table_with_size(&hashType, size);
594
145
}
595
596
extern int
597
onig_st_lookup_strend(hash_table_type table, const UChar* str_key,
598
                      const UChar* end_key, hash_data_type *value)
599
2.64k
{
600
2.64k
  st_str_end_key key;
601
602
2.64k
  key.s   = (UChar* )str_key;
603
2.64k
  key.end = (UChar* )end_key;
604
605
2.64k
  return onig_st_lookup(table, (st_data_t )(&key), value);
606
2.64k
}
607
608
extern int
609
onig_st_insert_strend(hash_table_type table, const UChar* str_key,
610
                      const UChar* end_key, hash_data_type value)
611
857
{
612
857
  st_str_end_key* key;
613
857
  int result;
614
615
857
  key = (st_str_end_key* )xmalloc(sizeof(st_str_end_key));
616
857
  CHECK_NULL_RETURN_MEMERR(key);
617
618
857
  key->s   = (UChar* )str_key;
619
857
  key->end = (UChar* )end_key;
620
857
  result = onig_st_insert(table, (st_data_t )key, value);
621
857
  if (result) {
622
0
    xfree(key);
623
0
  }
624
857
  return result;
625
857
}
626
627
628
#ifdef USE_CALLOUT
629
630
typedef struct {
631
  OnigEncoding enc;
632
  int    type; /* callout type: single or not */
633
  UChar* s;
634
  UChar* end;
635
} st_callout_name_key;
636
637
static int
638
callout_name_table_cmp(st_callout_name_key* x, st_callout_name_key* y)
639
0
{
640
0
  UChar *p, *q;
641
0
  int c;
642
643
0
  if (x->enc  != y->enc)  return 1;
644
0
  if (x->type != y->type) return 1;
645
0
  if ((x->end - x->s) != (y->end - y->s))
646
0
    return 1;
647
648
0
  p = x->s;
649
0
  q = y->s;
650
0
  while (p < x->end) {
651
0
    c = (int )*p - (int )*q;
652
0
    if (c != 0) return c;
653
654
0
    p++; q++;
655
0
  }
656
657
0
  return 0;
658
0
}
659
660
static int
661
callout_name_table_hash(st_callout_name_key* x)
662
108
{
663
108
  UChar *p;
664
108
  unsigned int val = 0;
665
666
108
  p = x->s;
667
3.64k
  while (p < x->end) {
668
3.54k
    val = val * 997 + (unsigned int )*p++;
669
3.54k
  }
670
671
  /* use intptr_t for escape warning in Windows */
672
108
  return (int )(val + (val >> 5) + ((intptr_t )x->enc & 0xffff) + x->type);
673
108
}
674
675
extern hash_table_type
676
onig_st_init_callout_name_table_with_size(int size)
677
2
{
678
2
  static struct st_hash_type hashType = {
679
2
    callout_name_table_cmp,
680
2
    callout_name_table_hash,
681
2
  };
682
683
2
  return (hash_table_type )onig_st_init_table_with_size(&hashType, size);
684
2
}
685
686
extern int
687
onig_st_lookup_callout_name_table(hash_table_type table,
688
                                  OnigEncoding enc,
689
                                  int type,
690
                                  const UChar* str_key,
691
                                  const UChar* end_key,
692
                                  hash_data_type *value)
693
94
{
694
94
  st_callout_name_key key;
695
696
94
  key.enc  = enc;
697
94
  key.type = type;
698
94
  key.s    = (UChar* )str_key;
699
94
  key.end  = (UChar* )end_key;
700
701
94
  return onig_st_lookup(table, (st_data_t )(&key), value);
702
94
}
703
704
static int
705
st_insert_callout_name_table(hash_table_type table,
706
                             OnigEncoding enc, int type,
707
                             UChar* str_key, UChar* end_key,
708
                             hash_data_type value)
709
14
{
710
14
  st_callout_name_key* key;
711
14
  int result;
712
713
14
  key = (st_callout_name_key* )xmalloc(sizeof(st_callout_name_key));
714
14
  CHECK_NULL_RETURN_MEMERR(key);
715
716
  /* key->s: don't duplicate, because str_key is duped in callout_name_entry() */
717
14
  key->enc  = enc;
718
14
  key->type = type;
719
14
  key->s    = str_key;
720
14
  key->end  = end_key;
721
14
  result = onig_st_insert(table, (st_data_t )key, value);
722
14
  if (result) {
723
0
    xfree(key);
724
0
  }
725
14
  return result;
726
14
}
727
#endif
728
729
#endif /* USE_ST_LIBRARY */
730
731
732
315
#define INIT_NAME_BACKREFS_ALLOC_NUM   8
733
734
typedef struct {
735
  UChar* name;
736
  int    name_len;   /* byte length */
737
  int    back_num;   /* number of backrefs */
738
  int    back_alloc;
739
  int    back_ref1;
740
  int*   back_refs;
741
} NameEntry;
742
743
#ifdef USE_ST_LIBRARY
744
745
147
#define INIT_NAMES_ALLOC_NUM    5
746
747
typedef st_table  NameTable;
748
typedef st_data_t HashDataType;   /* 1.6 st.h doesn't define st_data_t type */
749
750
#define NAMEBUF_SIZE    24
751
#define NAMEBUF_SIZE_1  25
752
753
#ifdef ONIG_DEBUG
754
static int
755
i_print_name_entry(UChar* key, NameEntry* e, void* arg)
756
{
757
  int i;
758
  FILE* fp = (FILE* )arg;
759
760
  fprintf(fp, "%s: ", e->name);
761
  if (e->back_num == 0)
762
    fputs("-", fp);
763
  else if (e->back_num == 1)
764
    fprintf(fp, "%d", e->back_ref1);
765
  else {
766
    for (i = 0; i < e->back_num; i++) {
767
      if (i > 0) fprintf(fp, ", ");
768
      fprintf(fp, "%d", e->back_refs[i]);
769
    }
770
  }
771
  fputs("\n", fp);
772
  return ST_CONTINUE;
773
}
774
775
extern int
776
onig_print_names(FILE* fp, regex_t* reg)
777
{
778
  NameTable* t = (NameTable* )reg->name_table;
779
780
  if (IS_NOT_NULL(t)) {
781
    fprintf(fp, "name table\n");
782
    onig_st_foreach(t, i_print_name_entry, (HashDataType )fp);
783
    fputs("\n", fp);
784
  }
785
  return 0;
786
}
787
#endif /* ONIG_DEBUG */
788
789
static int
790
i_free_name_entry(UChar* key, NameEntry* e, void* arg ARG_UNUSED)
791
857
{
792
857
  xfree(e->name);
793
857
  if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
794
857
  xfree(key);
795
857
  xfree(e);
796
857
  return ST_DELETE;
797
857
}
798
799
static int
800
names_clear(regex_t* reg)
801
2.74M
{
802
2.74M
  NameTable* t = (NameTable* )reg->name_table;
803
804
2.74M
  if (IS_NOT_NULL(t)) {
805
145
    onig_st_foreach(t, i_free_name_entry, 0);
806
145
  }
807
2.74M
  return 0;
808
2.74M
}
809
810
extern int
811
onig_names_free(regex_t* reg)
812
1.37M
{
813
1.37M
  int r;
814
1.37M
  NameTable* t;
815
816
1.37M
  r = names_clear(reg);
817
1.37M
  if (r != 0) return r;
818
819
1.37M
  t = (NameTable* )reg->name_table;
820
1.37M
  if (IS_NOT_NULL(t)) onig_st_free_table(t);
821
1.37M
  reg->name_table = (void* )NULL;
822
1.37M
  return 0;
823
1.37M
}
824
825
static NameEntry*
826
name_find(regex_t* reg, const UChar* name, const UChar* name_end)
827
2.79k
{
828
2.79k
  NameEntry* e;
829
2.79k
  NameTable* t = (NameTable* )reg->name_table;
830
831
2.79k
  e = (NameEntry* )NULL;
832
2.79k
  if (IS_NOT_NULL(t)) {
833
2.64k
    onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));
834
2.64k
  }
835
2.79k
  return e;
836
2.79k
}
837
838
typedef struct {
839
  int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*);
840
  regex_t* reg;
841
  void* arg;
842
  int ret;
843
  OnigEncoding enc;
844
} INamesArg;
845
846
static int
847
i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg)
848
8.28k
{
849
8.28k
  int r = (*(arg->func))(e->name,
850
8.28k
                         e->name + e->name_len,
851
8.28k
                         e->back_num,
852
8.28k
                         (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
853
8.28k
                         arg->reg, arg->arg);
854
8.28k
  if (r != 0) {
855
0
    arg->ret = r;
856
0
    return ST_STOP;
857
0
  }
858
8.28k
  return ST_CONTINUE;
859
8.28k
}
860
861
extern int
862
onig_foreach_name(regex_t* reg,
863
  int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
864
1.97M
{
865
1.97M
  INamesArg narg;
866
1.97M
  NameTable* t = (NameTable* )reg->name_table;
867
868
1.97M
  narg.ret = 0;
869
1.97M
  if (IS_NOT_NULL(t)) {
870
5.14k
    narg.func = func;
871
5.14k
    narg.reg  = reg;
872
5.14k
    narg.arg  = arg;
873
5.14k
    narg.enc  = reg->enc; /* should be pattern encoding. */
874
5.14k
    onig_st_foreach(t, i_names, (HashDataType )&narg);
875
5.14k
  }
876
1.97M
  return narg.ret;
877
1.97M
}
878
879
static int
880
i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumMap* map)
881
0
{
882
0
  int i;
883
884
0
  if (e->back_num > 1) {
885
0
    for (i = 0; i < e->back_num; i++) {
886
0
      e->back_refs[i] = map[e->back_refs[i]].new_val;
887
0
    }
888
0
  }
889
0
  else if (e->back_num == 1) {
890
0
    e->back_ref1 = map[e->back_ref1].new_val;
891
0
  }
892
893
0
  return ST_CONTINUE;
894
0
}
895
896
extern int
897
onig_renumber_name_table(regex_t* reg, GroupNumMap* map)
898
0
{
899
0
  NameTable* t = (NameTable* )reg->name_table;
900
901
0
  if (IS_NOT_NULL(t)) {
902
0
    onig_st_foreach(t, i_renumber_name, (HashDataType )map);
903
0
  }
904
0
  return 0;
905
0
}
906
907
908
extern int
909
onig_number_of_names(regex_t* reg)
910
0
{
911
0
  NameTable* t = (NameTable* )reg->name_table;
912
913
0
  if (IS_NOT_NULL(t))
914
0
    return t->num_entries;
915
0
  else
916
0
    return 0;
917
0
}
918
919
#else  /* USE_ST_LIBRARY */
920
921
#define INIT_NAMES_ALLOC_NUM    8
922
923
typedef struct {
924
  NameEntry* e;
925
  int        num;
926
  int        alloc;
927
} NameTable;
928
929
#ifdef ONIG_DEBUG
930
extern int
931
onig_print_names(FILE* fp, regex_t* reg)
932
{
933
  int i, j;
934
  NameEntry* e;
935
  NameTable* t = (NameTable* )reg->name_table;
936
937
  if (IS_NOT_NULL(t) && t->num > 0) {
938
    fprintf(fp, "name table\n");
939
    for (i = 0; i < t->num; i++) {
940
      e = &(t->e[i]);
941
      fprintf(fp, "%s: ", e->name);
942
      if (e->back_num == 0) {
943
        fputs("-", fp);
944
      }
945
      else if (e->back_num == 1) {
946
        fprintf(fp, "%d", e->back_ref1);
947
      }
948
      else {
949
        for (j = 0; j < e->back_num; j++) {
950
          if (j > 0) fprintf(fp, ", ");
951
          fprintf(fp, "%d", e->back_refs[j]);
952
        }
953
      }
954
      fputs("\n", fp);
955
    }
956
    fputs("\n", fp);
957
  }
958
  return 0;
959
}
960
#endif
961
962
static int
963
names_clear(regex_t* reg)
964
{
965
  int i;
966
  NameEntry* e;
967
  NameTable* t = (NameTable* )reg->name_table;
968
969
  if (IS_NOT_NULL(t)) {
970
    for (i = 0; i < t->num; i++) {
971
      e = &(t->e[i]);
972
      if (IS_NOT_NULL(e->name)) {
973
        xfree(e->name);
974
        e->name       = NULL;
975
        e->name_len   = 0;
976
        e->back_num   = 0;
977
        e->back_alloc = 0;
978
        if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
979
        e->back_refs = (int* )NULL;
980
      }
981
    }
982
    if (IS_NOT_NULL(t->e)) {
983
      xfree(t->e);
984
      t->e = NULL;
985
    }
986
    t->num = 0;
987
  }
988
  return 0;
989
}
990
991
extern int
992
onig_names_free(regex_t* reg)
993
{
994
  int r;
995
  NameTable* t;
996
997
  r = names_clear(reg);
998
  if (r != 0) return r;
999
1000
  t = (NameTable* )reg->name_table;
1001
  if (IS_NOT_NULL(t)) xfree(t);
1002
  reg->name_table = NULL;
1003
  return 0;
1004
}
1005
1006
static NameEntry*
1007
name_find(regex_t* reg, UChar* name, UChar* name_end)
1008
{
1009
  int i, len;
1010
  NameEntry* e;
1011
  NameTable* t = (NameTable* )reg->name_table;
1012
1013
  if (IS_NOT_NULL(t)) {
1014
    len = name_end - name;
1015
    for (i = 0; i < t->num; i++) {
1016
      e = &(t->e[i]);
1017
      if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)
1018
        return e;
1019
    }
1020
  }
1021
  return (NameEntry* )NULL;
1022
}
1023
1024
extern int
1025
onig_foreach_name(regex_t* reg,
1026
  int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
1027
{
1028
  int i, r;
1029
  NameEntry* e;
1030
  NameTable* t = (NameTable* )reg->name_table;
1031
1032
  if (IS_NOT_NULL(t)) {
1033
    for (i = 0; i < t->num; i++) {
1034
      e = &(t->e[i]);
1035
      r = (*func)(e->name, e->name + e->name_len, e->back_num,
1036
                  (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
1037
                  reg, arg);
1038
      if (r != 0) return r;
1039
    }
1040
  }
1041
  return 0;
1042
}
1043
1044
extern int
1045
onig_number_of_names(regex_t* reg)
1046
{
1047
  NameTable* t = (NameTable* )reg->name_table;
1048
1049
  if (IS_NOT_NULL(t))
1050
    return t->num;
1051
  else
1052
    return 0;
1053
}
1054
1055
#endif /* else USE_ST_LIBRARY */
1056
1057
static int
1058
name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ParseEnv* env)
1059
2.78k
{
1060
2.78k
  int r;
1061
2.78k
  int alloc;
1062
2.78k
  NameEntry* e;
1063
2.78k
  NameTable* t = (NameTable* )reg->name_table;
1064
1065
2.78k
  if (name_end - name <= 0)
1066
0
    return ONIGERR_EMPTY_GROUP_NAME;
1067
1068
2.78k
  e = name_find(reg, name, name_end);
1069
2.78k
  if (IS_NULL(e)) {
1070
857
#ifdef USE_ST_LIBRARY
1071
857
    if (IS_NULL(t)) {
1072
145
      t = onig_st_init_strend_table_with_size(INIT_NAMES_ALLOC_NUM);
1073
145
      CHECK_NULL_RETURN_MEMERR(t);
1074
145
      reg->name_table = (void* )t;
1075
145
    }
1076
857
    e = (NameEntry* )xmalloc(sizeof(NameEntry));
1077
857
    CHECK_NULL_RETURN_MEMERR(e);
1078
1079
857
    e->name = onigenc_strdup(reg->enc, name, name_end);
1080
857
    if (IS_NULL(e->name)) {
1081
0
      xfree(e);  return ONIGERR_MEMORY;
1082
0
    }
1083
857
    r = onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),
1084
857
                              (HashDataType )e);
1085
857
    if (r < 0) return r;
1086
1087
857
    e->name_len   = (int )(name_end - name);
1088
857
    e->back_num   = 0;
1089
857
    e->back_alloc = 0;
1090
857
    e->back_refs  = (int* )NULL;
1091
1092
#else
1093
1094
    if (IS_NULL(t)) {
1095
      alloc = INIT_NAMES_ALLOC_NUM;
1096
      t = (NameTable* )xmalloc(sizeof(NameTable));
1097
      CHECK_NULL_RETURN_MEMERR(t);
1098
      t->e     = NULL;
1099
      t->alloc = 0;
1100
      t->num   = 0;
1101
1102
      t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc);
1103
      if (IS_NULL(t->e)) {
1104
        xfree(t);
1105
        return ONIGERR_MEMORY;
1106
      }
1107
      t->alloc = alloc;
1108
      reg->name_table = t;
1109
      goto clear;
1110
    }
1111
    else if (t->num == t->alloc) {
1112
      int i;
1113
1114
      alloc = t->alloc * 2;
1115
      t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc);
1116
      CHECK_NULL_RETURN_MEMERR(t->e);
1117
      t->alloc = alloc;
1118
1119
    clear:
1120
      for (i = t->num; i < t->alloc; i++) {
1121
        t->e[i].name       = NULL;
1122
        t->e[i].name_len   = 0;
1123
        t->e[i].back_num   = 0;
1124
        t->e[i].back_alloc = 0;
1125
        t->e[i].back_refs  = (int* )NULL;
1126
      }
1127
    }
1128
    e = &(t->e[t->num]);
1129
    t->num++;
1130
    e->name = onigenc_strdup(reg->enc, name, name_end);
1131
    if (IS_NULL(e->name)) return ONIGERR_MEMORY;
1132
    e->name_len = name_end - name;
1133
#endif
1134
857
  }
1135
1136
2.78k
  if (e->back_num >= 1 &&
1137
2.78k
      ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME)) {
1138
0
    onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME,
1139
0
                                   name, name_end);
1140
0
    return ONIGERR_MULTIPLEX_DEFINED_NAME;
1141
0
  }
1142
1143
2.78k
  e->back_num++;
1144
2.78k
  if (e->back_num == 1) {
1145
857
    e->back_ref1 = backref;
1146
857
  }
1147
1.93k
  else {
1148
1.93k
    if (e->back_num == 2) {
1149
315
      alloc = INIT_NAME_BACKREFS_ALLOC_NUM;
1150
315
      e->back_refs = (int* )xmalloc(sizeof(int) * alloc);
1151
315
      CHECK_NULL_RETURN_MEMERR(e->back_refs);
1152
315
      e->back_alloc = alloc;
1153
315
      e->back_refs[0] = e->back_ref1;
1154
315
      e->back_refs[1] = backref;
1155
315
    }
1156
1.61k
    else {
1157
1.61k
      if (e->back_num > e->back_alloc) {
1158
92
        alloc = e->back_alloc * 2;
1159
92
        e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc);
1160
92
        CHECK_NULL_RETURN_MEMERR(e->back_refs);
1161
92
        e->back_alloc = alloc;
1162
92
      }
1163
1.61k
      e->back_refs[e->back_num - 1] = backref;
1164
1.61k
    }
1165
1.93k
  }
1166
1167
2.78k
  return 0;
1168
2.78k
}
1169
1170
extern int
1171
onig_name_to_group_numbers(regex_t* reg, const UChar* name,
1172
                           const UChar* name_end, int** nums)
1173
5
{
1174
5
  NameEntry* e = name_find(reg, name, name_end);
1175
1176
5
  if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE;
1177
1178
0
  switch (e->back_num) {
1179
0
  case 0:
1180
0
    break;
1181
0
  case 1:
1182
0
    *nums = &(e->back_ref1);
1183
0
    break;
1184
0
  default:
1185
0
    *nums = e->back_refs;
1186
0
    break;
1187
0
  }
1188
0
  return e->back_num;
1189
0
}
1190
1191
static int
1192
name_to_group_numbers(ParseEnv* env, const UChar* name, const UChar* name_end,
1193
                      int** nums)
1194
0
{
1195
0
  regex_t* reg;
1196
0
  NameEntry* e;
1197
1198
0
  reg = env->reg;
1199
0
  e = name_find(reg, name, name_end);
1200
1201
0
  if (IS_NULL(e)) {
1202
0
    onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,
1203
0
                                   (UChar* )name, (UChar* )name_end);
1204
0
    return ONIGERR_UNDEFINED_NAME_REFERENCE;
1205
0
  }
1206
1207
0
  switch (e->back_num) {
1208
0
  case 0:
1209
0
    break;
1210
0
  case 1:
1211
0
    *nums = &(e->back_ref1);
1212
0
    break;
1213
0
  default:
1214
0
    *nums = e->back_refs;
1215
0
    break;
1216
0
  }
1217
0
  return e->back_num;
1218
0
}
1219
1220
extern int
1221
onig_name_to_backref_number(regex_t* reg, const UChar* name,
1222
                            const UChar* name_end, OnigRegion *region)
1223
0
{
1224
0
  int i, n, *nums;
1225
1226
0
  n = onig_name_to_group_numbers(reg, name, name_end, &nums);
1227
0
  if (n < 0)
1228
0
    return n;
1229
0
  else if (n == 0)
1230
0
    return ONIGERR_PARSER_BUG;
1231
0
  else if (n == 1)
1232
0
    return nums[0];
1233
0
  else {
1234
0
    if (IS_NOT_NULL(region)) {
1235
0
      for (i = n - 1; i >= 0; i--) {
1236
0
        if (region->beg[nums[i]] != ONIG_REGION_NOTPOS)
1237
0
          return nums[i];
1238
0
      }
1239
0
    }
1240
0
    return nums[n - 1];
1241
0
  }
1242
0
}
1243
1244
extern int
1245
onig_noname_group_capture_is_active(regex_t* reg)
1246
0
{
1247
0
  if (OPTON_DONT_CAPTURE_GROUP(reg->options))
1248
0
    return 0;
1249
1250
0
  if (onig_number_of_names(reg) > 0 &&
1251
0
      IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
1252
0
      ! OPTON_CAPTURE_GROUP(reg->options)) {
1253
0
    return 0;
1254
0
  }
1255
1256
0
  return 1;
1257
0
}
1258
1259
#ifdef USE_CALLOUT
1260
1261
typedef struct {
1262
  OnigCalloutType type;
1263
  int             in;
1264
  OnigCalloutFunc start_func;
1265
  OnigCalloutFunc end_func;
1266
  int             arg_num;
1267
  int             opt_arg_num;
1268
  unsigned int    arg_types[ONIG_CALLOUT_MAX_ARGS_NUM];
1269
  OnigValue       opt_defaults[ONIG_CALLOUT_MAX_ARGS_NUM];
1270
  UChar*          name; /* reference to GlobalCalloutNameTable entry: e->name */
1271
} CalloutNameListEntry;
1272
1273
typedef struct {
1274
  int  n;
1275
  int  alloc;
1276
  CalloutNameListEntry* v;
1277
} CalloutNameListType;
1278
1279
static CalloutNameListType* GlobalCalloutNameList;
1280
1281
static int
1282
make_callout_func_list(CalloutNameListType** rs, int init_size)
1283
2
{
1284
2
  CalloutNameListType* s;
1285
2
  CalloutNameListEntry* v;
1286
1287
2
  *rs = 0;
1288
1289
2
  s = xmalloc(sizeof(*s));
1290
2
  if (IS_NULL(s)) return ONIGERR_MEMORY;
1291
1292
2
  v = (CalloutNameListEntry* )xmalloc(sizeof(CalloutNameListEntry) * init_size);
1293
2
  if (IS_NULL(v)) {
1294
0
    xfree(s);
1295
0
    return ONIGERR_MEMORY;
1296
0
  }
1297
1298
2
  s->n = 0;
1299
2
  s->alloc = init_size;
1300
2
  s->v = v;
1301
1302
2
  *rs = s;
1303
2
  return ONIG_NORMAL;
1304
2
}
1305
1306
static void
1307
free_callout_func_list(CalloutNameListType* s)
1308
0
{
1309
0
  if (IS_NOT_NULL(s)) {
1310
0
    if (IS_NOT_NULL(s->v)) {
1311
0
      int i, j;
1312
1313
0
      for (i = 0; i < s->n; i++) {
1314
0
        CalloutNameListEntry* e = s->v + i;
1315
0
        for (j = e->arg_num - e->opt_arg_num; j < e->arg_num; j++) {
1316
0
          if (e->arg_types[j] == ONIG_TYPE_STRING) {
1317
0
            UChar* p = e->opt_defaults[j].s.start;
1318
0
            if (IS_NOT_NULL(p)) xfree(p);
1319
0
          }
1320
0
        }
1321
0
      }
1322
0
      xfree(s->v);
1323
0
    }
1324
0
    xfree(s);
1325
0
  }
1326
0
}
1327
1328
static int
1329
callout_func_list_add(CalloutNameListType* s, int* rid)
1330
16
{
1331
16
  if (s->n >= s->alloc) {
1332
0
    int new_size = s->alloc * 2;
1333
0
    CalloutNameListEntry* nv = (CalloutNameListEntry* )
1334
0
      xrealloc(s->v, sizeof(CalloutNameListEntry) * new_size);
1335
0
    if (IS_NULL(nv)) return ONIGERR_MEMORY;
1336
1337
0
    s->alloc = new_size;
1338
0
    s->v = nv;
1339
0
  }
1340
1341
16
  *rid = s->n;
1342
1343
16
  xmemset(&(s->v[s->n]), 0, sizeof(*(s->v)));
1344
16
  s->n++;
1345
16
  return ONIG_NORMAL;
1346
16
}
1347
1348
1349
typedef struct {
1350
  UChar* name;
1351
  int    name_len;   /* byte length */
1352
  int    id;
1353
} CalloutNameEntry;
1354
1355
#ifdef USE_ST_LIBRARY
1356
typedef st_table  CalloutNameTable;
1357
#else
1358
typedef struct {
1359
  CalloutNameEntry* e;
1360
  int               num;
1361
  int               alloc;
1362
} CalloutNameTable;
1363
#endif
1364
1365
static CalloutNameTable* GlobalCalloutNameTable;
1366
static int CalloutNameIDCounter;
1367
1368
#ifdef USE_ST_LIBRARY
1369
1370
static int
1371
i_free_callout_name_entry(st_callout_name_key* key, CalloutNameEntry* e,
1372
                          void* arg ARG_UNUSED)
1373
0
{
1374
0
  if (IS_NOT_NULL(e)) {
1375
0
    xfree(e->name);
1376
0
  }
1377
  /*xfree(key->s); */ /* is same as e->name */
1378
0
  xfree(key);
1379
0
  xfree(e);
1380
0
  return ST_DELETE;
1381
0
}
1382
1383
static int
1384
callout_name_table_clear(CalloutNameTable* t)
1385
0
{
1386
0
  if (IS_NOT_NULL(t)) {
1387
0
    onig_st_foreach(t, i_free_callout_name_entry, 0);
1388
0
  }
1389
0
  return 0;
1390
0
}
1391
1392
static int
1393
global_callout_name_table_free(void)
1394
0
{
1395
0
  if (IS_NOT_NULL(GlobalCalloutNameTable)) {
1396
0
    int r = callout_name_table_clear(GlobalCalloutNameTable);
1397
0
    if (r != 0) return r;
1398
1399
0
    onig_st_free_table(GlobalCalloutNameTable);
1400
0
    GlobalCalloutNameTable = 0;
1401
0
    CalloutNameIDCounter = 0;
1402
0
  }
1403
1404
0
  return 0;
1405
0
}
1406
1407
static CalloutNameEntry*
1408
callout_name_find(OnigEncoding enc, int is_not_single,
1409
                  const UChar* name, const UChar* name_end)
1410
55
{
1411
55
  int r;
1412
55
  CalloutNameEntry* e;
1413
55
  CalloutNameTable* t = GlobalCalloutNameTable;
1414
1415
55
  e = (CalloutNameEntry* )NULL;
1416
55
  if (IS_NOT_NULL(t)) {
1417
53
    r = onig_st_lookup_callout_name_table(t, enc, is_not_single, name, name_end,
1418
53
                                          (HashDataType* )((void* )(&e)));
1419
53
    if (r == 0) { /* not found */
1420
53
      if (enc != ONIG_ENCODING_ASCII &&
1421
53
          ONIGENC_IS_ASCII_COMPATIBLE_ENCODING(enc)) {
1422
41
        enc = ONIG_ENCODING_ASCII;
1423
41
        onig_st_lookup_callout_name_table(t, enc, is_not_single, name, name_end,
1424
41
                                          (HashDataType* )((void* )(&e)));
1425
41
      }
1426
53
    }
1427
53
  }
1428
55
  return e;
1429
55
}
1430
1431
#else
1432
1433
static int
1434
callout_name_table_clear(CalloutNameTable* t)
1435
{
1436
  int i;
1437
  CalloutNameEntry* e;
1438
1439
  if (IS_NOT_NULL(t)) {
1440
    for (i = 0; i < t->num; i++) {
1441
      e = &(t->e[i]);
1442
      if (IS_NOT_NULL(e->name)) {
1443
        xfree(e->name);
1444
        e->name     = NULL;
1445
        e->name_len = 0;
1446
        e->id       = 0;
1447
        e->func     = 0;
1448
      }
1449
    }
1450
    if (IS_NOT_NULL(t->e)) {
1451
      xfree(t->e);
1452
      t->e = NULL;
1453
    }
1454
    t->num = 0;
1455
  }
1456
  return 0;
1457
}
1458
1459
static int
1460
global_callout_name_table_free(void)
1461
{
1462
  if (IS_NOT_NULL(GlobalCalloutNameTable)) {
1463
    int r = callout_name_table_clear(GlobalCalloutNameTable);
1464
    if (r != 0) return r;
1465
1466
    xfree(GlobalCalloutNameTable);
1467
    GlobalCalloutNameTable = 0;
1468
    CalloutNameIDCounter = 0;
1469
  }
1470
  return 0;
1471
}
1472
1473
static CalloutNameEntry*
1474
callout_name_find(UChar* name, UChar* name_end)
1475
{
1476
  int i, len;
1477
  CalloutNameEntry* e;
1478
  CalloutNameTable* t = Calloutnames;
1479
1480
  if (IS_NOT_NULL(t)) {
1481
    len = name_end - name;
1482
    for (i = 0; i < t->num; i++) {
1483
      e = &(t->e[i]);
1484
      if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)
1485
        return e;
1486
    }
1487
  }
1488
  return (CalloutNameEntry* )NULL;
1489
}
1490
1491
#endif
1492
1493
/* name string must be single byte char string. */
1494
static int
1495
callout_name_entry(CalloutNameEntry** rentry, OnigEncoding enc,
1496
                   int is_not_single, UChar* name, UChar* name_end)
1497
14
{
1498
14
  int r;
1499
14
  CalloutNameEntry* e;
1500
14
  CalloutNameTable* t = GlobalCalloutNameTable;
1501
1502
14
  *rentry = 0;
1503
14
  if (name_end - name <= 0)
1504
0
    return ONIGERR_INVALID_CALLOUT_NAME;
1505
1506
14
  e = callout_name_find(enc, is_not_single, name, name_end);
1507
14
  if (IS_NULL(e)) {
1508
14
#ifdef USE_ST_LIBRARY
1509
14
    if (IS_NULL(t)) {
1510
2
      t = onig_st_init_callout_name_table_with_size(INIT_NAMES_ALLOC_NUM);
1511
2
      CHECK_NULL_RETURN_MEMERR(t);
1512
2
      GlobalCalloutNameTable = t;
1513
2
    }
1514
14
    e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry));
1515
14
    CHECK_NULL_RETURN_MEMERR(e);
1516
1517
14
    e->name = onigenc_strdup(enc, name, name_end);
1518
14
    if (IS_NULL(e->name)) {
1519
0
      xfree(e);  return ONIGERR_MEMORY;
1520
0
    }
1521
1522
14
    r = st_insert_callout_name_table(t, enc, is_not_single,
1523
14
                                     e->name, (e->name + (name_end - name)),
1524
14
                                     (HashDataType )e);
1525
14
    if (r < 0) return r;
1526
1527
#else
1528
1529
    int alloc;
1530
1531
    if (IS_NULL(t)) {
1532
      alloc = INIT_NAMES_ALLOC_NUM;
1533
      t = (CalloutNameTable* )xmalloc(sizeof(CalloutNameTable));
1534
      CHECK_NULL_RETURN_MEMERR(t);
1535
      t->e     = NULL;
1536
      t->alloc = 0;
1537
      t->num   = 0;
1538
1539
      t->e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry) * alloc);
1540
      if (IS_NULL(t->e)) {
1541
        xfree(t);
1542
        return ONIGERR_MEMORY;
1543
      }
1544
      t->alloc = alloc;
1545
      GlobalCalloutNameTable = t;
1546
      goto clear;
1547
    }
1548
    else if (t->num == t->alloc) {
1549
      int i;
1550
1551
      alloc = t->alloc * 2;
1552
      t->e = (CalloutNameEntry* )xrealloc(t->e, sizeof(CalloutNameEntry) * alloc);
1553
      CHECK_NULL_RETURN_MEMERR(t->e);
1554
      t->alloc = alloc;
1555
1556
    clear:
1557
      for (i = t->num; i < t->alloc; i++) {
1558
        t->e[i].name       = NULL;
1559
        t->e[i].name_len   = 0;
1560
        t->e[i].id         = 0;
1561
      }
1562
    }
1563
    e = &(t->e[t->num]);
1564
    t->num++;
1565
    e->name = onigenc_strdup(enc, name, name_end);
1566
    if (IS_NULL(e->name)) return ONIGERR_MEMORY;
1567
#endif
1568
1569
14
    CalloutNameIDCounter++;
1570
14
    e->id = CalloutNameIDCounter;
1571
14
    e->name_len = (int )(name_end - name);
1572
14
  }
1573
1574
14
  *rentry = e;
1575
14
  return e->id;
1576
14
}
1577
1578
static int
1579
is_allowed_callout_name(OnigEncoding enc, UChar* name, UChar* name_end)
1580
646
{
1581
646
  UChar* p;
1582
646
  OnigCodePoint c;
1583
1584
646
  if (name >= name_end) return 0;
1585
1586
591
  p = name;
1587
7.15k
  while (p < name_end) {
1588
6.60k
    c = ONIGENC_MBC_TO_CODE(enc, p, name_end);
1589
6.60k
    if (! IS_ALLOWED_CODE_IN_CALLOUT_NAME(c))
1590
47
      return 0;
1591
1592
6.56k
    if (p == name) {
1593
578
      if (c >= '0' && c <= '9') return 0;
1594
578
    }
1595
1596
6.55k
    p += ONIGENC_MBC_ENC_LEN(enc, p);
1597
6.55k
  }
1598
1599
542
  return 1;
1600
591
}
1601
1602
static int
1603
is_allowed_callout_tag_name(OnigEncoding enc, UChar* name, UChar* name_end)
1604
27
{
1605
27
  UChar* p;
1606
27
  OnigCodePoint c;
1607
1608
27
  if (name >= name_end) return 0;
1609
1610
27
  p = name;
1611
1.21k
  while (p < name_end) {
1612
1.21k
    c = ONIGENC_MBC_TO_CODE(enc, p, name_end);
1613
1.21k
    if (! IS_ALLOWED_CODE_IN_CALLOUT_TAG_NAME(c))
1614
24
      return 0;
1615
1616
1.18k
    if (p == name) {
1617
26
      if (c >= '0' && c <= '9') return 0;
1618
26
    }
1619
1620
1.18k
    p += ONIGENC_MBC_ENC_LEN(enc, p);
1621
1.18k
  }
1622
1623
2
  return 1;
1624
27
}
1625
1626
extern int
1627
onig_set_callout_of_name(OnigEncoding enc, OnigCalloutType callout_type,
1628
                         UChar* name, UChar* name_end, int in,
1629
                         OnigCalloutFunc start_func,
1630
                         OnigCalloutFunc end_func,
1631
                         int arg_num, unsigned int arg_types[],
1632
                         int opt_arg_num, OnigValue opt_defaults[])
1633
14
{
1634
14
  int r;
1635
14
  int i;
1636
14
  int j;
1637
14
  int id;
1638
14
  int is_not_single;
1639
14
  CalloutNameEntry* e;
1640
14
  CalloutNameListEntry* fe;
1641
1642
14
  if (callout_type != ONIG_CALLOUT_TYPE_SINGLE)
1643
0
    return ONIGERR_INVALID_ARGUMENT;
1644
1645
14
  if (arg_num < 0 || arg_num > ONIG_CALLOUT_MAX_ARGS_NUM)
1646
0
    return ONIGERR_INVALID_CALLOUT_ARG;
1647
1648
14
  if (opt_arg_num < 0 || opt_arg_num > arg_num)
1649
0
    return ONIGERR_INVALID_CALLOUT_ARG;
1650
1651
14
  if (start_func == 0 && end_func == 0)
1652
0
    return ONIGERR_INVALID_CALLOUT_ARG;
1653
1654
14
  if ((in & ONIG_CALLOUT_IN_PROGRESS) == 0 && (in & ONIG_CALLOUT_IN_RETRACTION) == 0)
1655
0
    return ONIGERR_INVALID_CALLOUT_ARG;
1656
1657
30
  for (i = 0; i < arg_num; i++) {
1658
16
    unsigned int t = arg_types[i];
1659
16
    if (t == ONIG_TYPE_VOID)
1660
0
      return ONIGERR_INVALID_CALLOUT_ARG;
1661
16
    else {
1662
16
      if (i >= arg_num - opt_arg_num) {
1663
8
        if (t != ONIG_TYPE_LONG && t != ONIG_TYPE_CHAR && t != ONIG_TYPE_STRING &&
1664
8
            t != ONIG_TYPE_TAG)
1665
0
          return ONIGERR_INVALID_CALLOUT_ARG;
1666
8
      }
1667
8
      else {
1668
8
        if (t != ONIG_TYPE_LONG) {
1669
8
          t = t & ~ONIG_TYPE_LONG;
1670
8
          if (t != ONIG_TYPE_CHAR && t != ONIG_TYPE_STRING && t != ONIG_TYPE_TAG)
1671
0
            return ONIGERR_INVALID_CALLOUT_ARG;
1672
8
        }
1673
8
      }
1674
16
    }
1675
16
  }
1676
1677
14
  if (! is_allowed_callout_name(enc, name, name_end)) {
1678
0
    return ONIGERR_INVALID_CALLOUT_NAME;
1679
0
  }
1680
1681
14
  is_not_single = (callout_type != ONIG_CALLOUT_TYPE_SINGLE);
1682
14
  id = callout_name_entry(&e, enc, is_not_single, name, name_end);
1683
14
  if (id < 0) return id;
1684
1685
14
  r = ONIG_NORMAL;
1686
14
  if (IS_NULL(GlobalCalloutNameList)) {
1687
2
    r = make_callout_func_list(&GlobalCalloutNameList, 10);
1688
2
    if (r != ONIG_NORMAL) return r;
1689
2
  }
1690
1691
30
  while (id >= GlobalCalloutNameList->n) {
1692
16
    int rid;
1693
16
    r = callout_func_list_add(GlobalCalloutNameList, &rid);
1694
16
    if (r != ONIG_NORMAL) return r;
1695
16
  }
1696
1697
14
  fe = GlobalCalloutNameList->v + id;
1698
14
  fe->type         = callout_type;
1699
14
  fe->in           = in;
1700
14
  fe->start_func   = start_func;
1701
14
  fe->end_func     = end_func;
1702
14
  fe->arg_num      = arg_num;
1703
14
  fe->opt_arg_num  = opt_arg_num;
1704
14
  fe->name         = e->name;
1705
1706
30
  for (i = 0; i < arg_num; i++) {
1707
16
    fe->arg_types[i] = arg_types[i];
1708
16
  }
1709
22
  for (i = arg_num - opt_arg_num, j = 0; i < arg_num; i++, j++) {
1710
8
    if (IS_NULL(opt_defaults)) return ONIGERR_INVALID_ARGUMENT;
1711
8
    if (fe->arg_types[i] == ONIG_TYPE_STRING) {
1712
0
      OnigValue* val;
1713
0
      UChar* ds;
1714
1715
0
      val = opt_defaults + j;
1716
0
      ds = onigenc_strdup(enc, val->s.start, val->s.end);
1717
0
      CHECK_NULL_RETURN_MEMERR(ds);
1718
1719
0
      fe->opt_defaults[i].s.start = ds;
1720
0
      fe->opt_defaults[i].s.end   = ds + (val->s.end - val->s.start);
1721
0
    }
1722
8
    else {
1723
8
      fe->opt_defaults[i] = opt_defaults[j];
1724
8
    }
1725
8
  }
1726
1727
14
  r = id;
1728
14
  return r;
1729
14
}
1730
1731
static int
1732
get_callout_name_id_by_name(OnigEncoding enc, int is_not_single,
1733
                            UChar* name, UChar* name_end, int* rid)
1734
41
{
1735
41
  int r;
1736
41
  CalloutNameEntry* e;
1737
1738
41
  if (! is_allowed_callout_name(enc, name, name_end)) {
1739
0
    return ONIGERR_INVALID_CALLOUT_NAME;
1740
0
  }
1741
1742
41
  e = callout_name_find(enc, is_not_single, name, name_end);
1743
41
  if (IS_NULL(e)) {
1744
41
    return ONIGERR_UNDEFINED_CALLOUT_NAME;
1745
41
  }
1746
1747
0
  r = ONIG_NORMAL;
1748
0
  *rid = e->id;
1749
1750
0
  return r;
1751
41
}
1752
1753
extern OnigCalloutFunc
1754
onig_get_callout_start_func(regex_t* reg, int callout_num)
1755
0
{
1756
  /* If used for callouts of contents, return 0. */
1757
0
  CalloutListEntry* e;
1758
1759
0
  e = onig_reg_callout_list_at(reg, callout_num);
1760
0
  CHECK_NULL_RETURN(e);
1761
0
  return e->start_func;
1762
0
}
1763
1764
extern const UChar*
1765
onig_get_callout_tag_start(regex_t* reg, int callout_num)
1766
0
{
1767
0
  CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num);
1768
0
  CHECK_NULL_RETURN(e);
1769
0
  return e->tag_start;
1770
0
}
1771
1772
extern const UChar*
1773
onig_get_callout_tag_end(regex_t* reg, int callout_num)
1774
0
{
1775
0
  CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num);
1776
0
  CHECK_NULL_RETURN(e);
1777
0
  return e->tag_end;
1778
0
}
1779
1780
1781
extern OnigCalloutType
1782
onig_get_callout_type_by_name_id(int name_id)
1783
0
{
1784
0
  if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
1785
0
    return 0;
1786
1787
0
  return GlobalCalloutNameList->v[name_id].type;
1788
0
}
1789
1790
extern OnigCalloutFunc
1791
onig_get_callout_start_func_by_name_id(int name_id)
1792
0
{
1793
0
  if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
1794
0
    return 0;
1795
1796
0
  return GlobalCalloutNameList->v[name_id].start_func;
1797
0
}
1798
1799
extern OnigCalloutFunc
1800
onig_get_callout_end_func_by_name_id(int name_id)
1801
0
{
1802
0
  if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
1803
0
    return 0;
1804
1805
0
  return GlobalCalloutNameList->v[name_id].end_func;
1806
0
}
1807
1808
extern int
1809
onig_get_callout_in_by_name_id(int name_id)
1810
0
{
1811
0
  if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
1812
0
    return 0;
1813
1814
0
  return GlobalCalloutNameList->v[name_id].in;
1815
0
}
1816
1817
static int
1818
get_callout_arg_num_by_name_id(int name_id)
1819
0
{
1820
0
  return GlobalCalloutNameList->v[name_id].arg_num;
1821
0
}
1822
1823
static int
1824
get_callout_opt_arg_num_by_name_id(int name_id)
1825
0
{
1826
0
  return GlobalCalloutNameList->v[name_id].opt_arg_num;
1827
0
}
1828
1829
static unsigned int
1830
get_callout_arg_type_by_name_id(int name_id, int index)
1831
0
{
1832
0
  return GlobalCalloutNameList->v[name_id].arg_types[index];
1833
0
}
1834
1835
static OnigValue
1836
get_callout_opt_default_by_name_id(int name_id, int index)
1837
0
{
1838
0
  return GlobalCalloutNameList->v[name_id].opt_defaults[index];
1839
0
}
1840
1841
extern UChar*
1842
onig_get_callout_name_by_name_id(int name_id)
1843
0
{
1844
0
  if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
1845
0
    return 0;
1846
1847
0
  return GlobalCalloutNameList->v[name_id].name;
1848
0
}
1849
1850
extern int
1851
onig_global_callout_names_free(void)
1852
0
{
1853
0
  free_callout_func_list(GlobalCalloutNameList);
1854
0
  GlobalCalloutNameList = 0;
1855
1856
0
  global_callout_name_table_free();
1857
0
  return ONIG_NORMAL;
1858
0
}
1859
1860
1861
typedef st_table   CalloutTagTable;
1862
typedef intptr_t   CalloutTagVal;
1863
1864
0
#define CALLOUT_TAG_LIST_FLAG_TAG_EXIST     (1<<0)
1865
1866
static int
1867
i_callout_callout_list_set(UChar* key, CalloutTagVal e, void* arg)
1868
0
{
1869
0
  int num;
1870
0
  RegexExt* ext = (RegexExt* )arg;
1871
1872
0
  num = (int )e - 1;
1873
0
  ext->callout_list[num].flag |= CALLOUT_TAG_LIST_FLAG_TAG_EXIST;
1874
0
  return ST_CONTINUE;
1875
0
}
1876
1877
static int
1878
setup_ext_callout_list_values(regex_t* reg)
1879
0
{
1880
0
  int i, j;
1881
0
  RegexExt* ext;
1882
1883
0
  ext = reg->extp;
1884
0
  if (IS_NOT_NULL(ext->tag_table)) {
1885
0
    onig_st_foreach((CalloutTagTable *)ext->tag_table, i_callout_callout_list_set,
1886
0
                    (st_data_t )ext);
1887
0
  }
1888
1889
0
  for (i = 0; i < ext->callout_num; i++) {
1890
0
    CalloutListEntry* e = ext->callout_list + i;
1891
0
    if (e->of == ONIG_CALLOUT_OF_NAME) {
1892
0
      for (j = 0; j < e->u.arg.num; j++) {
1893
0
        if (e->u.arg.types[j] == ONIG_TYPE_TAG) {
1894
0
          UChar* start;
1895
0
          UChar* end;
1896
0
          int num;
1897
0
          start = e->u.arg.vals[j].s.start;
1898
0
          end   = e->u.arg.vals[j].s.end;
1899
0
          num = onig_get_callout_num_by_tag(reg, start, end);
1900
0
          if (num < 0) return num;
1901
0
          e->u.arg.vals[j].tag = num;
1902
0
        }
1903
0
      }
1904
0
    }
1905
0
  }
1906
1907
0
  return ONIG_NORMAL;
1908
0
}
1909
1910
extern int
1911
onig_callout_tag_is_exist_at_callout_num(regex_t* reg, int callout_num)
1912
0
{
1913
0
  RegexExt* ext = reg->extp;
1914
1915
0
  if (IS_NULL(ext) || IS_NULL(ext->callout_list)) return 0;
1916
0
  if (callout_num > ext->callout_num) return 0;
1917
1918
0
  return (ext->callout_list[callout_num].flag &
1919
0
          CALLOUT_TAG_LIST_FLAG_TAG_EXIST) != 0;
1920
0
}
1921
1922
static int
1923
i_free_callout_tag_entry(UChar* key, CalloutTagVal e, void* arg ARG_UNUSED)
1924
0
{
1925
0
  xfree(key);
1926
0
  return ST_DELETE;
1927
0
}
1928
1929
static int
1930
callout_tag_table_clear(CalloutTagTable* t)
1931
0
{
1932
0
  if (IS_NOT_NULL(t)) {
1933
0
    onig_st_foreach(t, i_free_callout_tag_entry, 0);
1934
0
  }
1935
0
  return 0;
1936
0
}
1937
1938
extern int
1939
onig_callout_tag_table_free(void* table)
1940
0
{
1941
0
  CalloutTagTable* t = (CalloutTagTable* )table;
1942
1943
0
  if (IS_NOT_NULL(t)) {
1944
0
    int r = callout_tag_table_clear(t);
1945
0
    if (r != 0) return r;
1946
1947
0
    onig_st_free_table(t);
1948
0
  }
1949
1950
0
  return 0;
1951
0
}
1952
1953
extern int
1954
onig_get_callout_num_by_tag(regex_t* reg,
1955
                            const UChar* tag, const UChar* tag_end)
1956
0
{
1957
0
  int r;
1958
0
  RegexExt* ext;
1959
0
  CalloutTagVal e;
1960
1961
0
  ext = reg->extp;
1962
0
  if (IS_NULL(ext) || IS_NULL(ext->tag_table))
1963
0
    return ONIGERR_INVALID_CALLOUT_TAG_NAME;
1964
1965
0
  r = onig_st_lookup_strend(ext->tag_table, tag, tag_end,
1966
0
                            (HashDataType* )((void* )(&e)));
1967
0
  if (r == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME;
1968
0
  return (int )e;
1969
0
}
1970
1971
static CalloutTagVal
1972
callout_tag_find(CalloutTagTable* t, const UChar* name, const UChar* name_end)
1973
0
{
1974
0
  CalloutTagVal e;
1975
1976
0
  e = -1;
1977
0
  if (IS_NOT_NULL(t)) {
1978
0
    onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));
1979
0
  }
1980
0
  return e;
1981
0
}
1982
1983
static int
1984
callout_tag_table_new(CalloutTagTable** rt)
1985
0
{
1986
0
  CalloutTagTable* t;
1987
1988
0
  *rt = 0;
1989
0
  t = onig_st_init_strend_table_with_size(INIT_TAG_NAMES_ALLOC_NUM);
1990
0
  CHECK_NULL_RETURN_MEMERR(t);
1991
1992
0
  *rt = t;
1993
0
  return ONIG_NORMAL;
1994
0
}
1995
1996
static int
1997
callout_tag_entry_raw(ParseEnv* env, CalloutTagTable* t, UChar* name,
1998
                      UChar* name_end, CalloutTagVal entry_val)
1999
0
{
2000
0
  int r;
2001
0
  CalloutTagVal val;
2002
2003
0
  if (name_end - name <= 0)
2004
0
    return ONIGERR_INVALID_CALLOUT_TAG_NAME;
2005
2006
0
  val = callout_tag_find(t, name, name_end);
2007
0
  if (val >= 0) {
2008
0
    onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME,
2009
0
                                   name, name_end);
2010
0
    return ONIGERR_MULTIPLEX_DEFINED_NAME;
2011
0
  }
2012
2013
0
  r = onig_st_insert_strend(t, name, name_end, (HashDataType )entry_val);
2014
0
  if (r < 0) return r;
2015
2016
0
  return ONIG_NORMAL;
2017
0
}
2018
2019
static int
2020
ext_ensure_tag_table(regex_t* reg)
2021
0
{
2022
0
  int r;
2023
0
  RegexExt* ext;
2024
0
  CalloutTagTable* t;
2025
2026
0
  ext = onig_get_regex_ext(reg);
2027
0
  CHECK_NULL_RETURN_MEMERR(ext);
2028
2029
0
  if (IS_NULL(ext->tag_table)) {
2030
0
    r = callout_tag_table_new(&t);
2031
0
    if (r != ONIG_NORMAL) return r;
2032
2033
0
    ext->tag_table = t;
2034
0
  }
2035
2036
0
  return ONIG_NORMAL;
2037
0
}
2038
2039
static int
2040
callout_tag_entry(ParseEnv* env, regex_t* reg, UChar* name, UChar* name_end,
2041
                  CalloutTagVal entry_val)
2042
0
{
2043
0
  int r;
2044
0
  RegexExt* ext;
2045
0
  CalloutListEntry* e;
2046
2047
0
  r = ext_ensure_tag_table(reg);
2048
0
  if (r != ONIG_NORMAL) return r;
2049
2050
0
  ext = onig_get_regex_ext(reg);
2051
0
  CHECK_NULL_RETURN_MEMERR(ext);
2052
0
  r = callout_tag_entry_raw(env, ext->tag_table, name, name_end, entry_val);
2053
2054
0
  e = onig_reg_callout_list_at(reg, (int )entry_val);
2055
0
  CHECK_NULL_RETURN_MEMERR(e);
2056
0
  e->tag_start = name;
2057
0
  e->tag_end   = name_end;
2058
2059
0
  return r;
2060
0
}
2061
2062
#endif /* USE_CALLOUT */
2063
2064
2065
3.20k
#define INIT_PARSEENV_MEMENV_ALLOC_SIZE   16
2066
2067
static void
2068
scan_env_clear(ParseEnv* env)
2069
1.37M
{
2070
1.37M
  MEM_STATUS_CLEAR(env->cap_history);
2071
1.37M
  MEM_STATUS_CLEAR(env->backtrack_mem);
2072
1.37M
  MEM_STATUS_CLEAR(env->backrefed_mem);
2073
1.37M
  env->error      = (UChar* )NULL;
2074
1.37M
  env->error_end  = (UChar* )NULL;
2075
1.37M
  env->num_call   = 0;
2076
2077
1.37M
#ifdef USE_CALL
2078
1.37M
  env->unset_addr_list = NULL;
2079
1.37M
#endif
2080
2081
1.37M
  env->num_mem    = 0;
2082
1.37M
  env->num_named  = 0;
2083
1.37M
  env->mem_alloc  = 0;
2084
1.37M
  env->mem_env_dynamic = (MemEnv* )NULL;
2085
2086
1.37M
  xmemset(env->mem_env_static, 0, sizeof(env->mem_env_static));
2087
2088
1.37M
  env->parse_depth      = 0;
2089
#ifdef ONIG_DEBUG_PARSE
2090
  env->max_parse_depth  = 0;
2091
#endif
2092
1.37M
  env->backref_num      = 0;
2093
1.37M
  env->keep_num         = 0;
2094
1.37M
  env->id_num           = 0;
2095
1.37M
  env->save_alloc_num   = 0;
2096
1.37M
  env->saves            = 0;
2097
1.37M
  env->flags            = 0;
2098
1.37M
}
2099
2100
static int
2101
scan_env_add_mem_entry(ParseEnv* env)
2102
1.70M
{
2103
1.70M
  int i, need, alloc;
2104
1.70M
  MemEnv* p;
2105
2106
1.70M
  need = env->num_mem + 1;
2107
1.70M
  if (need > MaxCaptureNum && MaxCaptureNum != 0)
2108
5
    return ONIGERR_TOO_MANY_CAPTURES;
2109
2110
1.70M
  if (need >= PARSEENV_MEMENV_SIZE) {
2111
1.56M
    if (env->mem_alloc <= need) {
2112
14.3k
      if (IS_NULL(env->mem_env_dynamic)) {
2113
3.20k
        alloc = INIT_PARSEENV_MEMENV_ALLOC_SIZE;
2114
3.20k
        p = (MemEnv* )xmalloc(sizeof(MemEnv) * alloc);
2115
3.20k
        CHECK_NULL_RETURN_MEMERR(p);
2116
3.20k
        xmemcpy(p, env->mem_env_static, sizeof(env->mem_env_static));
2117
3.20k
      }
2118
11.1k
      else {
2119
11.1k
        alloc = env->mem_alloc * 2;
2120
11.1k
        p = (MemEnv* )xrealloc(env->mem_env_dynamic, sizeof(MemEnv) * alloc);
2121
11.1k
        CHECK_NULL_RETURN_MEMERR(p);
2122
11.1k
      }
2123
2124
2.15M
      for (i = env->num_mem + 1; i < alloc; i++) {
2125
2.14M
        p[i].mem_node = NULL_NODE;
2126
2.14M
        p[i].empty_repeat_node = NULL_NODE;
2127
2.14M
      }
2128
2129
14.3k
      env->mem_env_dynamic = p;
2130
14.3k
      env->mem_alloc = alloc;
2131
14.3k
    }
2132
1.56M
  }
2133
2134
1.70M
  env->num_mem++;
2135
1.70M
  return env->num_mem;
2136
1.70M
}
2137
2138
static int
2139
scan_env_set_mem_node(ParseEnv* env, int num, Node* node)
2140
427k
{
2141
427k
  if (env->num_mem >= num)
2142
427k
    PARSEENV_MEMENV(env)[num].mem_node = node;
2143
0
  else
2144
0
    return ONIGERR_PARSER_BUG;
2145
427k
  return 0;
2146
427k
}
2147
2148
static void
2149
node_free_body(Node* node)
2150
20.5M
{
2151
20.5M
  if (IS_NULL(node)) return ;
2152
2153
20.5M
  switch (ND_TYPE(node)) {
2154
8.26M
  case ND_STRING:
2155
8.26M
    if (STR_(node)->capacity != 0 &&
2156
8.26M
        IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) {
2157
593k
      xfree(STR_(node)->s);
2158
593k
    }
2159
8.26M
    break;
2160
2161
2.22M
  case ND_LIST:
2162
2.35M
  case ND_ALT:
2163
2.35M
    onig_node_free(ND_CAR(node));
2164
2.35M
    node = ND_CDR(node);
2165
13.4M
    while (IS_NOT_NULL(node)) {
2166
11.1M
      Node* next = ND_CDR(node);
2167
11.1M
      onig_node_free(ND_CAR(node));
2168
11.1M
      xfree(node);
2169
11.1M
      node = next;
2170
11.1M
    }
2171
2.35M
    break;
2172
2173
2.12M
  case ND_CCLASS:
2174
2.12M
    {
2175
2.12M
      CClassNode* cc = CCLASS_(node);
2176
2177
2.12M
      if (cc->mbuf)
2178
1.21M
        bbuf_free(cc->mbuf);
2179
2.12M
    }
2180
2.12M
    break;
2181
2182
26.1k
  case ND_BACKREF:
2183
26.1k
    if (IS_NOT_NULL(BACKREF_(node)->back_dynamic))
2184
0
      xfree(BACKREF_(node)->back_dynamic);
2185
26.1k
    break;
2186
2187
2.51M
  case ND_BAG:
2188
2.51M
    if (ND_BODY(node))
2189
1.22M
      onig_node_free(ND_BODY(node));
2190
2191
2.51M
    {
2192
2.51M
      BagNode* en = BAG_(node);
2193
2.51M
      if (en->type == BAG_IF_ELSE) {
2194
3.12k
        onig_node_free(en->te.Then);
2195
3.12k
        onig_node_free(en->te.Else);
2196
3.12k
      }
2197
2.51M
    }
2198
2.51M
    break;
2199
2200
2.62M
  case ND_QUANT:
2201
2.62M
    if (ND_BODY(node))
2202
2.22M
      onig_node_free(ND_BODY(node));
2203
2.62M
    break;
2204
2205
271k
  case ND_ANCHOR:
2206
271k
    if (ND_BODY(node))
2207
1.97k
      onig_node_free(ND_BODY(node));
2208
271k
    if (IS_NOT_NULL(ANCHOR_(node)->lead_node))
2209
0
      onig_node_free(ANCHOR_(node)->lead_node);
2210
271k
    break;
2211
2212
2.35M
  case ND_CTYPE:
2213
2.37M
  case ND_CALL:
2214
2.39M
  case ND_GIMMICK:
2215
2.39M
    break;
2216
20.5M
  }
2217
20.5M
}
2218
2219
extern void
2220
onig_node_free(Node* node)
2221
23.2M
{
2222
23.2M
  if (IS_NULL(node)) return ;
2223
2224
#ifdef DEBUG_ND_FREE
2225
  fprintf(stderr, "onig_node_free: %p\n", node);
2226
#endif
2227
2228
20.5M
  node_free_body(node);
2229
20.5M
  xfree(node);
2230
20.5M
}
2231
2232
static void
2233
cons_node_free_alone(Node* node)
2234
1.42k
{
2235
1.42k
  ND_CAR(node) = 0;
2236
1.42k
  ND_CDR(node) = 0;
2237
1.42k
  onig_node_free(node);
2238
1.42k
}
2239
2240
static Node*
2241
node_new(void)
2242
31.7M
{
2243
31.7M
  Node* node;
2244
2245
31.7M
  node = (Node* )xmalloc(sizeof(Node));
2246
31.7M
  CHECK_NULL_RETURN(node);
2247
31.7M
  xmemset(node, 0, sizeof(*node));
2248
2249
#ifdef DEBUG_ND_FREE
2250
  fprintf(stderr, "node_new: %p\n", node);
2251
#endif
2252
31.7M
  return node;
2253
31.7M
}
2254
2255
extern int
2256
onig_node_copy(Node** rcopy, Node* from)
2257
0
{
2258
0
  int r;
2259
0
  Node* copy;
2260
2261
0
  *rcopy = NULL_NODE;
2262
2263
0
  switch (ND_TYPE(from)) {
2264
0
  case ND_LIST:
2265
0
  case ND_ALT:
2266
0
  case ND_ANCHOR:
2267
    /* These node's link to other nodes are processed by caller. */
2268
0
    break;
2269
0
  case ND_STRING:
2270
0
  case ND_CCLASS:
2271
0
  case ND_CTYPE:
2272
    /* Fixed contents after copy. */
2273
0
    break;
2274
0
  default:
2275
    /* Not supported yet. */
2276
0
    return ONIGERR_TYPE_BUG;
2277
0
    break;
2278
0
  }
2279
2280
0
  copy = node_new();
2281
0
  CHECK_NULL_RETURN_MEMERR(copy);
2282
0
  xmemcpy(copy, from, sizeof(*copy));
2283
2284
0
  switch (ND_TYPE(copy)) {
2285
0
  case ND_STRING:
2286
0
    r = onig_node_str_set(copy, STR_(from)->s, STR_(from)->end, FALSE);
2287
0
    if (r != 0) {
2288
0
    err:
2289
0
      onig_node_free(copy);
2290
0
      return r;
2291
0
    }
2292
0
    break;
2293
2294
0
  case ND_CCLASS:
2295
0
    {
2296
0
      CClassNode *fcc, *tcc;
2297
2298
0
      fcc = CCLASS_(from);
2299
0
      tcc = CCLASS_(copy);
2300
0
      if (IS_NOT_NULL(fcc->mbuf)) {
2301
0
        r = bbuf_clone(&(tcc->mbuf), fcc->mbuf);
2302
0
        if (r != 0) goto err;
2303
0
      }
2304
0
    }
2305
0
    break;
2306
2307
0
  default:
2308
0
    break;
2309
0
  }
2310
2311
0
  *rcopy = copy;
2312
0
  return ONIG_NORMAL;
2313
0
}
2314
2315
2316
static void
2317
initialize_cclass(CClassNode* cc)
2318
2.12M
{
2319
2.12M
  BITSET_CLEAR(cc->bs);
2320
2.12M
  cc->flags = 0;
2321
2.12M
  cc->mbuf  = NULL;
2322
2.12M
}
2323
2324
static Node*
2325
node_new_cclass(void)
2326
2.12M
{
2327
2.12M
  Node* node = node_new();
2328
2.12M
  CHECK_NULL_RETURN(node);
2329
2330
2.12M
  ND_SET_TYPE(node, ND_CCLASS);
2331
2.12M
  initialize_cclass(CCLASS_(node));
2332
2.12M
  return node;
2333
2.12M
}
2334
2335
static Node*
2336
node_new_ctype(int type, int not, OnigOptionType options)
2337
2.35M
{
2338
2.35M
  Node* node = node_new();
2339
2.35M
  CHECK_NULL_RETURN(node);
2340
2341
2.35M
  ND_SET_TYPE(node, ND_CTYPE);
2342
2.35M
  CTYPE_(node)->ctype   = type;
2343
2.35M
  CTYPE_(node)->not     = not;
2344
2.35M
  CTYPE_(node)->ascii_mode = OPTON_IS_ASCII_MODE_CTYPE(type, options);
2345
2.35M
  return node;
2346
2.35M
}
2347
2348
static Node*
2349
node_new_anychar(OnigOptionType options)
2350
2.35M
{
2351
2.35M
  Node* node;
2352
2353
2.35M
  node = node_new_ctype(CTYPE_ANYCHAR, FALSE, options);
2354
2.35M
  CHECK_NULL_RETURN(node);
2355
2356
2.35M
  if (OPTON_MULTILINE(options))
2357
11.3k
    ND_STATUS_ADD(node, MULTILINE);
2358
2.35M
  return node;
2359
2.35M
}
2360
2361
static int
2362
node_new_no_newline(Node** node, ParseEnv* env)
2363
10
{
2364
10
  Node* n;
2365
2366
10
  n = node_new_anychar(ONIG_OPTION_NONE);
2367
10
  CHECK_NULL_RETURN_MEMERR(n);
2368
10
  *node = n;
2369
10
  return 0;
2370
10
}
2371
2372
static int
2373
node_new_true_anychar(Node** node)
2374
10.2k
{
2375
10.2k
  Node* n;
2376
2377
10.2k
  n = node_new_anychar(ONIG_OPTION_MULTILINE);
2378
10.2k
  CHECK_NULL_RETURN_MEMERR(n);
2379
10.2k
  *node = n;
2380
10.2k
  return 0;
2381
10.2k
}
2382
2383
static Node*
2384
node_new_list(Node* left, Node* right)
2385
10.5M
{
2386
10.5M
  Node* node = node_new();
2387
10.5M
  CHECK_NULL_RETURN(node);
2388
2389
10.5M
  ND_SET_TYPE(node, ND_LIST);
2390
10.5M
  ND_CAR(node)  = left;
2391
10.5M
  ND_CDR(node) = right;
2392
10.5M
  return node;
2393
10.5M
}
2394
2395
extern Node*
2396
onig_node_new_list(Node* left, Node* right)
2397
1.11M
{
2398
1.11M
  return node_new_list(left, right);
2399
1.11M
}
2400
2401
extern Node*
2402
onig_node_new_alt(Node* left, Node* right)
2403
1.39M
{
2404
1.39M
  Node* node = node_new();
2405
1.39M
  CHECK_NULL_RETURN(node);
2406
2407
1.39M
  ND_SET_TYPE(node, ND_ALT);
2408
1.39M
  ND_CAR(node)  = left;
2409
1.39M
  ND_CDR(node) = right;
2410
1.39M
  return node;
2411
1.39M
}
2412
2413
static Node*
2414
make_list_or_alt(NodeType type, int n, Node* ns[])
2415
1.52M
{
2416
1.52M
  Node* r;
2417
2418
1.52M
  if (n <= 0) return NULL_NODE;
2419
2420
1.52M
  if (n == 1) {
2421
741k
    r = node_new();
2422
741k
    CHECK_NULL_RETURN(r);
2423
741k
    ND_SET_TYPE(r, type);
2424
741k
    ND_CAR(r) = ns[0];
2425
741k
    ND_CDR(r) = NULL_NODE;
2426
741k
  }
2427
784k
  else {
2428
784k
    Node* right;
2429
2430
784k
    r = node_new();
2431
784k
    CHECK_NULL_RETURN(r);
2432
2433
784k
    right = make_list_or_alt(type, n - 1, ns + 1);
2434
784k
    if (IS_NULL(right)) {
2435
0
      onig_node_free(r);
2436
0
      return NULL_NODE;
2437
0
    }
2438
2439
784k
    ND_SET_TYPE(r, type);
2440
784k
    ND_CAR(r) = ns[0];
2441
784k
    ND_CDR(r) = right;
2442
784k
  }
2443
2444
1.52M
  return r;
2445
1.52M
}
2446
2447
static Node*
2448
make_list(int n, Node* ns[])
2449
736k
{
2450
736k
  return make_list_or_alt(ND_LIST, n, ns);
2451
736k
}
2452
2453
static Node*
2454
make_alt(int n, Node* ns[])
2455
4.96k
{
2456
4.96k
  return make_list_or_alt(ND_ALT, n, ns);
2457
4.96k
}
2458
2459
static Node*
2460
node_new_anchor(int type)
2461
271k
{
2462
271k
  Node* node;
2463
2464
271k
  node = node_new();
2465
271k
  CHECK_NULL_RETURN(node);
2466
2467
271k
  ND_SET_TYPE(node, ND_ANCHOR);
2468
271k
  ANCHOR_(node)->type       = type;
2469
271k
  ANCHOR_(node)->char_min_len = 0;
2470
271k
  ANCHOR_(node)->char_max_len = INFINITE_LEN;
2471
271k
  ANCHOR_(node)->ascii_mode = 0;
2472
271k
  ANCHOR_(node)->lead_node  = NULL_NODE;
2473
271k
  return node;
2474
271k
}
2475
2476
static Node*
2477
node_new_anchor_with_options(int type, OnigOptionType options)
2478
269k
{
2479
269k
  int ascii_mode;
2480
269k
  Node* node;
2481
2482
269k
  node = node_new_anchor(type);
2483
269k
  CHECK_NULL_RETURN(node);
2484
2485
269k
  ascii_mode = OPTON_WORD_ASCII(options) && IS_WORD_ANCHOR_TYPE(type) ? 1 : 0;
2486
269k
  ANCHOR_(node)->ascii_mode = ascii_mode;
2487
2488
269k
  if (type == ANCR_TEXT_SEGMENT_BOUNDARY ||
2489
269k
      type == ANCR_NO_TEXT_SEGMENT_BOUNDARY) {
2490
8.78k
    if (OPTON_TEXT_SEGMENT_WORD(options))
2491
0
      ND_STATUS_ADD(node, TEXT_SEGMENT_WORD);
2492
8.78k
  }
2493
2494
269k
  return node;
2495
269k
}
2496
2497
static Node*
2498
node_new_backref(int back_num, int* backrefs, int by_name,
2499
#ifdef USE_BACKREF_WITH_LEVEL
2500
                 int exist_level, int nest_level,
2501
#endif
2502
                 ParseEnv* env)
2503
26.1k
{
2504
26.1k
  int i;
2505
26.1k
  Node* node;
2506
2507
26.1k
  node = node_new();
2508
26.1k
  CHECK_NULL_RETURN(node);
2509
2510
26.1k
  ND_SET_TYPE(node, ND_BACKREF);
2511
26.1k
  BACKREF_(node)->back_num = back_num;
2512
26.1k
  BACKREF_(node)->back_dynamic = (int* )NULL;
2513
26.1k
  if (by_name != 0)
2514
0
    ND_STATUS_ADD(node, BY_NAME);
2515
2516
26.1k
  if (OPTON_IGNORECASE(env->options))
2517
4.17k
    ND_STATUS_ADD(node, IGNORECASE);
2518
2519
26.1k
#ifdef USE_BACKREF_WITH_LEVEL
2520
26.1k
  if (exist_level != 0) {
2521
26
    ND_STATUS_ADD(node, NEST_LEVEL);
2522
26
    BACKREF_(node)->nest_level  = nest_level;
2523
26
  }
2524
26.1k
#endif
2525
2526
49.3k
  for (i = 0; i < back_num; i++) {
2527
26.1k
    if (backrefs[i] <= env->num_mem &&
2528
26.1k
        IS_NULL(PARSEENV_MEMENV(env)[backrefs[i]].mem_node)) {
2529
2.92k
      ND_STATUS_ADD(node, RECURSION);   /* /...(\1).../ */
2530
2.92k
      break;
2531
2.92k
    }
2532
26.1k
  }
2533
2534
26.1k
  if (back_num <= ND_BACKREFS_SIZE) {
2535
52.2k
    for (i = 0; i < back_num; i++)
2536
26.1k
      BACKREF_(node)->back_static[i] = backrefs[i];
2537
26.1k
  }
2538
0
  else {
2539
0
    int* p = (int* )xmalloc(sizeof(int) * back_num);
2540
0
    if (IS_NULL(p)) {
2541
0
      onig_node_free(node);
2542
0
      return NULL;
2543
0
    }
2544
0
    BACKREF_(node)->back_dynamic = p;
2545
0
    for (i = 0; i < back_num; i++)
2546
0
      p[i] = backrefs[i];
2547
0
  }
2548
2549
26.1k
  env->backref_num++;
2550
26.1k
  return node;
2551
26.1k
}
2552
2553
static Node*
2554
node_new_backref_checker(int back_num, int* backrefs, int by_name,
2555
#ifdef USE_BACKREF_WITH_LEVEL
2556
                         int exist_level, int nest_level,
2557
#endif
2558
                         ParseEnv* env)
2559
3.17k
{
2560
3.17k
  Node* node;
2561
2562
3.17k
  node = node_new_backref(back_num, backrefs, by_name,
2563
3.17k
#ifdef USE_BACKREF_WITH_LEVEL
2564
3.17k
                          exist_level, nest_level,
2565
3.17k
#endif
2566
3.17k
                          env);
2567
3.17k
  CHECK_NULL_RETURN(node);
2568
2569
3.17k
  ND_STATUS_ADD(node, CHECKER);
2570
3.17k
  return node;
2571
3.17k
}
2572
2573
#ifdef USE_CALL
2574
static Node*
2575
node_new_call(UChar* name, UChar* name_end, int gnum, int by_number)
2576
19.7k
{
2577
19.7k
  Node* node = node_new();
2578
19.7k
  CHECK_NULL_RETURN(node);
2579
2580
19.7k
  ND_SET_TYPE(node, ND_CALL);
2581
19.7k
  CALL_(node)->by_number   = by_number;
2582
19.7k
  CALL_(node)->name        = name;
2583
19.7k
  CALL_(node)->name_end    = name_end;
2584
19.7k
  CALL_(node)->called_gnum = gnum;
2585
19.7k
  CALL_(node)->entry_count = 1;
2586
19.7k
  return node;
2587
19.7k
}
2588
#endif
2589
2590
static Node*
2591
node_new_quantifier(int lower, int upper, int by_number)
2592
2.62M
{
2593
2.62M
  Node* node = node_new();
2594
2.62M
  CHECK_NULL_RETURN(node);
2595
2596
2.62M
  ND_SET_TYPE(node, ND_QUANT);
2597
2.62M
  QUANT_(node)->lower            = lower;
2598
2.62M
  QUANT_(node)->upper            = upper;
2599
2.62M
  QUANT_(node)->greedy           = 1;
2600
2.62M
  QUANT_(node)->emptiness        = BODY_IS_NOT_EMPTY;
2601
2.62M
  QUANT_(node)->head_exact       = NULL_NODE;
2602
2.62M
  QUANT_(node)->next_head_exact  = NULL_NODE;
2603
2.62M
  QUANT_(node)->include_referred = 0;
2604
2.62M
  QUANT_(node)->empty_status_mem = 0;
2605
2.62M
  if (by_number != 0)
2606
22.9k
    ND_STATUS_ADD(node, BY_NUMBER);
2607
2608
2.62M
  return node;
2609
2.62M
}
2610
2611
static Node*
2612
node_new_bag(enum BagType type)
2613
2.51M
{
2614
2.51M
  Node* node = node_new();
2615
2.51M
  CHECK_NULL_RETURN(node);
2616
2617
2.51M
  ND_SET_TYPE(node, ND_BAG);
2618
2.51M
  BAG_(node)->type = type;
2619
2620
2.51M
  switch (type) {
2621
1.70M
  case BAG_MEMORY:
2622
1.70M
    BAG_(node)->m.regnum       =  0;
2623
1.70M
    BAG_(node)->m.called_addr  = -1;
2624
1.70M
    BAG_(node)->m.entry_count  =  1;
2625
1.70M
    BAG_(node)->m.called_state =  0;
2626
1.70M
    break;
2627
2628
11.7k
  case BAG_OPTION:
2629
11.7k
    BAG_(node)->o.options =  0;
2630
11.7k
    break;
2631
2632
797k
  case BAG_STOP_BACKTRACK:
2633
797k
    break;
2634
2635
3.12k
  case BAG_IF_ELSE:
2636
3.12k
    BAG_(node)->te.Then = 0;
2637
3.12k
    BAG_(node)->te.Else = 0;
2638
3.12k
    break;
2639
2.51M
  }
2640
2641
2.51M
  BAG_(node)->opt_count = 0;
2642
2.51M
  return node;
2643
2.51M
}
2644
2645
extern Node*
2646
onig_node_new_bag(enum BagType type)
2647
192k
{
2648
192k
  return node_new_bag(type);
2649
192k
}
2650
2651
static Node*
2652
node_new_bag_if_else(Node* cond, Node* Then, Node* Else)
2653
3.12k
{
2654
3.12k
  Node* n;
2655
3.12k
  n = node_new_bag(BAG_IF_ELSE);
2656
3.12k
  CHECK_NULL_RETURN(n);
2657
2658
3.12k
  ND_BODY(n) = cond;
2659
3.12k
  BAG_(n)->te.Then = Then;
2660
3.12k
  BAG_(n)->te.Else = Else;
2661
3.12k
  return n;
2662
3.12k
}
2663
2664
static Node*
2665
node_new_memory(int is_named)
2666
1.70M
{
2667
1.70M
  Node* node = node_new_bag(BAG_MEMORY);
2668
1.70M
  CHECK_NULL_RETURN(node);
2669
1.70M
  if (is_named != 0)
2670
2.78k
    ND_STATUS_ADD(node, NAMED_GROUP);
2671
2672
1.70M
  return node;
2673
1.70M
}
2674
2675
static Node*
2676
node_new_option(OnigOptionType option)
2677
11.7k
{
2678
11.7k
  Node* node = node_new_bag(BAG_OPTION);
2679
11.7k
  CHECK_NULL_RETURN(node);
2680
11.7k
  BAG_(node)->o.options = option;
2681
11.7k
  return node;
2682
11.7k
}
2683
2684
static Node*
2685
node_new_group(Node* content)
2686
0
{
2687
0
  Node* node;
2688
2689
0
  node = node_new();
2690
0
  CHECK_NULL_RETURN(node);
2691
0
  ND_SET_TYPE(node, ND_LIST);
2692
0
  ND_CAR(node) = content;
2693
0
  ND_CDR(node) = NULL_NODE;
2694
2695
0
  return node;
2696
0
}
2697
2698
static Node*
2699
node_drop_group(Node* group)
2700
0
{
2701
0
  Node* content;
2702
2703
0
  content = ND_CAR(group);
2704
0
  ND_CAR(group) = NULL_NODE;
2705
0
  onig_node_free(group);
2706
0
  return content;
2707
0
}
2708
2709
static int
2710
node_set_fail(Node* node)
2711
4.96k
{
2712
4.96k
  ND_SET_TYPE(node, ND_GIMMICK);
2713
4.96k
  GIMMICK_(node)->type = GIMMICK_FAIL;
2714
4.96k
  return ONIG_NORMAL;
2715
4.96k
}
2716
2717
static int
2718
node_new_fail(Node** node, ParseEnv* env)
2719
4.96k
{
2720
4.96k
  *node = node_new();
2721
4.96k
  CHECK_NULL_RETURN_MEMERR(*node);
2722
2723
4.96k
  return node_set_fail(*node);
2724
4.96k
}
2725
2726
extern int
2727
onig_node_reset_fail(Node* node)
2728
0
{
2729
0
  node_free_body(node);
2730
0
  return node_set_fail(node);
2731
0
}
2732
2733
static int
2734
node_new_save_gimmick(Node** node, enum SaveType save_type, ParseEnv* env)
2735
5.23k
{
2736
5.23k
  int id;
2737
2738
5.23k
  ID_ENTRY(env, id);
2739
2740
5.23k
  *node = node_new();
2741
5.23k
  CHECK_NULL_RETURN_MEMERR(*node);
2742
2743
5.23k
  ND_SET_TYPE(*node, ND_GIMMICK);
2744
5.23k
  GIMMICK_(*node)->id   = id;
2745
5.23k
  GIMMICK_(*node)->type = GIMMICK_SAVE;
2746
5.23k
  GIMMICK_(*node)->detail_type = (int )save_type;
2747
2748
5.23k
  return ONIG_NORMAL;
2749
5.23k
}
2750
2751
static int
2752
node_new_update_var_gimmick(Node** node, enum UpdateVarType update_var_type,
2753
                            int id, ParseEnv* env)
2754
7.45k
{
2755
7.45k
  *node = node_new();
2756
7.45k
  CHECK_NULL_RETURN_MEMERR(*node);
2757
2758
7.45k
  ND_SET_TYPE(*node, ND_GIMMICK);
2759
7.45k
  GIMMICK_(*node)->id   = id;
2760
7.45k
  GIMMICK_(*node)->type = GIMMICK_UPDATE_VAR;
2761
7.45k
  GIMMICK_(*node)->detail_type = (int )update_var_type;
2762
2763
7.45k
  return ONIG_NORMAL;
2764
7.45k
}
2765
2766
static int
2767
node_new_keep(Node** node, ParseEnv* env)
2768
223
{
2769
223
  int r;
2770
2771
223
  r = node_new_save_gimmick(node, SAVE_KEEP, env);
2772
223
  if (r != 0) return r;
2773
2774
223
  env->keep_num++;
2775
223
  return ONIG_NORMAL;
2776
223
}
2777
2778
#ifdef USE_CALLOUT
2779
2780
extern void
2781
onig_free_reg_callout_list(int n, CalloutListEntry* list)
2782
0
{
2783
0
  int i;
2784
0
  int j;
2785
2786
0
  if (IS_NULL(list)) return ;
2787
2788
0
  for (i = 0; i < n; i++) {
2789
0
    if (list[i].of == ONIG_CALLOUT_OF_NAME) {
2790
0
      for (j = 0; j < list[i].u.arg.passed_num; j++) {
2791
0
        if (list[i].u.arg.types[j] == ONIG_TYPE_STRING) {
2792
0
          if (IS_NOT_NULL(list[i].u.arg.vals[j].s.start))
2793
0
            xfree(list[i].u.arg.vals[j].s.start);
2794
0
        }
2795
0
      }
2796
0
    }
2797
0
    else { /* ONIG_CALLOUT_OF_CONTENTS */
2798
0
      if (IS_NOT_NULL(list[i].u.content.start)) {
2799
0
        xfree((void* )list[i].u.content.start);
2800
0
      }
2801
0
    }
2802
0
  }
2803
2804
0
  xfree(list);
2805
0
}
2806
2807
extern CalloutListEntry*
2808
onig_reg_callout_list_at(regex_t* reg, int num)
2809
0
{
2810
0
  RegexExt* ext = reg->extp;
2811
0
  CHECK_NULL_RETURN(ext);
2812
2813
0
  if (num <= 0 || num > ext->callout_num)
2814
0
    return 0;
2815
2816
0
  num--;
2817
0
  return ext->callout_list + num;
2818
0
}
2819
2820
static int
2821
reg_callout_list_entry(ParseEnv* env, int* rnum)
2822
0
{
2823
0
#define INIT_CALLOUT_LIST_NUM  3
2824
2825
0
  int num;
2826
0
  CalloutListEntry* list;
2827
0
  CalloutListEntry* e;
2828
0
  RegexExt* ext;
2829
2830
0
  ext = onig_get_regex_ext(env->reg);
2831
0
  CHECK_NULL_RETURN_MEMERR(ext);
2832
2833
0
  if (IS_NULL(ext->callout_list)) {
2834
0
    list = (CalloutListEntry* )xmalloc(sizeof(*list) * INIT_CALLOUT_LIST_NUM);
2835
0
    CHECK_NULL_RETURN_MEMERR(list);
2836
2837
0
    ext->callout_list = list;
2838
0
    ext->callout_list_alloc = INIT_CALLOUT_LIST_NUM;
2839
0
    ext->callout_num = 0;
2840
0
  }
2841
2842
0
  num = ext->callout_num + 1;
2843
0
  if (num > ext->callout_list_alloc) {
2844
0
    int alloc = ext->callout_list_alloc * 2;
2845
0
    list = (CalloutListEntry* )xrealloc(ext->callout_list,
2846
0
                                        sizeof(CalloutListEntry) * alloc);
2847
0
    CHECK_NULL_RETURN_MEMERR(list);
2848
2849
0
    ext->callout_list       = list;
2850
0
    ext->callout_list_alloc = alloc;
2851
0
  }
2852
2853
0
  e = ext->callout_list + (num - 1);
2854
2855
0
  e->flag             = 0;
2856
0
  e->of               = 0;
2857
0
  e->in               = ONIG_CALLOUT_OF_CONTENTS;
2858
0
  e->type             = 0;
2859
0
  e->tag_start        = 0;
2860
0
  e->tag_end          = 0;
2861
0
  e->start_func       = 0;
2862
0
  e->end_func         = 0;
2863
0
  e->u.arg.num        = 0;
2864
0
  e->u.arg.passed_num = 0;
2865
2866
0
  ext->callout_num = num;
2867
0
  *rnum = num;
2868
0
  return ONIG_NORMAL;
2869
0
}
2870
2871
static int
2872
node_new_callout(Node** node, OnigCalloutOf callout_of, int num, int id,
2873
                 ParseEnv* env)
2874
0
{
2875
0
  *node = node_new();
2876
0
  CHECK_NULL_RETURN_MEMERR(*node);
2877
2878
0
  ND_SET_TYPE(*node, ND_GIMMICK);
2879
0
  GIMMICK_(*node)->id          = id;
2880
0
  GIMMICK_(*node)->num         = num;
2881
0
  GIMMICK_(*node)->type        = GIMMICK_CALLOUT;
2882
0
  GIMMICK_(*node)->detail_type = (int )callout_of;
2883
2884
0
  return ONIG_NORMAL;
2885
0
}
2886
#endif
2887
2888
static int
2889
make_text_segment(Node** node, ParseEnv* env)
2890
2.74k
{
2891
2.74k
  int r;
2892
2.74k
  int i;
2893
2.74k
  Node* x;
2894
2.74k
  Node* ns[2];
2895
2896
  /* \X == (?>\O(?:\Y\O)*) */
2897
2898
2.74k
  ns[1] = NULL_NODE;
2899
2900
2.74k
  r = ONIGERR_MEMORY;
2901
2.74k
  ns[0] = node_new_anchor_with_options(ANCR_NO_TEXT_SEGMENT_BOUNDARY, env->options);
2902
2.74k
  if (IS_NULL(ns[0])) goto err;
2903
2904
2.74k
  r = node_new_true_anychar(&ns[1]);
2905
2.74k
  if (r != 0) goto err1;
2906
2907
2.74k
  x = make_list(2, ns);
2908
2.74k
  if (IS_NULL(x)) goto err;
2909
2.74k
  ns[0] = x;
2910
2.74k
  ns[1] = NULL_NODE;
2911
2912
2.74k
  x = node_new_quantifier(0, INFINITE_REPEAT, TRUE);
2913
2.74k
  if (IS_NULL(x)) goto err;
2914
2915
2.74k
  ND_BODY(x) = ns[0];
2916
2.74k
  ns[0] = NULL_NODE;
2917
2.74k
  ns[1] = x;
2918
2919
2.74k
  r = node_new_true_anychar(&ns[0]);
2920
2.74k
  if (r != 0) goto err1;
2921
2922
2.74k
  x = make_list(2, ns);
2923
2.74k
  if (IS_NULL(x)) goto err;
2924
2925
2.74k
  ns[0] = x;
2926
2.74k
  ns[1] = NULL_NODE;
2927
2928
2.74k
  x = node_new_bag(BAG_STOP_BACKTRACK);
2929
2.74k
  if (IS_NULL(x)) goto err;
2930
2931
2.74k
  ND_BODY(x) = ns[0];
2932
2933
2.74k
  *node = x;
2934
2.74k
  return ONIG_NORMAL;
2935
2936
0
 err:
2937
0
  r = ONIGERR_MEMORY;
2938
0
 err1:
2939
0
  for (i = 0; i < 2; i++) onig_node_free(ns[i]);
2940
0
  return r;
2941
0
}
2942
2943
static int
2944
make_absent_engine(Node** node, int pre_save_right_id, Node* absent,
2945
                   Node* step_one, int lower, int upper, int possessive,
2946
                   int is_range_cutter, ParseEnv* env)
2947
2.48k
{
2948
2.48k
  int r;
2949
2.48k
  int i;
2950
2.48k
  int id;
2951
2.48k
  Node* x;
2952
2.48k
  Node* ns[4];
2953
2954
12.4k
  for (i = 0; i < 4; i++) ns[i] = NULL_NODE;
2955
2956
2.48k
  ns[1] = absent;
2957
2.48k
  ns[3] = step_one; /* for err */
2958
2.48k
  r = node_new_save_gimmick(&ns[0], SAVE_S, env);
2959
2.48k
  if (r != 0) goto err;
2960
2961
2.48k
  id = GIMMICK_(ns[0])->id;
2962
2.48k
  r = node_new_update_var_gimmick(&ns[2], UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK,
2963
2.48k
                                  id, env);
2964
2.48k
  if (r != 0) goto err;
2965
2966
2.48k
  if (is_range_cutter != 0)
2967
39
    ND_STATUS_ADD(ns[2], ABSENT_WITH_SIDE_EFFECTS);
2968
2969
2.48k
  r = node_new_fail(&ns[3], env);
2970
2.48k
  if (r != 0) goto err;
2971
2972
2.48k
  x = make_list(4, ns);
2973
2.48k
  if (IS_NULL(x)) goto err0;
2974
2975
2.48k
  ns[0] = x;
2976
2.48k
  ns[1] = step_one;
2977
2.48k
  ns[2] = ns[3] = NULL_NODE;
2978
2979
2.48k
  x = make_alt(2, ns);
2980
2.48k
  if (IS_NULL(x)) goto err0;
2981
2982
2.48k
  ns[0] = x;
2983
2984
2.48k
  x = node_new_quantifier(lower, upper, FALSE);
2985
2.48k
  if (IS_NULL(x)) goto err0;
2986
2987
2.48k
  ND_BODY(x) = ns[0];
2988
2.48k
  ns[0] = x;
2989
2990
2.48k
  if (possessive != 0) {
2991
43
    x = node_new_bag(BAG_STOP_BACKTRACK);
2992
43
    if (IS_NULL(x)) goto err0;
2993
2994
43
    ND_BODY(x) = ns[0];
2995
43
    ns[0] = x;
2996
43
  }
2997
2998
2.48k
  r = node_new_update_var_gimmick(&ns[1], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
2999
2.48k
                                  pre_save_right_id, env);
3000
2.48k
  if (r != 0) goto err;
3001
3002
2.48k
  r = node_new_fail(&ns[2], env);
3003
2.48k
  if (r != 0) goto err;
3004
3005
2.48k
  x = make_list(2, ns + 1);
3006
2.48k
  if (IS_NULL(x)) goto err0;
3007
3008
2.48k
  ns[1] = x; ns[2] = NULL_NODE;
3009
3010
2.48k
  x = make_alt(2, ns);
3011
2.48k
  if (IS_NULL(x)) goto err0;
3012
3013
2.48k
  if (is_range_cutter != FALSE)
3014
39
    ND_STATUS_ADD(x, SUPER);
3015
3016
2.48k
  *node = x;
3017
2.48k
  return ONIG_NORMAL;
3018
3019
0
 err0:
3020
0
  r = ONIGERR_MEMORY;
3021
0
 err:
3022
0
  for (i = 0; i < 4; i++) onig_node_free(ns[i]);
3023
0
  return r;
3024
0
}
3025
3026
static int
3027
make_absent_tail(Node** node1, Node** node2, int pre_save_right_id,
3028
                 ParseEnv* env)
3029
4
{
3030
4
  int r;
3031
4
  int id;
3032
4
  Node* save;
3033
4
  Node* x;
3034
4
  Node* ns[2];
3035
3036
4
  *node1 = *node2 = NULL_NODE;
3037
4
  save = ns[0] = ns[1] = NULL_NODE;
3038
3039
4
  r = node_new_save_gimmick(&save, SAVE_RIGHT_RANGE, env);
3040
4
  if (r != 0) goto err;
3041
3042
4
  id = GIMMICK_(save)->id;
3043
4
  r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
3044
4
                                  id, env);
3045
4
  if (r != 0) goto err;
3046
3047
4
  r = node_new_fail(&ns[1], env);
3048
4
  if (r != 0) goto err;
3049
3050
4
  x = make_list(2, ns);
3051
4
  if (IS_NULL(x)) goto err0;
3052
3053
4
  ns[0] = NULL_NODE; ns[1] = x;
3054
3055
4
  r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
3056
4
                                  pre_save_right_id, env);
3057
4
  if (r != 0) goto err;
3058
3059
4
  x = make_alt(2, ns);
3060
4
  if (IS_NULL(x)) goto err0;
3061
3062
4
  *node1 = save;
3063
4
  *node2 = x;
3064
4
  return ONIG_NORMAL;
3065
3066
0
 err0:
3067
0
  r = ONIGERR_MEMORY;
3068
0
 err:
3069
0
  onig_node_free(save);
3070
0
  onig_node_free(ns[0]);
3071
0
  onig_node_free(ns[1]);
3072
0
  return r;
3073
0
}
3074
3075
static int
3076
make_range_clear(Node** node, ParseEnv* env)
3077
0
{
3078
0
  int r;
3079
0
  int id;
3080
0
  Node* save;
3081
0
  Node* x;
3082
0
  Node* ns[2];
3083
3084
0
  *node = NULL_NODE;
3085
0
  save = ns[0] = ns[1] = NULL_NODE;
3086
3087
0
  r = node_new_save_gimmick(&save, SAVE_RIGHT_RANGE, env);
3088
0
  if (r != 0) goto err;
3089
3090
0
  id = GIMMICK_(save)->id;
3091
0
  r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
3092
0
                                  id, env);
3093
0
  if (r != 0) goto err;
3094
3095
0
  r = node_new_fail(&ns[1], env);
3096
0
  if (r != 0) goto err;
3097
3098
0
  x = make_list(2, ns);
3099
0
  if (IS_NULL(x)) goto err0;
3100
3101
0
  ns[0] = NULL_NODE; ns[1] = x;
3102
3103
0
#define ID_NOT_USED_DONT_CARE_ME   0
3104
3105
0
  r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_INIT,
3106
0
                                  ID_NOT_USED_DONT_CARE_ME, env);
3107
0
  if (r != 0) goto err;
3108
0
  ND_STATUS_ADD(ns[0], ABSENT_WITH_SIDE_EFFECTS);
3109
3110
0
  x = make_alt(2, ns);
3111
0
  if (IS_NULL(x)) goto err0;
3112
3113
0
  ND_STATUS_ADD(x, SUPER);
3114
3115
0
  ns[0] = save;
3116
0
  ns[1] = x;
3117
0
  save = NULL_NODE;
3118
0
  x = make_list(2, ns);
3119
0
  if (IS_NULL(x)) goto err0;
3120
3121
0
  *node = x;
3122
0
  return ONIG_NORMAL;
3123
3124
0
 err0:
3125
0
  r = ONIGERR_MEMORY;
3126
0
 err:
3127
0
  onig_node_free(save);
3128
0
  onig_node_free(ns[0]);
3129
0
  onig_node_free(ns[1]);
3130
0
  return r;
3131
0
}
3132
3133
static int
3134
is_simple_one_char_repeat(Node* node, Node** rquant, Node** rbody,
3135
                          int* is_possessive, ParseEnv* env)
3136
4
{
3137
4
  Node* quant;
3138
4
  Node* body;
3139
3140
4
  *rquant = *rbody = 0;
3141
4
  *is_possessive = 0;
3142
3143
4
  if (ND_TYPE(node) == ND_QUANT) {
3144
0
    quant = node;
3145
0
  }
3146
4
  else {
3147
4
    if (ND_TYPE(node) == ND_BAG) {
3148
0
      BagNode* en = BAG_(node);
3149
0
      if (en->type == BAG_STOP_BACKTRACK) {
3150
0
        *is_possessive = 1;
3151
0
        quant = ND_BAG_BODY(en);
3152
0
        if (ND_TYPE(quant) != ND_QUANT)
3153
0
          return 0;
3154
0
      }
3155
0
      else
3156
0
        return 0;
3157
0
    }
3158
4
    else
3159
4
      return 0;
3160
4
  }
3161
3162
0
  if (QUANT_(quant)->greedy == 0)
3163
0
    return 0;
3164
3165
0
  body = ND_BODY(quant);
3166
0
  switch (ND_TYPE(body)) {
3167
0
  case ND_STRING:
3168
0
    {
3169
0
      int len;
3170
0
      StrNode* sn = STR_(body);
3171
0
      UChar *s = sn->s;
3172
3173
0
      len = 0;
3174
0
      while (s < sn->end) {
3175
0
        s += enclen(env->enc, s);
3176
0
        len++;
3177
0
      }
3178
0
      if (len != 1)
3179
0
        return 0;
3180
0
    }
3181
3182
0
  case ND_CCLASS:
3183
0
    break;
3184
3185
0
  default:
3186
0
    return 0;
3187
0
    break;
3188
0
  }
3189
3190
0
  if (node != quant) {
3191
0
    ND_BODY(node) = 0;
3192
0
    onig_node_free(node);
3193
0
  }
3194
0
  ND_BODY(quant) = NULL_NODE;
3195
0
  *rquant = quant;
3196
0
  *rbody  = body;
3197
0
  return 1;
3198
0
}
3199
3200
static int
3201
make_absent_tree_for_simple_one_char_repeat(Node** node,
3202
  Node* absent, Node* quant, Node* body, int possessive, ParseEnv* env)
3203
2.43k
{
3204
2.43k
  int r;
3205
2.43k
  int i;
3206
2.43k
  int id1;
3207
2.43k
  int lower, upper;
3208
2.43k
  Node* x;
3209
2.43k
  Node* ns[4];
3210
3211
2.43k
  *node = NULL_NODE;
3212
2.43k
  r = ONIGERR_MEMORY;
3213
2.43k
  ns[0] = ns[1] = NULL_NODE;
3214
2.43k
  ns[2] = body, ns[3] = absent;
3215
3216
2.43k
  lower = QUANT_(quant)->lower;
3217
2.43k
  upper = QUANT_(quant)->upper;
3218
3219
2.43k
  r = node_new_save_gimmick(&ns[0], SAVE_RIGHT_RANGE, env);
3220
2.43k
  if (r != 0) goto err;
3221
3222
2.43k
  id1 = GIMMICK_(ns[0])->id;
3223
3224
2.43k
  r = make_absent_engine(&ns[1], id1, absent, body, lower, upper, possessive,
3225
2.43k
                         FALSE, env);
3226
2.43k
  if (r != 0) goto err;
3227
3228
2.43k
  ns[2] = ns[3] = NULL_NODE;
3229
3230
2.43k
  r = node_new_update_var_gimmick(&ns[2], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
3231
2.43k
                                  id1, env);
3232
2.43k
  if (r != 0) goto err;
3233
3234
2.43k
  x = make_list(3, ns);
3235
2.43k
  if (IS_NULL(x)) goto err0;
3236
3237
2.43k
  *node = x;
3238
2.43k
  return ONIG_NORMAL;
3239
3240
0
 err0:
3241
0
  r = ONIGERR_MEMORY;
3242
0
 err:
3243
0
  for (i = 0; i < 4; i++) onig_node_free(ns[i]);
3244
0
  return r;
3245
0
}
3246
3247
static int
3248
make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter,
3249
                 ParseEnv* env)
3250
2.48k
{
3251
2.48k
  int r;
3252
2.48k
  int i;
3253
2.48k
  int id1, id2;
3254
2.48k
  int possessive;
3255
2.48k
  Node* x;
3256
2.48k
  Node* ns[7];
3257
3258
2.48k
  r = ONIGERR_MEMORY;
3259
19.8k
  for (i = 0; i < 7; i++) ns[i] = NULL_NODE;
3260
2.48k
  ns[4] = expr; ns[5] = absent;
3261
3262
2.48k
  if (is_range_cutter == 0) {
3263
2.44k
    Node* quant;
3264
2.44k
    Node* body;
3265
3266
2.44k
    if (expr == NULL_NODE) {
3267
      /* default expr \O* */
3268
2.43k
      quant = node_new_quantifier(0, INFINITE_REPEAT, FALSE);
3269
2.43k
      if (IS_NULL(quant)) goto err0;
3270
3271
2.43k
      r = node_new_true_anychar(&body);
3272
2.43k
      if (r != 0) {
3273
0
        onig_node_free(quant);
3274
0
        goto err;
3275
0
      }
3276
2.43k
      possessive = 0;
3277
2.43k
      goto simple;
3278
2.43k
    }
3279
4
    else {
3280
4
      if (is_simple_one_char_repeat(expr, &quant, &body, &possessive, env)) {
3281
2.43k
      simple:
3282
2.43k
        r = make_absent_tree_for_simple_one_char_repeat(node, absent, quant,
3283
2.43k
                                                        body, possessive, env);
3284
2.43k
        onig_node_free(quant);
3285
2.43k
        if (r != 0) {
3286
0
          ns[4] = NULL_NODE;
3287
0
          onig_node_free(body);
3288
0
          goto err;
3289
0
        }
3290
3291
2.43k
        return ONIG_NORMAL;
3292
2.43k
      }
3293
4
    }
3294
2.44k
  }
3295
3296
43
  r = node_new_save_gimmick(&ns[0], SAVE_RIGHT_RANGE, env);
3297
43
  if (r != 0) goto err;
3298
3299
43
  id1 = GIMMICK_(ns[0])->id;
3300
3301
43
  r = node_new_save_gimmick(&ns[1], SAVE_S, env);
3302
43
  if (r != 0) goto err;
3303
3304
43
  id2 = GIMMICK_(ns[1])->id;
3305
3306
43
  r = node_new_true_anychar(&ns[3]);
3307
43
  if (r != 0) goto err;
3308
3309
43
  possessive = 1;
3310
43
  r = make_absent_engine(&ns[2], id1, absent, ns[3], 0, INFINITE_REPEAT,
3311
43
                         possessive, is_range_cutter, env);
3312
43
  if (r != 0) goto err;
3313
3314
43
  ns[3] = NULL_NODE;
3315
43
  ns[5] = NULL_NODE;
3316
3317
43
  r = node_new_update_var_gimmick(&ns[3], UPDATE_VAR_S_FROM_STACK, id2, env);
3318
43
  if (r != 0) goto err;
3319
3320
43
  if (is_range_cutter != 0) {
3321
39
    x = make_list(4, ns);
3322
39
    if (IS_NULL(x)) goto err0;
3323
39
  }
3324
4
  else {
3325
4
    r = make_absent_tail(&ns[5], &ns[6], id1, env);
3326
4
    if (r != 0) goto err;
3327
3328
4
    x = make_list(7, ns);
3329
4
    if (IS_NULL(x)) goto err0;
3330
4
  }
3331
3332
43
  *node = x;
3333
43
  return ONIG_NORMAL;
3334
3335
0
 err0:
3336
0
  r = ONIGERR_MEMORY;
3337
0
 err:
3338
0
  for (i = 0; i < 7; i++) onig_node_free(ns[i]);
3339
0
  return r;
3340
0
}
3341
3342
extern int
3343
onig_node_str_cat(Node* node, const UChar* s, const UChar* end)
3344
80.1M
{
3345
80.1M
  int addlen = (int )(end - s);
3346
3347
80.1M
  if (addlen > 0) {
3348
79.8M
    int len  = (int )(STR_(node)->end - STR_(node)->s);
3349
3350
79.8M
    if (STR_(node)->capacity > 0 || (len + addlen > ND_STRING_BUF_SIZE - 1)) {
3351
47.4M
      UChar* p;
3352
47.4M
      int capa = len + addlen + ND_STRING_MARGIN;
3353
3354
47.4M
      if (capa <= STR_(node)->capacity) {
3355
0
        onig_strcpy(STR_(node)->s + len, s, end);
3356
0
      }
3357
47.4M
      else {
3358
47.4M
        if (STR_(node)->s == STR_(node)->buf)
3359
593k
          p = strcat_capa_from_static(STR_(node)->s, STR_(node)->end,
3360
593k
                                      s, end, capa);
3361
46.8M
        else
3362
46.8M
          p = strcat_capa(STR_(node)->s, STR_(node)->end, s, end, capa);
3363
3364
47.4M
        CHECK_NULL_RETURN_MEMERR(p);
3365
47.4M
        STR_(node)->s        = p;
3366
47.4M
        STR_(node)->capacity = capa;
3367
47.4M
      }
3368
47.4M
    }
3369
32.3M
    else {
3370
32.3M
      onig_strcpy(STR_(node)->s + len, s, end);
3371
32.3M
    }
3372
79.8M
    STR_(node)->end = STR_(node)->s + len + addlen;
3373
79.8M
  }
3374
3375
80.1M
  return 0;
3376
80.1M
}
3377
3378
extern int
3379
onig_node_str_set(Node* node, const UChar* s, const UChar* end, int need_free)
3380
0
{
3381
0
  onig_node_str_clear(node, need_free);
3382
0
  return onig_node_str_cat(node, s, end);
3383
0
}
3384
3385
static int
3386
node_str_cat_char(Node* node, UChar c)
3387
0
{
3388
0
  UChar s[1];
3389
3390
0
  s[0] = c;
3391
0
  return onig_node_str_cat(node, s, s + 1);
3392
0
}
3393
3394
extern void
3395
onig_node_str_clear(Node* node, int need_free)
3396
0
{
3397
0
  if (need_free != 0 &&
3398
0
      STR_(node)->capacity != 0 &&
3399
0
      IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) {
3400
0
    xfree(STR_(node)->s);
3401
0
  }
3402
3403
0
  STR_(node)->flag     = 0;
3404
0
  STR_(node)->s        = STR_(node)->buf;
3405
0
  STR_(node)->end      = STR_(node)->buf;
3406
0
  STR_(node)->capacity = 0;
3407
0
}
3408
3409
static int
3410
node_set_str(Node* node, const UChar* s, const UChar* end)
3411
8.26M
{
3412
8.26M
  int r;
3413
3414
8.26M
  ND_SET_TYPE(node, ND_STRING);
3415
8.26M
  STR_(node)->flag     = 0;
3416
8.26M
  STR_(node)->s        = STR_(node)->buf;
3417
8.26M
  STR_(node)->end      = STR_(node)->buf;
3418
8.26M
  STR_(node)->capacity = 0;
3419
3420
8.26M
  r = onig_node_str_cat(node, s, end);
3421
8.26M
  return r;
3422
8.26M
}
3423
3424
static Node*
3425
node_new_str(const UChar* s, const UChar* end)
3426
8.26M
{
3427
8.26M
  int r;
3428
8.26M
  Node* node = node_new();
3429
8.26M
  CHECK_NULL_RETURN(node);
3430
3431
8.26M
  r = node_set_str(node, s, end);
3432
8.26M
  if (r != 0) {
3433
0
    onig_node_free(node);
3434
0
    return NULL;
3435
0
  }
3436
3437
8.26M
  return node;
3438
8.26M
}
3439
3440
static int
3441
node_reset_str(Node* node, const UChar* s, const UChar* end)
3442
0
{
3443
0
  node_free_body(node);
3444
0
  return node_set_str(node, s, end);
3445
0
}
3446
3447
extern int
3448
onig_node_reset_empty(Node* node)
3449
0
{
3450
0
  return node_reset_str(node, NULL, NULL);
3451
0
}
3452
3453
extern Node*
3454
onig_node_new_str(const UChar* s, const UChar* end)
3455
622k
{
3456
622k
  return node_new_str(s, end);
3457
622k
}
3458
3459
static Node*
3460
node_new_str_with_options(const UChar* s, const UChar* end,
3461
                          OnigOptionType options)
3462
5.87M
{
3463
5.87M
  Node* node;
3464
5.87M
  node = node_new_str(s, end);
3465
3466
5.87M
  if (OPTON_IGNORECASE(options))
3467
967k
    ND_STATUS_ADD(node, IGNORECASE);
3468
3469
5.87M
  return node;
3470
5.87M
}
3471
3472
static Node*
3473
node_new_str_crude(UChar* s, UChar* end, OnigOptionType options)
3474
4.69k
{
3475
4.69k
  Node* node = node_new_str_with_options(s, end, options);
3476
4.69k
  CHECK_NULL_RETURN(node);
3477
4.69k
  ND_STRING_SET_CRUDE(node);
3478
4.69k
  return node;
3479
4.69k
}
3480
3481
static Node*
3482
node_new_empty(void)
3483
108k
{
3484
108k
  return node_new_str(NULL, NULL);
3485
108k
}
3486
3487
static Node*
3488
node_new_str_crude_char(UChar c, OnigOptionType options)
3489
4.04k
{
3490
4.04k
  int i;
3491
4.04k
  UChar p[1];
3492
4.04k
  Node* node;
3493
3494
4.04k
  p[0] = c;
3495
4.04k
  node = node_new_str_crude(p, p + 1, options);
3496
4.04k
  CHECK_NULL_RETURN(node);
3497
3498
  /* clear buf tail */
3499
97.1k
  for (i = 1; i < ND_STRING_BUF_SIZE; i++)
3500
93.1k
    STR_(node)->buf[i] = '\0';
3501
3502
4.04k
  return node;
3503
4.04k
}
3504
3505
static Node*
3506
str_node_split_last_char(Node* node, OnigEncoding enc)
3507
1.45M
{
3508
1.45M
  const UChar *p;
3509
1.45M
  Node* rn;
3510
1.45M
  StrNode* sn;
3511
3512
1.45M
  sn = STR_(node);
3513
1.45M
  rn = NULL_NODE;
3514
1.45M
  if (sn->end > sn->s) {
3515
1.45M
    p = onigenc_get_prev_char_head(enc, sn->s, sn->end);
3516
1.45M
    if (p && p > sn->s) { /* can be split. */
3517
1.45M
      rn = node_new_str(p, sn->end);
3518
1.45M
      CHECK_NULL_RETURN(rn);
3519
3520
1.45M
      sn->end = (UChar* )p;
3521
1.45M
      STR_(rn)->flag = sn->flag;
3522
1.45M
      ND_STATUS(rn) = ND_STATUS(node);
3523
1.45M
    }
3524
1.45M
  }
3525
3526
1.45M
  return rn;
3527
1.45M
}
3528
3529
static int
3530
str_node_can_be_split(Node* node, OnigEncoding enc)
3531
1.57M
{
3532
1.57M
  StrNode* sn = STR_(node);
3533
1.57M
  if (sn->end > sn->s) {
3534
1.57M
    return ((enclen(enc, sn->s) < sn->end - sn->s)  ?  1 : 0);
3535
1.57M
  }
3536
0
  return 0;
3537
1.57M
}
3538
3539
static int
3540
scan_number(UChar** src, const UChar* end, OnigEncoding enc)
3541
980k
{
3542
980k
  int num, val;
3543
980k
  OnigCodePoint c;
3544
980k
  UChar* p;
3545
980k
  PFETCH_READY;
3546
3547
980k
  p = *src;
3548
980k
  num = 0;
3549
1.15M
  while (! PEND) {
3550
1.12M
    PFETCH(c);
3551
1.12M
    if (IS_CODE_DIGIT_ASCII(enc, c)) {
3552
176k
      val = (int )DIGITVAL(c);
3553
176k
      if ((ONIG_INT_MAX - val) / 10 < num)
3554
1.61k
        return -1;  /* overflow */
3555
3556
174k
      num = num * 10 + val;
3557
174k
    }
3558
951k
    else {
3559
951k
      PUNFETCH;
3560
951k
      break;
3561
951k
    }
3562
1.12M
  }
3563
978k
  *src = p;
3564
978k
  return num;
3565
980k
}
3566
3567
static int
3568
scan_hexadecimal_number(UChar** src, UChar* end, int minlen, int maxlen,
3569
                        OnigEncoding enc, OnigCodePoint* rcode)
3570
641
{
3571
641
  OnigCodePoint code;
3572
641
  OnigCodePoint c;
3573
641
  unsigned int val;
3574
641
  int n;
3575
641
  UChar* p;
3576
641
  PFETCH_READY;
3577
3578
641
  p = *src;
3579
641
  code = 0;
3580
641
  n = 0;
3581
1.13k
  while (! PEND && n < maxlen) {
3582
1.06k
    PFETCH(c);
3583
1.06k
    if (IS_CODE_XDIGIT_ASCII(enc, c)) {
3584
491
      n++;
3585
491
      val = (unsigned int )XDIGITVAL(enc, c);
3586
491
      if ((UINT_MAX - val) / 16UL < code)
3587
0
        return ONIGERR_TOO_BIG_NUMBER; /* overflow */
3588
3589
491
      code = (code << 4) + val;
3590
491
    }
3591
573
    else {
3592
573
      PUNFETCH;
3593
573
      break;
3594
573
    }
3595
1.06k
  }
3596
3597
641
  if (n < minlen)
3598
0
    return ONIGERR_INVALID_CODE_POINT_VALUE;
3599
3600
641
  *rcode = code;
3601
641
  *src = p;
3602
641
  return ONIG_NORMAL;
3603
641
}
3604
3605
static int
3606
scan_octal_number(UChar** src, UChar* end, int minlen, int maxlen,
3607
                  OnigEncoding enc, OnigCodePoint* rcode)
3608
5.02k
{
3609
5.02k
  OnigCodePoint code;
3610
5.02k
  OnigCodePoint c;
3611
5.02k
  unsigned int val;
3612
5.02k
  int n;
3613
5.02k
  UChar* p;
3614
5.02k
  PFETCH_READY;
3615
3616
5.02k
  p = *src;
3617
5.02k
  code = 0;
3618
5.02k
  n = 0;
3619
11.5k
  while (! PEND && n < maxlen) {
3620
10.1k
    PFETCH(c);
3621
10.1k
    if (IS_CODE_DIGIT_ASCII(enc, c) && c < '8') {
3622
6.55k
      n++;
3623
6.55k
      val = (unsigned int )ODIGITVAL(c);
3624
6.55k
      if ((UINT_MAX - val) / 8UL < code)
3625
0
        return ONIGERR_TOO_BIG_NUMBER; /* overflow */
3626
3627
6.55k
      code = (code << 3) + val;
3628
6.55k
    }
3629
3.60k
    else {
3630
3.60k
      PUNFETCH;
3631
3.60k
      break;
3632
3.60k
    }
3633
10.1k
  }
3634
3635
5.02k
  if (n < minlen)
3636
0
    return ONIGERR_INVALID_CODE_POINT_VALUE;
3637
3638
5.02k
  *rcode = code;
3639
5.02k
  *src = p;
3640
5.02k
  return ONIG_NORMAL;
3641
5.02k
}
3642
3643
static int
3644
scan_number_of_base(UChar** src, UChar* end, int minlen,
3645
                    OnigEncoding enc, OnigCodePoint* rcode, int base)
3646
0
{
3647
0
  int r;
3648
3649
0
  if (base == 16)
3650
0
    r = scan_hexadecimal_number(src, end, minlen, 8, enc, rcode);
3651
0
  else if (base == 8)
3652
0
    r = scan_octal_number(src, end, minlen, 11, enc, rcode);
3653
0
  else
3654
0
    r = ONIGERR_INVALID_CODE_POINT_VALUE;
3655
3656
0
  return r;
3657
0
}
3658
3659
0
#define IS_CODE_POINT_DIVIDE(c)  ((c) == ' ' || (c) == '\n')
3660
3661
enum CPS_STATE {
3662
  CPS_EMPTY = 0,
3663
  CPS_START = 1,
3664
  CPS_RANGE = 2
3665
};
3666
3667
static int
3668
check_code_point_sequence_cc(UChar* p, UChar* end, int base,
3669
                             OnigEncoding enc, int state)
3670
0
{
3671
0
  int r;
3672
0
  int n;
3673
0
  int end_digit;
3674
0
  OnigCodePoint code;
3675
0
  OnigCodePoint c;
3676
0
  PFETCH_READY;
3677
3678
0
  end_digit = FALSE;
3679
0
  n = 0;
3680
0
  while (! PEND) {
3681
0
  start:
3682
0
    PFETCH(c);
3683
0
    if (c == '}') {
3684
0
    end_char:
3685
0
      if (state == CPS_RANGE) return ONIGERR_INVALID_CODE_POINT_VALUE;
3686
0
      return n;
3687
0
    }
3688
3689
0
    if (IS_CODE_POINT_DIVIDE(c)) {
3690
0
      while (! PEND) {
3691
0
        PFETCH(c);
3692
0
        if (! IS_CODE_POINT_DIVIDE(c)) break;
3693
0
      }
3694
0
      if (IS_CODE_POINT_DIVIDE(c))
3695
0
        return ONIGERR_INVALID_CODE_POINT_VALUE;
3696
0
    }
3697
0
    else if (c == '-') {
3698
0
    range:
3699
0
      if (state != CPS_START) return ONIGERR_INVALID_CODE_POINT_VALUE;
3700
0
      if (PEND) return ONIGERR_INVALID_CODE_POINT_VALUE;
3701
0
      end_digit = FALSE;
3702
0
      state = CPS_RANGE;
3703
0
      goto start;
3704
0
    }
3705
0
    else if (end_digit == TRUE) {
3706
0
      if (base == 16) {
3707
0
        if (IS_CODE_XDIGIT_ASCII(enc, c))
3708
0
          return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
3709
0
      }
3710
0
      else if (base == 8) {
3711
0
        if (IS_CODE_DIGIT_ASCII(enc, c) && c < '8')
3712
0
          return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
3713
0
      }
3714
3715
0
      return ONIGERR_INVALID_CODE_POINT_VALUE;
3716
0
    }
3717
3718
0
    if (c == '}') goto end_char;
3719
0
    if (c == '-') goto range;
3720
3721
0
    PUNFETCH;
3722
0
    r = scan_number_of_base(&p, end, 1, enc, &code, base);
3723
0
    if (r != 0) return r;
3724
0
    n++;
3725
0
    end_digit = TRUE;
3726
0
    state = (state == CPS_RANGE) ? CPS_EMPTY : CPS_START;
3727
0
  }
3728
3729
0
  return ONIGERR_INVALID_CODE_POINT_VALUE;
3730
0
}
3731
3732
static int
3733
check_code_point_sequence(UChar* p, UChar* end, int base, OnigEncoding enc)
3734
0
{
3735
0
  int r;
3736
0
  int n;
3737
0
  int end_digit;
3738
0
  OnigCodePoint code;
3739
0
  OnigCodePoint c;
3740
0
  PFETCH_READY;
3741
3742
0
  end_digit = FALSE;
3743
0
  n = 0;
3744
0
  while (! PEND) {
3745
0
    PFETCH(c);
3746
0
    if (c == '}') {
3747
0
    end_char:
3748
0
      return n;
3749
0
    }
3750
3751
0
    if (IS_CODE_POINT_DIVIDE(c)) {
3752
0
      while (! PEND) {
3753
0
        PFETCH(c);
3754
0
        if (! IS_CODE_POINT_DIVIDE(c)) break;
3755
0
      }
3756
0
      if (IS_CODE_POINT_DIVIDE(c))
3757
0
        return ONIGERR_INVALID_CODE_POINT_VALUE;
3758
0
    }
3759
0
    else if (end_digit == TRUE) {
3760
0
      if (base == 16) {
3761
0
        if (IS_CODE_XDIGIT_ASCII(enc, c))
3762
0
          return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
3763
0
      }
3764
0
      else if (base == 8) {
3765
0
        if (IS_CODE_DIGIT_ASCII(enc, c) && c < '8')
3766
0
          return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
3767
0
      }
3768
3769
0
      return ONIGERR_INVALID_CODE_POINT_VALUE;
3770
0
    }
3771
3772
0
    if (c == '}') goto end_char;
3773
3774
0
    PUNFETCH;
3775
0
    r = scan_number_of_base(&p, end, 1, enc, &code, base);
3776
0
    if (r != 0) return r;
3777
0
    n++;
3778
0
    end_digit = TRUE;
3779
0
  }
3780
3781
0
  return ONIGERR_INVALID_CODE_POINT_VALUE;
3782
0
}
3783
3784
static int
3785
get_next_code_point(UChar** src, UChar* end, int base, OnigEncoding enc, int in_cc, OnigCodePoint* rcode)
3786
0
{
3787
0
  int r;
3788
0
  OnigCodePoint c;
3789
0
  UChar* p;
3790
0
  PFETCH_READY;
3791
3792
0
  p = *src;
3793
0
  while (! PEND) {
3794
0
    PFETCH(c);
3795
0
    if (! IS_CODE_POINT_DIVIDE(c)) {
3796
0
      if (c == '}') {
3797
0
        *src = p;
3798
0
        return 1; /* end of sequence */
3799
0
      }
3800
0
      else if (c == '-' && in_cc == TRUE) {
3801
0
        *src = p;
3802
0
        return 2; /* range */
3803
0
      }
3804
0
      PUNFETCH;
3805
0
      break;
3806
0
    }
3807
0
    else {
3808
0
      if (PEND)
3809
0
        return ONIGERR_INVALID_CODE_POINT_VALUE;
3810
0
    }
3811
0
  }
3812
3813
0
  r = scan_number_of_base(&p, end, 1, enc, rcode, base);
3814
0
  if (r != 0) return r;
3815
3816
0
  *src = p;
3817
0
  return ONIG_NORMAL;
3818
0
}
3819
3820
3821
#define BB_WRITE_CODE_POINT(bbuf,pos,code) \
3822
71.0M
    BB_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)
3823
3824
/* data format:
3825
     [n][from-1][to-1][from-2][to-2] ... [from-n][to-n]
3826
     (all data size is OnigCodePoint)
3827
 */
3828
static int
3829
new_code_range(BBuf** pbuf)
3830
1.21M
{
3831
1.21M
#define INIT_MULTI_BYTE_RANGE_SIZE  (SIZE_CODE_POINT * 5)
3832
1.21M
  int r;
3833
1.21M
  OnigCodePoint n;
3834
1.21M
  BBuf* bbuf;
3835
3836
1.21M
  bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf));
3837
1.21M
  CHECK_NULL_RETURN_MEMERR(bbuf);
3838
1.21M
  r = BB_INIT(bbuf, INIT_MULTI_BYTE_RANGE_SIZE);
3839
1.21M
  if (r != 0) {
3840
0
    xfree(bbuf);
3841
0
    *pbuf = 0;
3842
0
    return r;
3843
0
  }
3844
3845
1.21M
  n = 0;
3846
1.21M
  BB_WRITE_CODE_POINT(bbuf, 0, n);
3847
1.21M
  return 0;
3848
1.21M
}
3849
3850
static int
3851
add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to)
3852
23.2M
{
3853
23.2M
  int r, inc_n, pos;
3854
23.2M
  int low, high, bound, x;
3855
23.2M
  OnigCodePoint n, *data;
3856
23.2M
  BBuf* bbuf;
3857
3858
23.2M
  if (from > to) {
3859
0
    n = from; from = to; to = n;
3860
0
  }
3861
3862
23.2M
  if (IS_NULL(*pbuf)) {
3863
1.21M
    r = new_code_range(pbuf);
3864
1.21M
    if (r != 0) return r;
3865
1.21M
    bbuf = *pbuf;
3866
1.21M
    n = 0;
3867
1.21M
  }
3868
22.0M
  else {
3869
22.0M
    bbuf = *pbuf;
3870
22.0M
    GET_CODE_POINT(n, bbuf->p);
3871
22.0M
  }
3872
23.2M
  data = (OnigCodePoint* )(bbuf->p);
3873
23.2M
  data++;
3874
3875
57.7M
  for (low = 0, bound = n; low < bound; ) {
3876
34.4M
    x = (low + bound) >> 1;
3877
34.4M
    if (from > data[x*2 + 1])
3878
10.4M
      low = x + 1;
3879
24.0M
    else
3880
24.0M
      bound = x;
3881
34.4M
  }
3882
3883
23.2M
  high = (to == ~((OnigCodePoint )0)) ? n : low;
3884
50.3M
  for (bound = n; high < bound; ) {
3885
27.0M
    x = (high + bound) >> 1;
3886
27.0M
    if (to + 1 >= data[x*2])
3887
19.3M
      high = x + 1;
3888
7.77M
    else
3889
7.77M
      bound = x;
3890
27.0M
  }
3891
3892
23.2M
  inc_n = low + 1 - high;
3893
23.2M
  if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM)
3894
0
    return ONIGERR_TOO_MANY_MULTI_BYTE_RANGES;
3895
3896
23.2M
  if (inc_n != 1) {
3897
19.3M
    if (from > data[low*2])
3898
17.6M
      from = data[low*2];
3899
19.3M
    if (to < data[(high - 1)*2 + 1])
3900
17.0M
      to = data[(high - 1)*2 + 1];
3901
19.3M
  }
3902
3903
23.2M
  if (inc_n != 0 && (OnigCodePoint )high < n) {
3904
954k
    int from_pos = SIZE_CODE_POINT * (1 + high * 2);
3905
954k
    int to_pos   = SIZE_CODE_POINT * (1 + (low + 1) * 2);
3906
954k
    int size = (n - high) * 2 * SIZE_CODE_POINT;
3907
3908
954k
    if (inc_n > 0) {
3909
952k
      BB_MOVE_RIGHT(bbuf, from_pos, to_pos, size);
3910
952k
    }
3911
1.67k
    else {
3912
1.67k
      BB_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos);
3913
1.67k
    }
3914
954k
  }
3915
3916
23.2M
  pos = SIZE_CODE_POINT * (1 + low * 2);
3917
23.2M
  BB_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2);
3918
23.2M
  BB_WRITE_CODE_POINT(bbuf, pos, from);
3919
23.2M
  BB_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to);
3920
23.2M
  n += inc_n;
3921
23.2M
  BB_WRITE_CODE_POINT(bbuf, 0, n);
3922
3923
23.2M
  return 0;
3924
23.2M
}
3925
3926
static int
3927
add_code_range(BBuf** pbuf, ParseEnv* env, OnigCodePoint from, OnigCodePoint to)
3928
1.92M
{
3929
1.92M
  if (from > to) {
3930
13
    if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
3931
0
      return 0;
3932
13
    else
3933
13
      return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
3934
13
  }
3935
3936
1.92M
  return add_code_range_to_buf(pbuf, from, to);
3937
1.92M
}
3938
3939
static int
3940
not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf)
3941
0
{
3942
0
  int r, i, n;
3943
0
  OnigCodePoint pre, from, *data, to = 0;
3944
3945
0
  *pbuf = (BBuf* )NULL;
3946
0
  if (IS_NULL(bbuf)) {
3947
0
  set_all:
3948
0
    return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
3949
0
  }
3950
3951
0
  data = (OnigCodePoint* )(bbuf->p);
3952
0
  GET_CODE_POINT(n, data);
3953
0
  data++;
3954
0
  if (n <= 0) goto set_all;
3955
3956
0
  r = 0;
3957
0
  pre = MBCODE_START_POS(enc);
3958
0
  for (i = 0; i < n; i++) {
3959
0
    from = data[i*2];
3960
0
    to   = data[i*2+1];
3961
0
    if (pre <= from - 1) {
3962
0
      r = add_code_range_to_buf(pbuf, pre, from - 1);
3963
0
      if (r != 0) {
3964
0
        bbuf_free(*pbuf);
3965
0
        return r;
3966
0
      }
3967
0
    }
3968
0
    if (to == ~((OnigCodePoint )0)) break;
3969
0
    pre = to + 1;
3970
0
  }
3971
0
  if (to < ~((OnigCodePoint )0)) {
3972
0
    r = add_code_range_to_buf(pbuf, to + 1, ~((OnigCodePoint )0));
3973
0
    if (r != 0) bbuf_free(*pbuf);
3974
0
  }
3975
0
  return r;
3976
0
}
3977
3978
0
#define SWAP_BB_NOT(bbuf1, not1, bbuf2, not2) do {\
3979
0
  BBuf *tbuf; \
3980
0
  int  tnot; \
3981
0
  tnot = not1;  not1  = not2;  not2  = tnot; \
3982
0
  tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \
3983
0
} while (0)
3984
3985
static int
3986
or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,
3987
                  BBuf* bbuf2, int not2, BBuf** pbuf)
3988
0
{
3989
0
  int r;
3990
0
  OnigCodePoint i, n1, *data1;
3991
0
  OnigCodePoint from, to;
3992
3993
0
  *pbuf = (BBuf* )NULL;
3994
0
  if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) {
3995
0
    if (not1 != 0 || not2 != 0)
3996
0
      return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
3997
0
    return 0;
3998
0
  }
3999
4000
0
  r = 0;
4001
0
  if (IS_NULL(bbuf2))
4002
0
    SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);
4003
4004
0
  if (IS_NULL(bbuf1)) {
4005
0
    if (not1 != 0) {
4006
0
      return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
4007
0
    }
4008
0
    else {
4009
0
      if (not2 == 0) {
4010
0
        return bbuf_clone(pbuf, bbuf2);
4011
0
      }
4012
0
      else {
4013
0
        return not_code_range_buf(enc, bbuf2, pbuf);
4014
0
      }
4015
0
    }
4016
0
  }
4017
4018
0
  if (not1 != 0)
4019
0
    SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);
4020
4021
0
  data1 = (OnigCodePoint* )(bbuf1->p);
4022
0
  GET_CODE_POINT(n1, data1);
4023
0
  data1++;
4024
4025
0
  if (not2 == 0 && not1 == 0) { /* 1 OR 2 */
4026
0
    r = bbuf_clone(pbuf, bbuf2);
4027
0
  }
4028
0
  else if (not1 == 0) { /* 1 OR (not 2) */
4029
0
    r = not_code_range_buf(enc, bbuf2, pbuf);
4030
0
  }
4031
0
  if (r != 0) return r;
4032
4033
0
  for (i = 0; i < n1; i++) {
4034
0
    from = data1[i*2];
4035
0
    to   = data1[i*2+1];
4036
0
    r = add_code_range_to_buf(pbuf, from, to);
4037
0
    if (r != 0) return r;
4038
0
  }
4039
0
  return 0;
4040
0
}
4041
4042
static int
4043
and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1,
4044
                OnigCodePoint* data, int n)
4045
0
{
4046
0
  int i, r;
4047
0
  OnigCodePoint from2, to2;
4048
4049
0
  for (i = 0; i < n; i++) {
4050
0
    from2 = data[i*2];
4051
0
    to2   = data[i*2+1];
4052
0
    if (from2 < from1) {
4053
0
      if (to2 < from1) continue;
4054
0
      else {
4055
0
        from1 = to2 + 1;
4056
0
      }
4057
0
    }
4058
0
    else if (from2 <= to1) {
4059
0
      if (to2 < to1) {
4060
0
        if (from1 <= from2 - 1) {
4061
0
          r = add_code_range_to_buf(pbuf, from1, from2-1);
4062
0
          if (r != 0) return r;
4063
0
        }
4064
0
        from1 = to2 + 1;
4065
0
      }
4066
0
      else {
4067
0
        to1 = from2 - 1;
4068
0
      }
4069
0
    }
4070
0
    else {
4071
0
      from1 = from2;
4072
0
    }
4073
0
    if (from1 > to1) break;
4074
0
  }
4075
0
  if (from1 <= to1) {
4076
0
    r = add_code_range_to_buf(pbuf, from1, to1);
4077
0
    if (r != 0) return r;
4078
0
  }
4079
0
  return 0;
4080
0
}
4081
4082
static int
4083
and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)
4084
0
{
4085
0
  int r;
4086
0
  OnigCodePoint i, j, n1, n2, *data1, *data2;
4087
0
  OnigCodePoint from, to, from1, to1, from2, to2;
4088
4089
0
  *pbuf = (BBuf* )NULL;
4090
0
  if (IS_NULL(bbuf1)) {
4091
0
    if (not1 != 0 && IS_NOT_NULL(bbuf2)) /* not1 != 0 -> not2 == 0 */
4092
0
      return bbuf_clone(pbuf, bbuf2);
4093
0
    return 0;
4094
0
  }
4095
0
  else if (IS_NULL(bbuf2)) {
4096
0
    if (not2 != 0)
4097
0
      return bbuf_clone(pbuf, bbuf1);
4098
0
    return 0;
4099
0
  }
4100
4101
0
  if (not1 != 0)
4102
0
    SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);
4103
4104
0
  data1 = (OnigCodePoint* )(bbuf1->p);
4105
0
  data2 = (OnigCodePoint* )(bbuf2->p);
4106
0
  GET_CODE_POINT(n1, data1);
4107
0
  GET_CODE_POINT(n2, data2);
4108
0
  data1++;
4109
0
  data2++;
4110
4111
0
  if (not2 == 0 && not1 == 0) { /* 1 AND 2 */
4112
0
    for (i = 0; i < n1; i++) {
4113
0
      from1 = data1[i*2];
4114
0
      to1   = data1[i*2+1];
4115
0
      for (j = 0; j < n2; j++) {
4116
0
        from2 = data2[j*2];
4117
0
        to2   = data2[j*2+1];
4118
0
        if (from2 > to1) break;
4119
0
        if (to2 < from1) continue;
4120
0
        from = MAX(from1, from2);
4121
0
        to   = MIN(to1, to2);
4122
0
        r = add_code_range_to_buf(pbuf, from, to);
4123
0
        if (r != 0) return r;
4124
0
      }
4125
0
    }
4126
0
  }
4127
0
  else if (not1 == 0) { /* 1 AND (not 2) */
4128
0
    for (i = 0; i < n1; i++) {
4129
0
      from1 = data1[i*2];
4130
0
      to1   = data1[i*2+1];
4131
0
      r = and_code_range1(pbuf, from1, to1, data2, n2);
4132
0
      if (r != 0) return r;
4133
0
    }
4134
0
  }
4135
4136
0
  return 0;
4137
0
}
4138
4139
static int
4140
and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
4141
0
{
4142
0
  int r, not1, not2;
4143
0
  BBuf *buf1, *buf2, *pbuf;
4144
0
  BitSetRef bsr1, bsr2;
4145
0
  BitSet bs1, bs2;
4146
4147
0
  not1 = IS_NCCLASS_NOT(dest);
4148
0
  bsr1 = dest->bs;
4149
0
  buf1 = dest->mbuf;
4150
0
  not2 = IS_NCCLASS_NOT(cc);
4151
0
  bsr2 = cc->bs;
4152
0
  buf2 = cc->mbuf;
4153
4154
0
  if (not1 != 0) {
4155
0
    bitset_invert_to(bsr1, bs1);
4156
0
    bsr1 = bs1;
4157
0
  }
4158
0
  if (not2 != 0) {
4159
0
    bitset_invert_to(bsr2, bs2);
4160
0
    bsr2 = bs2;
4161
0
  }
4162
0
  bitset_and(bsr1, bsr2);
4163
0
  if (bsr1 != dest->bs) {
4164
0
    bitset_copy(dest->bs, bsr1);
4165
0
  }
4166
0
  if (not1 != 0) {
4167
0
    bitset_invert(dest->bs);
4168
0
  }
4169
4170
0
  if (! ONIGENC_IS_SINGLEBYTE(enc)) {
4171
0
    if (not1 != 0 && not2 != 0) {
4172
0
      r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf);
4173
0
    }
4174
0
    else {
4175
0
      r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf);
4176
0
      if (r == 0 && not1 != 0) {
4177
0
        BBuf *tbuf;
4178
0
        r = not_code_range_buf(enc, pbuf, &tbuf);
4179
0
        if (r != 0) {
4180
0
          bbuf_free(pbuf);
4181
0
          return r;
4182
0
        }
4183
0
        bbuf_free(pbuf);
4184
0
        pbuf = tbuf;
4185
0
      }
4186
0
    }
4187
0
    if (r != 0) return r;
4188
4189
0
    dest->mbuf = pbuf;
4190
0
    bbuf_free(buf1);
4191
0
    return r;
4192
0
  }
4193
0
  return 0;
4194
0
}
4195
4196
static int
4197
or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
4198
0
{
4199
0
  int r, not1, not2;
4200
0
  BBuf *buf1, *buf2, *pbuf;
4201
0
  BitSetRef bsr1, bsr2;
4202
0
  BitSet bs1, bs2;
4203
4204
0
  not1 = IS_NCCLASS_NOT(dest);
4205
0
  bsr1 = dest->bs;
4206
0
  buf1 = dest->mbuf;
4207
0
  not2 = IS_NCCLASS_NOT(cc);
4208
0
  bsr2 = cc->bs;
4209
0
  buf2 = cc->mbuf;
4210
4211
0
  if (not1 != 0) {
4212
0
    bitset_invert_to(bsr1, bs1);
4213
0
    bsr1 = bs1;
4214
0
  }
4215
0
  if (not2 != 0) {
4216
0
    bitset_invert_to(bsr2, bs2);
4217
0
    bsr2 = bs2;
4218
0
  }
4219
0
  bitset_or(bsr1, bsr2);
4220
0
  if (bsr1 != dest->bs) {
4221
0
    bitset_copy(dest->bs, bsr1);
4222
0
  }
4223
0
  if (not1 != 0) {
4224
0
    bitset_invert(dest->bs);
4225
0
  }
4226
4227
0
  if (! ONIGENC_IS_SINGLEBYTE(enc)) {
4228
0
    if (not1 != 0 && not2 != 0) {
4229
0
      r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf);
4230
0
    }
4231
0
    else {
4232
0
      r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf);
4233
0
      if (r == 0 && not1 != 0) {
4234
0
        BBuf *tbuf;
4235
0
        r = not_code_range_buf(enc, pbuf, &tbuf);
4236
0
        if (r != 0) {
4237
0
          bbuf_free(pbuf);
4238
0
          return r;
4239
0
        }
4240
0
        bbuf_free(pbuf);
4241
0
        pbuf = tbuf;
4242
0
      }
4243
0
    }
4244
0
    if (r != 0) return r;
4245
4246
0
    dest->mbuf = pbuf;
4247
0
    bbuf_free(buf1);
4248
0
    return r;
4249
0
  }
4250
0
  else
4251
0
    return 0;
4252
0
}
4253
4254
static OnigCodePoint
4255
conv_backslash_value(OnigCodePoint c, ParseEnv* env)
4256
566k
{
4257
566k
  if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) {
4258
566k
    switch (c) {
4259
8.68k
    case 'n': return '\n';
4260
4.09k
    case 't': return '\t';
4261
474
    case 'r': return '\r';
4262
872
    case 'f': return '\f';
4263
298
    case 'a': return '\007';
4264
101
    case 'b': return '\010';
4265
304
    case 'e': return '\033';
4266
81
    case 'v':
4267
81
      if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB))
4268
0
        return '\v';
4269
81
      break;
4270
4271
551k
    default:
4272
551k
      break;
4273
566k
    }
4274
566k
  }
4275
551k
  return c;
4276
566k
}
4277
4278
static int
4279
is_invalid_quantifier_target(Node* node)
4280
2.76M
{
4281
2.76M
  switch (ND_TYPE(node)) {
4282
79
  case ND_ANCHOR:
4283
1.31k
  case ND_GIMMICK:
4284
1.31k
    return 1;
4285
0
    break;
4286
4287
547k
  case ND_BAG:
4288
    /* allow enclosed elements */
4289
    /* return is_invalid_quantifier_target(ND_BODY(node)); */
4290
547k
    break;
4291
4292
69.8k
  case ND_LIST:
4293
70.6k
    do {
4294
70.6k
      if (! is_invalid_quantifier_target(ND_CAR(node))) return 0;
4295
70.6k
    } while (IS_NOT_NULL(node = ND_CDR(node)));
4296
413
    return 0;
4297
0
    break;
4298
4299
1.08k
  case ND_ALT:
4300
70.4k
    do {
4301
70.4k
      if (is_invalid_quantifier_target(ND_CAR(node))) return 1;
4302
70.4k
    } while (IS_NOT_NULL(node = ND_CDR(node)));
4303
1.08k
    break;
4304
4305
2.14M
  default:
4306
2.14M
    break;
4307
2.76M
  }
4308
2.69M
  return 0;
4309
2.76M
}
4310
4311
/* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */
4312
static int
4313
quantifier_type_num(QuantNode* q)
4314
1.64M
{
4315
1.64M
  if (q->greedy) {
4316
1.30M
    if (q->lower == 0) {
4317
1.28M
      if (q->upper == 1) return 0;
4318
1.09M
      else if (IS_INFINITE_REPEAT(q->upper)) return 1;
4319
1.28M
    }
4320
17.8k
    else if (q->lower == 1) {
4321
7.94k
      if (IS_INFINITE_REPEAT(q->upper)) return 2;
4322
7.94k
    }
4323
1.30M
  }
4324
337k
  else {
4325
337k
    if (q->lower == 0) {
4326
333k
      if (q->upper == 1) return 3;
4327
14.5k
      else if (IS_INFINITE_REPEAT(q->upper)) return 4;
4328
333k
    }
4329
4.32k
    else if (q->lower == 1) {
4330
1.71k
      if (IS_INFINITE_REPEAT(q->upper)) return 5;
4331
1.71k
    }
4332
337k
  }
4333
25.5k
  return -1;
4334
1.64M
}
4335
4336
4337
enum ReduceType {
4338
  RQ_ASIS = 0, /* as is */
4339
  RQ_DEL,      /* delete parent */
4340
  RQ_A,        /* to '*'    */
4341
  RQ_P,        /* to '+'    */
4342
  RQ_AQ,       /* to '*?'   */
4343
  RQ_QQ,       /* to '??'   */
4344
  RQ_P_QQ,     /* to '+)??' */
4345
};
4346
4347
static enum ReduceType ReduceTypeTable[6][6] = {
4348
  {RQ_DEL,  RQ_A,    RQ_A,   RQ_QQ,   RQ_AQ,   RQ_ASIS}, /* '?'  */
4349
  {RQ_DEL,  RQ_DEL,  RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL},  /* '*'  */
4350
  {RQ_A,    RQ_A,    RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL},  /* '+'  */
4351
  {RQ_DEL,  RQ_AQ,   RQ_AQ,  RQ_DEL,  RQ_AQ,   RQ_AQ},   /* '??' */
4352
  {RQ_DEL,  RQ_DEL,  RQ_DEL, RQ_DEL,  RQ_DEL,  RQ_DEL},  /* '*?' */
4353
  {RQ_ASIS, RQ_A,    RQ_P,   RQ_AQ,   RQ_AQ,   RQ_DEL}   /* '+?' */
4354
};
4355
4356
extern int
4357
onig_reduce_nested_quantifier(Node* pnode)
4358
407k
{
4359
407k
  int pnum, cnum;
4360
407k
  QuantNode *p, *c;
4361
407k
  Node* cnode;
4362
4363
407k
  cnode = ND_BODY(pnode);
4364
4365
407k
  p = QUANT_(pnode);
4366
407k
  c = QUANT_(cnode);
4367
407k
  pnum = quantifier_type_num(p);
4368
407k
  cnum = quantifier_type_num(c);
4369
407k
  if (pnum < 0 || cnum < 0) {
4370
7.19k
    if (p->lower == p->upper && c->lower == c->upper) {
4371
2.24k
      int n = onig_positive_int_multiply(p->lower, c->lower);
4372
2.24k
      if (n < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
4373
4374
2.24k
      p->lower = p->upper = n;
4375
2.24k
      ND_BODY(pnode) = ND_BODY(cnode);
4376
2.24k
      goto remove_cnode;
4377
2.24k
    }
4378
4379
4.95k
    return 0;
4380
7.19k
  }
4381
4382
400k
  switch(ReduceTypeTable[cnum][pnum]) {
4383
305k
  case RQ_DEL:
4384
305k
    *pnode = *cnode;
4385
305k
    goto remove_cnode;
4386
0
    break;
4387
90.8k
  case RQ_A:
4388
90.8k
    ND_BODY(pnode) = ND_BODY(cnode);
4389
90.8k
    p->lower  = 0;  p->upper = INFINITE_REPEAT;  p->greedy = 1;
4390
90.8k
    goto remove_cnode;
4391
0
    break;
4392
441
  case RQ_P:
4393
441
    ND_BODY(pnode) = ND_BODY(cnode);
4394
441
    p->lower  = 1;  p->upper = INFINITE_REPEAT;  p->greedy = 1;
4395
441
    goto remove_cnode;
4396
0
    break;
4397
2.28k
  case RQ_AQ:
4398
2.28k
    ND_BODY(pnode) = ND_BODY(cnode);
4399
2.28k
    p->lower  = 0;  p->upper = INFINITE_REPEAT;  p->greedy = 0;
4400
2.28k
    goto remove_cnode;
4401
0
    break;
4402
3
  case RQ_QQ:
4403
3
    ND_BODY(pnode) = ND_BODY(cnode);
4404
3
    p->lower  = 0;  p->upper = 1;  p->greedy = 0;
4405
3
    goto remove_cnode;
4406
0
    break;
4407
1.06k
  case RQ_P_QQ:
4408
1.06k
    p->lower  = 0;  p->upper = 1;  p->greedy = 0;
4409
1.06k
    c->lower  = 1;  c->upper = INFINITE_REPEAT;  c->greedy = 1;
4410
1.06k
    break;
4411
53
  case RQ_ASIS:
4412
53
    break;
4413
400k
  }
4414
4415
1.12k
  return 0;
4416
4417
401k
 remove_cnode:
4418
401k
  ND_BODY(cnode) = NULL_NODE;
4419
401k
  onig_node_free(cnode);
4420
401k
  return 0;
4421
400k
}
4422
4423
static int
4424
node_new_general_newline(Node** node, ParseEnv* env)
4425
641
{
4426
641
  int r;
4427
641
  int dlen, alen;
4428
641
  UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN * 2];
4429
641
  Node* crnl;
4430
641
  Node* ncc;
4431
641
  Node* x;
4432
641
  CClassNode* cc;
4433
4434
641
  dlen = ONIGENC_CODE_TO_MBC(env->enc, 0x0d, buf);
4435
641
  if (dlen < 0) return dlen;
4436
641
  alen = ONIGENC_CODE_TO_MBC(env->enc, NEWLINE_CODE, buf + dlen);
4437
641
  if (alen < 0) return alen;
4438
4439
641
  crnl = node_new_str_crude(buf, buf + dlen + alen, ONIG_OPTION_NONE);
4440
641
  CHECK_NULL_RETURN_MEMERR(crnl);
4441
4442
641
  ncc = node_new_cclass();
4443
641
  if (IS_NULL(ncc)) goto err2;
4444
4445
641
  cc = CCLASS_(ncc);
4446
641
  if (dlen == 1) {
4447
641
    bitset_set_range(cc->bs, NEWLINE_CODE, 0x0d);
4448
641
  }
4449
0
  else {
4450
0
    r = add_code_range(&(cc->mbuf), env, NEWLINE_CODE, 0x0d);
4451
0
    if (r != 0) {
4452
0
    err1:
4453
0
      onig_node_free(ncc);
4454
0
    err2:
4455
0
      onig_node_free(crnl);
4456
0
      return ONIGERR_MEMORY;
4457
0
    }
4458
0
  }
4459
4460
641
  if (ONIGENC_IS_UNICODE_ENCODING(env->enc)) {
4461
641
    r = add_code_range(&(cc->mbuf), env, 0x85, 0x85);
4462
641
    if (r != 0) goto err1;
4463
641
    r = add_code_range(&(cc->mbuf), env, 0x2028, 0x2029);
4464
641
    if (r != 0) goto err1;
4465
641
  }
4466
4467
641
  x = node_new_bag_if_else(crnl, NULL_NODE, ncc);
4468
641
  if (IS_NULL(x)) goto err1;
4469
4470
641
  *node = x;
4471
641
  return 0;
4472
641
}
4473
4474
enum TokenSyms {
4475
  TK_EOT      = 0,   /* end of token */
4476
  TK_CRUDE_BYTE,
4477
  TK_CHAR,
4478
  TK_STRING,
4479
  TK_CODE_POINT,
4480
  TK_ANYCHAR,
4481
  TK_CHAR_TYPE,
4482
  TK_BACKREF,
4483
  TK_CALL,
4484
  TK_ANCHOR,
4485
  TK_REPEAT,
4486
  TK_INTERVAL,
4487
  TK_ANYCHAR_ANYTIME,  /* SQL '%' == .* */
4488
  TK_ALT,
4489
  TK_SUBEXP_OPEN,
4490
  TK_SUBEXP_CLOSE,
4491
  TK_OPEN_CC,
4492
  TK_QUOTE_OPEN,
4493
  TK_CHAR_PROPERTY,    /* \p{...}, \P{...} */
4494
  TK_KEEP,             /* \K */
4495
  TK_GENERAL_NEWLINE,  /* \R */
4496
  TK_NO_NEWLINE,       /* \N */
4497
  TK_TRUE_ANYCHAR,     /* \O */
4498
  TK_TEXT_SEGMENT,     /* \X */
4499
4500
  /* in cc */
4501
  TK_CC_CLOSE,
4502
  TK_CC_RANGE,
4503
  TK_CC_POSIX_BRACKET_OPEN,
4504
  TK_CC_AND,           /* && */
4505
  TK_CC_OPEN_CC        /* [ */
4506
};
4507
4508
typedef struct {
4509
  enum TokenSyms type;
4510
  int code_point_continue;
4511
  int escaped;
4512
  int base_num;   /* is number: 8, 16 (used in [....]) */
4513
  UChar* backp;
4514
  union {
4515
    UChar* s;
4516
    UChar byte;
4517
    OnigCodePoint code;
4518
    int   anchor;
4519
    int   subtype;
4520
    struct {
4521
      int lower;
4522
      int upper;
4523
      int greedy;
4524
      int possessive;
4525
    } repeat;
4526
    struct {
4527
      int  num;
4528
      int  ref1;
4529
      int* refs;
4530
      int  by_name;
4531
#ifdef USE_BACKREF_WITH_LEVEL
4532
      int  exist_level;
4533
      int  level;   /* \k<name+n> */
4534
#endif
4535
    } backref;
4536
    struct {
4537
      UChar* name;
4538
      UChar* name_end;
4539
      int    gnum;
4540
      int    by_number;
4541
    } call;
4542
    struct {
4543
      int ctype;
4544
      int not;
4545
    } prop;
4546
  } u;
4547
} PToken;
4548
4549
static void
4550
ptoken_init(PToken* tok)
4551
1.37M
{
4552
1.37M
  tok->code_point_continue = 0;
4553
1.37M
}
4554
4555
static int
4556
fetch_interval(UChar** src, UChar* end, PToken* tok, ParseEnv* env)
4557
946k
{
4558
946k
  int low, up, syn_allow, non_low;
4559
946k
  int r;
4560
946k
  OnigCodePoint c;
4561
946k
  OnigEncoding enc;
4562
946k
  UChar* p;
4563
946k
  PFETCH_READY;
4564
4565
946k
  p = *src;
4566
946k
  r = 0;
4567
946k
  non_low = 0;
4568
946k
  enc = env->enc;
4569
946k
  syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL);
4570
4571
946k
  if (PEND) {
4572
25.9k
    if (syn_allow)
4573
25.9k
      return 1;  /* "....{" : OK! */
4574
0
    else
4575
0
      return ONIGERR_END_PATTERN_AT_LEFT_BRACE;  /* "....{" syntax error */
4576
25.9k
  }
4577
4578
921k
  if (! syn_allow) {
4579
0
    c = PPEEK;
4580
0
    if (c == ')' || c == '(' || c == '|') {
4581
0
      return ONIGERR_END_PATTERN_AT_LEFT_BRACE;
4582
0
    }
4583
0
  }
4584
4585
921k
  low = scan_number(&p, end, env->enc);
4586
921k
  if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
4587
921k
  if (low > ONIG_MAX_REPEAT_NUM)
4588
25
    return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
4589
4590
921k
  if (p == *src) { /* can't read low */
4591
866k
    if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV)) {
4592
      /* allow {,n} as {0,n} */
4593
0
      low = 0;
4594
0
      non_low = 1;
4595
0
    }
4596
866k
    else
4597
866k
      goto invalid;
4598
866k
  }
4599
4600
54.5k
  if (PEND) goto invalid;
4601
52.2k
  PFETCH(c);
4602
52.2k
  if (c == ',') {
4603
9.43k
    UChar* prev = p;
4604
9.43k
    up = scan_number(&p, end, env->enc);
4605
9.43k
    if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
4606
9.42k
    if (up > ONIG_MAX_REPEAT_NUM)
4607
20
      return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
4608
4609
9.40k
    if (p == prev) {
4610
3.58k
      if (non_low != 0)
4611
0
        goto invalid;
4612
3.58k
      up = INFINITE_REPEAT;  /* {n,} : {n,infinite} */
4613
3.58k
    }
4614
9.40k
  }
4615
42.8k
  else {
4616
42.8k
    if (non_low != 0)
4617
0
      goto invalid;
4618
4619
42.8k
    PUNFETCH;
4620
42.8k
    up = low;  /* {n} : exact n times */
4621
42.8k
    r = 2;     /* fixed */
4622
42.8k
  }
4623
4624
52.2k
  if (PEND) goto invalid;
4625
51.1k
  PFETCH(c);
4626
51.1k
  if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) {
4627
0
    if (c != MC_ESC(env->syntax) || PEND) goto invalid;
4628
0
    PFETCH(c);
4629
0
  }
4630
51.1k
  if (c != '}') goto invalid;
4631
4632
20.6k
  if (!IS_INFINITE_REPEAT(up) && low > up) {
4633
    /* {n,m}+ supported case */
4634
1
    if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL))
4635
1
      return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE;
4636
4637
0
    tok->u.repeat.possessive = 1;
4638
0
    {
4639
0
      int tmp;
4640
0
      tmp = low; low = up; up = tmp;
4641
0
    }
4642
0
  }
4643
20.6k
  else
4644
20.6k
    tok->u.repeat.possessive = 0;
4645
4646
20.6k
  tok->type = TK_INTERVAL;
4647
20.6k
  tok->u.repeat.lower = low;
4648
20.6k
  tok->u.repeat.upper = up;
4649
20.6k
  *src = p;
4650
20.6k
  return r; /* 0: normal {n,m}, 2: fixed {n} */
4651
4652
900k
 invalid:
4653
900k
  if (syn_allow) {
4654
    /* *src = p; */ /* !!! Don't do this line !!! */
4655
900k
    return 1;  /* OK */
4656
900k
  }
4657
0
  else
4658
0
    return ONIGERR_INVALID_REPEAT_RANGE_PATTERN;
4659
900k
}
4660
4661
/* \M-, \C-, \c, or \... */
4662
static int
4663
fetch_escaped_value_raw(UChar** src, UChar* end, ParseEnv* env,
4664
                        OnigCodePoint* val)
4665
570k
{
4666
570k
  int v;
4667
570k
  OnigCodePoint c;
4668
570k
  OnigEncoding enc = env->enc;
4669
570k
  UChar* p = *src;
4670
4671
570k
  if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
4672
4673
570k
  PFETCH_S(c);
4674
570k
  switch (c) {
4675
80
  case 'M':
4676
80
    if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META)) {
4677
0
      if (PEND) return ONIGERR_END_PATTERN_AT_META;
4678
0
      PFETCH_S(c);
4679
0
      if (c != '-') return ONIGERR_META_CODE_SYNTAX;
4680
0
      if (PEND) return ONIGERR_END_PATTERN_AT_META;
4681
0
      PFETCH_S(c);
4682
0
      if (c == MC_ESC(env->syntax)) {
4683
0
        v = fetch_escaped_value_raw(&p, end, env, &c);
4684
0
        if (v < 0) return v;
4685
0
      }
4686
0
      c = ((c & 0xff) | 0x80);
4687
0
    }
4688
80
    else
4689
80
      goto backslash;
4690
0
    break;
4691
4692
513
  case 'C':
4693
513
    if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) {
4694
0
      if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
4695
0
      PFETCH_S(c);
4696
0
      if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX;
4697
0
      goto control;
4698
0
    }
4699
513
    else
4700
513
      goto backslash;
4701
4702
4.03k
  case 'c':
4703
4.03k
    if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) {
4704
4.03k
    control:
4705
4.03k
      if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
4706
4.00k
      PFETCH_S(c);
4707
4.00k
      if (c == '?') {
4708
10
        c = 0177;
4709
10
      }
4710
3.99k
      else {
4711
3.99k
        if (c == MC_ESC(env->syntax)) {
4712
44
          v = fetch_escaped_value_raw(&p, end, env, &c);
4713
44
          if (v < 0) return v;
4714
44
        }
4715
3.99k
        c &= 0x9f;
4716
3.99k
      }
4717
4.00k
      break;
4718
4.00k
    }
4719
    /* fall through */
4720
4721
566k
  default:
4722
566k
    {
4723
566k
    backslash:
4724
566k
      c = conv_backslash_value(c, env);
4725
566k
    }
4726
566k
    break;
4727
570k
  }
4728
4729
570k
  *src = p;
4730
570k
  *val = c;
4731
570k
  return 0;
4732
570k
}
4733
4734
static int
4735
fetch_escaped_value(UChar** src, UChar* end, ParseEnv* env, OnigCodePoint* val)
4736
570k
{
4737
570k
  int r;
4738
570k
  int len;
4739
4740
570k
  r = fetch_escaped_value_raw(src, end, env, val);
4741
570k
  if (r != 0) return r;
4742
4743
570k
  len = ONIGENC_CODE_TO_MBCLEN(env->enc, *val);
4744
570k
  if (len < 0) return len;
4745
4746
570k
  return 0;
4747
570k
}
4748
4749
static int fetch_token(PToken* tok, UChar** src, UChar* end, ParseEnv* env);
4750
4751
static OnigCodePoint
4752
get_name_end_code_point(OnigCodePoint start)
4753
36.1k
{
4754
36.1k
  switch (start) {
4755
197
  case '<':  return (OnigCodePoint )'>';  break;
4756
3.01k
  case '\'': return (OnigCodePoint )'\''; break;
4757
32.9k
  case '(':  return (OnigCodePoint )')';  break;
4758
0
  default:
4759
0
    break;
4760
36.1k
  }
4761
4762
0
  return (OnigCodePoint )0;
4763
36.1k
}
4764
4765
enum REF_NUM {
4766
  IS_NOT_NUM = 0,
4767
  IS_ABS_NUM = 1,
4768
  IS_REL_NUM = 2
4769
};
4770
4771
#ifdef USE_BACKREF_WITH_LEVEL
4772
/*
4773
   \k<name+n>, \k<name-n>
4774
   \k<num+n>,  \k<num-n>
4775
   \k<-num+n>, \k<-num-n>
4776
   \k<+num+n>, \k<+num-n>
4777
*/
4778
static int
4779
fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
4780
                      UChar** rname_end, ParseEnv* env,
4781
                      int* rback_num, int* rlevel, enum REF_NUM* num_type)
4782
12.3k
{
4783
12.3k
  int r, sign, exist_level;
4784
12.3k
  int digit_count;
4785
12.3k
  OnigCodePoint end_code;
4786
12.3k
  OnigCodePoint c;
4787
12.3k
  OnigEncoding enc;
4788
12.3k
  UChar *name_end;
4789
12.3k
  UChar *pnum_head;
4790
12.3k
  UChar *p;
4791
12.3k
  PFETCH_READY;
4792
4793
12.3k
  p = *src;
4794
12.3k
  c = 0;
4795
12.3k
  enc = env->enc;
4796
12.3k
  *rback_num = 0;
4797
12.3k
  exist_level = 0;
4798
12.3k
  *num_type = IS_NOT_NUM;
4799
12.3k
  sign = 1;
4800
12.3k
  pnum_head = *src;
4801
4802
12.3k
  end_code = get_name_end_code_point(start_code);
4803
4804
12.3k
  digit_count = 0;
4805
12.3k
  name_end = end;
4806
12.3k
  r = 0;
4807
12.3k
  if (PEND) {
4808
0
    return ONIGERR_EMPTY_GROUP_NAME;
4809
0
  }
4810
12.3k
  else {
4811
12.3k
    PFETCH(c);
4812
12.3k
    if (c == end_code)
4813
0
      return ONIGERR_EMPTY_GROUP_NAME;
4814
4815
12.3k
    if (IS_CODE_DIGIT_ASCII(enc, c)) {
4816
12.2k
      *num_type = IS_ABS_NUM;
4817
12.2k
      digit_count++;
4818
12.2k
    }
4819
128
    else if (c == '-') {
4820
103
      *num_type = IS_REL_NUM;
4821
103
      sign = -1;
4822
103
      pnum_head = p;
4823
103
    }
4824
25
    else if (c == '+') {
4825
2
      *num_type = IS_REL_NUM;
4826
2
      sign = 1;
4827
2
      pnum_head = p;
4828
2
    }
4829
23
    else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
4830
21
      r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
4831
21
    }
4832
12.3k
  }
4833
4834
2.42M
  while (!PEND) {
4835
2.42M
    name_end = p;
4836
2.42M
    PFETCH(c);
4837
2.42M
    if (c == end_code || c == ')' || c == '+' || c == '-') {
4838
8.21k
      if (*num_type != IS_NOT_NUM && digit_count == 0)
4839
18
        r = ONIGERR_INVALID_GROUP_NAME;
4840
8.21k
      break;
4841
8.21k
    }
4842
4843
2.41M
    if (*num_type != IS_NOT_NUM) {
4844
96.9k
      if (IS_CODE_DIGIT_ASCII(enc, c)) {
4845
89.1k
        digit_count++;
4846
89.1k
      }
4847
7.78k
      else {
4848
7.78k
        r = ONIGERR_INVALID_GROUP_NAME;
4849
7.78k
        *num_type = IS_NOT_NUM;
4850
7.78k
      }
4851
96.9k
    }
4852
2.32M
    else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
4853
1.59M
      r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
4854
1.59M
    }
4855
2.41M
  }
4856
4857
12.3k
  if (r == 0 && c != end_code) {
4858
1.26k
    if (c == '+' || c == '-') {
4859
687
      int level;
4860
687
      int flag = (c == '-' ? -1 : 1);
4861
4862
687
      if (PEND) {
4863
26
        r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
4864
26
        goto end;
4865
26
      }
4866
661
      PFETCH(c);
4867
661
      if (! IS_CODE_DIGIT_ASCII(enc, c)) goto err;
4868
227
      PUNFETCH;
4869
227
      level = scan_number(&p, end, enc);
4870
227
      if (level < 0) return ONIGERR_TOO_BIG_NUMBER;
4871
132
      *rlevel = (level * flag);
4872
132
      exist_level = 1;
4873
4874
132
      if (!PEND) {
4875
88
        PFETCH(c);
4876
88
        if (c == end_code)
4877
26
          goto end;
4878
88
      }
4879
132
    }
4880
4881
1.11k
  err:
4882
1.11k
    name_end = end;
4883
1.11k
  err2:
4884
1.11k
    r = ONIGERR_INVALID_GROUP_NAME;
4885
1.11k
  }
4886
4887
12.2k
 end:
4888
12.2k
  if (r == 0) {
4889
3.30k
    if (*num_type != IS_NOT_NUM) {
4890
3.30k
      *rback_num = scan_number(&pnum_head, name_end, enc);
4891
3.30k
      if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
4892
3.17k
      else if (*rback_num == 0) {
4893
5
        if (*num_type == IS_REL_NUM)
4894
0
          goto err2;
4895
5
      }
4896
4897
3.17k
      *rback_num *= sign;
4898
3.17k
    }
4899
4900
3.17k
    *rname_end = name_end;
4901
3.17k
    *src = p;
4902
3.17k
    return (exist_level ? 1 : 0);
4903
3.30k
  }
4904
8.96k
  else {
4905
8.96k
    onig_scan_env_set_error_string(env, r, *src, name_end);
4906
8.96k
    return r;
4907
8.96k
  }
4908
12.2k
}
4909
#endif /* USE_BACKREF_WITH_LEVEL */
4910
4911
/*
4912
  ref: 0 -> define name    (don't allow number name)
4913
       1 -> reference name (allow number name)
4914
*/
4915
static int
4916
fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
4917
           UChar** rname_end, ParseEnv* env, int* rback_num,
4918
           enum REF_NUM* num_type, int is_ref)
4919
23.8k
{
4920
23.8k
  int r, sign;
4921
23.8k
  int digit_count;
4922
23.8k
  OnigCodePoint end_code;
4923
23.8k
  OnigCodePoint c = 0;
4924
23.8k
  OnigEncoding enc = env->enc;
4925
23.8k
  UChar *name_end;
4926
23.8k
  UChar *pnum_head;
4927
23.8k
  UChar *p = *src;
4928
4929
23.8k
  *rback_num = 0;
4930
4931
23.8k
  end_code = get_name_end_code_point(start_code);
4932
4933
23.8k
  digit_count = 0;
4934
23.8k
  name_end = end;
4935
23.8k
  pnum_head = *src;
4936
23.8k
  r = 0;
4937
23.8k
  *num_type = IS_NOT_NUM;
4938
23.8k
  sign = 1;
4939
23.8k
  if (PEND) {
4940
84
    return ONIGERR_EMPTY_GROUP_NAME;
4941
84
  }
4942
23.7k
  else {
4943
23.7k
    PFETCH_S(c);
4944
23.7k
    if (c == end_code)
4945
1
      return ONIGERR_EMPTY_GROUP_NAME;
4946
4947
23.7k
    if (IS_CODE_DIGIT_ASCII(enc, c)) {
4948
19.9k
      if (is_ref == TRUE)
4949
19.9k
        *num_type = IS_ABS_NUM;
4950
23
      else {
4951
23
        r = ONIGERR_INVALID_GROUP_NAME;
4952
23
      }
4953
19.9k
      digit_count++;
4954
19.9k
    }
4955
3.79k
    else if (c == '-') {
4956
358
      if (is_ref == TRUE) {
4957
357
        *num_type = IS_REL_NUM;
4958
357
        sign = -1;
4959
357
        pnum_head = p;
4960
357
      }
4961
1
      else {
4962
1
        r = ONIGERR_INVALID_GROUP_NAME;
4963
1
      }
4964
358
    }
4965
3.43k
    else if (c == '+') {
4966
328
      if (is_ref == TRUE) {
4967
314
        *num_type = IS_REL_NUM;
4968
314
        sign = 1;
4969
314
        pnum_head = p;
4970
314
      }
4971
14
      else {
4972
14
        r = ONIGERR_INVALID_GROUP_NAME;
4973
14
      }
4974
328
    }
4975
3.10k
    else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
4976
108
      r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
4977
108
    }
4978
23.7k
  }
4979
4980
23.7k
  if (r == 0) {
4981
1.30M
    while (!PEND) {
4982
1.30M
      name_end = p;
4983
1.30M
      PFETCH_S(c);
4984
1.30M
      if (c == end_code || c == ')') {
4985
22.8k
        if (*num_type != IS_NOT_NUM && digit_count == 0)
4986
0
          r = ONIGERR_INVALID_GROUP_NAME;
4987
22.8k
        break;
4988
22.8k
      }
4989
4990
1.27M
      if (*num_type != IS_NOT_NUM) {
4991
1.70k
        if (IS_CODE_DIGIT_ASCII(enc, c)) {
4992
1.51k
          digit_count++;
4993
1.51k
        }
4994
183
        else {
4995
183
          if (!ONIGENC_IS_CODE_WORD(enc, c))
4996
158
            r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
4997
25
          else
4998
25
            r = ONIGERR_INVALID_GROUP_NAME;
4999
5000
183
          *num_type = IS_NOT_NUM;
5001
183
        }
5002
1.70k
      }
5003
1.27M
      else {
5004
1.27M
        if (!ONIGENC_IS_CODE_WORD(enc, c)) {
5005
735k
          r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
5006
735k
        }
5007
1.27M
      }
5008
1.27M
    }
5009
5010
23.5k
    if (c != end_code) {
5011
793
      r = ONIGERR_INVALID_GROUP_NAME;
5012
793
      goto err;
5013
793
    }
5014
5015
22.7k
    if (*num_type != IS_NOT_NUM) {
5016
19.9k
      *rback_num = scan_number(&pnum_head, name_end, enc);
5017
19.9k
      if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
5018
19.9k
      else if (*rback_num == 0) {
5019
4.20k
        if (*num_type == IS_REL_NUM) {
5020
174
          r = ONIGERR_INVALID_GROUP_NAME;
5021
174
          goto err;
5022
174
        }
5023
4.20k
      }
5024
5025
19.7k
      *rback_num *= sign;
5026
19.7k
    }
5027
5028
22.6k
    *rname_end = name_end;
5029
22.6k
    *src = p;
5030
22.6k
    return 0;
5031
22.7k
  }
5032
146
  else {
5033
5.25k
    while (!PEND) {
5034
5.12k
      name_end = p;
5035
5.12k
      PFETCH_S(c);
5036
5.12k
      if (c == end_code || c == ')')
5037
15
        break;
5038
5.12k
    }
5039
146
    if (PEND)
5040
133
      name_end = end;
5041
5042
1.11k
  err:
5043
1.11k
    onig_scan_env_set_error_string(env, r, *src, name_end);
5044
1.11k
    return r;
5045
146
  }
5046
23.7k
}
5047
5048
static void
5049
CC_ESC_WARN(ParseEnv* env, UChar *c)
5050
343k
{
5051
343k
  if (onig_warn == onig_null_warn) return ;
5052
5053
0
  if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) &&
5054
0
      IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) {
5055
0
    UChar buf[WARN_BUFSIZE];
5056
0
    onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
5057
0
                               env->pattern, env->pattern_end,
5058
0
                               (UChar* )"character class has '%s' without escape",
5059
0
                               c);
5060
0
    (*onig_warn)((char* )buf);
5061
0
  }
5062
0
}
5063
5064
static void
5065
CLOSE_BRACKET_WITHOUT_ESC_WARN(ParseEnv* env, UChar* c)
5066
666k
{
5067
666k
  if (onig_warn == onig_null_warn) return ;
5068
5069
0
  if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) {
5070
0
    UChar buf[WARN_BUFSIZE];
5071
0
    onig_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc,
5072
0
                         (env)->pattern, (env)->pattern_end,
5073
0
                         (UChar* )"regular expression has '%s' without escape", c);
5074
0
    (*onig_warn)((char* )buf);
5075
0
  }
5076
0
}
5077
5078
static UChar*
5079
find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,
5080
                  UChar **next, OnigEncoding enc)
5081
116
{
5082
116
  int i;
5083
116
  OnigCodePoint x;
5084
116
  UChar *q;
5085
116
  UChar *p = from;
5086
5087
1.07M
  while (p < to) {
5088
1.07M
    x = ONIGENC_MBC_TO_CODE(enc, p, to);
5089
1.07M
    q = p + enclen(enc, p);
5090
1.07M
    if (x == s[0]) {
5091
2.11k
      for (i = 1; i < n && q < to; i++) {
5092
2.06k
        x = ONIGENC_MBC_TO_CODE(enc, q, to);
5093
2.06k
        if (x != s[i]) break;
5094
0
        q += enclen(enc, q);
5095
0
      }
5096
2.11k
      if (i >= n) {
5097
0
        if (IS_NOT_NULL(next))
5098
0
          *next = q;
5099
0
        return p;
5100
0
      }
5101
2.11k
    }
5102
1.07M
    p = q;
5103
1.07M
  }
5104
116
  return NULL_UCHARP;
5105
116
}
5106
5107
static int
5108
is_head_of_bre_subexp(UChar* p, UChar* end, OnigEncoding enc, ParseEnv* env)
5109
0
{
5110
0
  UChar* start;
5111
0
  OnigCodePoint code;
5112
5113
0
  start = env->pattern;
5114
0
  if (p > start) {
5115
0
    p = onigenc_get_prev_char_head(enc, start, p);
5116
0
    if (p > start) {
5117
0
      code = ONIGENC_MBC_TO_CODE(enc, p, end);
5118
0
      if (code == '(' ||
5119
0
          (code == '|' &&
5120
0
           IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_VBAR_ALT))) {
5121
0
        p = onigenc_get_prev_char_head(enc, start, p);
5122
0
        code = ONIGENC_MBC_TO_CODE(enc, p, end);
5123
0
        if (IS_MC_ESC_CODE(code, env->syntax)) {
5124
0
          int count = 0;
5125
0
          while (p > start) {
5126
0
            p = onigenc_get_prev_char_head(enc, start, p);
5127
0
            code = ONIGENC_MBC_TO_CODE(enc, p, end);
5128
0
            if (! IS_MC_ESC_CODE(code, env->syntax)) break;
5129
0
            count++;
5130
0
          }
5131
0
          return (count % 2 == 0);
5132
0
        }
5133
0
      }
5134
0
    }
5135
0
    return FALSE;
5136
0
  }
5137
0
  else {
5138
0
    return TRUE;
5139
0
  }
5140
0
}
5141
5142
static int
5143
is_end_of_bre_subexp(UChar* p, UChar* end, OnigEncoding enc, ParseEnv* env)
5144
0
{
5145
0
  OnigCodePoint code;
5146
5147
0
  if (p == end) return TRUE;
5148
5149
0
  code = ONIGENC_MBC_TO_CODE(enc, p, end);
5150
0
  if (IS_MC_ESC_CODE(code, env->syntax)) {
5151
0
    p += ONIGENC_MBC_ENC_LEN(enc, p);
5152
0
    if (p < end) {
5153
0
      code = ONIGENC_MBC_TO_CODE(enc, p, end);
5154
0
      if (code == ')' ||
5155
0
          (code == '|' &&
5156
0
           IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_VBAR_ALT)))
5157
0
        return TRUE;
5158
0
    }
5159
0
  }
5160
5161
0
  return FALSE;
5162
0
}
5163
5164
static int
5165
is_posix_bracket_start(UChar* from, UChar* to, OnigEncoding enc)
5166
125k
{
5167
125k
  int n;
5168
125k
  OnigCodePoint x;
5169
125k
  UChar *p;
5170
5171
125k
  n = 0;
5172
125k
  p = from;
5173
549k
  while (p < to) {
5174
549k
    x = ONIGENC_MBC_TO_CODE(enc, p, to);
5175
549k
    p += enclen(enc, p);
5176
549k
    if (x == ':') {
5177
9.78k
      if (p < to) {
5178
9.74k
        x = ONIGENC_MBC_TO_CODE(enc, p, to);
5179
9.74k
        if (x == ']') {
5180
94
          if (n == 0) return FALSE;
5181
1
          else        return TRUE;
5182
94
        }
5183
9.74k
      }
5184
5185
9.68k
      return FALSE;
5186
9.78k
    }
5187
539k
    else if (x == '^' && n == 0) {
5188
18
      ;
5189
18
    }
5190
539k
    else if (! ONIGENC_IS_CODE_ALPHA(enc, x)) {
5191
115k
      break;
5192
115k
    }
5193
5194
424k
    n += 1;
5195
424k
  }
5196
5197
115k
  return FALSE;
5198
125k
}
5199
5200
static int
5201
fetch_token_cc(PToken* tok, UChar** src, UChar* end, ParseEnv* env, int state)
5202
5.15M
{
5203
5.15M
  int r;
5204
5.15M
  OnigCodePoint code;
5205
5.15M
  OnigCodePoint c, c2;
5206
5.15M
  int mindigits, maxdigits;
5207
5.15M
  OnigSyntaxType* syn;
5208
5.15M
  OnigEncoding enc;
5209
5.15M
  UChar* prev;
5210
5.15M
  UChar* p;
5211
5.15M
  PFETCH_READY;
5212
5213
5.15M
  p = *src;
5214
5.15M
  enc = env->enc;
5215
5.15M
  syn = env->syntax;
5216
5.15M
  if (tok->code_point_continue != 0) {
5217
0
    r = get_next_code_point(&p, end, tok->base_num, enc, TRUE, &code);
5218
0
    if (r == 1) {
5219
0
      tok->code_point_continue = 0;
5220
0
    }
5221
0
    else if (r == 2) {
5222
0
      tok->type = TK_CC_RANGE;
5223
0
      goto end;
5224
0
    }
5225
0
    else if (r == 0) {
5226
0
      tok->type   = TK_CODE_POINT;
5227
0
      tok->u.code = code;
5228
0
      goto end;
5229
0
    }
5230
0
    else
5231
0
      return r; /* error */
5232
0
  }
5233
5234
5.15M
  if (PEND) {
5235
29.4k
    tok->type = TK_EOT;
5236
29.4k
    return tok->type;
5237
29.4k
  }
5238
5239
5.12M
  PFETCH(c);
5240
5.12M
  tok->type = TK_CHAR;
5241
5.12M
  tok->base_num = 0;
5242
5.12M
  tok->u.code   = c;
5243
5.12M
  tok->escaped  = 0;
5244
5245
5.12M
  if (c == ']') {
5246
162k
    tok->type = TK_CC_CLOSE;
5247
162k
  }
5248
4.96M
  else if (c == '-') {
5249
55.5k
    tok->type = TK_CC_RANGE;
5250
55.5k
  }
5251
4.90M
  else if (c == MC_ESC(syn)) {
5252
46.7k
    if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC))
5253
0
      goto end;
5254
5255
46.7k
    if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
5256
5257
46.5k
    PFETCH(c);
5258
46.5k
    tok->escaped = 1;
5259
46.5k
    tok->u.code = c;
5260
46.5k
    switch (c) {
5261
752
    case 'w':
5262
752
      tok->type = TK_CHAR_TYPE;
5263
752
      tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
5264
752
      tok->u.prop.not   = 0;
5265
752
      break;
5266
14
    case 'W':
5267
14
      tok->type = TK_CHAR_TYPE;
5268
14
      tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
5269
14
      tok->u.prop.not   = 1;
5270
14
      break;
5271
71
    case 'd':
5272
71
      tok->type = TK_CHAR_TYPE;
5273
71
      tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
5274
71
      tok->u.prop.not   = 0;
5275
71
      break;
5276
150
    case 'D':
5277
150
      tok->type = TK_CHAR_TYPE;
5278
150
      tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
5279
150
      tok->u.prop.not   = 1;
5280
150
      break;
5281
1.27k
    case 's':
5282
1.27k
      tok->type = TK_CHAR_TYPE;
5283
1.27k
      tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
5284
1.27k
      tok->u.prop.not   = 0;
5285
1.27k
      break;
5286
12
    case 'S':
5287
12
      tok->type = TK_CHAR_TYPE;
5288
12
      tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
5289
12
      tok->u.prop.not   = 1;
5290
12
      break;
5291
22
    case 'h':
5292
22
      if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
5293
0
      tok->type = TK_CHAR_TYPE;
5294
0
      tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
5295
0
      tok->u.prop.not   = 0;
5296
0
      break;
5297
154
    case 'H':
5298
154
      if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
5299
0
      tok->type = TK_CHAR_TYPE;
5300
0
      tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
5301
0
      tok->u.prop.not   = 1;
5302
0
      break;
5303
5304
13
    case 'p':
5305
436
    case 'P':
5306
436
      if (PEND) break;
5307
5308
436
      c2 = PPEEK;
5309
436
      if (c2 == '{' &&
5310
436
          IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
5311
0
        PINC;
5312
0
        tok->type = TK_CHAR_PROPERTY;
5313
0
        tok->u.prop.not = c == 'P';
5314
5315
0
        if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
5316
0
          PFETCH(c2);
5317
0
          if (c2 == '^') {
5318
0
            tok->u.prop.not = tok->u.prop.not == 0;
5319
0
          }
5320
0
          else
5321
0
            PUNFETCH;
5322
0
        }
5323
0
      }
5324
436
      break;
5325
5326
361
    case 'o':
5327
361
      if (PEND) break;
5328