Coverage Report

Created: 2023-09-23 07:09

/src/augeas/gnulib/lib/regex_internal.c
Line
Count
Source (jump to first uncovered line)
1
/* Extended regular expression matching and search library.
2
   Copyright (C) 2002-2022 Free Software Foundation, Inc.
3
   This file is part of the GNU C Library.
4
   Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6
   The GNU C Library is free software; you can redistribute it and/or
7
   modify it under the terms of the GNU Lesser General Public
8
   License as published by the Free Software Foundation; either
9
   version 2.1 of the License, or (at your option) any later version.
10
11
   The GNU C Library is distributed in the hope that it will be useful,
12
   but WITHOUT ANY WARRANTY; without even the implied warranty of
13
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
   Lesser General Public License for more details.
15
16
   You should have received a copy of the GNU Lesser General Public
17
   License along with the GNU C Library; if not, see
18
   <https://www.gnu.org/licenses/>.  */
19
20
static void re_string_construct_common (const char *str, Idx len,
21
          re_string_t *pstr,
22
          RE_TRANSLATE_TYPE trans, bool icase,
23
          const re_dfa_t *dfa);
24
static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
25
            const re_node_set *nodes,
26
            re_hashval_t hash);
27
static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
28
            const re_node_set *nodes,
29
            unsigned int context,
30
            re_hashval_t hash);
31
static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr,
32
            Idx new_buf_len);
33
static void build_wcs_buffer (re_string_t *pstr);
34
static reg_errcode_t build_wcs_upper_buffer (re_string_t *pstr);
35
static void build_upper_buffer (re_string_t *pstr);
36
static void re_string_translate_buffer (re_string_t *pstr);
37
static unsigned int re_string_context_at (const re_string_t *input, Idx idx,
38
            int eflags) __attribute__ ((pure));
39

40
/* Functions for string operation.  */
41
42
/* This function allocate the buffers.  It is necessary to call
43
   re_string_reconstruct before using the object.  */
44
45
static reg_errcode_t
46
__attribute_warn_unused_result__
47
re_string_allocate (re_string_t *pstr, const char *str, Idx len, Idx init_len,
48
        RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
49
1.97M
{
50
1.97M
  reg_errcode_t ret;
51
1.97M
  Idx init_buf_len;
52
53
  /* Ensure at least one character fits into the buffers.  */
54
1.97M
  if (init_len < dfa->mb_cur_max)
55
0
    init_len = dfa->mb_cur_max;
56
1.97M
  init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
57
1.97M
  re_string_construct_common (str, len, pstr, trans, icase, dfa);
58
59
1.97M
  ret = re_string_realloc_buffers (pstr, init_buf_len);
60
1.97M
  if (__glibc_unlikely (ret != REG_NOERROR))
61
0
    return ret;
62
63
1.97M
  pstr->word_char = dfa->word_char;
64
1.97M
  pstr->word_ops_used = dfa->word_ops_used;
65
1.97M
  pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
66
1.97M
  pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
67
1.97M
  pstr->valid_raw_len = pstr->valid_len;
68
1.97M
  return REG_NOERROR;
69
1.97M
}
70
71
/* This function allocate the buffers, and initialize them.  */
72
73
static reg_errcode_t
74
__attribute_warn_unused_result__
75
re_string_construct (re_string_t *pstr, const char *str, Idx len,
76
         RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
77
12.7k
{
78
12.7k
  reg_errcode_t ret;
79
12.7k
  memset (pstr, '\0', sizeof (re_string_t));
80
12.7k
  re_string_construct_common (str, len, pstr, trans, icase, dfa);
81
82
12.7k
  if (len > 0)
83
12.7k
    {
84
12.7k
      ret = re_string_realloc_buffers (pstr, len + 1);
85
12.7k
      if (__glibc_unlikely (ret != REG_NOERROR))
86
0
  return ret;
87
12.7k
    }
88
12.7k
  pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
89
90
12.7k
  if (icase)
91
9.62k
    {
92
9.62k
      if (dfa->mb_cur_max > 1)
93
0
  {
94
0
    while (1)
95
0
      {
96
0
        ret = build_wcs_upper_buffer (pstr);
97
0
        if (__glibc_unlikely (ret != REG_NOERROR))
98
0
    return ret;
99
0
        if (pstr->valid_raw_len >= len)
100
0
    break;
101
0
        if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
102
0
    break;
103
0
        ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
104
0
        if (__glibc_unlikely (ret != REG_NOERROR))
105
0
    return ret;
106
0
      }
107
0
  }
108
9.62k
      else
109
9.62k
  build_upper_buffer (pstr);
110
9.62k
    }
111
3.12k
  else
112
3.12k
    {
113
3.12k
      if (dfa->mb_cur_max > 1)
114
0
  build_wcs_buffer (pstr);
115
3.12k
      else
116
3.12k
  {
117
3.12k
    if (trans != NULL)
118
0
      re_string_translate_buffer (pstr);
119
3.12k
    else
120
3.12k
      {
121
3.12k
        pstr->valid_len = pstr->bufs_len;
122
3.12k
        pstr->valid_raw_len = pstr->bufs_len;
123
3.12k
      }
124
3.12k
  }
125
3.12k
    }
126
127
12.7k
  return REG_NOERROR;
128
12.7k
}
129
130
/* Helper functions for re_string_allocate, and re_string_construct.  */
131
132
static reg_errcode_t
133
__attribute_warn_unused_result__
134
re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len)
135
2.20M
{
136
2.20M
  if (pstr->mb_cur_max > 1)
137
0
    {
138
0
      wint_t *new_wcs;
139
140
      /* Avoid overflow in realloc.  */
141
0
      const size_t max_object_size = MAX (sizeof (wint_t), sizeof (Idx));
142
0
      if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / max_object_size)
143
0
          < new_buf_len))
144
0
  return REG_ESPACE;
145
146
0
      new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
147
0
      if (__glibc_unlikely (new_wcs == NULL))
148
0
  return REG_ESPACE;
149
0
      pstr->wcs = new_wcs;
150
0
      if (pstr->offsets != NULL)
151
0
  {
152
0
    Idx *new_offsets = re_realloc (pstr->offsets, Idx, new_buf_len);
153
0
    if (__glibc_unlikely (new_offsets == NULL))
154
0
      return REG_ESPACE;
155
0
    pstr->offsets = new_offsets;
156
0
  }
157
0
    }
158
2.20M
  if (pstr->mbs_allocated)
159
19.2k
    {
160
19.2k
      unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
161
19.2k
             new_buf_len);
162
19.2k
      if (__glibc_unlikely (new_mbs == NULL))
163
0
  return REG_ESPACE;
164
19.2k
      pstr->mbs = new_mbs;
165
19.2k
    }
166
2.20M
  pstr->bufs_len = new_buf_len;
167
2.20M
  return REG_NOERROR;
168
2.20M
}
169
170
171
static void
172
re_string_construct_common (const char *str, Idx len, re_string_t *pstr,
173
          RE_TRANSLATE_TYPE trans, bool icase,
174
          const re_dfa_t *dfa)
175
1.98M
{
176
1.98M
  pstr->raw_mbs = (const unsigned char *) str;
177
1.98M
  pstr->len = len;
178
1.98M
  pstr->raw_len = len;
179
1.98M
  pstr->trans = trans;
180
1.98M
  pstr->icase = icase;
181
1.98M
  pstr->mbs_allocated = (trans != NULL || icase);
182
1.98M
  pstr->mb_cur_max = dfa->mb_cur_max;
183
1.98M
  pstr->is_utf8 = dfa->is_utf8;
184
1.98M
  pstr->map_notascii = dfa->map_notascii;
185
1.98M
  pstr->stop = pstr->len;
186
1.98M
  pstr->raw_stop = pstr->stop;
187
1.98M
}
188
189
190
/* Build wide character buffer PSTR->WCS.
191
   If the byte sequence of the string are:
192
     <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
193
   Then wide character buffer will be:
194
     <wc1>   , WEOF    , <wc2>   , WEOF    , <wc3>
195
   We use WEOF for padding, they indicate that the position isn't
196
   a first byte of a multibyte character.
197
198
   Note that this function assumes PSTR->VALID_LEN elements are already
199
   built and starts from PSTR->VALID_LEN.  */
200
201
static void
202
build_wcs_buffer (re_string_t *pstr)
203
0
{
204
#ifdef _LIBC
205
  unsigned char buf[MB_LEN_MAX];
206
  DEBUG_ASSERT (MB_LEN_MAX >= pstr->mb_cur_max);
207
#else
208
0
  unsigned char buf[64];
209
0
#endif
210
0
  mbstate_t prev_st;
211
0
  Idx byte_idx, end_idx, remain_len;
212
0
  size_t mbclen;
213
214
  /* Build the buffers from pstr->valid_len to either pstr->len or
215
     pstr->bufs_len.  */
216
0
  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
217
0
  for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
218
0
    {
219
0
      wchar_t wc;
220
0
      const char *p;
221
222
0
      remain_len = end_idx - byte_idx;
223
0
      prev_st = pstr->cur_state;
224
      /* Apply the translation if we need.  */
225
0
      if (__glibc_unlikely (pstr->trans != NULL))
226
0
  {
227
0
    int i, ch;
228
229
0
    for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
230
0
      {
231
0
        ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
232
0
        buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
233
0
      }
234
0
    p = (const char *) buf;
235
0
  }
236
0
      else
237
0
  p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
238
0
      mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
239
0
      if (__glibc_unlikely (mbclen == (size_t) -1 || mbclen == 0
240
0
          || (mbclen == (size_t) -2
241
0
        && pstr->bufs_len >= pstr->len)))
242
0
  {
243
    /* We treat these cases as a singlebyte character.  */
244
0
    mbclen = 1;
245
0
    wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
246
0
    if (__glibc_unlikely (pstr->trans != NULL))
247
0
      wc = pstr->trans[wc];
248
0
    pstr->cur_state = prev_st;
249
0
  }
250
0
      else if (__glibc_unlikely (mbclen == (size_t) -2))
251
0
  {
252
    /* The buffer doesn't have enough space, finish to build.  */
253
0
    pstr->cur_state = prev_st;
254
0
    break;
255
0
  }
256
257
      /* Write wide character and padding.  */
258
0
      pstr->wcs[byte_idx++] = wc;
259
      /* Write paddings.  */
260
0
      for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
261
0
  pstr->wcs[byte_idx++] = WEOF;
262
0
    }
263
0
  pstr->valid_len = byte_idx;
264
0
  pstr->valid_raw_len = byte_idx;
265
0
}
266
267
/* Build wide character buffer PSTR->WCS like build_wcs_buffer,
268
   but for REG_ICASE.  */
269
270
static reg_errcode_t
271
__attribute_warn_unused_result__
272
build_wcs_upper_buffer (re_string_t *pstr)
273
0
{
274
0
  mbstate_t prev_st;
275
0
  Idx src_idx, byte_idx, end_idx, remain_len;
276
0
  size_t mbclen;
277
#ifdef _LIBC
278
  char buf[MB_LEN_MAX];
279
  DEBUG_ASSERT (pstr->mb_cur_max <= MB_LEN_MAX);
280
#else
281
0
  char buf[64];
282
0
#endif
283
284
0
  byte_idx = pstr->valid_len;
285
0
  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
286
287
  /* The following optimization assumes that ASCII characters can be
288
     mapped to wide characters with a simple cast.  */
289
0
  if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
290
0
    {
291
0
      while (byte_idx < end_idx)
292
0
  {
293
0
    wchar_t wc;
294
0
    unsigned char ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
295
296
0
    if (isascii (ch) && mbsinit (&pstr->cur_state))
297
0
      {
298
        /* The next step uses the assumption that wchar_t is encoded
299
     ASCII-safe: all ASCII values can be converted like this.  */
300
0
        wchar_t wcu = __towupper (ch);
301
0
        if (isascii (wcu))
302
0
    {
303
0
      pstr->mbs[byte_idx] = wcu;
304
0
      pstr->wcs[byte_idx] = wcu;
305
0
      byte_idx++;
306
0
      continue;
307
0
    }
308
0
      }
309
310
0
    remain_len = end_idx - byte_idx;
311
0
    prev_st = pstr->cur_state;
312
0
    mbclen = __mbrtowc (&wc,
313
0
            ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
314
0
             + byte_idx), remain_len, &pstr->cur_state);
315
0
    if (__glibc_likely (0 < mbclen && mbclen < (size_t) -2))
316
0
      {
317
0
        wchar_t wcu = __towupper (wc);
318
0
        if (wcu != wc)
319
0
    {
320
0
      size_t mbcdlen;
321
322
0
      mbcdlen = __wcrtomb (buf, wcu, &prev_st);
323
0
      if (__glibc_likely (mbclen == mbcdlen))
324
0
        memcpy (pstr->mbs + byte_idx, buf, mbclen);
325
0
      else
326
0
        {
327
0
          src_idx = byte_idx;
328
0
          goto offsets_needed;
329
0
        }
330
0
    }
331
0
        else
332
0
    memcpy (pstr->mbs + byte_idx,
333
0
      pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
334
0
        pstr->wcs[byte_idx++] = wcu;
335
        /* Write paddings.  */
336
0
        for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
337
0
    pstr->wcs[byte_idx++] = WEOF;
338
0
      }
339
0
    else if (mbclen == (size_t) -1 || mbclen == 0
340
0
       || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len))
341
0
      {
342
        /* It is an invalid character, an incomplete character
343
     at the end of the string, or '\0'.  Just use the byte.  */
344
0
        pstr->mbs[byte_idx] = ch;
345
        /* And also cast it to wide char.  */
346
0
        pstr->wcs[byte_idx++] = (wchar_t) ch;
347
0
        if (__glibc_unlikely (mbclen == (size_t) -1))
348
0
    pstr->cur_state = prev_st;
349
0
      }
350
0
    else
351
0
      {
352
        /* The buffer doesn't have enough space, finish to build.  */
353
0
        pstr->cur_state = prev_st;
354
0
        break;
355
0
      }
356
0
  }
357
0
      pstr->valid_len = byte_idx;
358
0
      pstr->valid_raw_len = byte_idx;
359
0
      return REG_NOERROR;
360
0
    }
361
0
  else
362
0
    for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
363
0
      {
364
0
  wchar_t wc;
365
0
  const char *p;
366
0
      offsets_needed:
367
0
  remain_len = end_idx - byte_idx;
368
0
  prev_st = pstr->cur_state;
369
0
  if (__glibc_unlikely (pstr->trans != NULL))
370
0
    {
371
0
      int i, ch;
372
373
0
      for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
374
0
        {
375
0
    ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
376
0
    buf[i] = pstr->trans[ch];
377
0
        }
378
0
      p = (const char *) buf;
379
0
    }
380
0
  else
381
0
    p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
382
0
  mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
383
0
  if (__glibc_likely (0 < mbclen && mbclen < (size_t) -2))
384
0
    {
385
0
      wchar_t wcu = __towupper (wc);
386
0
      if (wcu != wc)
387
0
        {
388
0
    size_t mbcdlen;
389
390
0
    mbcdlen = __wcrtomb ((char *) buf, wcu, &prev_st);
391
0
    if (__glibc_likely (mbclen == mbcdlen))
392
0
      memcpy (pstr->mbs + byte_idx, buf, mbclen);
393
0
    else if (mbcdlen != (size_t) -1)
394
0
      {
395
0
        size_t i;
396
397
0
        if (byte_idx + mbcdlen > pstr->bufs_len)
398
0
          {
399
0
      pstr->cur_state = prev_st;
400
0
      break;
401
0
          }
402
403
0
        if (pstr->offsets == NULL)
404
0
          {
405
0
      pstr->offsets = re_malloc (Idx, pstr->bufs_len);
406
407
0
      if (pstr->offsets == NULL)
408
0
        return REG_ESPACE;
409
0
          }
410
0
        if (!pstr->offsets_needed)
411
0
          {
412
0
      for (i = 0; i < (size_t) byte_idx; ++i)
413
0
        pstr->offsets[i] = i;
414
0
      pstr->offsets_needed = 1;
415
0
          }
416
417
0
        memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
418
0
        pstr->wcs[byte_idx] = wcu;
419
0
        pstr->offsets[byte_idx] = src_idx;
420
0
        for (i = 1; i < mbcdlen; ++i)
421
0
          {
422
0
      pstr->offsets[byte_idx + i]
423
0
        = src_idx + (i < mbclen ? i : mbclen - 1);
424
0
      pstr->wcs[byte_idx + i] = WEOF;
425
0
          }
426
0
        pstr->len += mbcdlen - mbclen;
427
0
        if (pstr->raw_stop > src_idx)
428
0
          pstr->stop += mbcdlen - mbclen;
429
0
        end_idx = (pstr->bufs_len > pstr->len)
430
0
            ? pstr->len : pstr->bufs_len;
431
0
        byte_idx += mbcdlen;
432
0
        src_idx += mbclen;
433
0
        continue;
434
0
      }
435
0
    else
436
0
      memcpy (pstr->mbs + byte_idx, p, mbclen);
437
0
        }
438
0
      else
439
0
        memcpy (pstr->mbs + byte_idx, p, mbclen);
440
441
0
      if (__glibc_unlikely (pstr->offsets_needed != 0))
442
0
        {
443
0
    size_t i;
444
0
    for (i = 0; i < mbclen; ++i)
445
0
      pstr->offsets[byte_idx + i] = src_idx + i;
446
0
        }
447
0
      src_idx += mbclen;
448
449
0
      pstr->wcs[byte_idx++] = wcu;
450
      /* Write paddings.  */
451
0
      for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
452
0
        pstr->wcs[byte_idx++] = WEOF;
453
0
    }
454
0
  else if (mbclen == (size_t) -1 || mbclen == 0
455
0
     || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len))
456
0
    {
457
      /* It is an invalid character or '\0'.  Just use the byte.  */
458
0
      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
459
460
0
      if (__glibc_unlikely (pstr->trans != NULL))
461
0
        ch = pstr->trans [ch];
462
0
      pstr->mbs[byte_idx] = ch;
463
464
0
      if (__glibc_unlikely (pstr->offsets_needed != 0))
465
0
        pstr->offsets[byte_idx] = src_idx;
466
0
      ++src_idx;
467
468
      /* And also cast it to wide char.  */
469
0
      pstr->wcs[byte_idx++] = (wchar_t) ch;
470
0
      if (__glibc_unlikely (mbclen == (size_t) -1))
471
0
        pstr->cur_state = prev_st;
472
0
    }
473
0
  else
474
0
    {
475
      /* The buffer doesn't have enough space, finish to build.  */
476
0
      pstr->cur_state = prev_st;
477
0
      break;
478
0
    }
479
0
      }
480
0
  pstr->valid_len = byte_idx;
481
0
  pstr->valid_raw_len = src_idx;
482
0
  return REG_NOERROR;
483
0
}
484
485
/* Skip characters until the index becomes greater than NEW_RAW_IDX.
486
   Return the index.  */
487
488
static Idx
489
re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx, wint_t *last_wc)
490
0
{
491
0
  mbstate_t prev_st;
492
0
  Idx rawbuf_idx;
493
0
  size_t mbclen;
494
0
  wint_t wc = WEOF;
495
496
  /* Skip the characters which are not necessary to check.  */
497
0
  for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
498
0
       rawbuf_idx < new_raw_idx;)
499
0
    {
500
0
      wchar_t wc2;
501
0
      Idx remain_len = pstr->raw_len - rawbuf_idx;
502
0
      prev_st = pstr->cur_state;
503
0
      mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx,
504
0
        remain_len, &pstr->cur_state);
505
0
      if (__glibc_unlikely (mbclen == (size_t) -2 || mbclen == (size_t) -1
506
0
          || mbclen == 0))
507
0
  {
508
    /* We treat these cases as a single byte character.  */
509
0
    if (mbclen == 0 || remain_len == 0)
510
0
      wc = L'\0';
511
0
    else
512
0
      wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx);
513
0
    mbclen = 1;
514
0
    pstr->cur_state = prev_st;
515
0
  }
516
0
      else
517
0
  wc = wc2;
518
      /* Then proceed the next character.  */
519
0
      rawbuf_idx += mbclen;
520
0
    }
521
0
  *last_wc = wc;
522
0
  return rawbuf_idx;
523
0
}
524
525
/* Build the buffer PSTR->MBS, and apply the translation if we need.
526
   This function is used in case of REG_ICASE.  */
527
528
static void
529
build_upper_buffer (re_string_t *pstr)
530
19.2k
{
531
19.2k
  Idx char_idx, end_idx;
532
19.2k
  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
533
534
106k
  for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
535
86.7k
    {
536
86.7k
      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
537
86.7k
      if (__glibc_unlikely (pstr->trans != NULL))
538
0
  ch = pstr->trans[ch];
539
86.7k
      pstr->mbs[char_idx] = toupper (ch);
540
86.7k
    }
541
19.2k
  pstr->valid_len = char_idx;
542
19.2k
  pstr->valid_raw_len = char_idx;
543
19.2k
}
544
545
/* Apply TRANS to the buffer in PSTR.  */
546
547
static void
548
re_string_translate_buffer (re_string_t *pstr)
549
0
{
550
0
  Idx buf_idx, end_idx;
551
0
  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
552
553
0
  for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
554
0
    {
555
0
      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
556
0
      pstr->mbs[buf_idx] = pstr->trans[ch];
557
0
    }
558
559
0
  pstr->valid_len = buf_idx;
560
0
  pstr->valid_raw_len = buf_idx;
561
0
}
562
563
/* This function re-construct the buffers.
564
   Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
565
   convert to upper case in case of REG_ICASE, apply translation.  */
566
567
static reg_errcode_t
568
__attribute_warn_unused_result__
569
re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
570
1.97M
{
571
1.97M
  Idx offset;
572
573
1.97M
  if (__glibc_unlikely (pstr->raw_mbs_idx <= idx))
574
1.97M
    offset = idx - pstr->raw_mbs_idx;
575
0
  else
576
0
    {
577
      /* Reset buffer.  */
578
0
      if (pstr->mb_cur_max > 1)
579
0
  memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
580
0
      pstr->len = pstr->raw_len;
581
0
      pstr->stop = pstr->raw_stop;
582
0
      pstr->valid_len = 0;
583
0
      pstr->raw_mbs_idx = 0;
584
0
      pstr->valid_raw_len = 0;
585
0
      pstr->offsets_needed = 0;
586
0
      pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
587
0
         : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
588
0
      if (!pstr->mbs_allocated)
589
0
  pstr->mbs = (unsigned char *) pstr->raw_mbs;
590
0
      offset = idx;
591
0
    }
592
593
1.97M
  if (__glibc_likely (offset != 0))
594
0
    {
595
      /* Should the already checked characters be kept?  */
596
0
      if (__glibc_likely (offset < pstr->valid_raw_len))
597
0
  {
598
    /* Yes, move them to the front of the buffer.  */
599
0
    if (__glibc_unlikely (pstr->offsets_needed))
600
0
      {
601
0
        Idx low = 0, high = pstr->valid_len, mid;
602
0
        do
603
0
    {
604
0
      mid = (high + low) / 2;
605
0
      if (pstr->offsets[mid] > offset)
606
0
        high = mid;
607
0
      else if (pstr->offsets[mid] < offset)
608
0
        low = mid + 1;
609
0
      else
610
0
        break;
611
0
    }
612
0
        while (low < high);
613
0
        if (pstr->offsets[mid] < offset)
614
0
    ++mid;
615
0
        pstr->tip_context = re_string_context_at (pstr, mid - 1,
616
0
              eflags);
617
        /* This can be quite complicated, so handle specially
618
     only the common and easy case where the character with
619
     different length representation of lower and upper
620
     case is present at or after offset.  */
621
0
        if (pstr->valid_len > offset
622
0
      && mid == offset && pstr->offsets[mid] == offset)
623
0
    {
624
0
      memmove (pstr->wcs, pstr->wcs + offset,
625
0
         (pstr->valid_len - offset) * sizeof (wint_t));
626
0
      memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
627
0
      pstr->valid_len -= offset;
628
0
      pstr->valid_raw_len -= offset;
629
0
      for (low = 0; low < pstr->valid_len; low++)
630
0
        pstr->offsets[low] = pstr->offsets[low + offset] - offset;
631
0
    }
632
0
        else
633
0
    {
634
      /* Otherwise, just find out how long the partial multibyte
635
         character at offset is and fill it with WEOF/255.  */
636
0
      pstr->len = pstr->raw_len - idx + offset;
637
0
      pstr->stop = pstr->raw_stop - idx + offset;
638
0
      pstr->offsets_needed = 0;
639
0
      while (mid > 0 && pstr->offsets[mid - 1] == offset)
640
0
        --mid;
641
0
      while (mid < pstr->valid_len)
642
0
        if (pstr->wcs[mid] != WEOF)
643
0
          break;
644
0
        else
645
0
          ++mid;
646
0
      if (mid == pstr->valid_len)
647
0
        pstr->valid_len = 0;
648
0
      else
649
0
        {
650
0
          pstr->valid_len = pstr->offsets[mid] - offset;
651
0
          if (pstr->valid_len)
652
0
      {
653
0
        for (low = 0; low < pstr->valid_len; ++low)
654
0
          pstr->wcs[low] = WEOF;
655
0
        memset (pstr->mbs, 255, pstr->valid_len);
656
0
      }
657
0
        }
658
0
      pstr->valid_raw_len = pstr->valid_len;
659
0
    }
660
0
      }
661
0
    else
662
0
      {
663
0
        pstr->tip_context = re_string_context_at (pstr, offset - 1,
664
0
              eflags);
665
0
        if (pstr->mb_cur_max > 1)
666
0
    memmove (pstr->wcs, pstr->wcs + offset,
667
0
       (pstr->valid_len - offset) * sizeof (wint_t));
668
0
        if (__glibc_unlikely (pstr->mbs_allocated))
669
0
    memmove (pstr->mbs, pstr->mbs + offset,
670
0
       pstr->valid_len - offset);
671
0
        pstr->valid_len -= offset;
672
0
        pstr->valid_raw_len -= offset;
673
0
        DEBUG_ASSERT (pstr->valid_len > 0);
674
0
      }
675
0
  }
676
0
      else
677
0
  {
678
    /* No, skip all characters until IDX.  */
679
0
    Idx prev_valid_len = pstr->valid_len;
680
681
0
    if (__glibc_unlikely (pstr->offsets_needed))
682
0
      {
683
0
        pstr->len = pstr->raw_len - idx + offset;
684
0
        pstr->stop = pstr->raw_stop - idx + offset;
685
0
        pstr->offsets_needed = 0;
686
0
      }
687
0
    pstr->valid_len = 0;
688
0
    if (pstr->mb_cur_max > 1)
689
0
      {
690
0
        Idx wcs_idx;
691
0
        wint_t wc = WEOF;
692
693
0
        if (pstr->is_utf8)
694
0
    {
695
0
      const unsigned char *raw, *p, *end;
696
697
      /* Special case UTF-8.  Multi-byte chars start with any
698
         byte other than 0x80 - 0xbf.  */
699
0
      raw = pstr->raw_mbs + pstr->raw_mbs_idx;
700
0
      end = raw + (offset - pstr->mb_cur_max);
701
0
      if (end < pstr->raw_mbs)
702
0
        end = pstr->raw_mbs;
703
0
      p = raw + offset - 1;
704
#ifdef _LIBC
705
      /* We know the wchar_t encoding is UCS4, so for the simple
706
         case, ASCII characters, skip the conversion step.  */
707
      if (isascii (*p) && __glibc_likely (pstr->trans == NULL))
708
        {
709
          memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
710
          /* pstr->valid_len = 0; */
711
          wc = (wchar_t) *p;
712
        }
713
      else
714
#endif
715
0
        for (; p >= end; --p)
716
0
          if ((*p & 0xc0) != 0x80)
717
0
      {
718
0
        mbstate_t cur_state;
719
0
        wchar_t wc2;
720
0
        Idx mlen = raw + pstr->len - p;
721
0
        unsigned char buf[6];
722
0
        size_t mbclen;
723
724
0
        const unsigned char *pp = p;
725
0
        if (__glibc_unlikely (pstr->trans != NULL))
726
0
          {
727
0
            int i = mlen < 6 ? mlen : 6;
728
0
            while (--i >= 0)
729
0
        buf[i] = pstr->trans[p[i]];
730
0
            pp = buf;
731
0
          }
732
        /* XXX Don't use mbrtowc, we know which conversion
733
           to use (UTF-8 -> UCS4).  */
734
0
        memset (&cur_state, 0, sizeof (cur_state));
735
0
        mbclen = __mbrtowc (&wc2, (const char *) pp, mlen,
736
0
                &cur_state);
737
0
        if (raw + offset - p <= mbclen
738
0
            && mbclen < (size_t) -2)
739
0
          {
740
0
            memset (&pstr->cur_state, '\0',
741
0
              sizeof (mbstate_t));
742
0
            pstr->valid_len = mbclen - (raw + offset - p);
743
0
            wc = wc2;
744
0
          }
745
0
        break;
746
0
      }
747
0
    }
748
749
0
        if (wc == WEOF)
750
0
    pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
751
0
        if (wc == WEOF)
752
0
    pstr->tip_context
753
0
      = re_string_context_at (pstr, prev_valid_len - 1, eflags);
754
0
        else
755
0
    pstr->tip_context = ((__glibc_unlikely (pstr->word_ops_used != 0)
756
0
              && IS_WIDE_WORD_CHAR (wc))
757
0
             ? CONTEXT_WORD
758
0
             : ((IS_WIDE_NEWLINE (wc)
759
0
           && pstr->newline_anchor)
760
0
          ? CONTEXT_NEWLINE : 0));
761
0
        if (__glibc_unlikely (pstr->valid_len))
762
0
    {
763
0
      for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
764
0
        pstr->wcs[wcs_idx] = WEOF;
765
0
      if (pstr->mbs_allocated)
766
0
        memset (pstr->mbs, 255, pstr->valid_len);
767
0
    }
768
0
        pstr->valid_raw_len = pstr->valid_len;
769
0
      }
770
0
    else
771
0
      {
772
0
        int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
773
0
        pstr->valid_raw_len = 0;
774
0
        if (pstr->trans)
775
0
    c = pstr->trans[c];
776
0
        pstr->tip_context = (bitset_contain (pstr->word_char, c)
777
0
           ? CONTEXT_WORD
778
0
           : ((IS_NEWLINE (c) && pstr->newline_anchor)
779
0
              ? CONTEXT_NEWLINE : 0));
780
0
      }
781
0
  }
782
0
      if (!__glibc_unlikely (pstr->mbs_allocated))
783
0
  pstr->mbs += offset;
784
0
    }
785
0
  pstr->raw_mbs_idx = idx;
786
1.97M
  pstr->len -= offset;
787
1.97M
  pstr->stop -= offset;
788
789
  /* Then build the buffers.  */
790
1.97M
  if (pstr->mb_cur_max > 1)
791
0
    {
792
0
      if (pstr->icase)
793
0
  {
794
0
    reg_errcode_t ret = build_wcs_upper_buffer (pstr);
795
0
    if (__glibc_unlikely (ret != REG_NOERROR))
796
0
      return ret;
797
0
  }
798
0
      else
799
0
  build_wcs_buffer (pstr);
800
0
    }
801
1.97M
  else
802
1.97M
    if (__glibc_unlikely (pstr->mbs_allocated))
803
9.62k
      {
804
9.62k
  if (pstr->icase)
805
9.62k
    build_upper_buffer (pstr);
806
0
  else if (pstr->trans != NULL)
807
0
    re_string_translate_buffer (pstr);
808
9.62k
      }
809
1.96M
    else
810
1.96M
      pstr->valid_len = pstr->len;
811
812
1.97M
  pstr->cur_idx = 0;
813
1.97M
  return REG_NOERROR;
814
1.97M
}
815
816
static unsigned char
817
__attribute__ ((pure))
818
re_string_peek_byte_case (const re_string_t *pstr, Idx idx)
819
1.65k
{
820
1.65k
  int ch;
821
1.65k
  Idx off;
822
823
  /* Handle the common (easiest) cases first.  */
824
1.65k
  if (__glibc_likely (!pstr->mbs_allocated))
825
1.65k
    return re_string_peek_byte (pstr, idx);
826
827
0
  if (pstr->mb_cur_max > 1
828
0
      && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
829
0
    return re_string_peek_byte (pstr, idx);
830
831
0
  off = pstr->cur_idx + idx;
832
0
  if (pstr->offsets_needed)
833
0
    off = pstr->offsets[off];
834
835
0
  ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
836
837
  /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
838
     this function returns CAPITAL LETTER I instead of first byte of
839
     DOTLESS SMALL LETTER I.  The latter would confuse the parser,
840
     since peek_byte_case doesn't advance cur_idx in any way.  */
841
0
  if (pstr->offsets_needed && !isascii (ch))
842
0
    return re_string_peek_byte (pstr, idx);
843
844
0
  return ch;
845
0
}
846
847
static unsigned char
848
re_string_fetch_byte_case (re_string_t *pstr)
849
0
{
850
0
  if (__glibc_likely (!pstr->mbs_allocated))
851
0
    return re_string_fetch_byte (pstr);
852
853
0
  if (pstr->offsets_needed)
854
0
    {
855
0
      Idx off;
856
0
      int ch;
857
858
      /* For tr_TR.UTF-8 [[:islower:]] there is
859
   [[: CAPITAL LETTER I WITH DOT lower:]] in mbs.  Skip
860
   in that case the whole multi-byte character and return
861
   the original letter.  On the other side, with
862
   [[: DOTLESS SMALL LETTER I return [[:I, as doing
863
   anything else would complicate things too much.  */
864
865
0
      if (!re_string_first_byte (pstr, pstr->cur_idx))
866
0
  return re_string_fetch_byte (pstr);
867
868
0
      off = pstr->offsets[pstr->cur_idx];
869
0
      ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
870
871
0
      if (! isascii (ch))
872
0
  return re_string_fetch_byte (pstr);
873
874
0
      re_string_skip_bytes (pstr,
875
0
          re_string_char_size_at (pstr, pstr->cur_idx));
876
0
      return ch;
877
0
    }
878
879
0
  return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
880
0
}
881
882
static void
883
re_string_destruct (re_string_t *pstr)
884
1.98M
{
885
1.98M
  re_free (pstr->wcs);
886
1.98M
  re_free (pstr->offsets);
887
1.98M
  if (pstr->mbs_allocated)
888
1.98M
    re_free (pstr->mbs);
889
1.98M
}
890
891
/* Return the context at IDX in INPUT.  */
892
893
static unsigned int
894
re_string_context_at (const re_string_t *input, Idx idx, int eflags)
895
20.2M
{
896
20.2M
  int c;
897
20.2M
  if (__glibc_unlikely (idx < 0))
898
    /* In this case, we use the value stored in input->tip_context,
899
       since we can't know the character in input->mbs[-1] here.  */
900
1.88M
    return input->tip_context;
901
18.3M
  if (__glibc_unlikely (idx == input->len))
902
1.81M
    return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
903
1.81M
      : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
904
16.5M
  if (input->mb_cur_max > 1)
905
0
    {
906
0
      wint_t wc;
907
0
      Idx wc_idx = idx;
908
0
      while(input->wcs[wc_idx] == WEOF)
909
0
  {
910
0
    DEBUG_ASSERT (wc_idx >= 0);
911
0
    --wc_idx;
912
0
    if (wc_idx < 0)
913
0
      return input->tip_context;
914
0
  }
915
0
      wc = input->wcs[wc_idx];
916
0
      if (__glibc_unlikely (input->word_ops_used != 0)
917
0
    && IS_WIDE_WORD_CHAR (wc))
918
0
  return CONTEXT_WORD;
919
0
      return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
920
0
        ? CONTEXT_NEWLINE : 0);
921
0
    }
922
16.5M
  else
923
16.5M
    {
924
16.5M
      c = re_string_byte_at (input, idx);
925
16.5M
      if (bitset_contain (input->word_char, c))
926
0
  return CONTEXT_WORD;
927
16.5M
      return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
928
16.5M
    }
929
16.5M
}
930

931
/* Functions for set operation.  */
932
933
static reg_errcode_t
934
__attribute_warn_unused_result__
935
re_node_set_alloc (re_node_set *set, Idx size)
936
5.32M
{
937
5.32M
  set->alloc = size;
938
5.32M
  set->nelem = 0;
939
5.32M
  set->elems = re_malloc (Idx, size);
940
5.32M
  if (__glibc_unlikely (set->elems == NULL)
941
5.32M
      && (MALLOC_0_IS_NONNULL || size != 0))
942
0
    return REG_ESPACE;
943
5.32M
  return REG_NOERROR;
944
5.32M
}
945
946
static reg_errcode_t
947
__attribute_warn_unused_result__
948
re_node_set_init_1 (re_node_set *set, Idx elem)
949
131k
{
950
131k
  set->alloc = 1;
951
131k
  set->nelem = 1;
952
131k
  set->elems = re_malloc (Idx, 1);
953
131k
  if (__glibc_unlikely (set->elems == NULL))
954
0
    {
955
0
      set->alloc = set->nelem = 0;
956
0
      return REG_ESPACE;
957
0
    }
958
131k
  set->elems[0] = elem;
959
131k
  return REG_NOERROR;
960
131k
}
961
962
static reg_errcode_t
963
__attribute_warn_unused_result__
964
re_node_set_init_2 (re_node_set *set, Idx elem1, Idx elem2)
965
60.6k
{
966
60.6k
  set->alloc = 2;
967
60.6k
  set->elems = re_malloc (Idx, 2);
968
60.6k
  if (__glibc_unlikely (set->elems == NULL))
969
0
    return REG_ESPACE;
970
60.6k
  if (elem1 == elem2)
971
0
    {
972
0
      set->nelem = 1;
973
0
      set->elems[0] = elem1;
974
0
    }
975
60.6k
  else
976
60.6k
    {
977
60.6k
      set->nelem = 2;
978
60.6k
      if (elem1 < elem2)
979
60.5k
  {
980
60.5k
    set->elems[0] = elem1;
981
60.5k
    set->elems[1] = elem2;
982
60.5k
  }
983
109
      else
984
109
  {
985
109
    set->elems[0] = elem2;
986
109
    set->elems[1] = elem1;
987
109
  }
988
60.6k
    }
989
60.6k
  return REG_NOERROR;
990
60.6k
}
991
992
static reg_errcode_t
993
__attribute_warn_unused_result__
994
re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
995
391k
{
996
391k
  dest->nelem = src->nelem;
997
391k
  if (src->nelem > 0)
998
391k
    {
999
391k
      dest->alloc = dest->nelem;
1000
391k
      dest->elems = re_malloc (Idx, dest->alloc);
1001
391k
      if (__glibc_unlikely (dest->elems == NULL))
1002
0
  {
1003
0
    dest->alloc = dest->nelem = 0;
1004
0
    return REG_ESPACE;
1005
0
  }
1006
391k
      memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
1007
391k
    }
1008
0
  else
1009
0
    re_node_set_init_empty (dest);
1010
391k
  return REG_NOERROR;
1011
391k
}
1012
1013
/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
1014
   DEST. Return value indicate the error code or REG_NOERROR if succeeded.
1015
   Note: We assume dest->elems is NULL, when dest->alloc is 0.  */
1016
1017
static reg_errcode_t
1018
__attribute_warn_unused_result__
1019
re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
1020
         const re_node_set *src2)
1021
0
{
1022
0
  Idx i1, i2, is, id, delta, sbase;
1023
0
  if (src1->nelem == 0 || src2->nelem == 0)
1024
0
    return REG_NOERROR;
1025
1026
  /* We need dest->nelem + 2 * elems_in_intersection; this is a
1027
     conservative estimate.  */
1028
0
  if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
1029
0
    {
1030
0
      Idx new_alloc = src1->nelem + src2->nelem + dest->alloc;
1031
0
      Idx *new_elems = re_realloc (dest->elems, Idx, new_alloc);
1032
0
      if (__glibc_unlikely (new_elems == NULL))
1033
0
  return REG_ESPACE;
1034
0
      dest->elems = new_elems;
1035
0
      dest->alloc = new_alloc;
1036
0
    }
1037
1038
  /* Find the items in the intersection of SRC1 and SRC2, and copy
1039
     into the top of DEST those that are not already in DEST itself.  */
1040
0
  sbase = dest->nelem + src1->nelem + src2->nelem;
1041
0
  i1 = src1->nelem - 1;
1042
0
  i2 = src2->nelem - 1;
1043
0
  id = dest->nelem - 1;
1044
0
  for (;;)
1045
0
    {
1046
0
      if (src1->elems[i1] == src2->elems[i2])
1047
0
  {
1048
    /* Try to find the item in DEST.  Maybe we could binary search?  */
1049
0
    while (id >= 0 && dest->elems[id] > src1->elems[i1])
1050
0
      --id;
1051
1052
0
    if (id < 0 || dest->elems[id] != src1->elems[i1])
1053
0
            dest->elems[--sbase] = src1->elems[i1];
1054
1055
0
    if (--i1 < 0 || --i2 < 0)
1056
0
      break;
1057
0
  }
1058
1059
      /* Lower the highest of the two items.  */
1060
0
      else if (src1->elems[i1] < src2->elems[i2])
1061
0
  {
1062
0
    if (--i2 < 0)
1063
0
      break;
1064
0
  }
1065
0
      else
1066
0
  {
1067
0
    if (--i1 < 0)
1068
0
      break;
1069
0
  }
1070
0
    }
1071
1072
0
  id = dest->nelem - 1;
1073
0
  is = dest->nelem + src1->nelem + src2->nelem - 1;
1074
0
  delta = is - sbase + 1;
1075
1076
  /* Now copy.  When DELTA becomes zero, the remaining
1077
     DEST elements are already in place; this is more or
1078
     less the same loop that is in re_node_set_merge.  */
1079
0
  dest->nelem += delta;
1080
0
  if (delta > 0 && id >= 0)
1081
0
    for (;;)
1082
0
      {
1083
0
  if (dest->elems[is] > dest->elems[id])
1084
0
    {
1085
      /* Copy from the top.  */
1086
0
      dest->elems[id + delta--] = dest->elems[is--];
1087
0
      if (delta == 0)
1088
0
        break;
1089
0
    }
1090
0
  else
1091
0
    {
1092
      /* Slide from the bottom.  */
1093
0
      dest->elems[id + delta] = dest->elems[id];
1094
0
      if (--id < 0)
1095
0
        break;
1096
0
    }
1097
0
      }
1098
1099
  /* Copy remaining SRC elements.  */
1100
0
  memcpy (dest->elems, dest->elems + sbase, delta * sizeof (Idx));
1101
1102
0
  return REG_NOERROR;
1103
0
}
1104
1105
/* Calculate the union set of the sets SRC1 and SRC2. And store it to
1106
   DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
1107
1108
static reg_errcode_t
1109
__attribute_warn_unused_result__
1110
re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
1111
      const re_node_set *src2)
1112
0
{
1113
0
  Idx i1, i2, id;
1114
0
  if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
1115
0
    {
1116
0
      dest->alloc = src1->nelem + src2->nelem;
1117
0
      dest->elems = re_malloc (Idx, dest->alloc);
1118
0
      if (__glibc_unlikely (dest->elems == NULL))
1119
0
  return REG_ESPACE;
1120
0
    }
1121
0
  else
1122
0
    {
1123
0
      if (src1 != NULL && src1->nelem > 0)
1124
0
  return re_node_set_init_copy (dest, src1);
1125
0
      else if (src2 != NULL && src2->nelem > 0)
1126
0
  return re_node_set_init_copy (dest, src2);
1127
0
      else
1128
0
  re_node_set_init_empty (dest);
1129
0
      return REG_NOERROR;
1130
0
    }
1131
0
  for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
1132
0
    {
1133
0
      if (src1->elems[i1] > src2->elems[i2])
1134
0
  {
1135
0
    dest->elems[id++] = src2->elems[i2++];
1136
0
    continue;
1137
0
  }
1138
0
      if (src1->elems[i1] == src2->elems[i2])
1139
0
  ++i2;
1140
0
      dest->elems[id++] = src1->elems[i1++];
1141
0
    }
1142
0
  if (i1 < src1->nelem)
1143
0
    {
1144
0
      memcpy (dest->elems + id, src1->elems + i1,
1145
0
       (src1->nelem - i1) * sizeof (Idx));
1146
0
      id += src1->nelem - i1;
1147
0
    }
1148
0
  else if (i2 < src2->nelem)
1149
0
    {
1150
0
      memcpy (dest->elems + id, src2->elems + i2,
1151
0
       (src2->nelem - i2) * sizeof (Idx));
1152
0
      id += src2->nelem - i2;
1153
0
    }
1154
0
  dest->nelem = id;
1155
0
  return REG_NOERROR;
1156
0
}
1157
1158
/* Calculate the union set of the sets DEST and SRC. And store it to
1159
   DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
1160
1161
static reg_errcode_t
1162
__attribute_warn_unused_result__
1163
re_node_set_merge (re_node_set *dest, const re_node_set *src)
1164
2.92M
{
1165
2.92M
  Idx is, id, sbase, delta;
1166
2.92M
  if (src == NULL || src->nelem == 0)
1167
0
    return REG_NOERROR;
1168
2.92M
  if (dest->alloc < 2 * src->nelem + dest->nelem)
1169
192k
    {
1170
192k
      Idx new_alloc = 2 * (src->nelem + dest->alloc);
1171
192k
      Idx *new_buffer = re_realloc (dest->elems, Idx, new_alloc);
1172
192k
      if (__glibc_unlikely (new_buffer == NULL))
1173
0
  return REG_ESPACE;
1174
192k
      dest->elems = new_buffer;
1175
192k
      dest->alloc = new_alloc;
1176
192k
    }
1177
1178
2.92M
  if (__glibc_unlikely (dest->nelem == 0))
1179
315k
    {
1180
      /* Although we already guaranteed above that dest->alloc != 0 and
1181
         therefore dest->elems != NULL, add a debug assertion to pacify
1182
         GCC 11.2.1's -fanalyzer.  */
1183
315k
      DEBUG_ASSERT (dest->elems);
1184
0
      dest->nelem = src->nelem;
1185
315k
      memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
1186
315k
      return REG_NOERROR;
1187
315k
    }
1188
1189
  /* Copy into the top of DEST the items of SRC that are not
1190
     found in DEST.  Maybe we could binary search in DEST?  */
1191
2.60M
  for (sbase = dest->nelem + 2 * src->nelem,
1192
25.3M
       is = src->nelem - 1, id = dest->nelem - 1; is >= 0 && id >= 0; )
1193
22.7M
    {
1194
22.7M
      if (dest->elems[id] == src->elems[is])
1195
1.73M
  is--, id--;
1196
21.0M
      else if (dest->elems[id] < src->elems[is])
1197
7.88M
  dest->elems[--sbase] = src->elems[is--];
1198
13.1M
      else /* if (dest->elems[id] > src->elems[is]) */
1199
13.1M
  --id;
1200
22.7M
    }
1201
1202
2.60M
  if (is >= 0)
1203
83.1k
    {
1204
      /* If DEST is exhausted, the remaining items of SRC must be unique.  */
1205
83.1k
      sbase -= is + 1;
1206
83.1k
      memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (Idx));
1207
83.1k
    }
1208
1209
2.60M
  id = dest->nelem - 1;
1210
2.60M
  is = dest->nelem + 2 * src->nelem - 1;
1211
2.60M
  delta = is - sbase + 1;
1212
2.60M
  if (delta == 0)
1213
130k
    return REG_NOERROR;
1214
1215
  /* Now copy.  When DELTA becomes zero, the remaining
1216
     DEST elements are already in place.  */
1217
2.47M
  dest->nelem += delta;
1218
2.47M
  for (;;)
1219
19.1M
    {
1220
19.1M
      if (dest->elems[is] > dest->elems[id])
1221
7.88M
  {
1222
    /* Copy from the top.  */
1223
7.88M
    dest->elems[id + delta--] = dest->elems[is--];
1224
7.88M
    if (delta == 0)
1225
2.39M
      break;
1226
7.88M
  }
1227
11.2M
      else
1228
11.2M
  {
1229
    /* Slide from the bottom.  */
1230
11.2M
    dest->elems[id + delta] = dest->elems[id];
1231
11.2M
    if (--id < 0)
1232
83.1k
      {
1233
        /* Copy remaining SRC elements.  */
1234
83.1k
        memcpy (dest->elems, dest->elems + sbase,
1235
83.1k
          delta * sizeof (Idx));
1236
83.1k
        break;
1237
83.1k
      }
1238
11.2M
  }
1239
19.1M
    }
1240
1241
2.47M
  return REG_NOERROR;
1242
2.60M
}
1243
1244
/* Insert the new element ELEM to the re_node_set* SET.
1245
   SET should not already have ELEM.
1246
   Return true if successful.  */
1247
1248
static bool
1249
__attribute_warn_unused_result__
1250
re_node_set_insert (re_node_set *set, Idx elem)
1251
2.39M
{
1252
2.39M
  Idx idx;
1253
  /* In case the set is empty.  */
1254
2.39M
  if (set->alloc == 0)
1255
597
    return __glibc_likely (re_node_set_init_1 (set, elem) == REG_NOERROR);
1256
1257
2.39M
  if (__glibc_unlikely (set->nelem) == 0)
1258
439
    {
1259
      /* Although we already guaranteed above that set->alloc != 0 and
1260
         therefore set->elems != NULL, add a debug assertion to pacify
1261
         GCC 11.2 -fanalyzer.  */
1262
439
      DEBUG_ASSERT (set->elems);
1263
0
      set->elems[0] = elem;
1264
439
      ++set->nelem;
1265
439
      return true;
1266
439
    }
1267
1268
  /* Realloc if we need.  */
1269
2.39M
  if (set->alloc == set->nelem)
1270
289k
    {
1271
289k
      Idx *new_elems;
1272
289k
      set->alloc = set->alloc * 2;
1273
289k
      new_elems = re_realloc (set->elems, Idx, set->alloc);
1274
289k
      if (__glibc_unlikely (new_elems == NULL))
1275
0
  return false;
1276
289k
      set->elems = new_elems;
1277
289k
    }
1278
1279
  /* Move the elements which follows the new element.  Test the
1280
     first element separately to skip a check in the inner loop.  */
1281
2.39M
  if (elem < set->elems[0])
1282
0
    {
1283
0
      for (idx = set->nelem; idx > 0; idx--)
1284
0
  set->elems[idx] = set->elems[idx - 1];
1285
0
    }
1286
2.39M
  else
1287
2.39M
    {
1288
2.39M
      for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
1289
0
  set->elems[idx] = set->elems[idx - 1];
1290
2.39M
      DEBUG_ASSERT (set->elems[idx - 1] < elem);
1291
2.39M
    }
1292
1293
  /* Insert the new element.  */
1294
0
  set->elems[idx] = elem;
1295
2.39M
  ++set->nelem;
1296
2.39M
  return true;
1297
2.39M
}
1298
1299
/* Insert the new element ELEM to the re_node_set* SET.
1300
   SET should not already have any element greater than or equal to ELEM.
1301
   Return true if successful.  */
1302
1303
static bool
1304
__attribute_warn_unused_result__
1305
re_node_set_insert_last (re_node_set *set, Idx elem)
1306
19.8M
{
1307
  /* Realloc if we need.  */
1308
19.8M
  if (set->alloc == set->nelem)
1309
5.33M
    {
1310
5.33M
      Idx *new_elems;
1311
5.33M
      set->alloc = (set->alloc + 1) * 2;
1312
5.33M
      new_elems = re_realloc (set->elems, Idx, set->alloc);
1313
5.33M
      if (__glibc_unlikely (new_elems == NULL))
1314
0
  return false;
1315
5.33M
      set->elems = new_elems;
1316
5.33M
    }
1317
1318
  /* Insert the new element.  */
1319
19.8M
  set->elems[set->nelem++] = elem;
1320
19.8M
  return true;
1321
19.8M
}
1322
1323
/* Compare two node sets SET1 and SET2.
1324
   Return true if SET1 and SET2 are equivalent.  */
1325
1326
static bool
1327
__attribute__ ((pure))
1328
re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
1329
239k
{
1330
239k
  Idx i;
1331
239k
  if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
1332
17.7k
    return false;
1333
9.55M
  for (i = set1->nelem ; --i >= 0 ; )
1334
9.33M
    if (set1->elems[i] != set2->elems[i])
1335
7.96k
      return false;
1336
213k
  return true;
1337
221k
}
1338
1339
/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise.  */
1340
1341
static Idx
1342
__attribute__ ((pure))
1343
re_node_set_contains (const re_node_set *set, Idx elem)
1344
0
{
1345
0
  __re_size_t idx, right, mid;
1346
0
  if (set->nelem <= 0)
1347
0
    return 0;
1348
1349
  /* Binary search the element.  */
1350
0
  idx = 0;
1351
0
  right = set->nelem - 1;
1352
0
  while (idx < right)
1353
0
    {
1354
0
      mid = (idx + right) / 2;
1355
0
      if (set->elems[mid] < elem)
1356
0
  idx = mid + 1;
1357
0
      else
1358
0
  right = mid;
1359
0
    }
1360
0
  return set->elems[idx] == elem ? idx + 1 : 0;
1361
0
}
1362
1363
static void
1364
re_node_set_remove_at (re_node_set *set, Idx idx)
1365
942k
{
1366
942k
  if (idx < 0 || idx >= set->nelem)
1367
0
    return;
1368
942k
  --set->nelem;
1369
28.9M
  for (; idx < set->nelem; idx++)
1370
28.0M
    set->elems[idx] = set->elems[idx + 1];
1371
942k
}
1372

1373
1374
/* Add the token TOKEN to dfa->nodes, and return the index of the token.
1375
   Or return -1 if an error occurred.  */
1376
1377
static Idx
1378
re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
1379
5.03M
{
1380
5.03M
  if (__glibc_unlikely (dfa->nodes_len >= dfa->nodes_alloc))
1381
209
    {
1382
209
      size_t new_nodes_alloc = dfa->nodes_alloc * 2;
1383
209
      Idx *new_nexts, *new_indices;
1384
209
      re_node_set *new_edests, *new_eclosures;
1385
209
      re_token_t *new_nodes;
1386
1387
      /* Avoid overflows in realloc.  */
1388
209
      const size_t max_object_size = MAX (sizeof (re_token_t),
1389
209
            MAX (sizeof (re_node_set),
1390
209
                 sizeof (Idx)));
1391
209
      if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / max_object_size)
1392
209
          < new_nodes_alloc))
1393
0
  return -1;
1394
1395
209
      new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
1396
209
      if (__glibc_unlikely (new_nodes == NULL))
1397
0
  return -1;
1398
209
      dfa->nodes = new_nodes;
1399
209
      dfa->nodes_alloc = new_nodes_alloc;
1400
209
      new_nexts = re_realloc (dfa->nexts, Idx, new_nodes_alloc);
1401
209
      if (new_nexts != NULL)
1402
209
  dfa->nexts = new_nexts;
1403
209
      new_indices = re_realloc (dfa->org_indices, Idx, new_nodes_alloc);
1404
209
      if (new_indices != NULL)
1405
209
  dfa->org_indices = new_indices;
1406
209
      new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
1407
209
      if (new_edests != NULL)
1408
209
  dfa->edests = new_edests;
1409
209
      new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
1410
209
      if (new_eclosures != NULL)
1411
209
  dfa->eclosures = new_eclosures;
1412
209
      if (__glibc_unlikely (new_nexts == NULL || new_indices == NULL
1413
209
          || new_edests == NULL || new_eclosures == NULL))
1414
0
  return -1;
1415
209
    }
1416
5.03M
  dfa->nodes[dfa->nodes_len] = token;
1417
5.03M
  dfa->nodes[dfa->nodes_len].constraint = 0;
1418
5.03M
  dfa->nodes[dfa->nodes_len].accept_mb =
1419
5.03M
    ((token.type == OP_PERIOD && dfa->mb_cur_max > 1)
1420
5.03M
     || token.type == COMPLEX_BRACKET);
1421
5.03M
  dfa->nexts[dfa->nodes_len] = -1;
1422
5.03M
  re_node_set_init_empty (dfa->edests + dfa->nodes_len);
1423
5.03M
  re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
1424
5.03M
  return dfa->nodes_len++;
1425
5.03M
}
1426
1427
static re_hashval_t
1428
calc_state_hash (const re_node_set *nodes, unsigned int context)
1429
375k
{
1430
375k
  re_hashval_t hash = nodes->nelem + context;
1431
375k
  Idx i;
1432
14.0M
  for (i = 0 ; i < nodes->nelem ; i++)
1433
13.6M
    hash += nodes->elems[i];
1434
375k
  return hash;
1435
375k
}
1436
1437
/* Search for the state whose node_set is equivalent to NODES.
1438
   Return the pointer to the state, if we found it in the DFA.
1439
   Otherwise create the new one and return it.  In case of an error
1440
   return NULL and set the error code in ERR.
1441
   Note: - We assume NULL as the invalid state, then it is possible that
1442
     return value is NULL and ERR is REG_NOERROR.
1443
   - We never return non-NULL value in case of any errors, it is for
1444
     optimization.  */
1445
1446
static re_dfastate_t *
1447
__attribute_warn_unused_result__
1448
re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa,
1449
      const re_node_set *nodes)
1450
0
{
1451
0
  re_hashval_t hash;
1452
0
  re_dfastate_t *new_state;
1453
0
  struct re_state_table_entry *spot;
1454
0
  Idx i;
1455
#if defined GCC_LINT || defined lint
1456
  /* Suppress bogus uninitialized-variable warnings.  */
1457
  *err = REG_NOERROR;
1458
#endif
1459
0
  if (__glibc_unlikely (nodes->nelem == 0))
1460
0
    {
1461
0
      *err = REG_NOERROR;
1462
0
      return NULL;
1463
0
    }
1464
0
  hash = calc_state_hash (nodes, 0);
1465
0
  spot = dfa->state_table + (hash & dfa->state_hash_mask);
1466
1467
0
  for (i = 0 ; i < spot->num ; i++)
1468
0
    {
1469
0
      re_dfastate_t *state = spot->array[i];
1470
0
      if (hash != state->hash)
1471
0
  continue;
1472
0
      if (re_node_set_compare (&state->nodes, nodes))
1473
0
  return state;
1474
0
    }
1475
1476
  /* There are no appropriate state in the dfa, create the new one.  */
1477
0
  new_state = create_ci_newstate (dfa, nodes, hash);
1478
0
  if (__glibc_unlikely (new_state == NULL))
1479
0
    *err = REG_ESPACE;
1480
1481
0
  return new_state;
1482
0
}
1483
1484
/* Search for the state whose node_set is equivalent to NODES and
1485
   whose context is equivalent to CONTEXT.
1486
   Return the pointer to the state, if we found it in the DFA.
1487
   Otherwise create the new one and return it.  In case of an error
1488
   return NULL and set the error code in ERR.
1489
   Note: - We assume NULL as the invalid state, then it is possible that
1490
     return value is NULL and ERR is REG_NOERROR.
1491
   - We never return non-NULL value in case of any errors, it is for
1492
     optimization.  */
1493
1494
static re_dfastate_t *
1495
__attribute_warn_unused_result__
1496
re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa,
1497
        const re_node_set *nodes, unsigned int context)
1498
375k
{
1499
375k
  re_hashval_t hash;
1500
375k
  re_dfastate_t *new_state;
1501
375k
  struct re_state_table_entry *spot;
1502
375k
  Idx i;
1503
#if defined GCC_LINT || defined lint
1504
  /* Suppress bogus uninitialized-variable warnings.  */
1505
  *err = REG_NOERROR;
1506
#endif
1507
375k
  if (nodes->nelem == 0)
1508
0
    {
1509
0
      *err = REG_NOERROR;
1510
0
      return NULL;
1511
0
    }
1512
375k
  hash = calc_state_hash (nodes, context);
1513
375k
  spot = dfa->state_table + (hash & dfa->state_hash_mask);
1514
1515
480k
  for (i = 0 ; i < spot->num ; i++)
1516
318k
    {
1517
318k
      re_dfastate_t *state = spot->array[i];
1518
318k
      if (state->hash == hash
1519
318k
    && state->context == context
1520
318k
    && re_node_set_compare (state->entrance_nodes, nodes))
1521
213k
  return state;
1522
318k
    }
1523
  /* There are no appropriate state in 'dfa', create the new one.  */
1524
161k
  new_state = create_cd_newstate (dfa, nodes, context, hash);
1525
161k
  if (__glibc_unlikely (new_state == NULL))
1526
0
    *err = REG_ESPACE;
1527
1528
161k
  return new_state;
1529
375k
}
1530
1531
/* Finish initialization of the new state NEWSTATE, and using its hash value
1532
   HASH put in the appropriate bucket of DFA's state table.  Return value
1533
   indicates the error code if failed.  */
1534
1535
static reg_errcode_t
1536
__attribute_warn_unused_result__
1537
register_state (const re_dfa_t *dfa, re_dfastate_t *newstate,
1538
    re_hashval_t hash)
1539
161k
{
1540
161k
  struct re_state_table_entry *spot;
1541
161k
  reg_errcode_t err;
1542
161k
  Idx i;
1543
1544
161k
  newstate->hash = hash;
1545
161k
  err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
1546
161k
  if (__glibc_unlikely (err != REG_NOERROR))
1547
0
    return REG_ESPACE;
1548
4.30M
  for (i = 0; i < newstate->nodes.nelem; i++)
1549
4.14M
    {
1550
4.14M
      Idx elem = newstate->nodes.elems[i];
1551
4.14M
      if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
1552
2.57M
  if (! re_node_set_insert_last (&newstate->non_eps_nodes, elem))
1553
0
    return REG_ESPACE;
1554
4.14M
    }
1555
1556
161k
  spot = dfa->state_table + (hash & dfa->state_hash_mask);
1557
161k
  if (__glibc_unlikely (spot->alloc <= spot->num))
1558
146k
    {
1559
146k
      Idx new_alloc = 2 * spot->num + 2;
1560
146k
      re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
1561
146k
                new_alloc);
1562
146k
      if (__glibc_unlikely (new_array == NULL))
1563
0
  return REG_ESPACE;
1564
146k
      spot->array = new_array;
1565
146k
      spot->alloc = new_alloc;
1566
146k
    }
1567
161k
  spot->array[spot->num++] = newstate;
1568
161k
  return REG_NOERROR;
1569
161k
}
1570
1571
static void
1572
free_state (re_dfastate_t *state)
1573
161k
{
1574
161k
  re_node_set_free (&state->non_eps_nodes);
1575
161k
  re_node_set_free (&state->inveclosure);
1576
161k
  if (state->entrance_nodes != &state->nodes)
1577
27.8k
    {
1578
27.8k
      re_node_set_free (state->entrance_nodes);
1579
27.8k
      re_free (state->entrance_nodes);
1580
27.8k
    }
1581
161k
  re_node_set_free (&state->nodes);
1582
161k
  re_free (state->word_trtable);
1583
161k
  re_free (state->trtable);
1584
161k
  re_free (state);
1585
161k
}
1586
1587
/* Create the new state which is independent of contexts.
1588
   Return the new state if succeeded, otherwise return NULL.  */
1589
1590
static re_dfastate_t *
1591
__attribute_warn_unused_result__
1592
create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1593
        re_hashval_t hash)
1594
0
{
1595
0
  Idx i;
1596
0
  reg_errcode_t err;
1597
0
  re_dfastate_t *newstate;
1598
1599
0
  newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1600
0
  if (__glibc_unlikely (newstate == NULL))
1601
0
    return NULL;
1602
0
  err = re_node_set_init_copy (&newstate->nodes, nodes);
1603
0
  if (__glibc_unlikely (err != REG_NOERROR))
1604
0
    {
1605
0
      re_free (newstate);
1606
0
      return NULL;
1607
0
    }
1608
1609
0
  newstate->entrance_nodes = &newstate->nodes;
1610
0
  for (i = 0 ; i < nodes->nelem ; i++)
1611
0
    {
1612
0
      re_token_t *node = dfa->nodes + nodes->elems[i];
1613
0
      re_token_type_t type = node->type;
1614
0
      if (type == CHARACTER && !node->constraint)
1615
0
  continue;
1616
0
      newstate->accept_mb |= node->accept_mb;
1617
1618
      /* If the state has the halt node, the state is a halt state.  */
1619
0
      if (type == END_OF_RE)
1620
0
  newstate->halt = 1;
1621
0
      else if (type == OP_BACK_REF)
1622
0
  newstate->has_backref = 1;
1623
0
      else if (type == ANCHOR || node->constraint)
1624
0
  newstate->has_constraint = 1;
1625
0
    }
1626
0
  err = register_state (dfa, newstate, hash);
1627
0
  if (__glibc_unlikely (err != REG_NOERROR))
1628
0
    {
1629
0
      free_state (newstate);
1630
0
      newstate = NULL;
1631
0
    }
1632
0
  return newstate;
1633
0
}
1634
1635
/* Create the new state which is depend on the context CONTEXT.
1636
   Return the new state if succeeded, otherwise return NULL.  */
1637
1638
static re_dfastate_t *
1639
__attribute_warn_unused_result__
1640
create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1641
        unsigned int context, re_hashval_t hash)
1642
161k
{
1643
161k
  Idx i, nctx_nodes = 0;
1644
161k
  reg_errcode_t err;
1645
161k
  re_dfastate_t *newstate;
1646
1647
161k
  newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1648
161k
  if (__glibc_unlikely (newstate == NULL))
1649
0
    return NULL;
1650
161k
  err = re_node_set_init_copy (&newstate->nodes, nodes);
1651
161k
  if (__glibc_unlikely (err != REG_NOERROR))
1652
0
    {
1653
0
      re_free (newstate);
1654
0
      return NULL;
1655
0
    }
1656
1657
161k
  newstate->context = context;
1658
161k
  newstate->entrance_nodes = &newstate->nodes;
1659
1660
5.24M
  for (i = 0 ; i < nodes->nelem ; i++)
1661
5.08M
    {
1662
5.08M
      re_token_t *node = dfa->nodes + nodes->elems[i];
1663
5.08M
      re_token_type_t type = node->type;
1664
5.08M
      unsigned int constraint = node->constraint;
1665
1666
5.08M
      if (type == CHARACTER && !constraint)
1667
1.54M
  continue;
1668
3.54M
      newstate->accept_mb |= node->accept_mb;
1669
1670
      /* If the state has the halt node, the state is a halt state.  */
1671
3.54M
      if (type == END_OF_RE)
1672
9.57k
  newstate->halt = 1;
1673
3.53M
      else if (type == OP_BACK_REF)
1674
0
  newstate->has_backref = 1;
1675
1676
3.54M
      if (constraint)
1677
1.42M
  {
1678
1.42M
    if (newstate->entrance_nodes == &newstate->nodes)
1679
27.8k
      {
1680
27.8k
        re_node_set *entrance_nodes = re_malloc (re_node_set, 1);
1681
27.8k
        if (__glibc_unlikely (entrance_nodes == NULL))
1682
0
    {
1683
0
      free_state (newstate);
1684
0
      return NULL;
1685
0
    }
1686
27.8k
        newstate->entrance_nodes = entrance_nodes;
1687
27.8k
        if (re_node_set_init_copy (newstate->entrance_nodes, nodes)
1688
27.8k
      != REG_NOERROR)
1689
0
    {
1690
0
      free_state (newstate);
1691
0
      return NULL;
1692
0
    }
1693
27.8k
        nctx_nodes = 0;
1694
27.8k
        newstate->has_constraint = 1;
1695
27.8k
      }
1696
1697
1.42M
    if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
1698
942k
      {
1699
942k
        re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
1700
942k
        ++nctx_nodes;
1701
942k
      }
1702
1.42M
  }
1703
3.54M
    }
1704
161k
  err = register_state (dfa, newstate, hash);
1705
161k
  if (__glibc_unlikely (err != REG_NOERROR))
1706
0
    {
1707
0
      free_state (newstate);
1708
0
      newstate = NULL;
1709
0
    }
1710
161k
  return  newstate;
1711
161k
}