Coverage Report

Created: 2025-11-16 07:45

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/glib-2.80.0/subprojects/pcre2-10.42/src/pcre2_substring.c
Line
Count
Source
1
/*************************************************
2
*      Perl-Compatible Regular Expressions       *
3
*************************************************/
4
5
/* PCRE is a library of functions to support regular expressions whose syntax
6
and semantics are as close as possible to those of the Perl 5 language.
7
8
                       Written by Philip Hazel
9
     Original API code Copyright (c) 1997-2012 University of Cambridge
10
          New API code Copyright (c) 2016-2018 University of Cambridge
11
12
-----------------------------------------------------------------------------
13
Redistribution and use in source and binary forms, with or without
14
modification, are permitted provided that the following conditions are met:
15
16
    * Redistributions of source code must retain the above copyright notice,
17
      this list of conditions and the following disclaimer.
18
19
    * Redistributions in binary form must reproduce the above copyright
20
      notice, this list of conditions and the following disclaimer in the
21
      documentation and/or other materials provided with the distribution.
22
23
    * Neither the name of the University of Cambridge nor the names of its
24
      contributors may be used to endorse or promote products derived from
25
      this software without specific prior written permission.
26
27
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37
POSSIBILITY OF SUCH DAMAGE.
38
-----------------------------------------------------------------------------
39
*/
40
41
42
#ifdef HAVE_CONFIG_H
43
#include "config.h"
44
#endif
45
46
#include "pcre2_internal.h"
47
48
49
50
/*************************************************
51
*   Copy named captured string to given buffer   *
52
*************************************************/
53
54
/* This function copies a single captured substring into a given buffer,
55
identifying it by name. If the regex permits duplicate names, the first
56
substring that is set is chosen.
57
58
Arguments:
59
  match_data     points to the match data
60
  stringname     the name of the required substring
61
  buffer         where to put the substring
62
  sizeptr        the size of the buffer, updated to the size of the substring
63
64
Returns:         if successful: zero
65
                 if not successful, a negative error code:
66
                   (1) an error from nametable_scan()
67
                   (2) an error from copy_bynumber()
68
                   (3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector
69
                   (4) PCRE2_ERROR_UNSET: all named groups in ovector are unset
70
*/
71
72
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
73
pcre2_substring_copy_byname(pcre2_match_data *match_data, PCRE2_SPTR stringname,
74
  PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
75
0
{
76
0
PCRE2_SPTR first, last, entry;
77
0
int failrc, entrysize;
78
0
if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
79
0
  return PCRE2_ERROR_DFA_UFUNC;
80
0
entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
81
0
  &first, &last);
82
0
if (entrysize < 0) return entrysize;
83
0
failrc = PCRE2_ERROR_UNAVAILABLE;
84
0
for (entry = first; entry <= last; entry += entrysize)
85
0
  {
86
0
  uint32_t n = GET2(entry, 0);
87
0
  if (n < match_data->oveccount)
88
0
    {
89
0
    if (match_data->ovector[n*2] != PCRE2_UNSET)
90
0
      return pcre2_substring_copy_bynumber(match_data, n, buffer, sizeptr);
91
0
    failrc = PCRE2_ERROR_UNSET;
92
0
    }
93
0
  }
94
0
return failrc;
95
0
}
96
97
98
99
/*************************************************
100
*  Copy numbered captured string to given buffer *
101
*************************************************/
102
103
/* This function copies a single captured substring into a given buffer,
104
identifying it by number.
105
106
Arguments:
107
  match_data     points to the match data
108
  stringnumber   the number of the required substring
109
  buffer         where to put the substring
110
  sizeptr        the size of the buffer, updated to the size of the substring
111
112
Returns:         if successful: 0
113
                 if not successful, a negative error code:
114
                   PCRE2_ERROR_NOMEMORY: buffer too small
115
                   PCRE2_ERROR_NOSUBSTRING: no such substring
116
                   PCRE2_ERROR_UNAVAILABLE: ovector too small
117
                   PCRE2_ERROR_UNSET: substring is not set
118
*/
119
120
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
121
pcre2_substring_copy_bynumber(pcre2_match_data *match_data,
122
  uint32_t stringnumber, PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
123
0
{
124
0
int rc;
125
0
PCRE2_SIZE size;
126
0
rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size);
127
0
if (rc < 0) return rc;
128
0
if (size + 1 > *sizeptr) return PCRE2_ERROR_NOMEMORY;
129
0
memcpy(buffer, match_data->subject + match_data->ovector[stringnumber*2],
130
0
  CU2BYTES(size));
131
0
buffer[size] = 0;
132
0
*sizeptr = size;
133
0
return 0;
134
0
}
135
136
137
138
/*************************************************
139
*          Extract named captured string         *
140
*************************************************/
141
142
/* This function copies a single captured substring, identified by name, into
143
new memory. If the regex permits duplicate names, the first substring that is
144
set is chosen.
145
146
Arguments:
147
  match_data     pointer to match_data
148
  stringname     the name of the required substring
149
  stringptr      where to put the pointer to the new memory
150
  sizeptr        where to put the length of the substring
151
152
Returns:         if successful: zero
153
                 if not successful, a negative value:
154
                   (1) an error from nametable_scan()
155
                   (2) an error from get_bynumber()
156
                   (3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector
157
                   (4) PCRE2_ERROR_UNSET: all named groups in ovector are unset
158
*/
159
160
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
161
pcre2_substring_get_byname(pcre2_match_data *match_data,
162
  PCRE2_SPTR stringname, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
163
0
{
164
0
PCRE2_SPTR first, last, entry;
165
0
int failrc, entrysize;
166
0
if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
167
0
  return PCRE2_ERROR_DFA_UFUNC;
168
0
entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
169
0
  &first, &last);
170
0
if (entrysize < 0) return entrysize;
171
0
failrc = PCRE2_ERROR_UNAVAILABLE;
172
0
for (entry = first; entry <= last; entry += entrysize)
173
0
  {
174
0
  uint32_t n = GET2(entry, 0);
175
0
  if (n < match_data->oveccount)
176
0
    {
177
0
    if (match_data->ovector[n*2] != PCRE2_UNSET)
178
0
      return pcre2_substring_get_bynumber(match_data, n, stringptr, sizeptr);
179
0
    failrc = PCRE2_ERROR_UNSET;
180
0
    }
181
0
  }
182
0
return failrc;
183
0
}
184
185
186
187
/*************************************************
188
*      Extract captured string to new memory     *
189
*************************************************/
190
191
/* This function copies a single captured substring into a piece of new
192
memory.
193
194
Arguments:
195
  match_data     points to match data
196
  stringnumber   the number of the required substring
197
  stringptr      where to put a pointer to the new memory
198
  sizeptr        where to put the size of the substring
199
200
Returns:         if successful: 0
201
                 if not successful, a negative error code:
202
                   PCRE2_ERROR_NOMEMORY: failed to get memory
203
                   PCRE2_ERROR_NOSUBSTRING: no such substring
204
                   PCRE2_ERROR_UNAVAILABLE: ovector too small
205
                   PCRE2_ERROR_UNSET: substring is not set
206
*/
207
208
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
209
pcre2_substring_get_bynumber(pcre2_match_data *match_data,
210
  uint32_t stringnumber, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
211
0
{
212
0
int rc;
213
0
PCRE2_SIZE size;
214
0
PCRE2_UCHAR *yield;
215
0
rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size);
216
0
if (rc < 0) return rc;
217
0
yield = PRIV(memctl_malloc)(sizeof(pcre2_memctl) +
218
0
  (size + 1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)match_data);
219
0
if (yield == NULL) return PCRE2_ERROR_NOMEMORY;
220
0
yield = (PCRE2_UCHAR *)(((char *)yield) + sizeof(pcre2_memctl));
221
0
memcpy(yield, match_data->subject + match_data->ovector[stringnumber*2],
222
0
  CU2BYTES(size));
223
0
yield[size] = 0;
224
0
*stringptr = yield;
225
0
*sizeptr = size;
226
0
return 0;
227
0
}
228
229
230
231
/*************************************************
232
*       Free memory obtained by get_substring    *
233
*************************************************/
234
235
/*
236
Argument:     the result of a previous pcre2_substring_get_byxxx()
237
Returns:      nothing
238
*/
239
240
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
241
pcre2_substring_free(PCRE2_UCHAR *string)
242
0
{
243
0
if (string != NULL)
244
0
  {
245
0
  pcre2_memctl *memctl = (pcre2_memctl *)((char *)string - sizeof(pcre2_memctl));
246
0
  memctl->free(memctl, memctl->memory_data);
247
0
  }
248
0
}
249
250
251
252
/*************************************************
253
*         Get length of a named substring        *
254
*************************************************/
255
256
/* This function returns the length of a named captured substring. If the regex
257
permits duplicate names, the first substring that is set is chosen.
258
259
Arguments:
260
  match_data      pointer to match data
261
  stringname      the name of the required substring
262
  sizeptr         where to put the length
263
264
Returns:          0 if successful, else a negative error number
265
*/
266
267
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
268
pcre2_substring_length_byname(pcre2_match_data *match_data,
269
  PCRE2_SPTR stringname, PCRE2_SIZE *sizeptr)
270
0
{
271
0
PCRE2_SPTR first, last, entry;
272
0
int failrc, entrysize;
273
0
if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
274
0
  return PCRE2_ERROR_DFA_UFUNC;
275
0
entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
276
0
  &first, &last);
277
0
if (entrysize < 0) return entrysize;
278
0
failrc = PCRE2_ERROR_UNAVAILABLE;
279
0
for (entry = first; entry <= last; entry += entrysize)
280
0
  {
281
0
  uint32_t n = GET2(entry, 0);
282
0
  if (n < match_data->oveccount)
283
0
    {
284
0
    if (match_data->ovector[n*2] != PCRE2_UNSET)
285
0
      return pcre2_substring_length_bynumber(match_data, n, sizeptr);
286
0
    failrc = PCRE2_ERROR_UNSET;
287
0
    }
288
0
  }
289
0
return failrc;
290
0
}
291
292
293
294
/*************************************************
295
*        Get length of a numbered substring      *
296
*************************************************/
297
298
/* This function returns the length of a captured substring. If the start is
299
beyond the end (which can happen when \K is used in an assertion), it sets the
300
length to zero.
301
302
Arguments:
303
  match_data      pointer to match data
304
  stringnumber    the number of the required substring
305
  sizeptr         where to put the length, if not NULL
306
307
Returns:         if successful: 0
308
                 if not successful, a negative error code:
309
                   PCRE2_ERROR_NOSUBSTRING: no such substring
310
                   PCRE2_ERROR_UNAVAILABLE: ovector is too small
311
                   PCRE2_ERROR_UNSET: substring is not set
312
*/
313
314
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
315
pcre2_substring_length_bynumber(pcre2_match_data *match_data,
316
  uint32_t stringnumber, PCRE2_SIZE *sizeptr)
317
0
{
318
0
PCRE2_SIZE left, right;
319
0
int count = match_data->rc;
320
0
if (count == PCRE2_ERROR_PARTIAL)
321
0
  {
322
0
  if (stringnumber > 0) return PCRE2_ERROR_PARTIAL;
323
0
  count = 0;
324
0
  }
325
0
else if (count < 0) return count;            /* Match failed */
326
327
0
if (match_data->matchedby != PCRE2_MATCHEDBY_DFA_INTERPRETER)
328
0
  {
329
0
  if (stringnumber > match_data->code->top_bracket)
330
0
    return PCRE2_ERROR_NOSUBSTRING;
331
0
  if (stringnumber >= match_data->oveccount)
332
0
    return PCRE2_ERROR_UNAVAILABLE;
333
0
  if (match_data->ovector[stringnumber*2] == PCRE2_UNSET)
334
0
    return PCRE2_ERROR_UNSET;
335
0
  }
336
0
else  /* Matched using pcre2_dfa_match() */
337
0
  {
338
0
  if (stringnumber >= match_data->oveccount) return PCRE2_ERROR_UNAVAILABLE;
339
0
  if (count != 0 && stringnumber >= (uint32_t)count) return PCRE2_ERROR_UNSET;
340
0
  }
341
342
0
left = match_data->ovector[stringnumber*2];
343
0
right = match_data->ovector[stringnumber*2+1];
344
0
if (sizeptr != NULL) *sizeptr = (left > right)? 0 : right - left;
345
0
return 0;
346
0
}
347
348
349
350
/*************************************************
351
*    Extract all captured strings to new memory  *
352
*************************************************/
353
354
/* This function gets one chunk of memory and builds a list of pointers and all
355
the captured substrings in it. A NULL pointer is put on the end of the list.
356
The substrings are zero-terminated, but also, if the final argument is
357
non-NULL, a list of lengths is also returned. This allows binary data to be
358
handled.
359
360
Arguments:
361
  match_data     points to the match data
362
  listptr        set to point to the list of pointers
363
  lengthsptr     set to point to the list of lengths (may be NULL)
364
365
Returns:         if successful: 0
366
                 if not successful, a negative error code:
367
                   PCRE2_ERROR_NOMEMORY: failed to get memory,
368
                   or a match failure code
369
*/
370
371
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
372
pcre2_substring_list_get(pcre2_match_data *match_data, PCRE2_UCHAR ***listptr,
373
  PCRE2_SIZE **lengthsptr)
374
0
{
375
0
int i, count, count2;
376
0
PCRE2_SIZE size;
377
0
PCRE2_SIZE *lensp;
378
0
pcre2_memctl *memp;
379
0
PCRE2_UCHAR **listp;
380
0
PCRE2_UCHAR *sp;
381
0
PCRE2_SIZE *ovector;
382
383
0
if ((count = match_data->rc) < 0) return count;   /* Match failed */
384
0
if (count == 0) count = match_data->oveccount;    /* Ovector too small */
385
386
0
count2 = 2*count;
387
0
ovector = match_data->ovector;
388
0
size = sizeof(pcre2_memctl) + sizeof(PCRE2_UCHAR *);      /* For final NULL */
389
0
if (lengthsptr != NULL) size += sizeof(PCRE2_SIZE) * count;  /* For lengths */
390
391
0
for (i = 0; i < count2; i += 2)
392
0
  {
393
0
  size += sizeof(PCRE2_UCHAR *) + CU2BYTES(1);
394
0
  if (ovector[i+1] > ovector[i]) size += CU2BYTES(ovector[i+1] - ovector[i]);
395
0
  }
396
397
0
memp = PRIV(memctl_malloc)(size, (pcre2_memctl *)match_data);
398
0
if (memp == NULL) return PCRE2_ERROR_NOMEMORY;
399
400
0
*listptr = listp = (PCRE2_UCHAR **)((char *)memp + sizeof(pcre2_memctl));
401
0
lensp = (PCRE2_SIZE *)((char *)listp + sizeof(PCRE2_UCHAR *) * (count + 1));
402
403
0
if (lengthsptr == NULL)
404
0
  {
405
0
  sp = (PCRE2_UCHAR *)lensp;
406
0
  lensp = NULL;
407
0
  }
408
0
else
409
0
  {
410
0
  *lengthsptr = lensp;
411
0
  sp = (PCRE2_UCHAR *)((char *)lensp + sizeof(PCRE2_SIZE) * count);
412
0
  }
413
414
0
for (i = 0; i < count2; i += 2)
415
0
  {
416
0
  size = (ovector[i+1] > ovector[i])? (ovector[i+1] - ovector[i]) : 0;
417
418
  /* Size == 0 includes the case when the capture is unset. Avoid adding
419
  PCRE2_UNSET to match_data->subject because it overflows, even though with
420
  zero size calling memcpy() is harmless. */
421
422
0
  if (size != 0) memcpy(sp, match_data->subject + ovector[i], CU2BYTES(size));
423
0
  *listp++ = sp;
424
0
  if (lensp != NULL) *lensp++ = size;
425
0
  sp += size;
426
0
  *sp++ = 0;
427
0
  }
428
429
0
*listp = NULL;
430
0
return 0;
431
0
}
432
433
434
435
/*************************************************
436
*   Free memory obtained by substring_list_get   *
437
*************************************************/
438
439
/*
440
Argument:     the result of a previous pcre2_substring_list_get()
441
Returns:      nothing
442
*/
443
444
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
445
pcre2_substring_list_free(PCRE2_SPTR *list)
446
0
{
447
0
if (list != NULL)
448
0
  {
449
0
  pcre2_memctl *memctl = (pcre2_memctl *)((char *)list - sizeof(pcre2_memctl));
450
0
  memctl->free(memctl, memctl->memory_data);
451
0
  }
452
0
}
453
454
455
456
/*************************************************
457
*     Find (multiple) entries for named string   *
458
*************************************************/
459
460
/* This function scans the nametable for a given name, using binary chop. It
461
returns either two pointers to the entries in the table, or, if no pointers are
462
given, the number of a unique group with the given name. If duplicate names are
463
permitted, and the name is not unique, an error is generated.
464
465
Arguments:
466
  code        the compiled regex
467
  stringname  the name whose entries required
468
  firstptr    where to put the pointer to the first entry
469
  lastptr     where to put the pointer to the last entry
470
471
Returns:      PCRE2_ERROR_NOSUBSTRING if the name is not found
472
              otherwise, if firstptr and lastptr are NULL:
473
                a group number for a unique substring
474
                else PCRE2_ERROR_NOUNIQUESUBSTRING
475
              otherwise:
476
                the length of each entry, having set firstptr and lastptr
477
*/
478
479
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
480
pcre2_substring_nametable_scan(const pcre2_code *code, PCRE2_SPTR stringname,
481
  PCRE2_SPTR *firstptr, PCRE2_SPTR *lastptr)
482
0
{
483
0
uint16_t bot = 0;
484
0
uint16_t top = code->name_count;
485
0
uint16_t entrysize = code->name_entry_size;
486
0
PCRE2_SPTR nametable = (PCRE2_SPTR)((char *)code + sizeof(pcre2_real_code));
487
488
0
while (top > bot)
489
0
  {
490
0
  uint16_t mid = (top + bot) / 2;
491
0
  PCRE2_SPTR entry = nametable + entrysize*mid;
492
0
  int c = PRIV(strcmp)(stringname, entry + IMM2_SIZE);
493
0
  if (c == 0)
494
0
    {
495
0
    PCRE2_SPTR first;
496
0
    PCRE2_SPTR last;
497
0
    PCRE2_SPTR lastentry;
498
0
    lastentry = nametable + entrysize * (code->name_count - 1);
499
0
    first = last = entry;
500
0
    while (first > nametable)
501
0
      {
502
0
      if (PRIV(strcmp)(stringname, (first - entrysize + IMM2_SIZE)) != 0) break;
503
0
      first -= entrysize;
504
0
      }
505
0
    while (last < lastentry)
506
0
      {
507
0
      if (PRIV(strcmp)(stringname, (last + entrysize + IMM2_SIZE)) != 0) break;
508
0
      last += entrysize;
509
0
      }
510
0
    if (firstptr == NULL) return (first == last)?
511
0
      (int)GET2(entry, 0) : PCRE2_ERROR_NOUNIQUESUBSTRING;
512
0
    *firstptr = first;
513
0
    *lastptr = last;
514
0
    return entrysize;
515
0
    }
516
0
  if (c > 0) bot = mid + 1; else top = mid;
517
0
  }
518
519
0
return PCRE2_ERROR_NOSUBSTRING;
520
0
}
521
522
523
/*************************************************
524
*           Find number for named string         *
525
*************************************************/
526
527
/* This function is a convenience wrapper for pcre2_substring_nametable_scan()
528
when it is known that names are unique. If there are duplicate names, it is not
529
defined which number is returned.
530
531
Arguments:
532
  code        the compiled regex
533
  stringname  the name whose number is required
534
535
Returns:      the number of the named parenthesis, or a negative number
536
                PCRE2_ERROR_NOSUBSTRING if not found
537
                PCRE2_ERROR_NOUNIQUESUBSTRING if not unique
538
*/
539
540
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
541
pcre2_substring_number_from_name(const pcre2_code *code,
542
  PCRE2_SPTR stringname)
543
0
{
544
0
return pcre2_substring_nametable_scan(code, stringname, NULL, NULL);
545
0
}
546
547
/* End of pcre2_substring.c */