Coverage Report

Created: 2026-04-12 06:54

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Modules/_sre/sre.c
Line
Count
Source
1
/*
2
 * Secret Labs' Regular Expression Engine
3
 *
4
 * regular expression matching engine
5
 *
6
 * partial history:
7
 * 1999-10-24 fl   created (based on existing template matcher code)
8
 * 2000-03-06 fl   first alpha, sort of
9
 * 2000-08-01 fl   fixes for 1.6b1
10
 * 2000-08-07 fl   use PyOS_CheckStack() if available
11
 * 2000-09-20 fl   added expand method
12
 * 2001-03-20 fl   lots of fixes for 2.1b2
13
 * 2001-04-15 fl   export copyright as Python attribute, not global
14
 * 2001-04-28 fl   added __copy__ methods (work in progress)
15
 * 2001-05-14 fl   fixes for 1.5.2 compatibility
16
 * 2001-07-01 fl   added BIGCHARSET support (from Martin von Loewis)
17
 * 2001-10-18 fl   fixed group reset issue (from Matthew Mueller)
18
 * 2001-10-20 fl   added split primitive; re-enable unicode for 1.6/2.0/2.1
19
 * 2001-10-21 fl   added sub/subn primitive
20
 * 2001-10-24 fl   added finditer primitive (for 2.2 only)
21
 * 2001-12-07 fl   fixed memory leak in sub/subn (Guido van Rossum)
22
 * 2002-11-09 fl   fixed empty sub/subn return type
23
 * 2003-04-18 mvl  fully support 4-byte codes
24
 * 2003-10-17 gn   implemented non recursive scheme
25
 * 2013-02-04 mrab added fullmatch primitive
26
 *
27
 * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
28
 *
29
 * This version of the SRE library can be redistributed under CNRI's
30
 * Python 1.6 license.  For any other use, please contact Secret Labs
31
 * AB (info@pythonware.com).
32
 *
33
 * Portions of this engine have been developed in cooperation with
34
 * CNRI.  Hewlett-Packard provided funding for 1.6 integration and
35
 * other compatibility work.
36
 */
37
38
static const char copyright[] =
39
    " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
40
41
#include "Python.h"
42
#include "pycore_critical_section.h" // Py_BEGIN_CRITICAL_SECTION
43
#include "pycore_dict.h"             // _PyDict_Next()
44
#include "pycore_long.h"             // _PyLong_GetZero()
45
#include "pycore_moduleobject.h"     // _PyModule_GetState()
46
#include "pycore_tuple.h"            // _PyTuple_FromPairSteal
47
#include "pycore_unicodeobject.h"    // _PyUnicode_Copy
48
#include "pycore_weakref.h"          // FT_CLEAR_WEAKREFS()
49
50
#include "sre.h"                     // SRE_CODE
51
52
#include <ctype.h>                   // tolower(), toupper(), isalnum()
53
54
1.29G
#define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
55
56
// On macOS, use the wide character ctype API using btowc()
57
#if defined(__APPLE__)
58
#  define USE_CTYPE_WINT_T
59
#endif
60
61
0
static int sre_isalnum(unsigned int ch) {
62
#ifdef USE_CTYPE_WINT_T
63
    return (unsigned int)iswalnum(btowc((int)ch));
64
#else
65
0
    return (unsigned int)isalnum((int)ch);
66
0
#endif
67
0
}
68
69
0
static unsigned int sre_tolower(unsigned int ch) {
70
#ifdef USE_CTYPE_WINT_T
71
    return (unsigned int)towlower(btowc((int)ch));
72
#else
73
0
    return (unsigned int)tolower((int)ch);
74
0
#endif
75
0
}
76
77
0
static unsigned int sre_toupper(unsigned int ch) {
78
#ifdef USE_CTYPE_WINT_T
79
    return (unsigned int)towupper(btowc((int)ch));
80
#else
81
0
    return (unsigned int)toupper((int)ch);
82
0
#endif
83
0
}
84
85
/* Defining this one controls tracing:
86
 * 0 -- disabled
87
 * 1 -- only if the DEBUG flag set
88
 * 2 -- always
89
 */
90
#ifndef VERBOSE
91
#  define VERBOSE 0
92
#endif
93
94
/* -------------------------------------------------------------------- */
95
96
#if defined(_MSC_VER) && !defined(__clang__)
97
#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
98
#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
99
/* fastest possible local call under MSVC */
100
#define LOCAL(type) static __inline type __fastcall
101
#else
102
#define LOCAL(type) static inline type
103
#endif
104
105
/* error codes */
106
#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
107
#define SRE_ERROR_STATE -2 /* illegal state */
108
0
#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
109
0
#define SRE_ERROR_MEMORY -9 /* out of memory */
110
0
#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
111
112
#if VERBOSE == 0
113
#  define INIT_TRACE(state)
114
#  define DO_TRACE 0
115
#  define TRACE(v)
116
#elif VERBOSE == 1
117
#  define INIT_TRACE(state) int _debug = (state)->debug
118
#  define DO_TRACE (_debug)
119
#  define TRACE(v) do {     \
120
        if (_debug) { \
121
            printf v;       \
122
        }                   \
123
    } while (0)
124
#elif VERBOSE == 2
125
#  define INIT_TRACE(state)
126
#  define DO_TRACE 1
127
#  define TRACE(v) printf v
128
#else
129
#  error VERBOSE must be 0, 1 or 2
130
#endif
131
132
/* -------------------------------------------------------------------- */
133
/* search engine state */
134
135
#define SRE_IS_DIGIT(ch)\
136
462
    ((ch) <= '9' && Py_ISDIGIT(ch))
137
#define SRE_IS_SPACE(ch)\
138
32
    ((ch) <= ' ' && Py_ISSPACE(ch))
139
#define SRE_IS_LINEBREAK(ch)\
140
36.0M
    ((ch) == '\n')
141
#define SRE_IS_WORD(ch)\
142
11.5M
    ((ch) <= 'z' && (Py_ISALNUM(ch) || (ch) == '_'))
143
144
static unsigned int sre_lower_ascii(unsigned int ch)
145
9.03M
{
146
9.03M
    return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
147
9.03M
}
148
149
/* locale-specific character predicates */
150
/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
151
 * warnings when c's type supports only numbers < N+1 */
152
0
#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? sre_isalnum((ch)) : 0)
153
0
#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
154
155
static unsigned int sre_lower_locale(unsigned int ch)
156
0
{
157
0
    return ((ch) < 256 ? (unsigned int)sre_tolower((ch)) : ch);
158
0
}
159
160
static unsigned int sre_upper_locale(unsigned int ch)
161
0
{
162
0
    return ((ch) < 256 ? (unsigned int)sre_toupper((ch)) : ch);
163
0
}
164
165
/* unicode-specific character predicates */
166
167
16
#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
168
75.7M
#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
169
0
#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
170
11.2k
#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
171
5.62k
#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
172
173
static unsigned int sre_lower_unicode(unsigned int ch)
174
125M
{
175
125M
    return (unsigned int) Py_UNICODE_TOLOWER(ch);
176
125M
}
177
178
static unsigned int sre_upper_unicode(unsigned int ch)
179
27.8M
{
180
27.8M
    return (unsigned int) Py_UNICODE_TOUPPER(ch);
181
27.8M
}
182
183
LOCAL(int)
184
sre_category(SRE_CODE category, unsigned int ch)
185
87.2M
{
186
87.2M
    switch (category) {
187
188
462
    case SRE_CATEGORY_DIGIT:
189
462
        return SRE_IS_DIGIT(ch);
190
0
    case SRE_CATEGORY_NOT_DIGIT:
191
0
        return !SRE_IS_DIGIT(ch);
192
32
    case SRE_CATEGORY_SPACE:
193
32
        return SRE_IS_SPACE(ch);
194
0
    case SRE_CATEGORY_NOT_SPACE:
195
0
        return !SRE_IS_SPACE(ch);
196
11.5M
    case SRE_CATEGORY_WORD:
197
11.5M
        return SRE_IS_WORD(ch);
198
0
    case SRE_CATEGORY_NOT_WORD:
199
0
        return !SRE_IS_WORD(ch);
200
0
    case SRE_CATEGORY_LINEBREAK:
201
0
        return SRE_IS_LINEBREAK(ch);
202
0
    case SRE_CATEGORY_NOT_LINEBREAK:
203
0
        return !SRE_IS_LINEBREAK(ch);
204
205
0
    case SRE_CATEGORY_LOC_WORD:
206
0
        return SRE_LOC_IS_WORD(ch);
207
0
    case SRE_CATEGORY_LOC_NOT_WORD:
208
0
        return !SRE_LOC_IS_WORD(ch);
209
210
16
    case SRE_CATEGORY_UNI_DIGIT:
211
16
        return SRE_UNI_IS_DIGIT(ch);
212
0
    case SRE_CATEGORY_UNI_NOT_DIGIT:
213
0
        return !SRE_UNI_IS_DIGIT(ch);
214
5.63M
    case SRE_CATEGORY_UNI_SPACE:
215
5.63M
        return SRE_UNI_IS_SPACE(ch);
216
70.1M
    case SRE_CATEGORY_UNI_NOT_SPACE:
217
70.1M
        return !SRE_UNI_IS_SPACE(ch);
218
5.62k
    case SRE_CATEGORY_UNI_WORD:
219
5.62k
        return SRE_UNI_IS_WORD(ch);
220
0
    case SRE_CATEGORY_UNI_NOT_WORD:
221
0
        return !SRE_UNI_IS_WORD(ch);
222
0
    case SRE_CATEGORY_UNI_LINEBREAK:
223
0
        return SRE_UNI_IS_LINEBREAK(ch);
224
0
    case SRE_CATEGORY_UNI_NOT_LINEBREAK:
225
0
        return !SRE_UNI_IS_LINEBREAK(ch);
226
87.2M
    }
227
0
    return 0;
228
87.2M
}
229
230
LOCAL(int)
231
char_loc_ignore(SRE_CODE pattern, SRE_CODE ch)
232
0
{
233
0
    return ch == pattern
234
0
        || (SRE_CODE) sre_lower_locale(ch) == pattern
235
0
        || (SRE_CODE) sre_upper_locale(ch) == pattern;
236
0
}
237
238
239
/* helpers */
240
241
static void
242
data_stack_dealloc(SRE_STATE* state)
243
193M
{
244
193M
    if (state->data_stack) {
245
169M
        PyMem_Free(state->data_stack);
246
169M
        state->data_stack = NULL;
247
169M
    }
248
193M
    state->data_stack_size = state->data_stack_base = 0;
249
193M
}
250
251
static int
252
data_stack_grow(SRE_STATE* state, Py_ssize_t size)
253
172M
{
254
172M
    INIT_TRACE(state);
255
172M
    Py_ssize_t minsize, cursize;
256
172M
    minsize = state->data_stack_base+size;
257
172M
    cursize = state->data_stack_size;
258
172M
    if (cursize < minsize) {
259
172M
        void* stack;
260
172M
        cursize = minsize+minsize/4+1024;
261
172M
        TRACE(("allocate/grow stack %zd\n", cursize));
262
172M
        stack = PyMem_Realloc(state->data_stack, cursize);
263
172M
        if (!stack) {
264
0
            data_stack_dealloc(state);
265
0
            return SRE_ERROR_MEMORY;
266
0
        }
267
172M
        state->data_stack = (char *)stack;
268
172M
        state->data_stack_size = cursize;
269
172M
    }
270
172M
    return 0;
271
172M
}
272
273
/* memory pool functions for SRE_REPEAT, this can avoid memory
274
   leak when SRE(match) function terminates abruptly.
275
   state->repeat_pool_used is a doubly-linked list, so that we
276
   can remove a SRE_REPEAT node from it.
277
   state->repeat_pool_unused is a singly-linked list, we put/get
278
   node at the head. */
279
static SRE_REPEAT *
280
repeat_pool_malloc(SRE_STATE *state)
281
48.1M
{
282
48.1M
    SRE_REPEAT *repeat;
283
284
48.1M
    if (state->repeat_pool_unused) {
285
        /* remove from unused pool (singly-linked list) */
286
16.2k
        repeat = state->repeat_pool_unused;
287
16.2k
        state->repeat_pool_unused = repeat->pool_next;
288
16.2k
    }
289
48.0M
    else {
290
48.0M
        repeat = PyMem_Malloc(sizeof(SRE_REPEAT));
291
48.0M
        if (!repeat) {
292
0
            return NULL;
293
0
        }
294
48.0M
    }
295
296
    /* add to used pool (doubly-linked list) */
297
48.1M
    SRE_REPEAT *temp = state->repeat_pool_used;
298
48.1M
    if (temp) {
299
28.4M
        temp->pool_prev = repeat;
300
28.4M
    }
301
48.1M
    repeat->pool_prev = NULL;
302
48.1M
    repeat->pool_next = temp;
303
48.1M
    state->repeat_pool_used = repeat;
304
305
48.1M
    return repeat;
306
48.1M
}
307
308
static void
309
repeat_pool_free(SRE_STATE *state, SRE_REPEAT *repeat)
310
48.1M
{
311
48.1M
    SRE_REPEAT *prev = repeat->pool_prev;
312
48.1M
    SRE_REPEAT *next = repeat->pool_next;
313
314
    /* remove from used pool (doubly-linked list) */
315
48.1M
    if (prev) {
316
0
        prev->pool_next = next;
317
0
    }
318
48.1M
    else {
319
48.1M
        state->repeat_pool_used = next;
320
48.1M
    }
321
48.1M
    if (next) {
322
28.4M
        next->pool_prev = prev;
323
28.4M
    }
324
325
    /* add to unused pool (singly-linked list) */
326
48.1M
    repeat->pool_next = state->repeat_pool_unused;
327
48.1M
    state->repeat_pool_unused = repeat;
328
48.1M
}
329
330
static void
331
repeat_pool_clear(SRE_STATE *state)
332
81.6M
{
333
    /* clear used pool */
334
81.6M
    SRE_REPEAT *next = state->repeat_pool_used;
335
81.6M
    state->repeat_pool_used = NULL;
336
81.6M
    while (next) {
337
0
        SRE_REPEAT *temp = next;
338
0
        next = temp->pool_next;
339
0
        PyMem_Free(temp);
340
0
    }
341
342
    /* clear unused pool */
343
81.6M
    next = state->repeat_pool_unused;
344
81.6M
    state->repeat_pool_unused = NULL;
345
129M
    while (next) {
346
48.0M
        SRE_REPEAT *temp = next;
347
48.0M
        next = temp->pool_next;
348
48.0M
        PyMem_Free(temp);
349
48.0M
    }
350
81.6M
}
351
352
/* generate 8-bit version */
353
354
293M
#define SRE_CHAR Py_UCS1
355
#define SIZEOF_SRE_CHAR 1
356
1.28G
#define SRE(F) sre_ucs1_##F
357
#include "sre_lib.h"
358
359
/* generate 16-bit unicode version */
360
361
314M
#define SRE_CHAR Py_UCS2
362
#define SIZEOF_SRE_CHAR 2
363
1.56G
#define SRE(F) sre_ucs2_##F
364
#include "sre_lib.h"
365
366
/* generate 32-bit unicode version */
367
368
109M
#define SRE_CHAR Py_UCS4
369
#define SIZEOF_SRE_CHAR 4
370
595M
#define SRE(F) sre_ucs4_##F
371
#include "sre_lib.h"
372
373
/* -------------------------------------------------------------------- */
374
/* factories and destructors */
375
376
/* module state */
377
typedef struct {
378
    PyTypeObject *Pattern_Type;
379
    PyTypeObject *Match_Type;
380
    PyTypeObject *Scanner_Type;
381
    PyTypeObject *Template_Type;
382
    PyObject *compile_template;  // reference to re._compile_template
383
} _sremodulestate;
384
385
static _sremodulestate *
386
get_sre_module_state(PyObject *m)
387
79.6M
{
388
79.6M
    _sremodulestate *state = (_sremodulestate *)_PyModule_GetState(m);
389
79.6M
    assert(state);
390
79.6M
    return state;
391
79.6M
}
392
393
static struct PyModuleDef sremodule;
394
#define get_sre_module_state_by_class(cls) \
395
79.6M
    (get_sre_module_state(PyType_GetModule(cls)))
396
397
/* see sre.h for object declarations */
398
static PyObject*pattern_new_match(_sremodulestate *, PatternObject*, SRE_STATE*, Py_ssize_t);
399
static PyObject *pattern_scanner(_sremodulestate *, PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t);
400
401
18.6k
#define _PatternObject_CAST(op)     ((PatternObject *)(op))
402
87.6M
#define _MatchObject_CAST(op)       ((MatchObject *)(op))
403
0
#define _TemplateObject_CAST(op)    ((TemplateObject *)(op))
404
765k
#define _ScannerObject_CAST(op)     ((ScannerObject *)(op))
405
406
/*[clinic input]
407
module _sre
408
class _sre.SRE_Pattern "PatternObject *" "get_sre_module_state_by_class(tp)->Pattern_Type"
409
class _sre.SRE_Match "MatchObject *" "get_sre_module_state_by_class(tp)->Match_Type"
410
class _sre.SRE_Scanner "ScannerObject *" "get_sre_module_state_by_class(tp)->Scanner_Type"
411
[clinic start generated code]*/
412
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=fe2966e32b66a231]*/
413
414
/*[clinic input]
415
_sre.getcodesize -> int
416
[clinic start generated code]*/
417
418
static int
419
_sre_getcodesize_impl(PyObject *module)
420
/*[clinic end generated code: output=e0db7ce34a6dd7b1 input=bd6f6ecf4916bb2b]*/
421
0
{
422
0
    return sizeof(SRE_CODE);
423
0
}
424
425
/*[clinic input]
426
_sre.ascii_iscased -> bool
427
428
    character: int
429
    /
430
431
[clinic start generated code]*/
432
433
static int
434
_sre_ascii_iscased_impl(PyObject *module, int character)
435
/*[clinic end generated code: output=4f454b630fbd19a2 input=9f0bd952812c7ed3]*/
436
7.05k
{
437
7.05k
    unsigned int ch = (unsigned int)character;
438
7.05k
    return ch < 128 && Py_ISALPHA(ch);
439
7.05k
}
440
441
/*[clinic input]
442
_sre.unicode_iscased -> bool
443
444
    character: int
445
    /
446
447
[clinic start generated code]*/
448
449
static int
450
_sre_unicode_iscased_impl(PyObject *module, int character)
451
/*[clinic end generated code: output=9c5ddee0dc2bc258 input=51e42c3b8dddb78e]*/
452
32.1M
{
453
32.1M
    unsigned int ch = (unsigned int)character;
454
32.1M
    return ch != sre_lower_unicode(ch) || ch != sre_upper_unicode(ch);
455
32.1M
}
456
457
/*[clinic input]
458
_sre.ascii_tolower -> int
459
460
    character: int
461
    /
462
463
[clinic start generated code]*/
464
465
static int
466
_sre_ascii_tolower_impl(PyObject *module, int character)
467
/*[clinic end generated code: output=228294ed6ff2a612 input=272c609b5b61f136]*/
468
1.37M
{
469
1.37M
    return sre_lower_ascii(character);
470
1.37M
}
471
472
/*[clinic input]
473
_sre.unicode_tolower -> int
474
475
    character: int
476
    /
477
478
[clinic start generated code]*/
479
480
static int
481
_sre_unicode_tolower_impl(PyObject *module, int character)
482
/*[clinic end generated code: output=6422272d7d7fee65 input=91d708c5f3c2045a]*/
483
93.2M
{
484
93.2M
    return sre_lower_unicode(character);
485
93.2M
}
486
487
LOCAL(void)
488
state_reset(SRE_STATE* state)
489
112M
{
490
    /* state->mark will be set to 0 in SRE_OP_MARK dynamically. */
491
    /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
492
493
112M
    state->lastmark = -1;
494
112M
    state->lastindex = -1;
495
496
112M
    state->repeat = NULL;
497
498
112M
    data_stack_dealloc(state);
499
112M
}
500
501
static const void*
502
getstring(PyObject* string, Py_ssize_t* p_length,
503
          int* p_isbytes, int* p_charsize,
504
          Py_buffer *view)
505
130M
{
506
    /* given a python object, return a data pointer, a length (in
507
       characters), and a character size.  return NULL if the object
508
       is not a string (or not compatible) */
509
510
    /* Unicode objects do not support the buffer API. So, get the data
511
       directly instead. */
512
130M
    if (PyUnicode_Check(string)) {
513
129M
        *p_length = PyUnicode_GET_LENGTH(string);
514
129M
        *p_charsize = PyUnicode_KIND(string);
515
129M
        *p_isbytes = 0;
516
129M
        return PyUnicode_DATA(string);
517
129M
    }
518
519
    /* get pointer to byte string buffer */
520
1.11M
    if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
521
0
        PyErr_Format(PyExc_TypeError, "expected string or bytes-like "
522
0
                     "object, got '%.200s'", Py_TYPE(string)->tp_name);
523
0
        return NULL;
524
0
    }
525
526
1.11M
    *p_length = view->len;
527
1.11M
    *p_charsize = 1;
528
1.11M
    *p_isbytes = 1;
529
530
1.11M
    if (view->buf == NULL) {
531
0
        PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
532
0
        PyBuffer_Release(view);
533
0
        view->buf = NULL;
534
0
        return NULL;
535
0
    }
536
1.11M
    return view->buf;
537
1.11M
}
538
539
LOCAL(PyObject*)
540
state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
541
           Py_ssize_t start, Py_ssize_t end)
542
81.6M
{
543
    /* prepare state object */
544
545
81.6M
    Py_ssize_t length;
546
81.6M
    int isbytes, charsize;
547
81.6M
    const void* ptr;
548
549
81.6M
    memset(state, 0, sizeof(SRE_STATE));
550
551
81.6M
    state->mark = PyMem_New(const void *, pattern->groups * 2);
552
81.6M
    if (!state->mark) {
553
0
        PyErr_NoMemory();
554
0
        goto err;
555
0
    }
556
81.6M
    state->lastmark = -1;
557
81.6M
    state->lastindex = -1;
558
559
81.6M
    state->buffer.buf = NULL;
560
81.6M
    ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
561
81.6M
    if (!ptr)
562
0
        goto err;
563
564
81.6M
    if (isbytes && pattern->isbytes == 0) {
565
0
        PyErr_SetString(PyExc_TypeError,
566
0
                        "cannot use a string pattern on a bytes-like object");
567
0
        goto err;
568
0
    }
569
81.6M
    if (!isbytes && pattern->isbytes > 0) {
570
0
        PyErr_SetString(PyExc_TypeError,
571
0
                        "cannot use a bytes pattern on a string-like object");
572
0
        goto err;
573
0
    }
574
575
    /* adjust boundaries */
576
81.6M
    if (start < 0)
577
0
        start = 0;
578
81.6M
    else if (start > length)
579
0
        start = length;
580
581
81.6M
    if (end < 0)
582
0
        end = 0;
583
81.6M
    else if (end > length)
584
81.6M
        end = length;
585
586
81.6M
    state->isbytes = isbytes;
587
81.6M
    state->charsize = charsize;
588
81.6M
    state->match_all = 0;
589
81.6M
    state->must_advance = 0;
590
81.6M
    state->debug = ((pattern->flags & SRE_FLAG_DEBUG) != 0);
591
592
81.6M
    state->beginning = ptr;
593
594
81.6M
    state->start = (void*) ((char*) ptr + start * state->charsize);
595
81.6M
    state->end = (void*) ((char*) ptr + end * state->charsize);
596
597
81.6M
    state->string = Py_NewRef(string);
598
81.6M
    state->pos = start;
599
81.6M
    state->endpos = end;
600
601
#ifdef Py_DEBUG
602
    state->fail_after_count = pattern->fail_after_count;
603
    state->fail_after_exc = pattern->fail_after_exc; // borrowed ref
604
#endif
605
606
81.6M
    return string;
607
0
  err:
608
    /* We add an explicit cast here because MSVC has a bug when
609
       compiling C code where it believes that `const void**` cannot be
610
       safely casted to `void*`, see bpo-39943 for details. */
611
0
    PyMem_Free((void*) state->mark);
612
0
    state->mark = NULL;
613
0
    if (state->buffer.buf)
614
0
        PyBuffer_Release(&state->buffer);
615
0
    return NULL;
616
81.6M
}
617
618
LOCAL(void)
619
state_fini(SRE_STATE* state)
620
81.6M
{
621
81.6M
    if (state->buffer.buf)
622
568k
        PyBuffer_Release(&state->buffer);
623
81.6M
    Py_XDECREF(state->string);
624
81.6M
    data_stack_dealloc(state);
625
    /* See above PyMem_Free() for why we explicitly cast here. */
626
81.6M
    PyMem_Free((void*) state->mark);
627
81.6M
    state->mark = NULL;
628
    /* SRE_REPEAT pool */
629
81.6M
    repeat_pool_clear(state);
630
81.6M
}
631
632
/* calculate offset from start of string */
633
#define STATE_OFFSET(state, member)\
634
194M
    (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
635
636
LOCAL(PyObject*)
637
getslice(int isbytes, const void *ptr,
638
         PyObject* string, Py_ssize_t start, Py_ssize_t end)
639
153M
{
640
153M
    if (isbytes) {
641
612k
        if (PyBytes_CheckExact(string) &&
642
612k
            start == 0 && end == PyBytes_GET_SIZE(string)) {
643
1.97k
            return Py_NewRef(string);
644
1.97k
        }
645
610k
        return PyBytes_FromStringAndSize(
646
610k
                (const char *)ptr + start, end - start);
647
612k
    }
648
153M
    else {
649
153M
        return PyUnicode_Substring(string, start, end);
650
153M
    }
651
153M
}
652
653
LOCAL(PyObject*)
654
state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
655
1.05M
{
656
1.05M
    Py_ssize_t i, j;
657
658
1.05M
    index = (index - 1) * 2;
659
660
1.05M
    if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
661
0
        if (empty)
662
            /* want empty string */
663
0
            i = j = 0;
664
0
        else {
665
0
            Py_RETURN_NONE;
666
0
        }
667
1.05M
    } else {
668
1.05M
        i = STATE_OFFSET(state, state->mark[index]);
669
1.05M
        j = STATE_OFFSET(state, state->mark[index+1]);
670
671
        /* check wrong span */
672
1.05M
        if (i > j) {
673
0
            PyErr_SetString(PyExc_SystemError,
674
0
                            "The span of capturing group is wrong,"
675
0
                            " please report a bug for the re module.");
676
0
            return NULL;
677
0
        }
678
1.05M
    }
679
680
1.05M
    return getslice(state->isbytes, state->beginning, string, i, j);
681
1.05M
}
682
683
static void
684
pattern_error(Py_ssize_t status)
685
0
{
686
0
    switch (status) {
687
0
    case SRE_ERROR_RECURSION_LIMIT:
688
        /* This error code seems to be unused. */
689
0
        PyErr_SetString(
690
0
            PyExc_RecursionError,
691
0
            "maximum recursion limit exceeded"
692
0
            );
693
0
        break;
694
0
    case SRE_ERROR_MEMORY:
695
0
        PyErr_NoMemory();
696
0
        break;
697
0
    case SRE_ERROR_INTERRUPTED:
698
    /* An exception has already been raised, so let it fly */
699
0
        break;
700
0
    default:
701
        /* other error codes indicate compiler/engine bugs */
702
0
        PyErr_SetString(
703
0
            PyExc_RuntimeError,
704
0
            "internal error in regular expression engine"
705
0
            );
706
0
    }
707
0
}
708
709
static int
710
pattern_traverse(PyObject *op, visitproc visit, void *arg)
711
15.3k
{
712
15.3k
    PatternObject *self = _PatternObject_CAST(op);
713
15.3k
    Py_VISIT(Py_TYPE(self));
714
15.3k
    Py_VISIT(self->groupindex);
715
15.3k
    Py_VISIT(self->indexgroup);
716
15.3k
    Py_VISIT(self->pattern);
717
#ifdef Py_DEBUG
718
    Py_VISIT(self->fail_after_exc);
719
#endif
720
15.3k
    return 0;
721
15.3k
}
722
723
static int
724
pattern_clear(PyObject *op)
725
3.31k
{
726
3.31k
    PatternObject *self = _PatternObject_CAST(op);
727
3.31k
    Py_CLEAR(self->groupindex);
728
3.31k
    Py_CLEAR(self->indexgroup);
729
3.31k
    Py_CLEAR(self->pattern);
730
#ifdef Py_DEBUG
731
    Py_CLEAR(self->fail_after_exc);
732
#endif
733
3.31k
    return 0;
734
3.31k
}
735
736
static void
737
pattern_dealloc(PyObject *self)
738
3.31k
{
739
3.31k
    PyTypeObject *tp = Py_TYPE(self);
740
3.31k
    PyObject_GC_UnTrack(self);
741
3.31k
    FT_CLEAR_WEAKREFS(self, _PatternObject_CAST(self)->weakreflist);
742
3.31k
    (void)pattern_clear(self);
743
3.31k
    tp->tp_free(self);
744
3.31k
    Py_DECREF(tp);
745
3.31k
}
746
747
LOCAL(Py_ssize_t)
748
sre_match(SRE_STATE* state, SRE_CODE* pattern)
749
61.5M
{
750
61.5M
    if (state->charsize == 1)
751
36.1M
        return sre_ucs1_match(state, pattern, 1);
752
25.4M
    if (state->charsize == 2)
753
15.7M
        return sre_ucs2_match(state, pattern, 1);
754
25.4M
    assert(state->charsize == 4);
755
9.66M
    return sre_ucs4_match(state, pattern, 1);
756
25.4M
}
757
758
LOCAL(Py_ssize_t)
759
sre_search(SRE_STATE* state, SRE_CODE* pattern)
760
117M
{
761
117M
    if (state->charsize == 1)
762
54.6M
        return sre_ucs1_search(state, pattern);
763
62.7M
    if (state->charsize == 2)
764
55.5M
        return sre_ucs2_search(state, pattern);
765
62.7M
    assert(state->charsize == 4);
766
7.14M
    return sre_ucs4_search(state, pattern);
767
62.7M
}
768
769
/*[clinic input]
770
_sre.SRE_Pattern.prefixmatch
771
772
    cls: defining_class
773
    /
774
    string: object
775
    pos: Py_ssize_t = 0
776
    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
777
778
Matches zero or more characters at the beginning of the string.
779
[clinic start generated code]*/
780
781
static PyObject *
782
_sre_SRE_Pattern_prefixmatch_impl(PatternObject *self, PyTypeObject *cls,
783
                                  PyObject *string, Py_ssize_t pos,
784
                                  Py_ssize_t endpos)
785
/*[clinic end generated code: output=a0e079fb4f875240 input=e2a7e68ea47d048c]*/
786
61.5M
{
787
61.5M
    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
788
61.5M
    SRE_STATE state;
789
61.5M
    Py_ssize_t status;
790
61.5M
    PyObject *match;
791
792
61.5M
    if (!state_init(&state, self, string, pos, endpos))
793
0
        return NULL;
794
795
61.5M
    INIT_TRACE(&state);
796
61.5M
    state.ptr = state.start;
797
798
61.5M
    TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
799
800
61.5M
    status = sre_match(&state, PatternObject_GetCode(self));
801
802
61.5M
    TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
803
61.5M
    if (PyErr_Occurred()) {
804
0
        state_fini(&state);
805
0
        return NULL;
806
0
    }
807
808
61.5M
    match = pattern_new_match(module_state, self, &state, status);
809
61.5M
    state_fini(&state);
810
61.5M
    return match;
811
61.5M
}
812
813
814
/*[clinic input]
815
_sre.SRE_Pattern.fullmatch
816
817
    cls: defining_class
818
    /
819
    string: object
820
    pos: Py_ssize_t = 0
821
    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
822
823
Matches against all of the string.
824
[clinic start generated code]*/
825
826
static PyObject *
827
_sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyTypeObject *cls,
828
                                PyObject *string, Py_ssize_t pos,
829
                                Py_ssize_t endpos)
830
/*[clinic end generated code: output=625b75b027ef94da input=50981172ab0fcfdd]*/
831
0
{
832
0
    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
833
0
    SRE_STATE state;
834
0
    Py_ssize_t status;
835
0
    PyObject *match;
836
837
0
    if (!state_init(&state, self, string, pos, endpos))
838
0
        return NULL;
839
840
0
    INIT_TRACE(&state);
841
0
    state.ptr = state.start;
842
843
0
    TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
844
845
0
    state.match_all = 1;
846
0
    status = sre_match(&state, PatternObject_GetCode(self));
847
848
0
    TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
849
0
    if (PyErr_Occurred()) {
850
0
        state_fini(&state);
851
0
        return NULL;
852
0
    }
853
854
0
    match = pattern_new_match(module_state, self, &state, status);
855
0
    state_fini(&state);
856
0
    return match;
857
0
}
858
859
/*[clinic input]
860
@permit_long_summary
861
_sre.SRE_Pattern.search
862
863
    cls: defining_class
864
    /
865
    string: object
866
    pos: Py_ssize_t = 0
867
    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
868
869
Scan through string looking for a match, and return a corresponding match object instance.
870
871
Return None if no position in the string matches.
872
[clinic start generated code]*/
873
874
static PyObject *
875
_sre_SRE_Pattern_search_impl(PatternObject *self, PyTypeObject *cls,
876
                             PyObject *string, Py_ssize_t pos,
877
                             Py_ssize_t endpos)
878
/*[clinic end generated code: output=bd7f2d9d583e1463 input=05e9feee0334c156]*/
879
5.10M
{
880
5.10M
    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
881
5.10M
    SRE_STATE state;
882
5.10M
    Py_ssize_t status;
883
5.10M
    PyObject *match;
884
885
5.10M
    if (!state_init(&state, self, string, pos, endpos))
886
0
        return NULL;
887
888
5.10M
    INIT_TRACE(&state);
889
5.10M
    TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
890
891
5.10M
    status = sre_search(&state, PatternObject_GetCode(self));
892
893
5.10M
    TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
894
895
5.10M
    if (PyErr_Occurred()) {
896
0
        state_fini(&state);
897
0
        return NULL;
898
0
    }
899
900
5.10M
    match = pattern_new_match(module_state, self, &state, status);
901
5.10M
    state_fini(&state);
902
5.10M
    return match;
903
5.10M
}
904
905
/*[clinic input]
906
_sre.SRE_Pattern.findall
907
908
    string: object
909
    pos: Py_ssize_t = 0
910
    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
911
912
Return a list of all non-overlapping matches of pattern in string.
913
[clinic start generated code]*/
914
915
static PyObject *
916
_sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,
917
                              Py_ssize_t pos, Py_ssize_t endpos)
918
/*[clinic end generated code: output=f4966baceea60aca input=5b6a4ee799741563]*/
919
3.76M
{
920
3.76M
    SRE_STATE state;
921
3.76M
    PyObject* list;
922
3.76M
    Py_ssize_t status;
923
3.76M
    Py_ssize_t i, b, e;
924
925
3.76M
    if (!state_init(&state, self, string, pos, endpos))
926
0
        return NULL;
927
928
3.76M
    list = PyList_New(0);
929
3.76M
    if (!list) {
930
0
        state_fini(&state);
931
0
        return NULL;
932
0
    }
933
934
91.0M
    while (state.start <= state.end) {
935
936
91.0M
        PyObject* item;
937
938
91.0M
        state_reset(&state);
939
940
91.0M
        state.ptr = state.start;
941
942
91.0M
        status = sre_search(&state, PatternObject_GetCode(self));
943
91.0M
        if (PyErr_Occurred())
944
0
            goto error;
945
946
91.0M
        if (status <= 0) {
947
3.76M
            if (status == 0)
948
3.76M
                break;
949
0
            pattern_error(status);
950
0
            goto error;
951
3.76M
        }
952
953
        /* don't bother to build a match object */
954
87.2M
        switch (self->groups) {
955
87.2M
        case 0:
956
87.2M
            b = STATE_OFFSET(&state, state.start);
957
87.2M
            e = STATE_OFFSET(&state, state.ptr);
958
87.2M
            item = getslice(state.isbytes, state.beginning,
959
87.2M
                            string, b, e);
960
87.2M
            if (!item)
961
0
                goto error;
962
87.2M
            break;
963
87.2M
        case 1:
964
0
            item = state_getslice(&state, 1, string, 1);
965
0
            if (!item)
966
0
                goto error;
967
0
            break;
968
0
        default:
969
0
            item = PyTuple_New(self->groups);
970
0
            if (!item)
971
0
                goto error;
972
0
            for (i = 0; i < self->groups; i++) {
973
0
                PyObject* o = state_getslice(&state, i+1, string, 1);
974
0
                if (!o) {
975
0
                    Py_DECREF(item);
976
0
                    goto error;
977
0
                }
978
0
                PyTuple_SET_ITEM(item, i, o);
979
0
            }
980
0
            break;
981
87.2M
        }
982
983
87.2M
        status = PyList_Append(list, item);
984
87.2M
        Py_DECREF(item);
985
87.2M
        if (status < 0)
986
0
            goto error;
987
988
87.2M
        state.must_advance = (state.ptr == state.start);
989
87.2M
        state.start = state.ptr;
990
87.2M
    }
991
992
3.76M
    state_fini(&state);
993
3.76M
    return list;
994
995
0
error:
996
0
    Py_DECREF(list);
997
0
    state_fini(&state);
998
0
    return NULL;
999
1000
3.76M
}
1001
1002
/*[clinic input]
1003
@permit_long_summary
1004
_sre.SRE_Pattern.finditer
1005
1006
    cls: defining_class
1007
    /
1008
    string: object
1009
    pos: Py_ssize_t = 0
1010
    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
1011
1012
Return an iterator over all non-overlapping matches for the RE pattern in string.
1013
1014
For each match, the iterator returns a match object.
1015
[clinic start generated code]*/
1016
1017
static PyObject *
1018
_sre_SRE_Pattern_finditer_impl(PatternObject *self, PyTypeObject *cls,
1019
                               PyObject *string, Py_ssize_t pos,
1020
                               Py_ssize_t endpos)
1021
/*[clinic end generated code: output=1791dbf3618ade56 input=ee28865796048023]*/
1022
382k
{
1023
382k
    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
1024
382k
    PyObject* scanner;
1025
382k
    PyObject* search;
1026
382k
    PyObject* iterator;
1027
1028
382k
    scanner = pattern_scanner(module_state, self, string, pos, endpos);
1029
382k
    if (!scanner)
1030
0
        return NULL;
1031
1032
382k
    search = PyObject_GetAttrString(scanner, "search");
1033
382k
    Py_DECREF(scanner);
1034
382k
    if (!search)
1035
0
        return NULL;
1036
1037
382k
    iterator = PyCallIter_New(search, Py_None);
1038
382k
    Py_DECREF(search);
1039
1040
382k
    return iterator;
1041
382k
}
1042
1043
/*[clinic input]
1044
_sre.SRE_Pattern.scanner
1045
1046
    cls: defining_class
1047
    /
1048
    string: object
1049
    pos: Py_ssize_t = 0
1050
    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
1051
1052
[clinic start generated code]*/
1053
1054
static PyObject *
1055
_sre_SRE_Pattern_scanner_impl(PatternObject *self, PyTypeObject *cls,
1056
                              PyObject *string, Py_ssize_t pos,
1057
                              Py_ssize_t endpos)
1058
/*[clinic end generated code: output=f70cd506112f1bd9 input=2e487e5151bcee4c]*/
1059
0
{
1060
0
    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
1061
1062
0
    return pattern_scanner(module_state, self, string, pos, endpos);
1063
0
}
1064
1065
/*[clinic input]
1066
_sre.SRE_Pattern.split
1067
1068
    string: object
1069
    maxsplit: Py_ssize_t = 0
1070
1071
Split string by the occurrences of pattern.
1072
[clinic start generated code]*/
1073
1074
static PyObject *
1075
_sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
1076
                            Py_ssize_t maxsplit)
1077
/*[clinic end generated code: output=7ac66f381c45e0be input=1eeeb10dafc9947a]*/
1078
1.55M
{
1079
1.55M
    SRE_STATE state;
1080
1.55M
    PyObject* list;
1081
1.55M
    PyObject* item;
1082
1.55M
    Py_ssize_t status;
1083
1.55M
    Py_ssize_t n;
1084
1.55M
    Py_ssize_t i;
1085
1.55M
    const void* last;
1086
1087
1.55M
    assert(self->codesize != 0);
1088
1089
1.55M
    if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX))
1090
0
        return NULL;
1091
1092
1.55M
    list = PyList_New(0);
1093
1.55M
    if (!list) {
1094
0
        state_fini(&state);
1095
0
        return NULL;
1096
0
    }
1097
1098
1.55M
    n = 0;
1099
1.55M
    last = state.start;
1100
1101
2.67M
    while (!maxsplit || n < maxsplit) {
1102
1103
1.62M
        state_reset(&state);
1104
1105
1.62M
        state.ptr = state.start;
1106
1107
1.62M
        status = sre_search(&state, PatternObject_GetCode(self));
1108
1.62M
        if (PyErr_Occurred())
1109
0
            goto error;
1110
1111
1.62M
        if (status <= 0) {
1112
507k
            if (status == 0)
1113
507k
                break;
1114
0
            pattern_error(status);
1115
0
            goto error;
1116
507k
        }
1117
1118
        /* get segment before this match */
1119
1.11M
        item = getslice(state.isbytes, state.beginning,
1120
1.11M
            string, STATE_OFFSET(&state, last),
1121
1.11M
            STATE_OFFSET(&state, state.start)
1122
1.11M
            );
1123
1.11M
        if (!item)
1124
0
            goto error;
1125
1.11M
        status = PyList_Append(list, item);
1126
1.11M
        Py_DECREF(item);
1127
1.11M
        if (status < 0)
1128
0
            goto error;
1129
1130
        /* add groups (if any) */
1131
2.16M
        for (i = 0; i < self->groups; i++) {
1132
1.05M
            item = state_getslice(&state, i+1, string, 0);
1133
1.05M
            if (!item)
1134
0
                goto error;
1135
1.05M
            status = PyList_Append(list, item);
1136
1.05M
            Py_DECREF(item);
1137
1.05M
            if (status < 0)
1138
0
                goto error;
1139
1.05M
        }
1140
1141
1.11M
        n = n + 1;
1142
1.11M
        state.must_advance = (state.ptr == state.start);
1143
1.11M
        last = state.start = state.ptr;
1144
1145
1.11M
    }
1146
1147
    /* get segment following last match (even if empty) */
1148
1.55M
    item = getslice(state.isbytes, state.beginning,
1149
1.55M
        string, STATE_OFFSET(&state, last), state.endpos
1150
1.55M
        );
1151
1.55M
    if (!item)
1152
0
        goto error;
1153
1.55M
    status = PyList_Append(list, item);
1154
1.55M
    Py_DECREF(item);
1155
1.55M
    if (status < 0)
1156
0
        goto error;
1157
1158
1.55M
    state_fini(&state);
1159
1.55M
    return list;
1160
1161
0
error:
1162
0
    Py_DECREF(list);
1163
0
    state_fini(&state);
1164
0
    return NULL;
1165
1166
1.55M
}
1167
1168
static PyObject *
1169
compile_template(_sremodulestate *module_state,
1170
                 PatternObject *pattern, PyObject *template)
1171
0
{
1172
    /* delegate to Python code */
1173
0
    PyObject *func = FT_ATOMIC_LOAD_PTR(module_state->compile_template);
1174
0
    if (func == NULL) {
1175
0
        func = PyImport_ImportModuleAttrString("re", "_compile_template");
1176
0
        if (func == NULL) {
1177
0
            return NULL;
1178
0
        }
1179
#ifdef Py_GIL_DISABLED
1180
        PyObject *other_func = NULL;
1181
        if (!_Py_atomic_compare_exchange_ptr(&module_state->compile_template, &other_func, func))  {
1182
            Py_DECREF(func);
1183
            func = other_func;
1184
        }
1185
#else
1186
0
        Py_XSETREF(module_state->compile_template, func);
1187
0
#endif
1188
0
    }
1189
1190
0
    PyObject *args[] = {(PyObject *)pattern, template};
1191
0
    PyObject *result = PyObject_Vectorcall(func, args, 2, NULL);
1192
1193
0
    if (result == NULL && PyErr_ExceptionMatches(PyExc_TypeError)) {
1194
        /* If the replacement string is unhashable (e.g. bytearray),
1195
         * convert it to the basic type (str or bytes) and repeat. */
1196
0
        if (PyUnicode_Check(template) && !PyUnicode_CheckExact(template)) {
1197
0
            PyErr_Clear();
1198
0
            template = _PyUnicode_Copy(template);
1199
0
        }
1200
0
        else if (PyObject_CheckBuffer(template) && !PyBytes_CheckExact(template)) {
1201
0
            PyErr_Clear();
1202
0
            template = PyBytes_FromObject(template);
1203
0
        }
1204
0
        else {
1205
0
            return NULL;
1206
0
        }
1207
0
        if (template == NULL) {
1208
0
            return NULL;
1209
0
        }
1210
0
        args[1] = template;
1211
0
        result = PyObject_Vectorcall(func, args, 2, NULL);
1212
0
        Py_DECREF(template);
1213
0
    }
1214
1215
0
    if (result != NULL && Py_TYPE(result) != module_state->Template_Type) {
1216
0
        PyErr_Format(PyExc_RuntimeError,
1217
0
                    "the result of compiling a replacement string is %.200s",
1218
0
                    Py_TYPE(result)->tp_name);
1219
0
        Py_DECREF(result);
1220
0
        return NULL;
1221
0
    }
1222
0
    return result;
1223
0
}
1224
1225
static PyObject *expand_template(TemplateObject *, MatchObject *); /* Forward */
1226
1227
static PyObject*
1228
pattern_subx(_sremodulestate* module_state,
1229
             PatternObject* self,
1230
             PyObject* ptemplate,
1231
             PyObject* string,
1232
             Py_ssize_t count,
1233
             Py_ssize_t subn)
1234
9.27M
{
1235
9.27M
    SRE_STATE state;
1236
9.27M
    PyObject* list;
1237
9.27M
    PyObject* joiner;
1238
9.27M
    PyObject* item;
1239
9.27M
    PyObject* filter;
1240
9.27M
    PyObject* match;
1241
9.27M
    const void* ptr;
1242
9.27M
    Py_ssize_t status;
1243
9.27M
    Py_ssize_t n;
1244
9.27M
    Py_ssize_t i, b, e;
1245
9.27M
    int isbytes, charsize;
1246
9.27M
    enum {LITERAL, TEMPLATE, CALLABLE} filter_type;
1247
9.27M
    Py_buffer view;
1248
1249
9.27M
    if (PyCallable_Check(ptemplate)) {
1250
        /* sub/subn takes either a function or a template */
1251
3.30M
        filter = Py_NewRef(ptemplate);
1252
3.30M
        filter_type = CALLABLE;
1253
5.97M
    } else {
1254
        /* if not callable, check if it's a literal string */
1255
5.97M
        int literal;
1256
5.97M
        view.buf = NULL;
1257
5.97M
        ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
1258
5.97M
        if (ptr) {
1259
5.97M
            if (charsize == 1)
1260
5.97M
                literal = memchr(ptr, '\\', n) == NULL;
1261
0
            else
1262
0
                literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
1263
5.97M
        } else {
1264
0
            PyErr_Clear();
1265
0
            literal = 0;
1266
0
        }
1267
5.97M
        if (view.buf)
1268
0
            PyBuffer_Release(&view);
1269
5.97M
        if (literal) {
1270
5.97M
            filter = Py_NewRef(ptemplate);
1271
5.97M
            filter_type = LITERAL;
1272
5.97M
        } else {
1273
            /* not a literal; hand it over to the template compiler */
1274
0
            filter = compile_template(module_state, self, ptemplate);
1275
0
            if (!filter)
1276
0
                return NULL;
1277
1278
0
            assert(Py_TYPE(filter) == module_state->Template_Type);
1279
0
            if (Py_SIZE(filter) == 0) {
1280
0
                Py_SETREF(filter,
1281
0
                          Py_NewRef(((TemplateObject *)filter)->literal));
1282
0
                filter_type = LITERAL;
1283
0
            }
1284
0
            else {
1285
0
                filter_type = TEMPLATE;
1286
0
            }
1287
0
        }
1288
5.97M
    }
1289
1290
9.27M
    if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) {
1291
0
        Py_DECREF(filter);
1292
0
        return NULL;
1293
0
    }
1294
1295
9.27M
    list = PyList_New(0);
1296
9.27M
    if (!list) {
1297
0
        Py_DECREF(filter);
1298
0
        state_fini(&state);
1299
0
        return NULL;
1300
0
    }
1301
1302
9.27M
    n = i = 0;
1303
1304
16.3M
    while (!count || n < count) {
1305
1306
16.3M
        state_reset(&state);
1307
1308
16.3M
        state.ptr = state.start;
1309
1310
16.3M
        status = sre_search(&state, PatternObject_GetCode(self));
1311
16.3M
        if (PyErr_Occurred())
1312
0
            goto error;
1313
1314
16.3M
        if (status <= 0) {
1315
9.27M
            if (status == 0)
1316
9.27M
                break;
1317
0
            pattern_error(status);
1318
0
            goto error;
1319
9.27M
        }
1320
1321
7.07M
        b = STATE_OFFSET(&state, state.start);
1322
7.07M
        e = STATE_OFFSET(&state, state.ptr);
1323
1324
7.07M
        if (i < b) {
1325
            /* get segment before this match */
1326
3.99M
            item = getslice(state.isbytes, state.beginning,
1327
3.99M
                string, i, b);
1328
3.99M
            if (!item)
1329
0
                goto error;
1330
3.99M
            status = PyList_Append(list, item);
1331
3.99M
            Py_DECREF(item);
1332
3.99M
            if (status < 0)
1333
0
                goto error;
1334
1335
3.99M
        }
1336
1337
7.07M
        if (filter_type != LITERAL) {
1338
            /* pass match object through filter */
1339
7.07M
            match = pattern_new_match(module_state, self, &state, 1);
1340
7.07M
            if (!match)
1341
0
                goto error;
1342
7.07M
            if (filter_type == TEMPLATE) {
1343
0
                item = expand_template((TemplateObject *)filter,
1344
0
                                       (MatchObject *)match);
1345
0
            }
1346
7.07M
            else {
1347
7.07M
                assert(filter_type == CALLABLE);
1348
7.07M
                item = PyObject_CallOneArg(filter, match);
1349
7.07M
            }
1350
7.07M
            Py_DECREF(match);
1351
7.07M
            if (!item)
1352
52
                goto error;
1353
7.07M
        } else {
1354
            /* filter is literal string */
1355
2.59k
            item = Py_NewRef(filter);
1356
2.59k
        }
1357
1358
        /* add to list */
1359
7.07M
        if (item != Py_None) {
1360
7.07M
            status = PyList_Append(list, item);
1361
7.07M
            Py_DECREF(item);
1362
7.07M
            if (status < 0)
1363
0
                goto error;
1364
7.07M
        }
1365
1366
7.07M
        i = e;
1367
7.07M
        n = n + 1;
1368
7.07M
        state.must_advance = (state.ptr == state.start);
1369
7.07M
        state.start = state.ptr;
1370
7.07M
    }
1371
1372
    /* get segment following last match */
1373
9.27M
    if (i < state.endpos) {
1374
6.59M
        item = getslice(state.isbytes, state.beginning,
1375
6.59M
                        string, i, state.endpos);
1376
6.59M
        if (!item)
1377
0
            goto error;
1378
6.59M
        status = PyList_Append(list, item);
1379
6.59M
        Py_DECREF(item);
1380
6.59M
        if (status < 0)
1381
0
            goto error;
1382
6.59M
    }
1383
1384
9.27M
    state_fini(&state);
1385
1386
9.27M
    Py_DECREF(filter);
1387
1388
    /* convert list to single string (also removes list) */
1389
9.27M
    joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
1390
9.27M
    if (!joiner) {
1391
0
        Py_DECREF(list);
1392
0
        return NULL;
1393
0
    }
1394
9.27M
    if (PyList_GET_SIZE(list) == 0) {
1395
1.91M
        Py_DECREF(list);
1396
1.91M
        item = joiner;
1397
1.91M
    }
1398
7.36M
    else {
1399
7.36M
        if (state.isbytes)
1400
32.1k
            item = PyBytes_Join(joiner, list);
1401
7.32M
        else
1402
7.32M
            item = PyUnicode_Join(joiner, list);
1403
7.36M
        Py_DECREF(joiner);
1404
7.36M
        Py_DECREF(list);
1405
7.36M
        if (!item)
1406
0
            return NULL;
1407
7.36M
    }
1408
1409
9.27M
    if (subn)
1410
0
        return Py_BuildValue("Nn", item, n);
1411
1412
9.27M
    return item;
1413
1414
52
error:
1415
52
    Py_DECREF(list);
1416
52
    state_fini(&state);
1417
52
    Py_DECREF(filter);
1418
52
    return NULL;
1419
1420
9.27M
}
1421
1422
/*[clinic input]
1423
@permit_long_summary
1424
_sre.SRE_Pattern.sub
1425
1426
    cls: defining_class
1427
    /
1428
    repl: object
1429
    string: object
1430
    count: Py_ssize_t = 0
1431
1432
Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl.
1433
[clinic start generated code]*/
1434
1435
static PyObject *
1436
_sre_SRE_Pattern_sub_impl(PatternObject *self, PyTypeObject *cls,
1437
                          PyObject *repl, PyObject *string, Py_ssize_t count)
1438
/*[clinic end generated code: output=4be141ab04bca60d input=eba511fd1c4908b7]*/
1439
9.27M
{
1440
9.27M
    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
1441
1442
9.27M
    return pattern_subx(module_state, self, repl, string, count, 0);
1443
9.27M
}
1444
1445
/*[clinic input]
1446
@permit_long_summary
1447
_sre.SRE_Pattern.subn
1448
1449
    cls: defining_class
1450
    /
1451
    repl: object
1452
    string: object
1453
    count: Py_ssize_t = 0
1454
1455
Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.
1456
[clinic start generated code]*/
1457
1458
static PyObject *
1459
_sre_SRE_Pattern_subn_impl(PatternObject *self, PyTypeObject *cls,
1460
                           PyObject *repl, PyObject *string,
1461
                           Py_ssize_t count)
1462
/*[clinic end generated code: output=da02fd85258b1e1f input=6a5bb5b61717abf0]*/
1463
0
{
1464
0
    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
1465
1466
0
    return pattern_subx(module_state, self, repl, string, count, 1);
1467
0
}
1468
1469
/*[clinic input]
1470
_sre.SRE_Pattern.__copy__
1471
1472
[clinic start generated code]*/
1473
1474
static PyObject *
1475
_sre_SRE_Pattern___copy___impl(PatternObject *self)
1476
/*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/
1477
0
{
1478
0
    return Py_NewRef(self);
1479
0
}
1480
1481
/*[clinic input]
1482
_sre.SRE_Pattern.__deepcopy__
1483
1484
    memo: object
1485
    /
1486
1487
[clinic start generated code]*/
1488
1489
static PyObject *
1490
_sre_SRE_Pattern___deepcopy___impl(PatternObject *self, PyObject *memo)
1491
/*[clinic end generated code: output=75efe69bd12c5d7d input=a465b1602f997bed]*/
1492
0
{
1493
0
    return Py_NewRef(self);
1494
0
}
1495
1496
#ifdef Py_DEBUG
1497
/*[clinic input]
1498
_sre.SRE_Pattern._fail_after
1499
1500
    count: int
1501
    exception: object
1502
    /
1503
1504
For debugging.
1505
[clinic start generated code]*/
1506
1507
static PyObject *
1508
_sre_SRE_Pattern__fail_after_impl(PatternObject *self, int count,
1509
                                  PyObject *exception)
1510
/*[clinic end generated code: output=9a6bf12135ac50c2 input=ef80a45c66c5499d]*/
1511
{
1512
    self->fail_after_count = count;
1513
    Py_INCREF(exception);
1514
    Py_XSETREF(self->fail_after_exc, exception);
1515
    Py_RETURN_NONE;
1516
}
1517
#endif /* Py_DEBUG */
1518
1519
static PyObject *
1520
pattern_repr(PyObject *self)
1521
0
{
1522
0
    static const struct {
1523
0
        const char *name;
1524
0
        int value;
1525
0
    } flag_names[] = {
1526
0
        {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
1527
0
        {"re.LOCALE", SRE_FLAG_LOCALE},
1528
0
        {"re.MULTILINE", SRE_FLAG_MULTILINE},
1529
0
        {"re.DOTALL", SRE_FLAG_DOTALL},
1530
0
        {"re.UNICODE", SRE_FLAG_UNICODE},
1531
0
        {"re.VERBOSE", SRE_FLAG_VERBOSE},
1532
0
        {"re.DEBUG", SRE_FLAG_DEBUG},
1533
0
        {"re.ASCII", SRE_FLAG_ASCII},
1534
0
    };
1535
1536
0
    PatternObject *obj = _PatternObject_CAST(self);
1537
0
    PyObject *result = NULL;
1538
0
    PyObject *flag_items;
1539
0
    size_t i;
1540
0
    int flags = obj->flags;
1541
1542
    /* Omit re.UNICODE for valid string patterns. */
1543
0
    if (obj->isbytes == 0 &&
1544
0
        (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
1545
0
         SRE_FLAG_UNICODE)
1546
0
        flags &= ~SRE_FLAG_UNICODE;
1547
1548
0
    flag_items = PyList_New(0);
1549
0
    if (!flag_items)
1550
0
        return NULL;
1551
1552
0
    for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
1553
0
        if (flags & flag_names[i].value) {
1554
0
            PyObject *item = PyUnicode_FromString(flag_names[i].name);
1555
0
            if (!item)
1556
0
                goto done;
1557
1558
0
            if (PyList_Append(flag_items, item) < 0) {
1559
0
                Py_DECREF(item);
1560
0
                goto done;
1561
0
            }
1562
0
            Py_DECREF(item);
1563
0
            flags &= ~flag_names[i].value;
1564
0
        }
1565
0
    }
1566
0
    if (flags) {
1567
0
        PyObject *item = PyUnicode_FromFormat("0x%x", flags);
1568
0
        if (!item)
1569
0
            goto done;
1570
1571
0
        if (PyList_Append(flag_items, item) < 0) {
1572
0
            Py_DECREF(item);
1573
0
            goto done;
1574
0
        }
1575
0
        Py_DECREF(item);
1576
0
    }
1577
1578
0
    if (PyList_Size(flag_items) > 0) {
1579
0
        PyObject *flags_result;
1580
0
        PyObject *sep = PyUnicode_FromString("|");
1581
0
        if (!sep)
1582
0
            goto done;
1583
0
        flags_result = PyUnicode_Join(sep, flag_items);
1584
0
        Py_DECREF(sep);
1585
0
        if (!flags_result)
1586
0
            goto done;
1587
0
        result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
1588
0
                                      obj->pattern, flags_result);
1589
0
        Py_DECREF(flags_result);
1590
0
    }
1591
0
    else {
1592
0
        result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
1593
0
    }
1594
1595
0
done:
1596
0
    Py_DECREF(flag_items);
1597
0
    return result;
1598
0
}
1599
1600
PyDoc_STRVAR(pattern_doc, "Compiled regular expression object.");
1601
1602
/* PatternObject's 'groupindex' method. */
1603
static PyObject *
1604
pattern_groupindex(PyObject *op, void *Py_UNUSED(ignored))
1605
0
{
1606
0
    PatternObject *self = _PatternObject_CAST(op);
1607
0
    if (self->groupindex == NULL)
1608
0
        return PyDict_New();
1609
0
    return PyDictProxy_New(self->groupindex);
1610
0
}
1611
1612
static int _validate(PatternObject *self); /* Forward */
1613
1614
/*[clinic input]
1615
_sre.compile
1616
1617
    pattern: object
1618
    flags: int
1619
    code: object(subclass_of='&PyList_Type')
1620
    groups: Py_ssize_t
1621
    groupindex: object(subclass_of='&PyDict_Type')
1622
    indexgroup: object(subclass_of='&PyTuple_Type')
1623
1624
[clinic start generated code]*/
1625
1626
static PyObject *
1627
_sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
1628
                  PyObject *code, Py_ssize_t groups, PyObject *groupindex,
1629
                  PyObject *indexgroup)
1630
/*[clinic end generated code: output=ef9c2b3693776404 input=0a68476dbbe5db30]*/
1631
3.72k
{
1632
    /* "compile" pattern descriptor to pattern object */
1633
1634
3.72k
    _sremodulestate *module_state = get_sre_module_state(module);
1635
3.72k
    PatternObject* self;
1636
3.72k
    Py_ssize_t i, n;
1637
1638
3.72k
    n = PyList_GET_SIZE(code);
1639
    /* coverity[ampersand_in_size] */
1640
3.72k
    self = PyObject_GC_NewVar(PatternObject, module_state->Pattern_Type, n);
1641
3.72k
    if (!self)
1642
0
        return NULL;
1643
3.72k
    self->weakreflist = NULL;
1644
3.72k
    self->pattern = NULL;
1645
3.72k
    self->groupindex = NULL;
1646
3.72k
    self->indexgroup = NULL;
1647
#ifdef Py_DEBUG
1648
    self->fail_after_count = -1;
1649
    self->fail_after_exc = NULL;
1650
#endif
1651
1652
3.72k
    self->codesize = n;
1653
1654
100M
    for (i = 0; i < n; i++) {
1655
100M
        PyObject *o = PyList_GET_ITEM(code, i);
1656
100M
        unsigned long value = PyLong_AsUnsignedLong(o);
1657
100M
        if (value == (unsigned long)-1 && PyErr_Occurred()) {
1658
0
            break;
1659
0
        }
1660
100M
        self->code[i] = (SRE_CODE) value;
1661
100M
        if ((unsigned long) self->code[i] != value) {
1662
0
            PyErr_SetString(PyExc_OverflowError,
1663
0
                            "regular expression code size limit exceeded");
1664
0
            break;
1665
0
        }
1666
100M
    }
1667
3.72k
    PyObject_GC_Track(self);
1668
1669
3.72k
    if (PyErr_Occurred()) {
1670
0
        Py_DECREF(self);
1671
0
        return NULL;
1672
0
    }
1673
1674
3.72k
    if (pattern == Py_None) {
1675
0
        self->isbytes = -1;
1676
0
    }
1677
3.72k
    else {
1678
3.72k
        Py_ssize_t p_length;
1679
3.72k
        int charsize;
1680
3.72k
        Py_buffer view;
1681
3.72k
        view.buf = NULL;
1682
3.72k
        if (!getstring(pattern, &p_length, &self->isbytes,
1683
3.72k
                       &charsize, &view)) {
1684
0
            Py_DECREF(self);
1685
0
            return NULL;
1686
0
        }
1687
3.72k
        if (view.buf)
1688
50
            PyBuffer_Release(&view);
1689
3.72k
    }
1690
1691
3.72k
    self->pattern = Py_NewRef(pattern);
1692
1693
3.72k
    self->flags = flags;
1694
1695
3.72k
    self->groups = groups;
1696
1697
3.72k
    if (PyDict_GET_SIZE(groupindex) > 0) {
1698
60
        self->groupindex = Py_NewRef(groupindex);
1699
60
        if (PyTuple_GET_SIZE(indexgroup) > 0) {
1700
60
            self->indexgroup = Py_NewRef(indexgroup);
1701
60
        }
1702
60
    }
1703
1704
3.72k
    if (!_validate(self)) {
1705
0
        Py_DECREF(self);
1706
0
        return NULL;
1707
0
    }
1708
1709
3.72k
    return (PyObject*) self;
1710
3.72k
}
1711
1712
/*[clinic input]
1713
_sre.template
1714
1715
    pattern: object
1716
    template: object(subclass_of="&PyList_Type")
1717
        A list containing interleaved literal strings (str or bytes) and group
1718
        indices (int), as returned by re._parser.parse_template():
1719
            [literal1, group1, ..., literalN, groupN]
1720
    /
1721
1722
[clinic start generated code]*/
1723
1724
static PyObject *
1725
_sre_template_impl(PyObject *module, PyObject *pattern, PyObject *template)
1726
/*[clinic end generated code: output=d51290e596ebca86 input=af55380b27f02942]*/
1727
0
{
1728
    /* template is a list containing interleaved literal strings (str or bytes)
1729
     * and group indices (int), as returned by _parser.parse_template:
1730
     * [literal1, group1, literal2, ..., literalN].
1731
     */
1732
0
    _sremodulestate *module_state = get_sre_module_state(module);
1733
0
    TemplateObject *self = NULL;
1734
0
    Py_ssize_t n = PyList_GET_SIZE(template);
1735
0
    if ((n & 1) == 0 || n < 1) {
1736
0
        goto bad_template;
1737
0
    }
1738
0
    n /= 2;
1739
0
    self = PyObject_GC_NewVar(TemplateObject, module_state->Template_Type, n);
1740
0
    if (!self)
1741
0
        return NULL;
1742
0
    self->chunks = 1 + 2*n;
1743
0
    self->literal = Py_NewRef(PyList_GET_ITEM(template, 0));
1744
0
    for (Py_ssize_t i = 0; i < n; i++) {
1745
0
        Py_ssize_t index = PyLong_AsSsize_t(PyList_GET_ITEM(template, 2*i+1));
1746
0
        if (index == -1 && PyErr_Occurred()) {
1747
0
            Py_SET_SIZE(self, i);
1748
0
            Py_DECREF(self);
1749
0
            return NULL;
1750
0
        }
1751
0
        if (index < 0) {
1752
0
            Py_SET_SIZE(self, i);
1753
0
            goto bad_template;
1754
0
        }
1755
0
        self->items[i].index = index;
1756
1757
0
        PyObject *literal = PyList_GET_ITEM(template, 2*i+2);
1758
        // Skip empty literals.
1759
0
        if ((PyUnicode_Check(literal) && !PyUnicode_GET_LENGTH(literal)) ||
1760
0
            (PyBytes_Check(literal) && !PyBytes_GET_SIZE(literal)))
1761
0
        {
1762
0
            literal = NULL;
1763
0
            self->chunks--;
1764
0
        }
1765
0
        self->items[i].literal = Py_XNewRef(literal);
1766
0
    }
1767
0
    PyObject_GC_Track(self);
1768
0
    return (PyObject*) self;
1769
1770
0
bad_template:
1771
0
    PyErr_SetString(PyExc_TypeError, "invalid template");
1772
0
    Py_XDECREF(self);
1773
0
    return NULL;
1774
0
}
1775
1776
/* -------------------------------------------------------------------- */
1777
/* Code validation */
1778
1779
/* To learn more about this code, have a look at the _compile() function in
1780
   Lib/sre_compile.py.  The validation functions below checks the code array
1781
   for conformance with the code patterns generated there.
1782
1783
   The nice thing about the generated code is that it is position-independent:
1784
   all jumps are relative jumps forward.  Also, jumps don't cross each other:
1785
   the target of a later jump is always earlier than the target of an earlier
1786
   jump.  IOW, this is okay:
1787
1788
   J---------J-------T--------T
1789
    \         \_____/        /
1790
     \______________________/
1791
1792
   but this is not:
1793
1794
   J---------J-------T--------T
1795
    \_________\_____/        /
1796
               \____________/
1797
1798
   It also helps that SRE_CODE is always an unsigned type.
1799
*/
1800
1801
/* Defining this one enables tracing of the validator */
1802
#undef VVERBOSE
1803
1804
/* Trace macro for the validator */
1805
#if defined(VVERBOSE)
1806
#define VTRACE(v) printf v
1807
#else
1808
155M
#define VTRACE(v) do {} while(0)  /* do nothing */
1809
#endif
1810
1811
/* Report failure */
1812
0
#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return -1; } while (0)
1813
1814
/* Extract opcode, argument, or skip count from code array */
1815
#define GET_OP                                          \
1816
36.9M
    do {                                                \
1817
36.9M
        VTRACE(("%p: ", code));                         \
1818
36.9M
        if (code >= end) FAIL;                          \
1819
36.9M
        op = *code++;                                   \
1820
36.9M
        VTRACE(("%lu (op)\n", (unsigned long)op));      \
1821
36.9M
    } while (0)
1822
#define GET_ARG                                         \
1823
31.7M
    do {                                                \
1824
31.7M
        VTRACE(("%p= ", code));                         \
1825
31.7M
        if (code >= end) FAIL;                          \
1826
31.7M
        arg = *code++;                                  \
1827
31.7M
        VTRACE(("%lu (arg)\n", (unsigned long)arg));    \
1828
31.7M
    } while (0)
1829
#define GET_SKIP_ADJ(adj)                               \
1830
7.15M
    do {                                                \
1831
7.15M
        VTRACE(("%p= ", code));                         \
1832
7.15M
        if (code >= end) FAIL;                          \
1833
7.15M
        skip = *code;                                   \
1834
7.15M
        VTRACE(("%lu (skip to %p)\n",                   \
1835
7.15M
               (unsigned long)skip, code+skip));        \
1836
7.15M
        if (skip-adj > (uintptr_t)(end - code))         \
1837
7.15M
            FAIL;                                       \
1838
7.15M
        code++;                                         \
1839
7.15M
    } while (0)
1840
7.15M
#define GET_SKIP GET_SKIP_ADJ(0)
1841
1842
static int
1843
_validate_charset(SRE_CODE *code, SRE_CODE *end)
1844
4.07M
{
1845
    /* Some variables are manipulated by the macros above */
1846
4.07M
    SRE_CODE op;
1847
4.07M
    SRE_CODE arg;
1848
4.07M
    SRE_CODE offset;
1849
4.07M
    int i;
1850
1851
12.1M
    while (code < end) {
1852
8.06M
        GET_OP;
1853
8.06M
        switch (op) {
1854
1855
1.32k
        case SRE_OP_NEGATE:
1856
1.32k
            break;
1857
1858
7.96M
        case SRE_OP_LITERAL:
1859
7.96M
            GET_ARG;
1860
7.96M
            break;
1861
1862
7.96M
        case SRE_OP_RANGE:
1863
10.7k
        case SRE_OP_RANGE_UNI_IGNORE:
1864
10.7k
            GET_ARG;
1865
10.7k
            GET_ARG;
1866
10.7k
            break;
1867
1868
10.7k
        case SRE_OP_CHARSET:
1869
1.04k
            offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
1870
1.04k
            if (offset > (uintptr_t)(end - code))
1871
0
                FAIL;
1872
1.04k
            code += offset;
1873
1.04k
            break;
1874
1875
86.5k
        case SRE_OP_BIGCHARSET:
1876
86.5k
            GET_ARG; /* Number of blocks */
1877
86.5k
            offset = 256/sizeof(SRE_CODE); /* 256-byte table */
1878
86.5k
            if (offset > (uintptr_t)(end - code))
1879
0
                FAIL;
1880
            /* Make sure that each byte points to a valid block */
1881
22.2M
            for (i = 0; i < 256; i++) {
1882
22.1M
                if (((unsigned char *)code)[i] >= arg)
1883
0
                    FAIL;
1884
22.1M
            }
1885
86.5k
            code += offset;
1886
86.5k
            offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
1887
86.5k
            if (offset > (uintptr_t)(end - code))
1888
0
                FAIL;
1889
86.5k
            code += offset;
1890
86.5k
            break;
1891
1892
1.74k
        case SRE_OP_CATEGORY:
1893
1.74k
            GET_ARG;
1894
1.74k
            switch (arg) {
1895
34
            case SRE_CATEGORY_DIGIT:
1896
34
            case SRE_CATEGORY_NOT_DIGIT:
1897
66
            case SRE_CATEGORY_SPACE:
1898
66
            case SRE_CATEGORY_NOT_SPACE:
1899
92
            case SRE_CATEGORY_WORD:
1900
92
            case SRE_CATEGORY_NOT_WORD:
1901
92
            case SRE_CATEGORY_LINEBREAK:
1902
92
            case SRE_CATEGORY_NOT_LINEBREAK:
1903
92
            case SRE_CATEGORY_LOC_WORD:
1904
92
            case SRE_CATEGORY_LOC_NOT_WORD:
1905
221
            case SRE_CATEGORY_UNI_DIGIT:
1906
828
            case SRE_CATEGORY_UNI_NOT_DIGIT:
1907
1.55k
            case SRE_CATEGORY_UNI_SPACE:
1908
1.57k
            case SRE_CATEGORY_UNI_NOT_SPACE:
1909
1.68k
            case SRE_CATEGORY_UNI_WORD:
1910
1.74k
            case SRE_CATEGORY_UNI_NOT_WORD:
1911
1.74k
            case SRE_CATEGORY_UNI_LINEBREAK:
1912
1.74k
            case SRE_CATEGORY_UNI_NOT_LINEBREAK:
1913
1.74k
                break;
1914
0
            default:
1915
0
                FAIL;
1916
1.74k
            }
1917
1.74k
            break;
1918
1919
1.74k
        default:
1920
0
            FAIL;
1921
1922
8.06M
        }
1923
8.06M
    }
1924
1925
4.07M
    return 0;
1926
4.07M
}
1927
1928
/* Returns 0 on success, -1 on failure, and 1 if the last op is JUMP. */
1929
static int
1930
_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1931
2.00M
{
1932
    /* Some variables are manipulated by the macros above */
1933
2.00M
    SRE_CODE op;
1934
2.00M
    SRE_CODE arg;
1935
2.00M
    SRE_CODE skip;
1936
1937
2.00M
    VTRACE(("code=%p, end=%p\n", code, end));
1938
1939
2.00M
    if (code > end)
1940
0
        FAIL;
1941
1942
28.8M
    while (code < end) {
1943
26.8M
        GET_OP;
1944
26.8M
        switch (op) {
1945
1946
175k
        case SRE_OP_MARK:
1947
            /* We don't check whether marks are properly nested; the
1948
               sre_match() code is robust even if they don't, and the worst
1949
               you can get is nonsensical match results. */
1950
175k
            GET_ARG;
1951
175k
            if (arg >= 2 * (size_t)groups) {
1952
0
                VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
1953
0
                FAIL;
1954
0
            }
1955
175k
            break;
1956
1957
17.4M
        case SRE_OP_LITERAL:
1958
17.4M
        case SRE_OP_NOT_LITERAL:
1959
17.4M
        case SRE_OP_LITERAL_IGNORE:
1960
17.4M
        case SRE_OP_NOT_LITERAL_IGNORE:
1961
21.5M
        case SRE_OP_LITERAL_UNI_IGNORE:
1962
21.5M
        case SRE_OP_NOT_LITERAL_UNI_IGNORE:
1963
21.5M
        case SRE_OP_LITERAL_LOC_IGNORE:
1964
21.5M
        case SRE_OP_NOT_LITERAL_LOC_IGNORE:
1965
21.5M
            GET_ARG;
1966
            /* The arg is just a character, nothing to check */
1967
21.5M
            break;
1968
1969
21.5M
        case SRE_OP_SUCCESS:
1970
51
        case SRE_OP_FAILURE:
1971
            /* Nothing to check; these normally end the matching process */
1972
51
            break;
1973
1974
23.8k
        case SRE_OP_AT:
1975
23.8k
            GET_ARG;
1976
23.8k
            switch (arg) {
1977
45
            case SRE_AT_BEGINNING:
1978
53
            case SRE_AT_BEGINNING_STRING:
1979
3.46k
            case SRE_AT_BEGINNING_LINE:
1980
3.53k
            case SRE_AT_END:
1981
19.8k
            case SRE_AT_END_LINE:
1982
19.9k
            case SRE_AT_END_STRING:
1983
19.9k
            case SRE_AT_BOUNDARY:
1984
19.9k
            case SRE_AT_NON_BOUNDARY:
1985
19.9k
            case SRE_AT_LOC_BOUNDARY:
1986
19.9k
            case SRE_AT_LOC_NON_BOUNDARY:
1987
23.8k
            case SRE_AT_UNI_BOUNDARY:
1988
23.8k
            case SRE_AT_UNI_NON_BOUNDARY:
1989
23.8k
                break;
1990
0
            default:
1991
0
                FAIL;
1992
23.8k
            }
1993
23.8k
            break;
1994
1995
26.8k
        case SRE_OP_ANY:
1996
26.8k
        case SRE_OP_ANY_ALL:
1997
            /* These have no operands */
1998
26.8k
            break;
1999
2000
36.0k
        case SRE_OP_IN:
2001
36.7k
        case SRE_OP_IN_IGNORE:
2002
4.07M
        case SRE_OP_IN_UNI_IGNORE:
2003
4.07M
        case SRE_OP_IN_LOC_IGNORE:
2004
4.07M
            GET_SKIP;
2005
            /* Stop 1 before the end; we check the FAILURE below */
2006
4.07M
            if (_validate_charset(code, code+skip-2))
2007
0
                FAIL;
2008
4.07M
            if (code[skip-2] != SRE_OP_FAILURE)
2009
0
                FAIL;
2010
4.07M
            code += skip-1;
2011
4.07M
            break;
2012
2013
3.72k
        case SRE_OP_INFO:
2014
3.72k
            {
2015
                /* A minimal info field is
2016
                   <INFO> <1=skip> <2=flags> <3=min> <4=max>;
2017
                   If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
2018
                   more follows. */
2019
3.72k
                SRE_CODE flags, i;
2020
3.72k
                SRE_CODE *newcode;
2021
3.72k
                GET_SKIP;
2022
3.72k
                newcode = code+skip-1;
2023
3.72k
                GET_ARG; flags = arg;
2024
3.72k
                GET_ARG;
2025
3.72k
                GET_ARG;
2026
                /* Check that only valid flags are present */
2027
3.72k
                if ((flags & ~(SRE_INFO_PREFIX |
2028
3.72k
                               SRE_INFO_LITERAL |
2029
3.72k
                               SRE_INFO_CHARSET)) != 0)
2030
0
                    FAIL;
2031
                /* PREFIX and CHARSET are mutually exclusive */
2032
3.72k
                if ((flags & SRE_INFO_PREFIX) &&
2033
1.57k
                    (flags & SRE_INFO_CHARSET))
2034
0
                    FAIL;
2035
                /* LITERAL implies PREFIX */
2036
3.72k
                if ((flags & SRE_INFO_LITERAL) &&
2037
676
                    !(flags & SRE_INFO_PREFIX))
2038
0
                    FAIL;
2039
                /* Validate the prefix */
2040
3.72k
                if (flags & SRE_INFO_PREFIX) {
2041
1.57k
                    SRE_CODE prefix_len;
2042
1.57k
                    GET_ARG; prefix_len = arg;
2043
1.57k
                    GET_ARG;
2044
                    /* Here comes the prefix string */
2045
1.57k
                    if (prefix_len > (uintptr_t)(newcode - code))
2046
0
                        FAIL;
2047
1.57k
                    code += prefix_len;
2048
                    /* And here comes the overlap table */
2049
1.57k
                    if (prefix_len > (uintptr_t)(newcode - code))
2050
0
                        FAIL;
2051
                    /* Each overlap value should be < prefix_len */
2052
6.34M
                    for (i = 0; i < prefix_len; i++) {
2053
6.34M
                        if (code[i] >= prefix_len)
2054
0
                            FAIL;
2055
6.34M
                    }
2056
1.57k
                    code += prefix_len;
2057
1.57k
                }
2058
                /* Validate the charset */
2059
3.72k
                if (flags & SRE_INFO_CHARSET) {
2060
430
                    if (_validate_charset(code, newcode-1))
2061
0
                        FAIL;
2062
430
                    if (newcode[-1] != SRE_OP_FAILURE)
2063
0
                        FAIL;
2064
430
                    code = newcode;
2065
430
                }
2066
3.29k
                else if (code != newcode) {
2067
0
                  VTRACE(("code=%p, newcode=%p\n", code, newcode));
2068
0
                    FAIL;
2069
0
                }
2070
3.72k
            }
2071
3.72k
            break;
2072
2073
28.4k
        case SRE_OP_BRANCH:
2074
28.4k
            {
2075
28.4k
                SRE_CODE *target = NULL;
2076
1.07M
                for (;;) {
2077
1.07M
                    GET_SKIP;
2078
1.07M
                    if (skip == 0)
2079
28.4k
                        break;
2080
                    /* Stop 2 before the end; we check the JUMP below */
2081
1.04M
                    if (_validate_inner(code, code+skip-3, groups))
2082
0
                        FAIL;
2083
1.04M
                    code += skip-3;
2084
                    /* Check that it ends with a JUMP, and that each JUMP
2085
                       has the same target */
2086
1.04M
                    GET_OP;
2087
1.04M
                    if (op != SRE_OP_JUMP)
2088
0
                        FAIL;
2089
1.04M
                    GET_SKIP;
2090
1.04M
                    if (target == NULL)
2091
28.4k
                        target = code+skip-1;
2092
1.01M
                    else if (code+skip-1 != target)
2093
0
                        FAIL;
2094
1.04M
                }
2095
28.4k
                if (code != target)
2096
0
                    FAIL;
2097
28.4k
            }
2098
28.4k
            break;
2099
2100
927k
        case SRE_OP_REPEAT_ONE:
2101
927k
        case SRE_OP_MIN_REPEAT_ONE:
2102
928k
        case SRE_OP_POSSESSIVE_REPEAT_ONE:
2103
928k
            {
2104
928k
                SRE_CODE min, max;
2105
928k
                GET_SKIP;
2106
928k
                GET_ARG; min = arg;
2107
928k
                GET_ARG; max = arg;
2108
928k
                if (min > max)
2109
0
                    FAIL;
2110
928k
                if (max > SRE_MAXREPEAT)
2111
0
                    FAIL;
2112
928k
                if (_validate_inner(code, code+skip-4, groups))
2113
0
                    FAIL;
2114
928k
                code += skip-4;
2115
928k
                GET_OP;
2116
928k
                if (op != SRE_OP_SUCCESS)
2117
0
                    FAIL;
2118
928k
            }
2119
928k
            break;
2120
2121
928k
        case SRE_OP_REPEAT:
2122
34.9k
        case SRE_OP_POSSESSIVE_REPEAT:
2123
34.9k
            {
2124
34.9k
                SRE_CODE op1 = op, min, max;
2125
34.9k
                GET_SKIP;
2126
34.9k
                GET_ARG; min = arg;
2127
34.9k
                GET_ARG; max = arg;
2128
34.9k
                if (min > max)
2129
0
                    FAIL;
2130
34.9k
                if (max > SRE_MAXREPEAT)
2131
0
                    FAIL;
2132
34.9k
                if (_validate_inner(code, code+skip-3, groups))
2133
0
                    FAIL;
2134
34.9k
                code += skip-3;
2135
34.9k
                GET_OP;
2136
34.9k
                if (op1 == SRE_OP_POSSESSIVE_REPEAT) {
2137
44
                    if (op != SRE_OP_SUCCESS)
2138
0
                        FAIL;
2139
44
                }
2140
34.9k
                else {
2141
34.9k
                    if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
2142
0
                        FAIL;
2143
34.9k
                }
2144
34.9k
            }
2145
34.9k
            break;
2146
2147
34.9k
        case SRE_OP_ATOMIC_GROUP:
2148
28
            {
2149
28
                GET_SKIP;
2150
28
                if (_validate_inner(code, code+skip-2, groups))
2151
0
                    FAIL;
2152
28
                code += skip-2;
2153
28
                GET_OP;
2154
28
                if (op != SRE_OP_SUCCESS)
2155
0
                    FAIL;
2156
28
            }
2157
28
            break;
2158
2159
28
        case SRE_OP_GROUPREF:
2160
120
        case SRE_OP_GROUPREF_IGNORE:
2161
1.16k
        case SRE_OP_GROUPREF_UNI_IGNORE:
2162
1.16k
        case SRE_OP_GROUPREF_LOC_IGNORE:
2163
1.16k
            GET_ARG;
2164
1.16k
            if (arg >= (size_t)groups)
2165
0
                FAIL;
2166
1.16k
            break;
2167
2168
1.16k
        case SRE_OP_GROUPREF_EXISTS:
2169
            /* The regex syntax for this is: '(?(group)then|else)', where
2170
               'group' is either an integer group number or a group name,
2171
               'then' and 'else' are sub-regexes, and 'else' is optional. */
2172
51
            GET_ARG;
2173
51
            if (arg >= (size_t)groups)
2174
0
                FAIL;
2175
51
            GET_SKIP_ADJ(1);
2176
51
            code--; /* The skip is relative to the first arg! */
2177
            /* There are two possibilities here: if there is both a 'then'
2178
               part and an 'else' part, the generated code looks like:
2179
2180
               GROUPREF_EXISTS
2181
               <group>
2182
               <skipyes>
2183
               ...then part...
2184
               JUMP
2185
               <skipno>
2186
               (<skipyes> jumps here)
2187
               ...else part...
2188
               (<skipno> jumps here)
2189
2190
               If there is only a 'then' part, it looks like:
2191
2192
               GROUPREF_EXISTS
2193
               <group>
2194
               <skip>
2195
               ...then part...
2196
               (<skip> jumps here)
2197
2198
               There is no direct way to decide which it is, and we don't want
2199
               to allow arbitrary jumps anywhere in the code; so we just look
2200
               for a JUMP opcode preceding our skip target.
2201
            */
2202
51
            VTRACE(("then part:\n"));
2203
51
            int rc = _validate_inner(code+1, code+skip-1, groups);
2204
51
            if (rc == 1) {
2205
37
                VTRACE(("else part:\n"));
2206
37
                code += skip-2; /* Position after JUMP, at <skipno> */
2207
37
                GET_SKIP;
2208
37
                rc = _validate_inner(code, code+skip-1, groups);
2209
37
            }
2210
51
            if (rc)
2211
0
                FAIL;
2212
51
            code += skip-1;
2213
51
            break;
2214
2215
102
        case SRE_OP_ASSERT:
2216
340
        case SRE_OP_ASSERT_NOT:
2217
340
            GET_SKIP;
2218
340
            GET_ARG; /* 0 for lookahead, width for lookbehind */
2219
340
            code--; /* Back up over arg to simplify math below */
2220
            /* Stop 1 before the end; we check the SUCCESS below */
2221
340
            if (_validate_inner(code+1, code+skip-2, groups))
2222
0
                FAIL;
2223
340
            code += skip-2;
2224
340
            GET_OP;
2225
340
            if (op != SRE_OP_SUCCESS)
2226
0
                FAIL;
2227
340
            break;
2228
2229
340
        case SRE_OP_JUMP:
2230
37
            if (code + 1 != end)
2231
0
                FAIL;
2232
37
            VTRACE(("JUMP: %d\n", __LINE__));
2233
37
            return 1;
2234
2235
0
        default:
2236
0
            FAIL;
2237
2238
26.8M
        }
2239
26.8M
    }
2240
2241
2.00M
    VTRACE(("okay\n"));
2242
2.00M
    return 0;
2243
2.00M
}
2244
2245
static int
2246
_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
2247
3.72k
{
2248
3.72k
    if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
2249
3.72k
        code >= end || end[-1] != SRE_OP_SUCCESS)
2250
0
        FAIL;
2251
3.72k
    return _validate_inner(code, end-1, groups);
2252
3.72k
}
2253
2254
static int
2255
_validate(PatternObject *self)
2256
3.72k
{
2257
3.72k
    if (_validate_outer(self->code, self->code+self->codesize, self->groups))
2258
0
    {
2259
0
        PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
2260
0
        return 0;
2261
0
    }
2262
3.72k
    else
2263
3.72k
        VTRACE(("Success!\n"));
2264
3.72k
    return 1;
2265
3.72k
}
2266
2267
/* -------------------------------------------------------------------- */
2268
/* match methods */
2269
2270
static int
2271
match_traverse(PyObject *op, visitproc visit, void *arg)
2272
6.04k
{
2273
6.04k
    MatchObject *self = _MatchObject_CAST(op);
2274
6.04k
    Py_VISIT(Py_TYPE(self));
2275
6.04k
    Py_VISIT(self->string);
2276
6.04k
    Py_VISIT(self->regs);
2277
6.04k
    Py_VISIT(self->pattern);
2278
6.04k
    return 0;
2279
6.04k
}
2280
2281
static int
2282
match_clear(PyObject *op)
2283
57.2M
{
2284
57.2M
    MatchObject *self = _MatchObject_CAST(op);
2285
57.2M
    Py_CLEAR(self->string);
2286
57.2M
    Py_CLEAR(self->regs);
2287
57.2M
    Py_CLEAR(self->pattern);
2288
57.2M
    return 0;
2289
57.2M
}
2290
2291
static void
2292
match_dealloc(PyObject *self)
2293
57.2M
{
2294
57.2M
    PyTypeObject *tp = Py_TYPE(self);
2295
57.2M
    PyObject_GC_UnTrack(self);
2296
57.2M
    (void)match_clear(self);
2297
57.2M
    tp->tp_free(self);
2298
57.2M
    Py_DECREF(tp);
2299
57.2M
}
2300
2301
static PyObject*
2302
match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
2303
52.9M
{
2304
52.9M
    Py_ssize_t length;
2305
52.9M
    int isbytes, charsize;
2306
52.9M
    Py_buffer view;
2307
52.9M
    PyObject *result;
2308
52.9M
    const void* ptr;
2309
52.9M
    Py_ssize_t i, j;
2310
2311
52.9M
    assert(0 <= index && index < self->groups);
2312
52.9M
    index *= 2;
2313
2314
52.9M
    if (self->string == Py_None || self->mark[index] < 0) {
2315
        /* return default value if the string or group is undefined */
2316
10.0M
        return Py_NewRef(def);
2317
10.0M
    }
2318
2319
42.8M
    ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
2320
42.8M
    if (ptr == NULL)
2321
0
        return NULL;
2322
2323
42.8M
    i = self->mark[index];
2324
42.8M
    j = self->mark[index+1];
2325
42.8M
    i = Py_MIN(i, length);
2326
42.8M
    j = Py_MIN(j, length);
2327
42.8M
    result = getslice(isbytes, ptr, self->string, i, j);
2328
42.8M
    if (isbytes && view.buf != NULL)
2329
546k
        PyBuffer_Release(&view);
2330
42.8M
    return result;
2331
42.8M
}
2332
2333
static Py_ssize_t
2334
match_getindex(MatchObject* self, PyObject* index)
2335
73.8M
{
2336
73.8M
    Py_ssize_t i;
2337
2338
73.8M
    if (index == NULL)
2339
        /* Default value */
2340
19.8M
        return 0;
2341
2342
53.9M
    if (PyIndex_Check(index)) {
2343
35.7M
        i = PyNumber_AsSsize_t(index, NULL);
2344
35.7M
    }
2345
18.1M
    else {
2346
18.1M
        i = -1;
2347
2348
18.1M
        if (self->pattern->groupindex) {
2349
18.1M
            index = PyDict_GetItemWithError(self->pattern->groupindex, index);
2350
18.1M
            if (index && PyLong_Check(index)) {
2351
18.1M
                i = PyLong_AsSsize_t(index);
2352
18.1M
            }
2353
18.1M
        }
2354
18.1M
    }
2355
53.9M
    if (i < 0 || i >= self->groups) {
2356
        /* raise IndexError if we were given a bad group number */
2357
0
        if (!PyErr_Occurred()) {
2358
0
            PyErr_SetString(PyExc_IndexError, "no such group");
2359
0
        }
2360
0
        return -1;
2361
0
    }
2362
2363
    // Check that i*2 cannot overflow to make static analyzers happy
2364
53.9M
    assert((size_t)i <= SRE_MAXGROUPS);
2365
53.9M
    return i;
2366
53.9M
}
2367
2368
static PyObject*
2369
match_getslice(MatchObject* self, PyObject* index, PyObject* def)
2370
52.9M
{
2371
52.9M
    Py_ssize_t i = match_getindex(self, index);
2372
2373
52.9M
    if (i < 0) {
2374
0
        return NULL;
2375
0
    }
2376
2377
52.9M
    return match_getslice_by_index(self, i, def);
2378
52.9M
}
2379
2380
/*[clinic input]
2381
@permit_long_summary
2382
_sre.SRE_Match.expand
2383
2384
    template: object
2385
2386
Return the string obtained by doing backslash substitution on the string template, as done by the sub() method.
2387
[clinic start generated code]*/
2388
2389
static PyObject *
2390
_sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template)
2391
/*[clinic end generated code: output=931b58ccc323c3a1 input=dc74d81265376ac3]*/
2392
0
{
2393
0
    _sremodulestate *module_state = get_sre_module_state_by_class(Py_TYPE(self));
2394
0
    PyObject *filter = compile_template(module_state, self->pattern, template);
2395
0
    if (filter == NULL) {
2396
0
        return NULL;
2397
0
    }
2398
0
    PyObject *result = expand_template((TemplateObject *)filter, self);
2399
0
    Py_DECREF(filter);
2400
0
    return result;
2401
0
}
2402
2403
static PyObject*
2404
match_group(PyObject *op, PyObject* args)
2405
27.5M
{
2406
27.5M
    MatchObject *self = _MatchObject_CAST(op);
2407
27.5M
    PyObject* result;
2408
27.5M
    Py_ssize_t i, size;
2409
2410
27.5M
    size = PyTuple_GET_SIZE(args);
2411
2412
27.5M
    switch (size) {
2413
3.07M
    case 0:
2414
3.07M
        result = match_getslice(self, _PyLong_GetZero(), Py_None);
2415
3.07M
        break;
2416
11.6M
    case 1:
2417
11.6M
        result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2418
11.6M
        break;
2419
12.7M
    default:
2420
        /* fetch multiple items */
2421
12.7M
        result = PyTuple_New(size);
2422
12.7M
        if (!result)
2423
0
            return NULL;
2424
48.0M
        for (i = 0; i < size; i++) {
2425
35.3M
            PyObject* item = match_getslice(
2426
35.3M
                self, PyTuple_GET_ITEM(args, i), Py_None
2427
35.3M
                );
2428
35.3M
            if (!item) {
2429
0
                Py_DECREF(result);
2430
0
                return NULL;
2431
0
            }
2432
35.3M
            PyTuple_SET_ITEM(result, i, item);
2433
35.3M
        }
2434
12.7M
        break;
2435
27.5M
    }
2436
27.5M
    return result;
2437
27.5M
}
2438
2439
static PyObject*
2440
match_getitem(PyObject *op, PyObject* name)
2441
2.85M
{
2442
2.85M
    MatchObject *self = _MatchObject_CAST(op);
2443
2.85M
    return match_getslice(self, name, Py_None);
2444
2.85M
}
2445
2446
/*[clinic input]
2447
_sre.SRE_Match.groups
2448
2449
    default: object = None
2450
        Is used for groups that did not participate in the match.
2451
2452
Return a tuple containing all the subgroups of the match, from 1.
2453
[clinic start generated code]*/
2454
2455
static PyObject *
2456
_sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value)
2457
/*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/
2458
323
{
2459
323
    PyObject* result;
2460
323
    Py_ssize_t index;
2461
2462
323
    result = PyTuple_New(self->groups-1);
2463
323
    if (!result)
2464
0
        return NULL;
2465
2466
2.74k
    for (index = 1; index < self->groups; index++) {
2467
2.42k
        PyObject* item;
2468
2.42k
        item = match_getslice_by_index(self, index, default_value);
2469
2.42k
        if (!item) {
2470
0
            Py_DECREF(result);
2471
0
            return NULL;
2472
0
        }
2473
2.42k
        PyTuple_SET_ITEM(result, index-1, item);
2474
2.42k
    }
2475
2476
323
    return result;
2477
323
}
2478
2479
/*[clinic input]
2480
@permit_long_summary
2481
_sre.SRE_Match.groupdict
2482
2483
    default: object = None
2484
        Is used for groups that did not participate in the match.
2485
2486
Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name.
2487
[clinic start generated code]*/
2488
2489
static PyObject *
2490
_sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value)
2491
/*[clinic end generated code: output=29917c9073e41757 input=a8d3a1dc80336872]*/
2492
49
{
2493
49
    PyObject *result;
2494
49
    PyObject *key;
2495
49
    PyObject *value;
2496
49
    Py_ssize_t pos = 0;
2497
49
    Py_hash_t hash;
2498
2499
49
    result = PyDict_New();
2500
49
    if (!result || !self->pattern->groupindex)
2501
0
        return result;
2502
2503
49
    Py_BEGIN_CRITICAL_SECTION(self->pattern->groupindex);
2504
295
    while (_PyDict_Next(self->pattern->groupindex, &pos, &key, &value, &hash)) {
2505
246
        int status;
2506
246
        Py_INCREF(key);
2507
246
        value = match_getslice(self, key, default_value);
2508
246
        if (!value) {
2509
0
            Py_DECREF(key);
2510
0
            Py_CLEAR(result);
2511
0
            goto exit;
2512
0
        }
2513
246
        status = _PyDict_SetItem_KnownHash(result, key, value, hash);
2514
246
        Py_DECREF(value);
2515
246
        Py_DECREF(key);
2516
246
        if (status < 0) {
2517
0
            Py_CLEAR(result);
2518
0
            goto exit;
2519
0
        }
2520
246
    }
2521
49
exit:;
2522
49
    Py_END_CRITICAL_SECTION();
2523
2524
49
    return result;
2525
49
}
2526
2527
/*[clinic input]
2528
_sre.SRE_Match.start -> Py_ssize_t
2529
2530
    group: object(c_default="NULL") = 0
2531
    /
2532
2533
Return index of the start of the substring matched by group.
2534
[clinic start generated code]*/
2535
2536
static Py_ssize_t
2537
_sre_SRE_Match_start_impl(MatchObject *self, PyObject *group)
2538
/*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/
2539
5.61M
{
2540
5.61M
    Py_ssize_t index = match_getindex(self, group);
2541
2542
5.61M
    if (index < 0) {
2543
0
        return -1;
2544
0
    }
2545
2546
    /* mark is -1 if group is undefined */
2547
5.61M
    return self->mark[index*2];
2548
5.61M
}
2549
2550
/*[clinic input]
2551
_sre.SRE_Match.end -> Py_ssize_t
2552
2553
    group: object(c_default="NULL") = 0
2554
    /
2555
2556
Return index of the end of the substring matched by group.
2557
[clinic start generated code]*/
2558
2559
static Py_ssize_t
2560
_sre_SRE_Match_end_impl(MatchObject *self, PyObject *group)
2561
/*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/
2562
12.4M
{
2563
12.4M
    Py_ssize_t index = match_getindex(self, group);
2564
2565
12.4M
    if (index < 0) {
2566
0
        return -1;
2567
0
    }
2568
2569
    /* mark is -1 if group is undefined */
2570
12.4M
    return self->mark[index*2+1];
2571
12.4M
}
2572
2573
LOCAL(PyObject*)
2574
_pair(Py_ssize_t i1, Py_ssize_t i2)
2575
2.87M
{
2576
2.87M
    PyObject* item1 = PyLong_FromSsize_t(i1);
2577
2.87M
    if (!item1) {
2578
0
        return NULL;
2579
0
    }
2580
2.87M
    PyObject* item2 = PyLong_FromSsize_t(i2);
2581
2.87M
    if(!item2) {
2582
0
        Py_DECREF(item1);
2583
0
        return NULL;
2584
0
    }
2585
2586
2.87M
    return _PyTuple_FromPairSteal(item1, item2);
2587
2.87M
}
2588
2589
/*[clinic input]
2590
_sre.SRE_Match.span
2591
2592
    group: object(c_default="NULL") = 0
2593
    /
2594
2595
For match object m, return the 2-tuple (m.start(group), m.end(group)).
2596
[clinic start generated code]*/
2597
2598
static PyObject *
2599
_sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)
2600
/*[clinic end generated code: output=f02ae40594d14fe6 input=8fa6014e982d71d4]*/
2601
2.87M
{
2602
2.87M
    Py_ssize_t index = match_getindex(self, group);
2603
2604
2.87M
    if (index < 0) {
2605
0
        return NULL;
2606
0
    }
2607
2608
    /* marks are -1 if group is undefined */
2609
2.87M
    return _pair(self->mark[index*2], self->mark[index*2+1]);
2610
2.87M
}
2611
2612
static PyObject*
2613
match_regs(MatchObject* self)
2614
0
{
2615
0
    PyObject* regs;
2616
0
    PyObject* item;
2617
0
    Py_ssize_t index;
2618
2619
0
    regs = PyTuple_New(self->groups);
2620
0
    if (!regs)
2621
0
        return NULL;
2622
2623
0
    for (index = 0; index < self->groups; index++) {
2624
0
        item = _pair(self->mark[index*2], self->mark[index*2+1]);
2625
0
        if (!item) {
2626
0
            Py_DECREF(regs);
2627
0
            return NULL;
2628
0
        }
2629
0
        PyTuple_SET_ITEM(regs, index, item);
2630
0
    }
2631
2632
0
    self->regs = Py_NewRef(regs);
2633
2634
0
    return regs;
2635
0
}
2636
2637
/*[clinic input]
2638
_sre.SRE_Match.__copy__
2639
2640
[clinic start generated code]*/
2641
2642
static PyObject *
2643
_sre_SRE_Match___copy___impl(MatchObject *self)
2644
/*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/
2645
0
{
2646
0
    return Py_NewRef(self);
2647
0
}
2648
2649
/*[clinic input]
2650
_sre.SRE_Match.__deepcopy__
2651
2652
    memo: object
2653
    /
2654
2655
[clinic start generated code]*/
2656
2657
static PyObject *
2658
_sre_SRE_Match___deepcopy___impl(MatchObject *self, PyObject *memo)
2659
/*[clinic end generated code: output=2b657578eb03f4a3 input=779d12a31c2c325e]*/
2660
0
{
2661
0
    return Py_NewRef(self);
2662
0
}
2663
2664
PyDoc_STRVAR(match_doc,
2665
"The result of re.search(), re.prefixmatch(), and re.fullmatch().\n\
2666
Match objects always have a boolean value of True.");
2667
2668
PyDoc_STRVAR(match_group_doc,
2669
"group([group1, ...]) -> str or tuple.\n\
2670
    Return subgroup(s) of the match by indices or names.\n\
2671
    For 0 returns the entire match.");
2672
2673
static PyObject *
2674
match_lastindex_get(PyObject *op, void *Py_UNUSED(ignored))
2675
0
{
2676
0
    MatchObject *self = _MatchObject_CAST(op);
2677
0
    if (self->lastindex >= 0)
2678
0
        return PyLong_FromSsize_t(self->lastindex);
2679
0
    Py_RETURN_NONE;
2680
0
}
2681
2682
static PyObject *
2683
match_lastgroup_get(PyObject *op, void *Py_UNUSED(ignored))
2684
0
{
2685
0
    MatchObject *self = _MatchObject_CAST(op);
2686
0
    if (self->pattern->indexgroup &&
2687
0
        self->lastindex >= 0 &&
2688
0
        self->lastindex < PyTuple_GET_SIZE(self->pattern->indexgroup))
2689
0
    {
2690
0
        PyObject *result = PyTuple_GET_ITEM(self->pattern->indexgroup,
2691
0
                                            self->lastindex);
2692
0
        return Py_NewRef(result);
2693
0
    }
2694
0
    Py_RETURN_NONE;
2695
0
}
2696
2697
static PyObject *
2698
match_regs_get(PyObject *op, void *Py_UNUSED(ignored))
2699
0
{
2700
0
    MatchObject *self = _MatchObject_CAST(op);
2701
0
    if (self->regs) {
2702
0
        return Py_NewRef(self->regs);
2703
0
    } else
2704
0
        return match_regs(self);
2705
0
}
2706
2707
static PyObject *
2708
match_repr(PyObject *op)
2709
0
{
2710
0
    MatchObject *self = _MatchObject_CAST(op);
2711
0
    PyObject *result;
2712
0
    PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
2713
0
    if (group0 == NULL)
2714
0
        return NULL;
2715
0
    result = PyUnicode_FromFormat(
2716
0
            "<%s object; span=(%zd, %zd), match=%.50R>",
2717
0
            Py_TYPE(self)->tp_name,
2718
0
            self->mark[0], self->mark[1], group0);
2719
0
    Py_DECREF(group0);
2720
0
    return result;
2721
0
}
2722
2723
2724
static PyObject*
2725
pattern_new_match(_sremodulestate* module_state,
2726
                  PatternObject* pattern,
2727
                  SRE_STATE* state,
2728
                  Py_ssize_t status)
2729
77.0M
{
2730
    /* create match object (from state object) */
2731
2732
77.0M
    MatchObject* match;
2733
77.0M
    Py_ssize_t i, j;
2734
77.0M
    char* base;
2735
77.0M
    int n;
2736
2737
77.0M
    if (status > 0) {
2738
2739
        /* create match object (with room for extra group marks) */
2740
        /* coverity[ampersand_in_size] */
2741
57.2M
        match = PyObject_GC_NewVar(MatchObject,
2742
57.2M
                                   module_state->Match_Type,
2743
57.2M
                                   2*(pattern->groups+1));
2744
57.2M
        if (!match)
2745
0
            return NULL;
2746
2747
57.2M
        Py_INCREF(pattern);
2748
57.2M
        match->pattern = pattern;
2749
2750
57.2M
        match->string = Py_NewRef(state->string);
2751
2752
57.2M
        match->regs = NULL;
2753
57.2M
        match->groups = pattern->groups+1;
2754
2755
        /* fill in group slices */
2756
2757
57.2M
        base = (char*) state->beginning;
2758
57.2M
        n = state->charsize;
2759
2760
57.2M
        match->mark[0] = ((char*) state->start - base) / n;
2761
57.2M
        match->mark[1] = ((char*) state->ptr - base) / n;
2762
2763
111M
        for (i = j = 0; i < pattern->groups; i++, j+=2)
2764
54.1M
            if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
2765
43.5M
                match->mark[j+2] = ((char*) state->mark[j] - base) / n;
2766
43.5M
                match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
2767
2768
                /* check wrong span */
2769
43.5M
                if (match->mark[j+2] > match->mark[j+3]) {
2770
0
                    PyErr_SetString(PyExc_SystemError,
2771
0
                                    "The span of capturing group is wrong,"
2772
0
                                    " please report a bug for the re module.");
2773
0
                    Py_DECREF(match);
2774
0
                    return NULL;
2775
0
                }
2776
43.5M
            } else
2777
10.5M
                match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
2778
2779
57.2M
        match->pos = state->pos;
2780
57.2M
        match->endpos = state->endpos;
2781
2782
57.2M
        match->lastindex = state->lastindex;
2783
2784
57.2M
        PyObject_GC_Track(match);
2785
57.2M
        return (PyObject*) match;
2786
2787
57.2M
    } else if (status == 0) {
2788
2789
        /* no match */
2790
19.7M
        Py_RETURN_NONE;
2791
2792
19.7M
    }
2793
2794
    /* internal error */
2795
0
    pattern_error(status);
2796
0
    return NULL;
2797
77.0M
}
2798
2799
2800
/* -------------------------------------------------------------------- */
2801
/* scanner methods (experimental) */
2802
2803
static int
2804
scanner_traverse(PyObject *op, visitproc visit, void *arg)
2805
267
{
2806
267
    ScannerObject *self = _ScannerObject_CAST(op);
2807
267
    Py_VISIT(Py_TYPE(self));
2808
267
    Py_VISIT(self->pattern);
2809
267
    return 0;
2810
267
}
2811
2812
static int
2813
scanner_clear(PyObject *op)
2814
382k
{
2815
382k
    ScannerObject *self = _ScannerObject_CAST(op);
2816
382k
    Py_CLEAR(self->pattern);
2817
382k
    return 0;
2818
382k
}
2819
2820
static void
2821
scanner_dealloc(PyObject *self)
2822
382k
{
2823
382k
    PyTypeObject *tp = Py_TYPE(self);
2824
382k
    PyObject_GC_UnTrack(self);
2825
382k
    ScannerObject *scanner = _ScannerObject_CAST(self);
2826
382k
    state_fini(&scanner->state);
2827
382k
    (void)scanner_clear(self);
2828
382k
    tp->tp_free(self);
2829
382k
    Py_DECREF(tp);
2830
382k
}
2831
2832
static int
2833
scanner_begin(ScannerObject* self)
2834
3.23M
{
2835
#ifdef Py_GIL_DISABLED
2836
    int was_executing = _Py_atomic_exchange_int(&self->executing, 1);
2837
#else
2838
3.23M
    int was_executing = self->executing;
2839
3.23M
    self->executing = 1;
2840
3.23M
#endif
2841
3.23M
    if (was_executing) {
2842
0
        PyErr_SetString(PyExc_ValueError,
2843
0
                        "regular expression scanner already executing");
2844
0
        return 0;
2845
0
    }
2846
3.23M
    return 1;
2847
3.23M
}
2848
2849
static void
2850
scanner_end(ScannerObject* self)
2851
3.23M
{
2852
3.23M
    assert(FT_ATOMIC_LOAD_INT_RELAXED(self->executing));
2853
3.23M
    FT_ATOMIC_STORE_INT(self->executing, 0);
2854
3.23M
}
2855
2856
/*[clinic input]
2857
_sre.SRE_Scanner.prefixmatch
2858
2859
    cls: defining_class
2860
    /
2861
2862
[clinic start generated code]*/
2863
2864
static PyObject *
2865
_sre_SRE_Scanner_prefixmatch_impl(ScannerObject *self, PyTypeObject *cls)
2866
/*[clinic end generated code: output=02b3b9d2954a2157 input=3049b20466c56a8e]*/
2867
0
{
2868
0
    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
2869
0
    SRE_STATE* state = &self->state;
2870
0
    PyObject* match;
2871
0
    Py_ssize_t status;
2872
2873
0
    if (!scanner_begin(self)) {
2874
0
        return NULL;
2875
0
    }
2876
0
    if (state->start == NULL) {
2877
0
        scanner_end(self);
2878
0
        Py_RETURN_NONE;
2879
0
    }
2880
2881
0
    state_reset(state);
2882
2883
0
    state->ptr = state->start;
2884
2885
0
    status = sre_match(state, PatternObject_GetCode(self->pattern));
2886
0
    if (PyErr_Occurred()) {
2887
0
        scanner_end(self);
2888
0
        return NULL;
2889
0
    }
2890
2891
0
    match = pattern_new_match(module_state, self->pattern,
2892
0
                              state, status);
2893
2894
0
    if (status == 0)
2895
0
        state->start = NULL;
2896
0
    else {
2897
0
        state->must_advance = (state->ptr == state->start);
2898
0
        state->start = state->ptr;
2899
0
    }
2900
2901
0
    scanner_end(self);
2902
0
    return match;
2903
0
}
2904
2905
2906
/*[clinic input]
2907
_sre.SRE_Scanner.search
2908
2909
    cls: defining_class
2910
    /
2911
2912
[clinic start generated code]*/
2913
2914
static PyObject *
2915
_sre_SRE_Scanner_search_impl(ScannerObject *self, PyTypeObject *cls)
2916
/*[clinic end generated code: output=23e8fc78013f9161 input=056c2d37171d0bf2]*/
2917
3.23M
{
2918
3.23M
    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
2919
3.23M
    SRE_STATE* state = &self->state;
2920
3.23M
    PyObject* match;
2921
3.23M
    Py_ssize_t status;
2922
2923
3.23M
    if (!scanner_begin(self)) {
2924
0
        return NULL;
2925
0
    }
2926
3.23M
    if (state->start == NULL) {
2927
0
        scanner_end(self);
2928
0
        Py_RETURN_NONE;
2929
0
    }
2930
2931
3.23M
    state_reset(state);
2932
2933
3.23M
    state->ptr = state->start;
2934
2935
3.23M
    status = sre_search(state, PatternObject_GetCode(self->pattern));
2936
3.23M
    if (PyErr_Occurred()) {
2937
0
        scanner_end(self);
2938
0
        return NULL;
2939
0
    }
2940
2941
3.23M
    match = pattern_new_match(module_state, self->pattern,
2942
3.23M
                              state, status);
2943
2944
3.23M
    if (status == 0)
2945
382k
        state->start = NULL;
2946
2.85M
    else {
2947
2.85M
        state->must_advance = (state->ptr == state->start);
2948
2.85M
        state->start = state->ptr;
2949
2.85M
    }
2950
2951
3.23M
    scanner_end(self);
2952
3.23M
    return match;
2953
3.23M
}
2954
2955
static PyObject *
2956
pattern_scanner(_sremodulestate *module_state,
2957
                PatternObject *self,
2958
                PyObject *string,
2959
                Py_ssize_t pos,
2960
                Py_ssize_t endpos)
2961
382k
{
2962
382k
    ScannerObject* scanner;
2963
2964
    /* create scanner object */
2965
382k
    scanner = PyObject_GC_New(ScannerObject, module_state->Scanner_Type);
2966
382k
    if (!scanner)
2967
0
        return NULL;
2968
382k
    scanner->pattern = NULL;
2969
382k
    scanner->executing = 0;
2970
2971
    /* create search state object */
2972
382k
    if (!state_init(&scanner->state, self, string, pos, endpos)) {
2973
0
        Py_DECREF(scanner);
2974
0
        return NULL;
2975
0
    }
2976
2977
382k
    Py_INCREF(self);
2978
382k
    scanner->pattern = self;
2979
2980
382k
    PyObject_GC_Track(scanner);
2981
382k
    return (PyObject*) scanner;
2982
382k
}
2983
2984
/* -------------------------------------------------------------------- */
2985
/* template methods */
2986
2987
static int
2988
template_traverse(PyObject *op, visitproc visit, void *arg)
2989
0
{
2990
0
    TemplateObject *self = _TemplateObject_CAST(op);
2991
0
    Py_VISIT(Py_TYPE(self));
2992
0
    Py_VISIT(self->literal);
2993
0
    for (Py_ssize_t i = 0, n = Py_SIZE(self); i < n; i++) {
2994
0
        Py_VISIT(self->items[i].literal);
2995
0
    }
2996
0
    return 0;
2997
0
}
2998
2999
static int
3000
template_clear(PyObject *op)
3001
0
{
3002
0
    TemplateObject *self = _TemplateObject_CAST(op);
3003
0
    Py_CLEAR(self->literal);
3004
0
    for (Py_ssize_t i = 0, n = Py_SIZE(self); i < n; i++) {
3005
0
        Py_CLEAR(self->items[i].literal);
3006
0
    }
3007
0
    return 0;
3008
0
}
3009
3010
static void
3011
template_dealloc(PyObject *self)
3012
0
{
3013
0
    PyTypeObject *tp = Py_TYPE(self);
3014
0
    PyObject_GC_UnTrack(self);
3015
0
    (void)template_clear(self);
3016
0
    tp->tp_free(self);
3017
0
    Py_DECREF(tp);
3018
0
}
3019
3020
static PyObject *
3021
expand_template(TemplateObject *self, MatchObject *match)
3022
0
{
3023
0
    if (Py_SIZE(self) == 0) {
3024
0
        return Py_NewRef(self->literal);
3025
0
    }
3026
3027
0
    PyObject *result = NULL;
3028
0
    Py_ssize_t count = 0;  // the number of non-empty chunks
3029
    /* For small number of strings use a buffer allocated on the stack,
3030
     * otherwise use a list object. */
3031
0
    PyObject *buffer[10];
3032
0
    PyObject **out = buffer;
3033
0
    PyObject *list = NULL;
3034
0
    if (self->chunks > (int)Py_ARRAY_LENGTH(buffer) ||
3035
0
        !PyUnicode_Check(self->literal))
3036
0
    {
3037
0
        list = PyList_New(self->chunks);
3038
0
        if (!list) {
3039
0
            return NULL;
3040
0
        }
3041
0
        out = &PyList_GET_ITEM(list, 0);
3042
0
    }
3043
3044
0
    out[count++] = Py_NewRef(self->literal);
3045
0
    for (Py_ssize_t i = 0; i < Py_SIZE(self); i++) {
3046
0
        Py_ssize_t index = self->items[i].index;
3047
0
        if (index >= match->groups) {
3048
0
            PyErr_SetString(PyExc_IndexError, "no such group");
3049
0
            goto cleanup;
3050
0
        }
3051
0
        PyObject *item = match_getslice_by_index(match, index, Py_None);
3052
0
        if (item == NULL) {
3053
0
            goto cleanup;
3054
0
        }
3055
0
        if (item != Py_None) {
3056
0
            out[count++] = Py_NewRef(item);
3057
0
        }
3058
0
        Py_DECREF(item);
3059
3060
0
        PyObject *literal = self->items[i].literal;
3061
0
        if (literal != NULL) {
3062
0
            out[count++] = Py_NewRef(literal);
3063
0
        }
3064
0
    }
3065
3066
0
    if (PyUnicode_Check(self->literal)) {
3067
0
        result = _PyUnicode_JoinArray(&_Py_STR(empty), out, count);
3068
0
    }
3069
0
    else {
3070
0
        Py_SET_SIZE(list, count);
3071
0
        result = PyBytes_Join((PyObject *)&_Py_SINGLETON(bytes_empty), list);
3072
0
    }
3073
3074
0
cleanup:
3075
0
    if (list) {
3076
0
        Py_DECREF(list);
3077
0
    }
3078
0
    else {
3079
0
        for (Py_ssize_t i = 0; i < count; i++) {
3080
0
            Py_DECREF(out[i]);
3081
0
        }
3082
0
    }
3083
0
    return result;
3084
0
}
3085
3086
3087
static Py_hash_t
3088
pattern_hash(PyObject *op)
3089
0
{
3090
0
    PatternObject *self = _PatternObject_CAST(op);
3091
3092
0
    Py_hash_t hash, hash2;
3093
3094
0
    hash = PyObject_Hash(self->pattern);
3095
0
    if (hash == -1) {
3096
0
        return -1;
3097
0
    }
3098
3099
0
    hash2 = Py_HashBuffer(self->code, sizeof(self->code[0]) * self->codesize);
3100
0
    hash ^= hash2;
3101
3102
0
    hash ^= self->flags;
3103
0
    hash ^= self->isbytes;
3104
0
    hash ^= self->codesize;
3105
3106
0
    if (hash == -1) {
3107
0
        hash = -2;
3108
0
    }
3109
0
    return hash;
3110
0
}
3111
3112
static PyObject*
3113
pattern_richcompare(PyObject *lefto, PyObject *righto, int op)
3114
0
{
3115
0
    PyTypeObject *tp = Py_TYPE(lefto);
3116
0
    _sremodulestate *module_state = get_sre_module_state_by_class(tp);
3117
0
    PatternObject *left, *right;
3118
0
    int cmp;
3119
3120
0
    if (op != Py_EQ && op != Py_NE) {
3121
0
        Py_RETURN_NOTIMPLEMENTED;
3122
0
    }
3123
3124
0
    if (!Py_IS_TYPE(righto, module_state->Pattern_Type))
3125
0
    {
3126
0
        Py_RETURN_NOTIMPLEMENTED;
3127
0
    }
3128
3129
0
    if (lefto == righto) {
3130
        /* a pattern is equal to itself */
3131
0
        return PyBool_FromLong(op == Py_EQ);
3132
0
    }
3133
3134
0
    left = (PatternObject *)lefto;
3135
0
    right = (PatternObject *)righto;
3136
3137
0
    cmp = (left->flags == right->flags
3138
0
           && left->isbytes == right->isbytes
3139
0
           && left->codesize == right->codesize);
3140
0
    if (cmp) {
3141
        /* Compare the code and the pattern because the same pattern can
3142
           produce different codes depending on the locale used to compile the
3143
           pattern when the re.LOCALE flag is used. Don't compare groups,
3144
           indexgroup nor groupindex: they are derivated from the pattern. */
3145
0
        cmp = (memcmp(left->code, right->code,
3146
0
                      sizeof(left->code[0]) * left->codesize) == 0);
3147
0
    }
3148
0
    if (cmp) {
3149
0
        cmp = PyObject_RichCompareBool(left->pattern, right->pattern,
3150
0
                                       Py_EQ);
3151
0
        if (cmp < 0) {
3152
0
            return NULL;
3153
0
        }
3154
0
    }
3155
0
    if (op == Py_NE) {
3156
0
        cmp = !cmp;
3157
0
    }
3158
0
    return PyBool_FromLong(cmp);
3159
0
}
3160
3161
#include "clinic/sre.c.h"
3162
3163
static PyMethodDef pattern_methods[] = {
3164
    _SRE_SRE_PATTERN_PREFIXMATCH_METHODDEF
3165
    /* "match" reuses the prefixmatch Clinic-generated parser and impl
3166
     * to avoid duplicating the argument parsing boilerplate code. */
3167
    {"match", _PyCFunction_CAST(_sre_SRE_Pattern_prefixmatch),
3168
     METH_METHOD|METH_FASTCALL|METH_KEYWORDS,
3169
     _sre_SRE_Pattern_prefixmatch__doc__},
3170
    _SRE_SRE_PATTERN_FULLMATCH_METHODDEF
3171
    _SRE_SRE_PATTERN_SEARCH_METHODDEF
3172
    _SRE_SRE_PATTERN_SUB_METHODDEF
3173
    _SRE_SRE_PATTERN_SUBN_METHODDEF
3174
    _SRE_SRE_PATTERN_FINDALL_METHODDEF
3175
    _SRE_SRE_PATTERN_SPLIT_METHODDEF
3176
    _SRE_SRE_PATTERN_FINDITER_METHODDEF
3177
    _SRE_SRE_PATTERN_SCANNER_METHODDEF
3178
    _SRE_SRE_PATTERN___COPY___METHODDEF
3179
    _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF
3180
    _SRE_SRE_PATTERN__FAIL_AFTER_METHODDEF
3181
    {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS,
3182
     PyDoc_STR("See PEP 585")},
3183
    {NULL, NULL}
3184
};
3185
3186
static PyGetSetDef pattern_getset[] = {
3187
    {"groupindex", pattern_groupindex, NULL,
3188
      "A dictionary mapping group names to group numbers."},
3189
    {NULL}  /* Sentinel */
3190
};
3191
3192
#define PAT_OFF(x) offsetof(PatternObject, x)
3193
static PyMemberDef pattern_members[] = {
3194
    {"pattern",    _Py_T_OBJECT,    PAT_OFF(pattern),       Py_READONLY,
3195
     "The pattern string from which the RE object was compiled."},
3196
    {"flags",      Py_T_INT,       PAT_OFF(flags),         Py_READONLY,
3197
     "The regex matching flags."},
3198
    {"groups",     Py_T_PYSSIZET,  PAT_OFF(groups),        Py_READONLY,
3199
     "The number of capturing groups in the pattern."},
3200
    {"__weaklistoffset__", Py_T_PYSSIZET, offsetof(PatternObject, weakreflist), Py_READONLY},
3201
    {NULL}  /* Sentinel */
3202
};
3203
3204
static PyType_Slot pattern_slots[] = {
3205
    {Py_tp_dealloc, pattern_dealloc},
3206
    {Py_tp_repr, pattern_repr},
3207
    {Py_tp_hash, pattern_hash},
3208
    {Py_tp_doc, (void *)pattern_doc},
3209
    {Py_tp_richcompare, pattern_richcompare},
3210
    {Py_tp_methods, pattern_methods},
3211
    {Py_tp_members, pattern_members},
3212
    {Py_tp_getset, pattern_getset},
3213
    {Py_tp_traverse, pattern_traverse},
3214
    {Py_tp_clear, pattern_clear},
3215
    {0, NULL},
3216
};
3217
3218
static PyType_Spec pattern_spec = {
3219
    .name = "re.Pattern",
3220
    .basicsize = sizeof(PatternObject),
3221
    .itemsize = sizeof(SRE_CODE),
3222
    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE |
3223
              Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC),
3224
    .slots = pattern_slots,
3225
};
3226
3227
static PyMethodDef match_methods[] = {
3228
    {"group", match_group, METH_VARARGS, match_group_doc},
3229
    _SRE_SRE_MATCH_START_METHODDEF
3230
    _SRE_SRE_MATCH_END_METHODDEF
3231
    _SRE_SRE_MATCH_SPAN_METHODDEF
3232
    _SRE_SRE_MATCH_GROUPS_METHODDEF
3233
    _SRE_SRE_MATCH_GROUPDICT_METHODDEF
3234
    _SRE_SRE_MATCH_EXPAND_METHODDEF
3235
    _SRE_SRE_MATCH___COPY___METHODDEF
3236
    _SRE_SRE_MATCH___DEEPCOPY___METHODDEF
3237
    {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS,
3238
     PyDoc_STR("See PEP 585")},
3239
    {NULL, NULL}
3240
};
3241
3242
static PyGetSetDef match_getset[] = {
3243
    {"lastindex", match_lastindex_get, NULL,
3244
     "The integer index of the last matched capturing group."},
3245
    {"lastgroup", match_lastgroup_get, NULL,
3246
     "The name of the last matched capturing group."},
3247
    {"regs", match_regs_get, NULL, NULL},
3248
    {NULL}
3249
};
3250
3251
#define MATCH_OFF(x) offsetof(MatchObject, x)
3252
static PyMemberDef match_members[] = {
3253
    {"string",  _Py_T_OBJECT,   MATCH_OFF(string),  Py_READONLY,
3254
     "The string passed to match() or search()."},
3255
    {"re",      _Py_T_OBJECT,   MATCH_OFF(pattern), Py_READONLY,
3256
     "The regular expression object."},
3257
    {"pos",     Py_T_PYSSIZET, MATCH_OFF(pos),     Py_READONLY,
3258
     "The index into the string at which the RE engine started looking for a match."},
3259
    {"endpos",  Py_T_PYSSIZET, MATCH_OFF(endpos),  Py_READONLY,
3260
     "The index into the string beyond which the RE engine will not go."},
3261
    {NULL}
3262
};
3263
3264
/* FIXME: implement setattr("string", None) as a special case (to
3265
   detach the associated string, if any */
3266
static PyType_Slot match_slots[] = {
3267
    {Py_tp_dealloc, match_dealloc},
3268
    {Py_tp_repr, match_repr},
3269
    {Py_tp_doc, (void *)match_doc},
3270
    {Py_tp_methods, match_methods},
3271
    {Py_tp_members, match_members},
3272
    {Py_tp_getset, match_getset},
3273
    {Py_tp_traverse, match_traverse},
3274
    {Py_tp_clear, match_clear},
3275
3276
    /* As mapping.
3277
     *
3278
     * Match objects do not support length or assignment, but do support
3279
     * __getitem__.
3280
     */
3281
    {Py_mp_subscript, match_getitem},
3282
3283
    {0, NULL},
3284
};
3285
3286
static PyType_Spec match_spec = {
3287
    .name = "re.Match",
3288
    .basicsize = sizeof(MatchObject),
3289
    .itemsize = sizeof(Py_ssize_t),
3290
    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE |
3291
              Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC),
3292
    .slots = match_slots,
3293
};
3294
3295
static PyMethodDef scanner_methods[] = {
3296
    _SRE_SRE_SCANNER_PREFIXMATCH_METHODDEF
3297
    /* "match" reuses the prefixmatch Clinic-generated parser and impl
3298
     * to avoid duplicating the argument parsing boilerplate code. */
3299
    {"match", _PyCFunction_CAST(_sre_SRE_Scanner_prefixmatch),
3300
     METH_METHOD|METH_FASTCALL|METH_KEYWORDS,
3301
     _sre_SRE_Scanner_prefixmatch__doc__},
3302
    _SRE_SRE_SCANNER_SEARCH_METHODDEF
3303
    {NULL, NULL}
3304
};
3305
3306
#define SCAN_OFF(x) offsetof(ScannerObject, x)
3307
static PyMemberDef scanner_members[] = {
3308
    {"pattern", _Py_T_OBJECT, SCAN_OFF(pattern), Py_READONLY},
3309
    {NULL}  /* Sentinel */
3310
};
3311
3312
static PyType_Slot scanner_slots[] = {
3313
    {Py_tp_dealloc, scanner_dealloc},
3314
    {Py_tp_methods, scanner_methods},
3315
    {Py_tp_members, scanner_members},
3316
    {Py_tp_traverse, scanner_traverse},
3317
    {Py_tp_clear, scanner_clear},
3318
    {0, NULL},
3319
};
3320
3321
static PyType_Spec scanner_spec = {
3322
    .name = "_sre.SRE_Scanner",
3323
    .basicsize = sizeof(ScannerObject),
3324
    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE |
3325
              Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC),
3326
    .slots = scanner_slots,
3327
};
3328
3329
static PyType_Slot template_slots[] = {
3330
    {Py_tp_dealloc, template_dealloc},
3331
    {Py_tp_traverse, template_traverse},
3332
    {Py_tp_clear, template_clear},
3333
    {0, NULL},
3334
};
3335
3336
static PyType_Spec template_spec = {
3337
    .name = "_sre.SRE_Template",
3338
    .basicsize = sizeof(TemplateObject),
3339
    .itemsize = sizeof(((TemplateObject *)0)->items[0]),
3340
    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE |
3341
              Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC),
3342
    .slots = template_slots,
3343
};
3344
3345
static PyMethodDef _functions[] = {
3346
    _SRE_COMPILE_METHODDEF
3347
    _SRE_TEMPLATE_METHODDEF
3348
    _SRE_GETCODESIZE_METHODDEF
3349
    _SRE_ASCII_ISCASED_METHODDEF
3350
    _SRE_UNICODE_ISCASED_METHODDEF
3351
    _SRE_ASCII_TOLOWER_METHODDEF
3352
    _SRE_UNICODE_TOLOWER_METHODDEF
3353
    {NULL, NULL}
3354
};
3355
3356
static int
3357
sre_traverse(PyObject *module, visitproc visit, void *arg)
3358
1.28k
{
3359
1.28k
    _sremodulestate *state = get_sre_module_state(module);
3360
3361
1.28k
    Py_VISIT(state->Pattern_Type);
3362
1.28k
    Py_VISIT(state->Match_Type);
3363
1.28k
    Py_VISIT(state->Scanner_Type);
3364
1.28k
    Py_VISIT(state->Template_Type);
3365
1.28k
    Py_VISIT(state->compile_template);
3366
3367
1.28k
    return 0;
3368
1.28k
}
3369
3370
static int
3371
sre_clear(PyObject *module)
3372
0
{
3373
0
    _sremodulestate *state = get_sre_module_state(module);
3374
3375
0
    Py_CLEAR(state->Pattern_Type);
3376
0
    Py_CLEAR(state->Match_Type);
3377
0
    Py_CLEAR(state->Scanner_Type);
3378
0
    Py_CLEAR(state->Template_Type);
3379
0
    Py_CLEAR(state->compile_template);
3380
3381
0
    return 0;
3382
0
}
3383
3384
static void
3385
sre_free(void *module)
3386
0
{
3387
0
    sre_clear((PyObject *)module);
3388
0
}
3389
3390
120
#define CREATE_TYPE(m, type, spec)                                  \
3391
120
do {                                                                \
3392
120
    type = (PyTypeObject *)PyType_FromModuleAndSpec(m, spec, NULL); \
3393
120
    if (type == NULL) {                                             \
3394
0
        goto error;                                                 \
3395
0
    }                                                               \
3396
120
} while (0)
3397
3398
#define ADD_ULONG_CONSTANT(module, name, value)           \
3399
60
    do {                                                  \
3400
60
        if (PyModule_Add(module, name, PyLong_FromUnsignedLong(value)) < 0) { \
3401
0
            goto error;                                   \
3402
0
        }                                                 \
3403
60
} while (0)
3404
3405
3406
#ifdef Py_DEBUG
3407
static void
3408
_assert_match_aliases_prefixmatch(PyMethodDef *methods)
3409
{
3410
    PyMethodDef *prefixmatch_md = &methods[0];
3411
    PyMethodDef *match_md = &methods[1];
3412
    assert(strcmp(prefixmatch_md->ml_name, "prefixmatch") == 0);
3413
    assert(strcmp(match_md->ml_name, "match") == 0);
3414
    assert(match_md->ml_meth == prefixmatch_md->ml_meth);
3415
    assert(match_md->ml_flags == prefixmatch_md->ml_flags);
3416
    assert(match_md->ml_doc == prefixmatch_md->ml_doc);
3417
}
3418
#endif
3419
3420
static int
3421
sre_exec(PyObject *m)
3422
30
{
3423
30
    _sremodulestate *state;
3424
3425
#ifdef Py_DEBUG
3426
    _assert_match_aliases_prefixmatch(pattern_methods);
3427
    _assert_match_aliases_prefixmatch(scanner_methods);
3428
#endif
3429
3430
    /* Create heap types */
3431
30
    state = get_sre_module_state(m);
3432
30
    CREATE_TYPE(m, state->Pattern_Type, &pattern_spec);
3433
30
    CREATE_TYPE(m, state->Match_Type, &match_spec);
3434
30
    CREATE_TYPE(m, state->Scanner_Type, &scanner_spec);
3435
30
    CREATE_TYPE(m, state->Template_Type, &template_spec);
3436
3437
30
    if (PyModule_AddIntConstant(m, "MAGIC", SRE_MAGIC) < 0) {
3438
0
        goto error;
3439
0
    }
3440
3441
30
    if (PyModule_AddIntConstant(m, "CODESIZE", sizeof(SRE_CODE)) < 0) {
3442
0
        goto error;
3443
0
    }
3444
3445
30
    ADD_ULONG_CONSTANT(m, "MAXREPEAT", SRE_MAXREPEAT);
3446
30
    ADD_ULONG_CONSTANT(m, "MAXGROUPS", SRE_MAXGROUPS);
3447
3448
30
    if (PyModule_AddStringConstant(m, "copyright", copyright) < 0) {
3449
0
        goto error;
3450
0
    }
3451
3452
30
    return 0;
3453
3454
0
error:
3455
0
    return -1;
3456
30
}
3457
3458
static PyModuleDef_Slot sre_slots[] = {
3459
    _Py_ABI_SLOT,
3460
    {Py_mod_exec, sre_exec},
3461
    {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
3462
    {Py_mod_gil, Py_MOD_GIL_NOT_USED},
3463
    {0, NULL},
3464
};
3465
3466
static struct PyModuleDef sremodule = {
3467
    .m_base = PyModuleDef_HEAD_INIT,
3468
    .m_name = "_sre",
3469
    .m_size = sizeof(_sremodulestate),
3470
    .m_methods = _functions,
3471
    .m_slots = sre_slots,
3472
    .m_traverse = sre_traverse,
3473
    .m_free = sre_free,
3474
    .m_clear = sre_clear,
3475
};
3476
3477
PyMODINIT_FUNC
3478
PyInit__sre(void)
3479
30
{
3480
30
    return PyModuleDef_Init(&sremodule);
3481
30
}
3482
3483
/* vim:ts=4:sw=4:et
3484
*/