Coverage Report

Created: 2026-06-01 06:14

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython3/Modules/_sre/sre.c
Line
Count
Source
1
/*
2
 * Secret Labs' Regular Expression Engine
3
 *
4
 * regular expression matching engine
5
 *
6
 * partial history:
7
 * 1999-10-24 fl   created (based on existing template matcher code)
8
 * 2000-03-06 fl   first alpha, sort of
9
 * 2000-08-01 fl   fixes for 1.6b1
10
 * 2000-08-07 fl   use PyOS_CheckStack() if available
11
 * 2000-09-20 fl   added expand method
12
 * 2001-03-20 fl   lots of fixes for 2.1b2
13
 * 2001-04-15 fl   export copyright as Python attribute, not global
14
 * 2001-04-28 fl   added __copy__ methods (work in progress)
15
 * 2001-05-14 fl   fixes for 1.5.2 compatibility
16
 * 2001-07-01 fl   added BIGCHARSET support (from Martin von Loewis)
17
 * 2001-10-18 fl   fixed group reset issue (from Matthew Mueller)
18
 * 2001-10-20 fl   added split primitive; re-enable unicode for 1.6/2.0/2.1
19
 * 2001-10-21 fl   added sub/subn primitive
20
 * 2001-10-24 fl   added finditer primitive (for 2.2 only)
21
 * 2001-12-07 fl   fixed memory leak in sub/subn (Guido van Rossum)
22
 * 2002-11-09 fl   fixed empty sub/subn return type
23
 * 2003-04-18 mvl  fully support 4-byte codes
24
 * 2003-10-17 gn   implemented non recursive scheme
25
 * 2013-02-04 mrab added fullmatch primitive
26
 *
27
 * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
28
 *
29
 * This version of the SRE library can be redistributed under CNRI's
30
 * Python 1.6 license.  For any other use, please contact Secret Labs
31
 * AB (info@pythonware.com).
32
 *
33
 * Portions of this engine have been developed in cooperation with
34
 * CNRI.  Hewlett-Packard provided funding for 1.6 integration and
35
 * other compatibility work.
36
 */
37
38
static const char copyright[] =
39
    " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
40
41
#include "Python.h"
42
#include "pycore_critical_section.h" // Py_BEGIN_CRITICAL_SECTION
43
#include "pycore_dict.h"             // _PyDict_Next()
44
#include "pycore_long.h"             // _PyLong_GetZero()
45
#include "pycore_moduleobject.h"     // _PyModule_GetState()
46
#include "pycore_tuple.h"            // _PyTuple_FromPairSteal
47
#include "pycore_unicodeobject.h"    // _PyUnicode_Copy
48
#include "pycore_weakref.h"          // FT_CLEAR_WEAKREFS()
49
50
#include "sre.h"                     // SRE_CODE
51
52
#include <ctype.h>                   // tolower(), toupper(), isalnum()
53
54
22.0M
#define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
55
56
// On macOS, use the wide character ctype API using btowc()
57
#if defined(__APPLE__)
58
#  define USE_CTYPE_WINT_T
59
#endif
60
61
0
static int sre_isalnum(unsigned int ch) {
62
#ifdef USE_CTYPE_WINT_T
63
    return (unsigned int)iswalnum(btowc((int)ch));
64
#else
65
0
    return (unsigned int)isalnum((int)ch);
66
0
#endif
67
0
}
68
69
0
static unsigned int sre_tolower(unsigned int ch) {
70
#ifdef USE_CTYPE_WINT_T
71
    return (unsigned int)towlower(btowc((int)ch));
72
#else
73
0
    return (unsigned int)tolower((int)ch);
74
0
#endif
75
0
}
76
77
0
static unsigned int sre_toupper(unsigned int ch) {
78
#ifdef USE_CTYPE_WINT_T
79
    return (unsigned int)towupper(btowc((int)ch));
80
#else
81
0
    return (unsigned int)toupper((int)ch);
82
0
#endif
83
0
}
84
85
/* Defining this one controls tracing:
86
 * 0 -- disabled
87
 * 1 -- only if the DEBUG flag set
88
 * 2 -- always
89
 */
90
#ifndef VERBOSE
91
#  define VERBOSE 0
92
#endif
93
94
/* -------------------------------------------------------------------- */
95
96
#if defined(_MSC_VER) && !defined(__clang__)
97
#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
98
#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
99
/* fastest possible local call under MSVC */
100
#define LOCAL(type) static __inline type __fastcall
101
#else
102
#define LOCAL(type) static inline type
103
#endif
104
105
/* error codes */
106
#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
107
#define SRE_ERROR_STATE -2 /* illegal state */
108
0
#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
109
0
#define SRE_ERROR_MEMORY -9 /* out of memory */
110
0
#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
111
112
#if VERBOSE == 0
113
#  define INIT_TRACE(state)
114
#  define DO_TRACE 0
115
#  define TRACE(v)
116
#elif VERBOSE == 1
117
#  define INIT_TRACE(state) int _debug = (state)->debug
118
#  define DO_TRACE (_debug)
119
#  define TRACE(v) do {     \
120
        if (_debug) { \
121
            printf v;       \
122
        }                   \
123
    } while (0)
124
#elif VERBOSE == 2
125
#  define INIT_TRACE(state)
126
#  define DO_TRACE 1
127
#  define TRACE(v) printf v
128
#else
129
#  error VERBOSE must be 0, 1 or 2
130
#endif
131
132
/* -------------------------------------------------------------------- */
133
/* search engine state */
134
135
#define SRE_IS_DIGIT(ch)\
136
1.29M
    ((ch) <= '9' && Py_ISDIGIT(ch))
137
#define SRE_IS_SPACE(ch)\
138
0
    ((ch) <= ' ' && Py_ISSPACE(ch))
139
#define SRE_IS_LINEBREAK(ch)\
140
10
    ((ch) == '\n')
141
#define SRE_IS_WORD(ch)\
142
8
    ((ch) <= 'z' && (Py_ISALNUM(ch) || (ch) == '_'))
143
144
static unsigned int sre_lower_ascii(unsigned int ch)
145
0
{
146
0
    return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
147
0
}
148
149
/* locale-specific character predicates */
150
/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
151
 * warnings when c's type supports only numbers < N+1 */
152
0
#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? sre_isalnum((ch)) : 0)
153
0
#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
154
155
static unsigned int sre_lower_locale(unsigned int ch)
156
0
{
157
0
    return ((ch) < 256 ? (unsigned int)sre_tolower((ch)) : ch);
158
0
}
159
160
static unsigned int sre_upper_locale(unsigned int ch)
161
0
{
162
0
    return ((ch) < 256 ? (unsigned int)sre_toupper((ch)) : ch);
163
0
}
164
165
/* unicode-specific character predicates */
166
167
9
#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
168
0
#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
169
0
#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
170
0
#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
171
0
#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
172
173
static unsigned int sre_lower_unicode(unsigned int ch)
174
141
{
175
141
    return (unsigned int) Py_UNICODE_TOLOWER(ch);
176
141
}
177
178
static unsigned int sre_upper_unicode(unsigned int ch)
179
45
{
180
45
    return (unsigned int) Py_UNICODE_TOUPPER(ch);
181
45
}
182
183
LOCAL(int)
184
sre_category(SRE_CODE category, unsigned int ch)
185
1.29M
{
186
1.29M
    switch (category) {
187
188
1.29M
    case SRE_CATEGORY_DIGIT:
189
1.29M
        return SRE_IS_DIGIT(ch);
190
0
    case SRE_CATEGORY_NOT_DIGIT:
191
0
        return !SRE_IS_DIGIT(ch);
192
0
    case SRE_CATEGORY_SPACE:
193
0
        return SRE_IS_SPACE(ch);
194
0
    case SRE_CATEGORY_NOT_SPACE:
195
0
        return !SRE_IS_SPACE(ch);
196
0
    case SRE_CATEGORY_WORD:
197
0
        return SRE_IS_WORD(ch);
198
0
    case SRE_CATEGORY_NOT_WORD:
199
0
        return !SRE_IS_WORD(ch);
200
0
    case SRE_CATEGORY_LINEBREAK:
201
0
        return SRE_IS_LINEBREAK(ch);
202
0
    case SRE_CATEGORY_NOT_LINEBREAK:
203
0
        return !SRE_IS_LINEBREAK(ch);
204
205
0
    case SRE_CATEGORY_LOC_WORD:
206
0
        return SRE_LOC_IS_WORD(ch);
207
0
    case SRE_CATEGORY_LOC_NOT_WORD:
208
0
        return !SRE_LOC_IS_WORD(ch);
209
210
9
    case SRE_CATEGORY_UNI_DIGIT:
211
9
        return SRE_UNI_IS_DIGIT(ch);
212
0
    case SRE_CATEGORY_UNI_NOT_DIGIT:
213
0
        return !SRE_UNI_IS_DIGIT(ch);
214
0
    case SRE_CATEGORY_UNI_SPACE:
215
0
        return SRE_UNI_IS_SPACE(ch);
216
0
    case SRE_CATEGORY_UNI_NOT_SPACE:
217
0
        return !SRE_UNI_IS_SPACE(ch);
218
0
    case SRE_CATEGORY_UNI_WORD:
219
0
        return SRE_UNI_IS_WORD(ch);
220
0
    case SRE_CATEGORY_UNI_NOT_WORD:
221
0
        return !SRE_UNI_IS_WORD(ch);
222
0
    case SRE_CATEGORY_UNI_LINEBREAK:
223
0
        return SRE_UNI_IS_LINEBREAK(ch);
224
0
    case SRE_CATEGORY_UNI_NOT_LINEBREAK:
225
0
        return !SRE_UNI_IS_LINEBREAK(ch);
226
1.29M
    }
227
0
    return 0;
228
1.29M
}
229
230
LOCAL(int)
231
char_loc_ignore(SRE_CODE pattern, SRE_CODE ch)
232
0
{
233
0
    return ch == pattern
234
0
        || (SRE_CODE) sre_lower_locale(ch) == pattern
235
0
        || (SRE_CODE) sre_upper_locale(ch) == pattern;
236
0
}
237
238
239
/* helpers */
240
241
static void
242
data_stack_dealloc(SRE_STATE* state)
243
10.7k
{
244
10.7k
    if (state->data_stack) {
245
10.7k
        PyMem_Free(state->data_stack);
246
10.7k
        state->data_stack = NULL;
247
10.7k
    }
248
10.7k
    state->data_stack_size = state->data_stack_base = 0;
249
10.7k
}
250
251
static int
252
data_stack_grow(SRE_STATE* state, Py_ssize_t size)
253
13.4k
{
254
13.4k
    INIT_TRACE(state);
255
13.4k
    Py_ssize_t minsize, cursize;
256
13.4k
    minsize = state->data_stack_base+size;
257
13.4k
    cursize = state->data_stack_size;
258
13.4k
    if (cursize < minsize) {
259
13.4k
        void* stack;
260
13.4k
        cursize = minsize+minsize/4+1024;
261
13.4k
        TRACE(("allocate/grow stack %zd\n", cursize));
262
13.4k
        stack = PyMem_Realloc(state->data_stack, cursize);
263
13.4k
        if (!stack) {
264
0
            data_stack_dealloc(state);
265
0
            return SRE_ERROR_MEMORY;
266
0
        }
267
13.4k
        state->data_stack = (char *)stack;
268
13.4k
        state->data_stack_size = cursize;
269
13.4k
    }
270
13.4k
    return 0;
271
13.4k
}
272
273
/* memory pool functions for SRE_REPEAT, this can avoid memory
274
   leak when SRE(match) function terminates abruptly.
275
   state->repeat_pool_used is a doubly-linked list, so that we
276
   can remove a SRE_REPEAT node from it.
277
   state->repeat_pool_unused is a singly-linked list, we put/get
278
   node at the head. */
279
static SRE_REPEAT *
280
repeat_pool_malloc(SRE_STATE *state)
281
3.16k
{
282
3.16k
    SRE_REPEAT *repeat;
283
284
3.16k
    if (state->repeat_pool_unused) {
285
        /* remove from unused pool (singly-linked list) */
286
2.71k
        repeat = state->repeat_pool_unused;
287
2.71k
        state->repeat_pool_unused = repeat->pool_next;
288
2.71k
    }
289
448
    else {
290
448
        repeat = PyMem_Malloc(sizeof(SRE_REPEAT));
291
448
        if (!repeat) {
292
0
            return NULL;
293
0
        }
294
448
    }
295
296
    /* add to used pool (doubly-linked list) */
297
3.16k
    SRE_REPEAT *temp = state->repeat_pool_used;
298
3.16k
    if (temp) {
299
172
        temp->pool_prev = repeat;
300
172
    }
301
3.16k
    repeat->pool_prev = NULL;
302
3.16k
    repeat->pool_next = temp;
303
3.16k
    state->repeat_pool_used = repeat;
304
305
3.16k
    return repeat;
306
3.16k
}
307
308
static void
309
repeat_pool_free(SRE_STATE *state, SRE_REPEAT *repeat)
310
3.16k
{
311
3.16k
    SRE_REPEAT *prev = repeat->pool_prev;
312
3.16k
    SRE_REPEAT *next = repeat->pool_next;
313
314
    /* remove from used pool (doubly-linked list) */
315
3.16k
    if (prev) {
316
0
        prev->pool_next = next;
317
0
    }
318
3.16k
    else {
319
3.16k
        state->repeat_pool_used = next;
320
3.16k
    }
321
3.16k
    if (next) {
322
172
        next->pool_prev = prev;
323
172
    }
324
325
    /* add to unused pool (singly-linked list) */
326
3.16k
    repeat->pool_next = state->repeat_pool_unused;
327
3.16k
    state->repeat_pool_unused = repeat;
328
3.16k
}
329
330
static void
331
repeat_pool_clear(SRE_STATE *state)
332
10.7k
{
333
    /* clear used pool */
334
10.7k
    SRE_REPEAT *next = state->repeat_pool_used;
335
10.7k
    state->repeat_pool_used = NULL;
336
10.7k
    while (next) {
337
0
        SRE_REPEAT *temp = next;
338
0
        next = temp->pool_next;
339
0
        PyMem_Free(temp);
340
0
    }
341
342
    /* clear unused pool */
343
10.7k
    next = state->repeat_pool_unused;
344
10.7k
    state->repeat_pool_unused = NULL;
345
11.2k
    while (next) {
346
448
        SRE_REPEAT *temp = next;
347
448
        next = temp->pool_next;
348
448
        PyMem_Free(temp);
349
448
    }
350
10.7k
}
351
352
/* generate 8-bit version */
353
354
1.30M
#define SRE_CHAR Py_UCS1
355
#define SIZEOF_SRE_CHAR 1
356
9.39M
#define SRE(F) sre_ucs1_##F
357
#include "sre_lib.h"
358
359
/* generate 16-bit unicode version */
360
361
1.84k
#define SRE_CHAR Py_UCS2
362
#define SIZEOF_SRE_CHAR 2
363
3.26M
#define SRE(F) sre_ucs2_##F
364
#include "sre_lib.h"
365
366
/* generate 32-bit unicode version */
367
368
2.21k
#define SRE_CHAR Py_UCS4
369
#define SIZEOF_SRE_CHAR 4
370
2.26M
#define SRE(F) sre_ucs4_##F
371
#include "sre_lib.h"
372
373
/* -------------------------------------------------------------------- */
374
/* factories and destructors */
375
376
/* module state */
377
typedef struct {
378
    PyTypeObject *Pattern_Type;
379
    PyTypeObject *Match_Type;
380
    PyTypeObject *Scanner_Type;
381
    PyTypeObject *Template_Type;
382
    PyObject *compile_template;  // reference to re._compile_template
383
} _sremodulestate;
384
385
static _sremodulestate *
386
get_sre_module_state(PyObject *m)
387
10.9k
{
388
10.9k
    _sremodulestate *state = (_sremodulestate *)_PyModule_GetState(m);
389
10.9k
    assert(state);
390
10.9k
    return state;
391
10.9k
}
392
393
static struct PyModuleDef sremodule;
394
#define get_sre_module_state_by_class(cls) \
395
10.7k
    (get_sre_module_state(PyType_GetModule(cls)))
396
397
/* see sre.h for object declarations */
398
static PyObject*pattern_new_match(_sremodulestate *, PatternObject*, SRE_STATE*, Py_ssize_t);
399
static PyObject *pattern_scanner(_sremodulestate *, PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t);
400
401
578
#define _PatternObject_CAST(op)     ((PatternObject *)(op))
402
10.4k
#define _MatchObject_CAST(op)       ((MatchObject *)(op))
403
0
#define _TemplateObject_CAST(op)    ((TemplateObject *)(op))
404
0
#define _ScannerObject_CAST(op)     ((ScannerObject *)(op))
405
406
/*[clinic input]
407
module _sre
408
class _sre.SRE_Pattern "PatternObject *" "get_sre_module_state_by_class(tp)->Pattern_Type"
409
class _sre.SRE_Match "MatchObject *" "get_sre_module_state_by_class(tp)->Match_Type"
410
class _sre.SRE_Scanner "ScannerObject *" "get_sre_module_state_by_class(tp)->Scanner_Type"
411
[clinic start generated code]*/
412
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=fe2966e32b66a231]*/
413
414
/*[clinic input]
415
_sre.getcodesize -> int
416
[clinic start generated code]*/
417
418
static int
419
_sre_getcodesize_impl(PyObject *module)
420
/*[clinic end generated code: output=e0db7ce34a6dd7b1 input=bd6f6ecf4916bb2b]*/
421
0
{
422
0
    return sizeof(SRE_CODE);
423
0
}
424
425
/*[clinic input]
426
_sre.ascii_iscased -> bool
427
428
    character: int
429
    /
430
431
[clinic start generated code]*/
432
433
static int
434
_sre_ascii_iscased_impl(PyObject *module, int character)
435
/*[clinic end generated code: output=4f454b630fbd19a2 input=9f0bd952812c7ed3]*/
436
0
{
437
0
    unsigned int ch = (unsigned int)character;
438
0
    return ch < 128 && Py_ISALPHA(ch);
439
0
}
440
441
/*[clinic input]
442
_sre.unicode_iscased -> bool
443
444
    character: int
445
    /
446
447
[clinic start generated code]*/
448
449
static int
450
_sre_unicode_iscased_impl(PyObject *module, int character)
451
/*[clinic end generated code: output=9c5ddee0dc2bc258 input=51e42c3b8dddb78e]*/
452
57
{
453
57
    unsigned int ch = (unsigned int)character;
454
57
    return ch != sre_lower_unicode(ch) || ch != sre_upper_unicode(ch);
455
57
}
456
457
/*[clinic input]
458
_sre.ascii_tolower -> int
459
460
    character: int
461
    /
462
463
[clinic start generated code]*/
464
465
static int
466
_sre_ascii_tolower_impl(PyObject *module, int character)
467
/*[clinic end generated code: output=228294ed6ff2a612 input=272c609b5b61f136]*/
468
0
{
469
0
    return sre_lower_ascii(character);
470
0
}
471
472
/*[clinic input]
473
_sre.unicode_tolower -> int
474
475
    character: int
476
    /
477
478
[clinic start generated code]*/
479
480
static int
481
_sre_unicode_tolower_impl(PyObject *module, int character)
482
/*[clinic end generated code: output=6422272d7d7fee65 input=91d708c5f3c2045a]*/
483
51
{
484
51
    return sre_lower_unicode(character);
485
51
}
486
487
LOCAL(void)
488
state_reset(SRE_STATE* state)
489
0
{
490
    /* state->mark will be set to 0 in SRE_OP_MARK dynamically. */
491
    /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
492
493
0
    state->lastmark = -1;
494
0
    state->lastindex = -1;
495
496
0
    state->repeat = NULL;
497
498
0
    data_stack_dealloc(state);
499
0
}
500
501
static const void*
502
getstring(PyObject* string, Py_ssize_t* p_length,
503
          int* p_isbytes, int* p_charsize,
504
          Py_buffer *view)
505
10.8k
{
506
    /* given a python object, return a data pointer, a length (in
507
       characters), and a character size.  return NULL if the object
508
       is not a string (or not compatible) */
509
510
    /* Unicode objects do not support the buffer API. So, get the data
511
       directly instead. */
512
10.8k
    if (PyUnicode_Check(string)) {
513
10.3k
        *p_length = PyUnicode_GET_LENGTH(string);
514
10.3k
        *p_charsize = PyUnicode_KIND(string);
515
0
        *p_isbytes = 0;
516
10.3k
        return PyUnicode_DATA(string);
517
10.3k
    }
518
519
    /* get pointer to byte string buffer */
520
465
    if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
521
0
        PyErr_Format(PyExc_TypeError, "expected string or bytes-like "
522
0
                     "object, got '%.200s'", Py_TYPE(string)->tp_name);
523
0
        return NULL;
524
0
    }
525
526
465
    *p_length = view->len;
527
465
    *p_charsize = 1;
528
465
    *p_isbytes = 1;
529
530
465
    if (view->buf == NULL) {
531
0
        PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
532
0
        PyBuffer_Release(view);
533
0
        view->buf = NULL;
534
0
        return NULL;
535
0
    }
536
465
    return view->buf;
537
465
}
538
539
LOCAL(PyObject*)
540
state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
541
           Py_ssize_t start, Py_ssize_t end)
542
10.7k
{
543
    /* prepare state object */
544
545
10.7k
    Py_ssize_t length;
546
10.7k
    int isbytes, charsize;
547
10.7k
    const void* ptr;
548
549
10.7k
    memset(state, 0, sizeof(SRE_STATE));
550
551
10.7k
    state->mark = PyMem_New(const void *, pattern->groups * 2);
552
10.7k
    if (!state->mark) {
553
0
        PyErr_NoMemory();
554
0
        goto err;
555
0
    }
556
10.7k
    state->lastmark = -1;
557
10.7k
    state->lastindex = -1;
558
559
10.7k
    state->buffer.buf = NULL;
560
10.7k
    ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
561
10.7k
    if (!ptr)
562
0
        goto err;
563
564
10.7k
    if (isbytes && pattern->isbytes == 0) {
565
0
        PyErr_SetString(PyExc_TypeError,
566
0
                        "cannot use a string pattern on a bytes-like object");
567
0
        goto err;
568
0
    }
569
10.7k
    if (!isbytes && pattern->isbytes > 0) {
570
0
        PyErr_SetString(PyExc_TypeError,
571
0
                        "cannot use a bytes pattern on a string-like object");
572
0
        goto err;
573
0
    }
574
575
    /* adjust boundaries */
576
10.7k
    if (start < 0)
577
0
        start = 0;
578
10.7k
    else if (start > length)
579
0
        start = length;
580
581
10.7k
    if (end < 0)
582
0
        end = 0;
583
10.7k
    else if (end > length)
584
10.7k
        end = length;
585
586
10.7k
    state->isbytes = isbytes;
587
10.7k
    state->charsize = charsize;
588
10.7k
    state->match_all = 0;
589
10.7k
    state->must_advance = 0;
590
10.7k
    state->debug = ((pattern->flags & SRE_FLAG_DEBUG) != 0);
591
592
10.7k
    state->beginning = ptr;
593
594
10.7k
    state->start = (void*) ((char*) ptr + start * state->charsize);
595
10.7k
    state->end = (void*) ((char*) ptr + end * state->charsize);
596
597
10.7k
    state->string = Py_NewRef(string);
598
10.7k
    state->pos = start;
599
10.7k
    state->endpos = end;
600
601
#ifdef Py_DEBUG
602
    state->fail_after_count = pattern->fail_after_count;
603
    state->fail_after_exc = pattern->fail_after_exc; // borrowed ref
604
#endif
605
606
10.7k
    return string;
607
0
  err:
608
    /* We add an explicit cast here because MSVC has a bug when
609
       compiling C code where it believes that `const void**` cannot be
610
       safely casted to `void*`, see bpo-39943 for details. */
611
0
    PyMem_Free((void*) state->mark);
612
0
    state->mark = NULL;
613
0
    if (state->buffer.buf)
614
0
        PyBuffer_Release(&state->buffer);
615
0
    return NULL;
616
10.7k
}
617
618
LOCAL(void)
619
state_fini(SRE_STATE* state)
620
10.7k
{
621
10.7k
    if (state->buffer.buf)
622
436
        PyBuffer_Release(&state->buffer);
623
10.7k
    Py_XDECREF(state->string);
624
10.7k
    data_stack_dealloc(state);
625
    /* See above PyMem_Free() for why we explicitly cast here. */
626
10.7k
    PyMem_Free((void*) state->mark);
627
10.7k
    state->mark = NULL;
628
    /* SRE_REPEAT pool */
629
10.7k
    repeat_pool_clear(state);
630
10.7k
}
631
632
/* calculate offset from start of string */
633
#define STATE_OFFSET(state, member)\
634
0
    (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
635
636
LOCAL(PyObject*)
637
getslice(int isbytes, const void *ptr,
638
         PyObject* string, Py_ssize_t start, Py_ssize_t end)
639
6
{
640
6
    if (isbytes) {
641
0
        if (PyBytes_CheckExact(string) &&
642
0
            start == 0 && end == PyBytes_GET_SIZE(string)) {
643
0
            return Py_NewRef(string);
644
0
        }
645
0
        return PyBytes_FromStringAndSize(
646
0
                (const char *)ptr + start, end - start);
647
0
    }
648
6
    else {
649
6
        return PyUnicode_Substring(string, start, end);
650
6
    }
651
6
}
652
653
LOCAL(PyObject*)
654
state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
655
0
{
656
0
    Py_ssize_t i, j;
657
658
0
    index = (index - 1) * 2;
659
660
0
    if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
661
0
        if (empty)
662
            /* want empty string */
663
0
            i = j = 0;
664
0
        else {
665
0
            Py_RETURN_NONE;
666
0
        }
667
0
    } else {
668
0
        i = STATE_OFFSET(state, state->mark[index]);
669
0
        j = STATE_OFFSET(state, state->mark[index+1]);
670
671
        /* check wrong span */
672
0
        if (i > j) {
673
0
            PyErr_SetString(PyExc_SystemError,
674
0
                            "The span of capturing group is wrong,"
675
0
                            " please report a bug for the re module.");
676
0
            return NULL;
677
0
        }
678
0
    }
679
680
0
    return getslice(state->isbytes, state->beginning, string, i, j);
681
0
}
682
683
static void
684
pattern_error(Py_ssize_t status)
685
0
{
686
0
    switch (status) {
687
0
    case SRE_ERROR_RECURSION_LIMIT:
688
        /* This error code seems to be unused. */
689
0
        PyErr_SetString(
690
0
            PyExc_RecursionError,
691
0
            "maximum recursion limit exceeded"
692
0
            );
693
0
        break;
694
0
    case SRE_ERROR_MEMORY:
695
0
        PyErr_NoMemory();
696
0
        break;
697
0
    case SRE_ERROR_INTERRUPTED:
698
    /* An exception has already been raised, so let it fly */
699
0
        break;
700
0
    default:
701
        /* other error codes indicate compiler/engine bugs */
702
0
        PyErr_SetString(
703
0
            PyExc_RuntimeError,
704
0
            "internal error in regular expression engine"
705
0
            );
706
0
    }
707
0
}
708
709
static int
710
pattern_traverse(PyObject *op, visitproc visit, void *arg)
711
566
{
712
566
    PatternObject *self = _PatternObject_CAST(op);
713
566
    Py_VISIT(Py_TYPE(self));
714
566
    Py_VISIT(self->groupindex);
715
566
    Py_VISIT(self->indexgroup);
716
566
    Py_VISIT(self->pattern);
717
#ifdef Py_DEBUG
718
    Py_VISIT(self->fail_after_exc);
719
#endif
720
566
    return 0;
721
566
}
722
723
static int
724
pattern_clear(PyObject *op)
725
12
{
726
12
    PatternObject *self = _PatternObject_CAST(op);
727
12
    Py_CLEAR(self->groupindex);
728
12
    Py_CLEAR(self->indexgroup);
729
12
    Py_CLEAR(self->pattern);
730
#ifdef Py_DEBUG
731
    Py_CLEAR(self->fail_after_exc);
732
#endif
733
12
    return 0;
734
12
}
735
736
static void
737
pattern_dealloc(PyObject *self)
738
12
{
739
12
    PyTypeObject *tp = Py_TYPE(self);
740
12
    PyObject_GC_UnTrack(self);
741
12
    FT_CLEAR_WEAKREFS(self, _PatternObject_CAST(self)->weakreflist);
742
12
    (void)pattern_clear(self);
743
12
    tp->tp_free(self);
744
12
    Py_DECREF(tp);
745
12
}
746
747
LOCAL(Py_ssize_t)
748
sre_match(SRE_STATE* state, SRE_CODE* pattern)
749
10.7k
{
750
10.7k
    if (state->charsize == 1)
751
6.72k
        return sre_ucs1_match(state, pattern, 1);
752
4.05k
    if (state->charsize == 2)
753
1.84k
        return sre_ucs2_match(state, pattern, 1);
754
4.05k
    assert(state->charsize == 4);
755
2.21k
    return sre_ucs4_match(state, pattern, 1);
756
2.21k
}
757
758
LOCAL(Py_ssize_t)
759
sre_search(SRE_STATE* state, SRE_CODE* pattern)
760
0
{
761
0
    if (state->charsize == 1)
762
0
        return sre_ucs1_search(state, pattern);
763
0
    if (state->charsize == 2)
764
0
        return sre_ucs2_search(state, pattern);
765
0
    assert(state->charsize == 4);
766
0
    return sre_ucs4_search(state, pattern);
767
0
}
768
769
/*[clinic input]
770
_sre.SRE_Pattern.prefixmatch
771
772
    cls: defining_class
773
    /
774
    string: object
775
    pos: Py_ssize_t = 0
776
    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
777
778
Matches zero or more characters at the beginning of the string.
779
[clinic start generated code]*/
780
781
static PyObject *
782
_sre_SRE_Pattern_prefixmatch_impl(PatternObject *self, PyTypeObject *cls,
783
                                  PyObject *string, Py_ssize_t pos,
784
                                  Py_ssize_t endpos)
785
/*[clinic end generated code: output=a0e079fb4f875240 input=e2a7e68ea47d048c]*/
786
10.7k
{
787
10.7k
    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
788
10.7k
    SRE_STATE state;
789
10.7k
    Py_ssize_t status;
790
10.7k
    PyObject *match;
791
792
10.7k
    if (!state_init(&state, self, string, pos, endpos))
793
0
        return NULL;
794
795
10.7k
    INIT_TRACE(&state);
796
10.7k
    state.ptr = state.start;
797
798
10.7k
    TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
799
800
10.7k
    status = sre_match(&state, PatternObject_GetCode(self));
801
802
10.7k
    TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
803
10.7k
    if (PyErr_Occurred()) {
804
0
        state_fini(&state);
805
0
        return NULL;
806
0
    }
807
808
10.7k
    match = pattern_new_match(module_state, self, &state, status);
809
10.7k
    state_fini(&state);
810
10.7k
    return match;
811
10.7k
}
812
813
814
/*[clinic input]
815
_sre.SRE_Pattern.fullmatch
816
817
    cls: defining_class
818
    /
819
    string: object
820
    pos: Py_ssize_t = 0
821
    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
822
823
Matches against all of the string.
824
[clinic start generated code]*/
825
826
static PyObject *
827
_sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyTypeObject *cls,
828
                                PyObject *string, Py_ssize_t pos,
829
                                Py_ssize_t endpos)
830
/*[clinic end generated code: output=625b75b027ef94da input=50981172ab0fcfdd]*/
831
0
{
832
0
    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
833
0
    SRE_STATE state;
834
0
    Py_ssize_t status;
835
0
    PyObject *match;
836
837
0
    if (!state_init(&state, self, string, pos, endpos))
838
0
        return NULL;
839
840
0
    INIT_TRACE(&state);
841
0
    state.ptr = state.start;
842
843
0
    TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
844
845
0
    state.match_all = 1;
846
0
    status = sre_match(&state, PatternObject_GetCode(self));
847
848
0
    TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
849
0
    if (PyErr_Occurred()) {
850
0
        state_fini(&state);
851
0
        return NULL;
852
0
    }
853
854
0
    match = pattern_new_match(module_state, self, &state, status);
855
0
    state_fini(&state);
856
0
    return match;
857
0
}
858
859
/*[clinic input]
860
@permit_long_summary
861
_sre.SRE_Pattern.search
862
863
    cls: defining_class
864
    /
865
    string: object
866
    pos: Py_ssize_t = 0
867
    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
868
869
Scan through string looking for a match, and return a corresponding match object instance.
870
871
Return None if no position in the string matches.
872
[clinic start generated code]*/
873
874
static PyObject *
875
_sre_SRE_Pattern_search_impl(PatternObject *self, PyTypeObject *cls,
876
                             PyObject *string, Py_ssize_t pos,
877
                             Py_ssize_t endpos)
878
/*[clinic end generated code: output=bd7f2d9d583e1463 input=05e9feee0334c156]*/
879
0
{
880
0
    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
881
0
    SRE_STATE state;
882
0
    Py_ssize_t status;
883
0
    PyObject *match;
884
885
0
    if (!state_init(&state, self, string, pos, endpos))
886
0
        return NULL;
887
888
0
    INIT_TRACE(&state);
889
0
    TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
890
891
0
    status = sre_search(&state, PatternObject_GetCode(self));
892
893
0
    TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
894
895
0
    if (PyErr_Occurred()) {
896
0
        state_fini(&state);
897
0
        return NULL;
898
0
    }
899
900
0
    match = pattern_new_match(module_state, self, &state, status);
901
0
    state_fini(&state);
902
0
    return match;
903
0
}
904
905
/*[clinic input]
906
_sre.SRE_Pattern.findall
907
908
    string: object
909
    pos: Py_ssize_t = 0
910
    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
911
912
Return a list of all non-overlapping matches of pattern in string.
913
[clinic start generated code]*/
914
915
static PyObject *
916
_sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,
917
                              Py_ssize_t pos, Py_ssize_t endpos)
918
/*[clinic end generated code: output=f4966baceea60aca input=5b6a4ee799741563]*/
919
0
{
920
0
    SRE_STATE state;
921
0
    PyObject* list;
922
0
    Py_ssize_t status;
923
0
    Py_ssize_t i, b, e;
924
925
0
    if (!state_init(&state, self, string, pos, endpos))
926
0
        return NULL;
927
928
0
    list = PyList_New(0);
929
0
    if (!list) {
930
0
        state_fini(&state);
931
0
        return NULL;
932
0
    }
933
934
0
    while (state.start <= state.end) {
935
936
0
        PyObject* item;
937
938
0
        state_reset(&state);
939
940
0
        state.ptr = state.start;
941
942
0
        status = sre_search(&state, PatternObject_GetCode(self));
943
0
        if (PyErr_Occurred())
944
0
            goto error;
945
946
0
        if (status <= 0) {
947
0
            if (status == 0)
948
0
                break;
949
0
            pattern_error(status);
950
0
            goto error;
951
0
        }
952
953
        /* don't bother to build a match object */
954
0
        switch (self->groups) {
955
0
        case 0:
956
0
            b = STATE_OFFSET(&state, state.start);
957
0
            e = STATE_OFFSET(&state, state.ptr);
958
0
            item = getslice(state.isbytes, state.beginning,
959
0
                            string, b, e);
960
0
            if (!item)
961
0
                goto error;
962
0
            break;
963
0
        case 1:
964
0
            item = state_getslice(&state, 1, string, 1);
965
0
            if (!item)
966
0
                goto error;
967
0
            break;
968
0
        default:
969
0
            item = PyTuple_New(self->groups);
970
0
            if (!item)
971
0
                goto error;
972
0
            for (i = 0; i < self->groups; i++) {
973
0
                PyObject* o = state_getslice(&state, i+1, string, 1);
974
0
                if (!o) {
975
0
                    Py_DECREF(item);
976
0
                    goto error;
977
0
                }
978
0
                PyTuple_SET_ITEM(item, i, o);
979
0
            }
980
0
            break;
981
0
        }
982
983
0
        status = PyList_Append(list, item);
984
0
        Py_DECREF(item);
985
0
        if (status < 0)
986
0
            goto error;
987
988
0
        state.must_advance = (state.ptr == state.start);
989
0
        state.start = state.ptr;
990
0
    }
991
992
0
    state_fini(&state);
993
0
    return list;
994
995
0
error:
996
0
    Py_DECREF(list);
997
0
    state_fini(&state);
998
0
    return NULL;
999
1000
0
}
1001
1002
/*[clinic input]
1003
@permit_long_summary
1004
_sre.SRE_Pattern.finditer
1005
1006
    cls: defining_class
1007
    /
1008
    string: object
1009
    pos: Py_ssize_t = 0
1010
    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
1011
1012
Return an iterator over all non-overlapping matches for the RE pattern in string.
1013
1014
For each match, the iterator returns a match object.
1015
[clinic start generated code]*/
1016
1017
static PyObject *
1018
_sre_SRE_Pattern_finditer_impl(PatternObject *self, PyTypeObject *cls,
1019
                               PyObject *string, Py_ssize_t pos,
1020
                               Py_ssize_t endpos)
1021
/*[clinic end generated code: output=1791dbf3618ade56 input=ee28865796048023]*/
1022
0
{
1023
0
    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
1024
0
    PyObject* scanner;
1025
0
    PyObject* search;
1026
0
    PyObject* iterator;
1027
1028
0
    scanner = pattern_scanner(module_state, self, string, pos, endpos);
1029
0
    if (!scanner)
1030
0
        return NULL;
1031
1032
0
    search = PyObject_GetAttrString(scanner, "search");
1033
0
    Py_DECREF(scanner);
1034
0
    if (!search)
1035
0
        return NULL;
1036
1037
0
    iterator = PyCallIter_New(search, Py_None);
1038
0
    Py_DECREF(search);
1039
1040
0
    return iterator;
1041
0
}
1042
1043
/*[clinic input]
1044
_sre.SRE_Pattern.scanner
1045
1046
    cls: defining_class
1047
    /
1048
    string: object
1049
    pos: Py_ssize_t = 0
1050
    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
1051
1052
[clinic start generated code]*/
1053
1054
static PyObject *
1055
_sre_SRE_Pattern_scanner_impl(PatternObject *self, PyTypeObject *cls,
1056
                              PyObject *string, Py_ssize_t pos,
1057
                              Py_ssize_t endpos)
1058
/*[clinic end generated code: output=f70cd506112f1bd9 input=2e487e5151bcee4c]*/
1059
0
{
1060
0
    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
1061
1062
0
    return pattern_scanner(module_state, self, string, pos, endpos);
1063
0
}
1064
1065
/*[clinic input]
1066
_sre.SRE_Pattern.split
1067
1068
    string: object
1069
    maxsplit: Py_ssize_t = 0
1070
1071
Split string by the occurrences of pattern.
1072
[clinic start generated code]*/
1073
1074
static PyObject *
1075
_sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
1076
                            Py_ssize_t maxsplit)
1077
/*[clinic end generated code: output=7ac66f381c45e0be input=1eeeb10dafc9947a]*/
1078
0
{
1079
0
    SRE_STATE state;
1080
0
    PyObject* list;
1081
0
    PyObject* item;
1082
0
    Py_ssize_t status;
1083
0
    Py_ssize_t n;
1084
0
    Py_ssize_t i;
1085
0
    const void* last;
1086
1087
0
    assert(self->codesize != 0);
1088
1089
0
    if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX))
1090
0
        return NULL;
1091
1092
0
    list = PyList_New(0);
1093
0
    if (!list) {
1094
0
        state_fini(&state);
1095
0
        return NULL;
1096
0
    }
1097
1098
0
    n = 0;
1099
0
    last = state.start;
1100
1101
0
    while (!maxsplit || n < maxsplit) {
1102
1103
0
        state_reset(&state);
1104
1105
0
        state.ptr = state.start;
1106
1107
0
        status = sre_search(&state, PatternObject_GetCode(self));
1108
0
        if (PyErr_Occurred())
1109
0
            goto error;
1110
1111
0
        if (status <= 0) {
1112
0
            if (status == 0)
1113
0
                break;
1114
0
            pattern_error(status);
1115
0
            goto error;
1116
0
        }
1117
1118
        /* get segment before this match */
1119
0
        item = getslice(state.isbytes, state.beginning,
1120
0
            string, STATE_OFFSET(&state, last),
1121
0
            STATE_OFFSET(&state, state.start)
1122
0
            );
1123
0
        if (!item)
1124
0
            goto error;
1125
0
        status = PyList_Append(list, item);
1126
0
        Py_DECREF(item);
1127
0
        if (status < 0)
1128
0
            goto error;
1129
1130
        /* add groups (if any) */
1131
0
        for (i = 0; i < self->groups; i++) {
1132
0
            item = state_getslice(&state, i+1, string, 0);
1133
0
            if (!item)
1134
0
                goto error;
1135
0
            status = PyList_Append(list, item);
1136
0
            Py_DECREF(item);
1137
0
            if (status < 0)
1138
0
                goto error;
1139
0
        }
1140
1141
0
        n = n + 1;
1142
0
        state.must_advance = (state.ptr == state.start);
1143
0
        last = state.start = state.ptr;
1144
1145
0
    }
1146
1147
    /* get segment following last match (even if empty) */
1148
0
    item = getslice(state.isbytes, state.beginning,
1149
0
        string, STATE_OFFSET(&state, last), state.endpos
1150
0
        );
1151
0
    if (!item)
1152
0
        goto error;
1153
0
    status = PyList_Append(list, item);
1154
0
    Py_DECREF(item);
1155
0
    if (status < 0)
1156
0
        goto error;
1157
1158
0
    state_fini(&state);
1159
0
    return list;
1160
1161
0
error:
1162
0
    Py_DECREF(list);
1163
0
    state_fini(&state);
1164
0
    return NULL;
1165
1166
0
}
1167
1168
static PyObject *
1169
compile_template(_sremodulestate *module_state,
1170
                 PatternObject *pattern, PyObject *template)
1171
0
{
1172
    /* delegate to Python code */
1173
0
    PyObject *func = FT_ATOMIC_LOAD_PTR(module_state->compile_template);
1174
0
    if (func == NULL) {
1175
0
        func = PyImport_ImportModuleAttrString("re", "_compile_template");
1176
0
        if (func == NULL) {
1177
0
            return NULL;
1178
0
        }
1179
#ifdef Py_GIL_DISABLED
1180
        PyObject *other_func = NULL;
1181
        if (!_Py_atomic_compare_exchange_ptr(&module_state->compile_template, &other_func, func))  {
1182
            Py_DECREF(func);
1183
            func = other_func;
1184
        }
1185
#else
1186
0
        Py_XSETREF(module_state->compile_template, func);
1187
0
#endif
1188
0
    }
1189
1190
0
    PyObject *args[] = {(PyObject *)pattern, template};
1191
0
    PyObject *result = PyObject_Vectorcall(func, args, 2, NULL);
1192
1193
0
    if (result == NULL && PyErr_ExceptionMatches(PyExc_TypeError)) {
1194
        /* If the replacement string is unhashable (e.g. bytearray),
1195
         * convert it to the basic type (str or bytes) and repeat. */
1196
0
        if (PyUnicode_Check(template) && !PyUnicode_CheckExact(template)) {
1197
0
            PyErr_Clear();
1198
0
            template = _PyUnicode_Copy(template);
1199
0
        }
1200
0
        else if (PyObject_CheckBuffer(template) && !PyBytes_CheckExact(template)) {
1201
0
            PyErr_Clear();
1202
0
            template = PyBytes_FromObject(template);
1203
0
        }
1204
0
        else {
1205
0
            return NULL;
1206
0
        }
1207
0
        if (template == NULL) {
1208
0
            return NULL;
1209
0
        }
1210
0
        args[1] = template;
1211
0
        result = PyObject_Vectorcall(func, args, 2, NULL);
1212
0
        Py_DECREF(template);
1213
0
    }
1214
1215
0
    if (result != NULL && Py_TYPE(result) != module_state->Template_Type) {
1216
0
        PyErr_Format(PyExc_RuntimeError,
1217
0
                    "the result of compiling a replacement string is %.200s",
1218
0
                    Py_TYPE(result)->tp_name);
1219
0
        Py_DECREF(result);
1220
0
        return NULL;
1221
0
    }
1222
0
    return result;
1223
0
}
1224
1225
static PyObject *expand_template(TemplateObject *, MatchObject *); /* Forward */
1226
1227
static PyObject*
1228
pattern_subx(_sremodulestate* module_state,
1229
             PatternObject* self,
1230
             PyObject* ptemplate,
1231
             PyObject* string,
1232
             Py_ssize_t count,
1233
             Py_ssize_t subn)
1234
0
{
1235
0
    SRE_STATE state;
1236
0
    PyObject* list;
1237
0
    PyObject* joiner;
1238
0
    PyObject* item;
1239
0
    PyObject* filter;
1240
0
    PyObject* match;
1241
0
    const void* ptr;
1242
0
    Py_ssize_t status;
1243
0
    Py_ssize_t n;
1244
0
    Py_ssize_t i, b, e;
1245
0
    int isbytes, charsize;
1246
0
    enum {LITERAL, TEMPLATE, CALLABLE} filter_type;
1247
0
    Py_buffer view;
1248
1249
0
    if (PyCallable_Check(ptemplate)) {
1250
        /* sub/subn takes either a function or a template */
1251
0
        filter = Py_NewRef(ptemplate);
1252
0
        filter_type = CALLABLE;
1253
0
    } else {
1254
        /* if not callable, check if it's a literal string */
1255
0
        int literal;
1256
0
        view.buf = NULL;
1257
0
        ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
1258
0
        if (ptr) {
1259
0
            if (charsize == 1)
1260
0
                literal = memchr(ptr, '\\', n) == NULL;
1261
0
            else
1262
0
                literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
1263
0
        } else {
1264
0
            PyErr_Clear();
1265
0
            literal = 0;
1266
0
        }
1267
0
        if (view.buf)
1268
0
            PyBuffer_Release(&view);
1269
0
        if (literal) {
1270
0
            filter = Py_NewRef(ptemplate);
1271
0
            filter_type = LITERAL;
1272
0
        } else {
1273
            /* not a literal; hand it over to the template compiler */
1274
0
            filter = compile_template(module_state, self, ptemplate);
1275
0
            if (!filter)
1276
0
                return NULL;
1277
1278
0
            assert(Py_TYPE(filter) == module_state->Template_Type);
1279
0
            if (Py_SIZE(filter) == 0) {
1280
0
                Py_SETREF(filter,
1281
0
                          Py_NewRef(((TemplateObject *)filter)->literal));
1282
0
                filter_type = LITERAL;
1283
0
            }
1284
0
            else {
1285
0
                filter_type = TEMPLATE;
1286
0
            }
1287
0
        }
1288
0
    }
1289
1290
0
    if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) {
1291
0
        Py_DECREF(filter);
1292
0
        return NULL;
1293
0
    }
1294
1295
0
    list = PyList_New(0);
1296
0
    if (!list) {
1297
0
        Py_DECREF(filter);
1298
0
        state_fini(&state);
1299
0
        return NULL;
1300
0
    }
1301
1302
0
    n = i = 0;
1303
1304
0
    while (!count || n < count) {
1305
1306
0
        state_reset(&state);
1307
1308
0
        state.ptr = state.start;
1309
1310
0
        status = sre_search(&state, PatternObject_GetCode(self));
1311
0
        if (PyErr_Occurred())
1312
0
            goto error;
1313
1314
0
        if (status <= 0) {
1315
0
            if (status == 0)
1316
0
                break;
1317
0
            pattern_error(status);
1318
0
            goto error;
1319
0
        }
1320
1321
0
        b = STATE_OFFSET(&state, state.start);
1322
0
        e = STATE_OFFSET(&state, state.ptr);
1323
1324
0
        if (i < b) {
1325
            /* get segment before this match */
1326
0
            item = getslice(state.isbytes, state.beginning,
1327
0
                string, i, b);
1328
0
            if (!item)
1329
0
                goto error;
1330
0
            status = PyList_Append(list, item);
1331
0
            Py_DECREF(item);
1332
0
            if (status < 0)
1333
0
                goto error;
1334
1335
0
        }
1336
1337
0
        if (filter_type != LITERAL) {
1338
            /* pass match object through filter */
1339
0
            match = pattern_new_match(module_state, self, &state, 1);
1340
0
            if (!match)
1341
0
                goto error;
1342
0
            if (filter_type == TEMPLATE) {
1343
0
                item = expand_template((TemplateObject *)filter,
1344
0
                                       (MatchObject *)match);
1345
0
            }
1346
0
            else {
1347
0
                assert(filter_type == CALLABLE);
1348
0
                item = PyObject_CallOneArg(filter, match);
1349
0
            }
1350
0
            Py_DECREF(match);
1351
0
            if (!item)
1352
0
                goto error;
1353
0
        } else {
1354
            /* filter is literal string */
1355
0
            item = Py_NewRef(filter);
1356
0
        }
1357
1358
        /* add to list */
1359
0
        if (item != Py_None) {
1360
0
            status = PyList_Append(list, item);
1361
0
            Py_DECREF(item);
1362
0
            if (status < 0)
1363
0
                goto error;
1364
0
        }
1365
1366
0
        i = e;
1367
0
        n = n + 1;
1368
0
        state.must_advance = (state.ptr == state.start);
1369
0
        state.start = state.ptr;
1370
0
    }
1371
1372
    /* get segment following last match */
1373
0
    if (i < state.endpos) {
1374
0
        item = getslice(state.isbytes, state.beginning,
1375
0
                        string, i, state.endpos);
1376
0
        if (!item)
1377
0
            goto error;
1378
0
        status = PyList_Append(list, item);
1379
0
        Py_DECREF(item);
1380
0
        if (status < 0)
1381
0
            goto error;
1382
0
    }
1383
1384
0
    state_fini(&state);
1385
1386
0
    Py_DECREF(filter);
1387
1388
    /* convert list to single string (also removes list) */
1389
0
    joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
1390
0
    if (!joiner) {
1391
0
        Py_DECREF(list);
1392
0
        return NULL;
1393
0
    }
1394
0
    if (PyList_GET_SIZE(list) == 0) {
1395
0
        Py_DECREF(list);
1396
0
        item = joiner;
1397
0
    }
1398
0
    else {
1399
0
        if (state.isbytes)
1400
0
            item = PyBytes_Join(joiner, list);
1401
0
        else
1402
0
            item = PyUnicode_Join(joiner, list);
1403
0
        Py_DECREF(joiner);
1404
0
        Py_DECREF(list);
1405
0
        if (!item)
1406
0
            return NULL;
1407
0
    }
1408
1409
0
    if (subn)
1410
0
        return Py_BuildValue("Nn", item, n);
1411
1412
0
    return item;
1413
1414
0
error:
1415
0
    Py_DECREF(list);
1416
0
    state_fini(&state);
1417
0
    Py_DECREF(filter);
1418
0
    return NULL;
1419
1420
0
}
1421
1422
/*[clinic input]
1423
@permit_long_summary
1424
_sre.SRE_Pattern.sub
1425
1426
    cls: defining_class
1427
    /
1428
    repl: object
1429
    string: object
1430
    count: Py_ssize_t = 0
1431
1432
Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl.
1433
[clinic start generated code]*/
1434
1435
static PyObject *
1436
_sre_SRE_Pattern_sub_impl(PatternObject *self, PyTypeObject *cls,
1437
                          PyObject *repl, PyObject *string, Py_ssize_t count)
1438
/*[clinic end generated code: output=4be141ab04bca60d input=eba511fd1c4908b7]*/
1439
0
{
1440
0
    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
1441
1442
0
    return pattern_subx(module_state, self, repl, string, count, 0);
1443
0
}
1444
1445
/*[clinic input]
1446
@permit_long_summary
1447
_sre.SRE_Pattern.subn
1448
1449
    cls: defining_class
1450
    /
1451
    repl: object
1452
    string: object
1453
    count: Py_ssize_t = 0
1454
1455
Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.
1456
[clinic start generated code]*/
1457
1458
static PyObject *
1459
_sre_SRE_Pattern_subn_impl(PatternObject *self, PyTypeObject *cls,
1460
                           PyObject *repl, PyObject *string,
1461
                           Py_ssize_t count)
1462
/*[clinic end generated code: output=da02fd85258b1e1f input=6a5bb5b61717abf0]*/
1463
0
{
1464
0
    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
1465
1466
0
    return pattern_subx(module_state, self, repl, string, count, 1);
1467
0
}
1468
1469
/*[clinic input]
1470
_sre.SRE_Pattern.__copy__
1471
1472
[clinic start generated code]*/
1473
1474
static PyObject *
1475
_sre_SRE_Pattern___copy___impl(PatternObject *self)
1476
/*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/
1477
0
{
1478
0
    return Py_NewRef(self);
1479
0
}
1480
1481
/*[clinic input]
1482
_sre.SRE_Pattern.__deepcopy__
1483
1484
    memo: object
1485
    /
1486
1487
[clinic start generated code]*/
1488
1489
static PyObject *
1490
_sre_SRE_Pattern___deepcopy___impl(PatternObject *self, PyObject *memo)
1491
/*[clinic end generated code: output=75efe69bd12c5d7d input=a465b1602f997bed]*/
1492
0
{
1493
0
    return Py_NewRef(self);
1494
0
}
1495
1496
#ifdef Py_DEBUG
1497
/*[clinic input]
1498
_sre.SRE_Pattern._fail_after
1499
1500
    count: int
1501
    exception: object
1502
    /
1503
1504
For debugging.
1505
[clinic start generated code]*/
1506
1507
static PyObject *
1508
_sre_SRE_Pattern__fail_after_impl(PatternObject *self, int count,
1509
                                  PyObject *exception)
1510
/*[clinic end generated code: output=9a6bf12135ac50c2 input=ef80a45c66c5499d]*/
1511
{
1512
    self->fail_after_count = count;
1513
    Py_INCREF(exception);
1514
    Py_XSETREF(self->fail_after_exc, exception);
1515
    Py_RETURN_NONE;
1516
}
1517
#endif /* Py_DEBUG */
1518
1519
static PyObject *
1520
pattern_repr(PyObject *self)
1521
0
{
1522
0
    static const struct {
1523
0
        const char *name;
1524
0
        int value;
1525
0
    } flag_names[] = {
1526
0
        {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
1527
0
        {"re.LOCALE", SRE_FLAG_LOCALE},
1528
0
        {"re.MULTILINE", SRE_FLAG_MULTILINE},
1529
0
        {"re.DOTALL", SRE_FLAG_DOTALL},
1530
0
        {"re.UNICODE", SRE_FLAG_UNICODE},
1531
0
        {"re.VERBOSE", SRE_FLAG_VERBOSE},
1532
0
        {"re.DEBUG", SRE_FLAG_DEBUG},
1533
0
        {"re.ASCII", SRE_FLAG_ASCII},
1534
0
    };
1535
1536
0
    PatternObject *obj = _PatternObject_CAST(self);
1537
0
    PyObject *result = NULL;
1538
0
    PyObject *flag_items;
1539
0
    size_t i;
1540
0
    int flags = obj->flags;
1541
1542
    /* Omit re.UNICODE for valid string patterns. */
1543
0
    if (obj->isbytes == 0 &&
1544
0
        (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
1545
0
         SRE_FLAG_UNICODE)
1546
0
        flags &= ~SRE_FLAG_UNICODE;
1547
1548
0
    flag_items = PyList_New(0);
1549
0
    if (!flag_items)
1550
0
        return NULL;
1551
1552
0
    for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
1553
0
        if (flags & flag_names[i].value) {
1554
0
            PyObject *item = PyUnicode_FromString(flag_names[i].name);
1555
0
            if (!item)
1556
0
                goto done;
1557
1558
0
            if (PyList_Append(flag_items, item) < 0) {
1559
0
                Py_DECREF(item);
1560
0
                goto done;
1561
0
            }
1562
0
            Py_DECREF(item);
1563
0
            flags &= ~flag_names[i].value;
1564
0
        }
1565
0
    }
1566
0
    if (flags) {
1567
0
        PyObject *item = PyUnicode_FromFormat("0x%x", flags);
1568
0
        if (!item)
1569
0
            goto done;
1570
1571
0
        if (PyList_Append(flag_items, item) < 0) {
1572
0
            Py_DECREF(item);
1573
0
            goto done;
1574
0
        }
1575
0
        Py_DECREF(item);
1576
0
    }
1577
1578
0
    if (PyList_Size(flag_items) > 0) {
1579
0
        PyObject *flags_result;
1580
0
        PyObject *sep = PyUnicode_FromString("|");
1581
0
        if (!sep)
1582
0
            goto done;
1583
0
        flags_result = PyUnicode_Join(sep, flag_items);
1584
0
        Py_DECREF(sep);
1585
0
        if (!flags_result)
1586
0
            goto done;
1587
0
        result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
1588
0
                                      obj->pattern, flags_result);
1589
0
        Py_DECREF(flags_result);
1590
0
    }
1591
0
    else {
1592
0
        result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
1593
0
    }
1594
1595
0
done:
1596
0
    Py_DECREF(flag_items);
1597
0
    return result;
1598
0
}
1599
1600
PyDoc_STRVAR(pattern_doc, "Compiled regular expression object.");
1601
1602
/* PatternObject's 'groupindex' method. */
1603
static PyObject *
1604
pattern_groupindex(PyObject *op, void *Py_UNUSED(ignored))
1605
0
{
1606
0
    PatternObject *self = _PatternObject_CAST(op);
1607
0
    if (self->groupindex == NULL)
1608
0
        return PyDict_New();
1609
0
    return PyDictProxy_New(self->groupindex);
1610
0
}
1611
1612
static int _validate(PatternObject *self); /* Forward */
1613
1614
/*[clinic input]
1615
_sre.compile
1616
1617
    pattern: object
1618
    flags: int
1619
    code: object(subclass_of='&PyList_Type')
1620
    groups: Py_ssize_t
1621
    groupindex: object(subclass_of='&PyDict_Type')
1622
    indexgroup: object(subclass_of='&PyTuple_Type')
1623
1624
[clinic start generated code]*/
1625
1626
static PyObject *
1627
_sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
1628
                  PyObject *code, Py_ssize_t groups, PyObject *groupindex,
1629
                  PyObject *indexgroup)
1630
/*[clinic end generated code: output=ef9c2b3693776404 input=0a68476dbbe5db30]*/
1631
61
{
1632
    /* "compile" pattern descriptor to pattern object */
1633
1634
61
    _sremodulestate *module_state = get_sre_module_state(module);
1635
61
    PatternObject* self;
1636
61
    Py_ssize_t i, n;
1637
1638
61
    n = PyList_GET_SIZE(code);
1639
    /* coverity[ampersand_in_size] */
1640
61
    self = PyObject_GC_NewVar(PatternObject, module_state->Pattern_Type, n);
1641
61
    if (!self)
1642
0
        return NULL;
1643
61
    self->weakreflist = NULL;
1644
61
    self->pattern = NULL;
1645
61
    self->groupindex = NULL;
1646
61
    self->indexgroup = NULL;
1647
#ifdef Py_DEBUG
1648
    self->fail_after_count = -1;
1649
    self->fail_after_exc = NULL;
1650
#endif
1651
1652
61
    self->codesize = n;
1653
1654
2.80k
    for (i = 0; i < n; i++) {
1655
2.74k
        PyObject *o = PyList_GET_ITEM(code, i);
1656
0
        unsigned long value = PyLong_AsUnsignedLong(o);
1657
2.74k
        if (value == (unsigned long)-1 && PyErr_Occurred()) {
1658
0
            break;
1659
0
        }
1660
2.74k
        self->code[i] = (SRE_CODE) value;
1661
2.74k
        if ((unsigned long) self->code[i] != value) {
1662
0
            PyErr_SetString(PyExc_OverflowError,
1663
0
                            "regular expression code size limit exceeded");
1664
0
            break;
1665
0
        }
1666
2.74k
    }
1667
61
    PyObject_GC_Track(self);
1668
1669
61
    if (PyErr_Occurred()) {
1670
0
        Py_DECREF(self);
1671
0
        return NULL;
1672
0
    }
1673
1674
61
    if (pattern == Py_None) {
1675
0
        self->isbytes = -1;
1676
0
    }
1677
61
    else {
1678
61
        Py_ssize_t p_length;
1679
61
        int charsize;
1680
61
        Py_buffer view;
1681
61
        view.buf = NULL;
1682
61
        if (!getstring(pattern, &p_length, &self->isbytes,
1683
61
                       &charsize, &view)) {
1684
0
            Py_DECREF(self);
1685
0
            return NULL;
1686
0
        }
1687
61
        if (view.buf)
1688
29
            PyBuffer_Release(&view);
1689
61
    }
1690
1691
61
    self->pattern = Py_NewRef(pattern);
1692
1693
61
    self->flags = flags;
1694
1695
61
    self->groups = groups;
1696
1697
61
    if (PyDict_GET_SIZE(groupindex) > 0) {
1698
6
        self->groupindex = Py_NewRef(groupindex);
1699
6
        if (PyTuple_GET_SIZE(indexgroup) > 0) {
1700
6
            self->indexgroup = Py_NewRef(indexgroup);
1701
6
        }
1702
6
    }
1703
1704
61
    if (!_validate(self)) {
1705
0
        Py_DECREF(self);
1706
0
        return NULL;
1707
0
    }
1708
1709
61
    return (PyObject*) self;
1710
61
}
1711
1712
/*[clinic input]
1713
_sre.template
1714
1715
    pattern: object
1716
    template: object(subclass_of="&PyList_Type")
1717
        A list containing interleaved literal strings (str or bytes) and group
1718
        indices (int), as returned by re._parser.parse_template():
1719
            [literal1, group1, ..., literalN, groupN]
1720
    /
1721
1722
[clinic start generated code]*/
1723
1724
static PyObject *
1725
_sre_template_impl(PyObject *module, PyObject *pattern, PyObject *template)
1726
/*[clinic end generated code: output=d51290e596ebca86 input=af55380b27f02942]*/
1727
0
{
1728
    /* template is a list containing interleaved literal strings (str or bytes)
1729
     * and group indices (int), as returned by _parser.parse_template:
1730
     * [literal1, group1, literal2, ..., literalN].
1731
     */
1732
0
    _sremodulestate *module_state = get_sre_module_state(module);
1733
0
    TemplateObject *self = NULL;
1734
0
    Py_ssize_t n = PyList_GET_SIZE(template);
1735
0
    if ((n & 1) == 0 || n < 1) {
1736
0
        goto bad_template;
1737
0
    }
1738
0
    n /= 2;
1739
0
    self = PyObject_GC_NewVar(TemplateObject, module_state->Template_Type, n);
1740
0
    if (!self)
1741
0
        return NULL;
1742
0
    self->chunks = 1 + 2*n;
1743
0
    self->literal = Py_NewRef(PyList_GET_ITEM(template, 0));
1744
0
    for (Py_ssize_t i = 0; i < n; i++) {
1745
0
        Py_ssize_t index = PyLong_AsSsize_t(PyList_GET_ITEM(template, 2*i+1));
1746
0
        if (index == -1 && PyErr_Occurred()) {
1747
0
            Py_SET_SIZE(self, i);
1748
0
            Py_DECREF(self);
1749
0
            return NULL;
1750
0
        }
1751
0
        if (index < 0) {
1752
0
            Py_SET_SIZE(self, i);
1753
0
            goto bad_template;
1754
0
        }
1755
0
        self->items[i].index = index;
1756
1757
0
        PyObject *literal = PyList_GET_ITEM(template, 2*i+2);
1758
        // Skip empty literals.
1759
0
        if ((PyUnicode_Check(literal) && !PyUnicode_GET_LENGTH(literal)) ||
1760
0
            (PyBytes_Check(literal) && !PyBytes_GET_SIZE(literal)))
1761
0
        {
1762
0
            literal = NULL;
1763
0
            self->chunks--;
1764
0
        }
1765
0
        self->items[i].literal = Py_XNewRef(literal);
1766
0
    }
1767
0
    PyObject_GC_Track(self);
1768
0
    return (PyObject*) self;
1769
1770
0
bad_template:
1771
0
    PyErr_SetString(PyExc_TypeError, "invalid template");
1772
0
    Py_XDECREF(self);
1773
0
    return NULL;
1774
0
}
1775
1776
/* -------------------------------------------------------------------- */
1777
/* Code validation */
1778
1779
/* To learn more about this code, have a look at the _compile() function in
1780
   Lib/sre_compile.py.  The validation functions below checks the code array
1781
   for conformance with the code patterns generated there.
1782
1783
   The nice thing about the generated code is that it is position-independent:
1784
   all jumps are relative jumps forward.  Also, jumps don't cross each other:
1785
   the target of a later jump is always earlier than the target of an earlier
1786
   jump.  IOW, this is okay:
1787
1788
   J---------J-------T--------T
1789
    \         \_____/        /
1790
     \______________________/
1791
1792
   but this is not:
1793
1794
   J---------J-------T--------T
1795
    \_________\_____/        /
1796
               \____________/
1797
1798
   It also helps that SRE_CODE is always an unsigned type.
1799
*/
1800
1801
/* Defining this one enables tracing of the validator */
1802
#undef VVERBOSE
1803
1804
/* Trace macro for the validator */
1805
#if defined(VVERBOSE)
1806
#define VTRACE(v) printf v
1807
#else
1808
4.89k
#define VTRACE(v) do {} while(0)  /* do nothing */
1809
#endif
1810
1811
/* Report failure */
1812
0
#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return -1; } while (0)
1813
1814
/* Extract opcode, argument, or skip count from code array */
1815
#define GET_OP                                          \
1816
924
    do {                                                \
1817
924
        VTRACE(("%p: ", code));                         \
1818
924
        if (code >= end) FAIL;                          \
1819
924
        op = *code++;                                   \
1820
924
        VTRACE(("%lu (op)\n", (unsigned long)op));      \
1821
924
    } while (0)
1822
#define GET_ARG                                         \
1823
896
    do {                                                \
1824
896
        VTRACE(("%p= ", code));                         \
1825
896
        if (code >= end) FAIL;                          \
1826
896
        arg = *code++;                                  \
1827
896
        VTRACE(("%lu (arg)\n", (unsigned long)arg));    \
1828
896
    } while (0)
1829
#define GET_SKIP_ADJ(adj)                               \
1830
370
    do {                                                \
1831
370
        VTRACE(("%p= ", code));                         \
1832
370
        if (code >= end) FAIL;                          \
1833
370
        skip = *code;                                   \
1834
370
        VTRACE(("%lu (skip to %p)\n",                   \
1835
370
               (unsigned long)skip, code+skip));        \
1836
370
        if (skip-adj > (uintptr_t)(end - code))         \
1837
370
            FAIL;                                       \
1838
370
        code++;                                         \
1839
370
    } while (0)
1840
370
#define GET_SKIP GET_SKIP_ADJ(0)
1841
1842
static int
1843
_validate_charset(SRE_CODE *code, SRE_CODE *end)
1844
102
{
1845
    /* Some variables are manipulated by the macros above */
1846
102
    SRE_CODE op;
1847
102
    SRE_CODE arg;
1848
102
    SRE_CODE offset;
1849
102
    int i;
1850
1851
254
    while (code < end) {
1852
152
        GET_OP;
1853
152
        switch (op) {
1854
1855
4
        case SRE_OP_NEGATE:
1856
4
            break;
1857
1858
77
        case SRE_OP_LITERAL:
1859
77
            GET_ARG;
1860
77
            break;
1861
1862
77
        case SRE_OP_RANGE:
1863
14
        case SRE_OP_RANGE_UNI_IGNORE:
1864
14
            GET_ARG;
1865
14
            GET_ARG;
1866
14
            break;
1867
1868
21
        case SRE_OP_CHARSET:
1869
21
            offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
1870
21
            if (offset > (uintptr_t)(end - code))
1871
0
                FAIL;
1872
21
            code += offset;
1873
21
            break;
1874
1875
2
        case SRE_OP_BIGCHARSET:
1876
2
            GET_ARG; /* Number of blocks */
1877
2
            offset = 256/sizeof(SRE_CODE); /* 256-byte table */
1878
2
            if (offset > (uintptr_t)(end - code))
1879
0
                FAIL;
1880
            /* Make sure that each byte points to a valid block */
1881
514
            for (i = 0; i < 256; i++) {
1882
512
                if (((unsigned char *)code)[i] >= arg)
1883
0
                    FAIL;
1884
512
            }
1885
2
            code += offset;
1886
2
            offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
1887
2
            if (offset > (uintptr_t)(end - code))
1888
0
                FAIL;
1889
2
            code += offset;
1890
2
            break;
1891
1892
34
        case SRE_OP_CATEGORY:
1893
34
            GET_ARG;
1894
34
            switch (arg) {
1895
3
            case SRE_CATEGORY_DIGIT:
1896
3
            case SRE_CATEGORY_NOT_DIGIT:
1897
3
            case SRE_CATEGORY_SPACE:
1898
3
            case SRE_CATEGORY_NOT_SPACE:
1899
5
            case SRE_CATEGORY_WORD:
1900
5
            case SRE_CATEGORY_NOT_WORD:
1901
5
            case SRE_CATEGORY_LINEBREAK:
1902
5
            case SRE_CATEGORY_NOT_LINEBREAK:
1903
5
            case SRE_CATEGORY_LOC_WORD:
1904
5
            case SRE_CATEGORY_LOC_NOT_WORD:
1905
32
            case SRE_CATEGORY_UNI_DIGIT:
1906
32
            case SRE_CATEGORY_UNI_NOT_DIGIT:
1907
34
            case SRE_CATEGORY_UNI_SPACE:
1908
34
            case SRE_CATEGORY_UNI_NOT_SPACE:
1909
34
            case SRE_CATEGORY_UNI_WORD:
1910
34
            case SRE_CATEGORY_UNI_NOT_WORD:
1911
34
            case SRE_CATEGORY_UNI_LINEBREAK:
1912
34
            case SRE_CATEGORY_UNI_NOT_LINEBREAK:
1913
34
                break;
1914
0
            default:
1915
0
                FAIL;
1916
34
            }
1917
34
            break;
1918
1919
34
        default:
1920
0
            FAIL;
1921
1922
152
        }
1923
152
    }
1924
1925
102
    return 0;
1926
102
}
1927
1928
/* Returns 0 on success, -1 on failure, and 1 if the last op is JUMP. */
1929
static int
1930
_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1931
225
{
1932
    /* Some variables are manipulated by the macros above */
1933
225
    SRE_CODE op;
1934
225
    SRE_CODE arg;
1935
225
    SRE_CODE skip;
1936
1937
225
    VTRACE(("code=%p, end=%p\n", code, end));
1938
1939
225
    if (code > end)
1940
0
        FAIL;
1941
1942
833
    while (code < end) {
1943
608
        GET_OP;
1944
608
        switch (op) {
1945
1946
152
        case SRE_OP_MARK:
1947
            /* We don't check whether marks are properly nested; the
1948
               sre_match() code is robust even if they don't, and the worst
1949
               you can get is nonsensical match results. */
1950
152
            GET_ARG;
1951
152
            if (arg >= 2 * (size_t)groups) {
1952
0
                VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
1953
0
                FAIL;
1954
0
            }
1955
152
            break;
1956
1957
152
        case SRE_OP_LITERAL:
1958
97
        case SRE_OP_NOT_LITERAL:
1959
97
        case SRE_OP_LITERAL_IGNORE:
1960
97
        case SRE_OP_NOT_LITERAL_IGNORE:
1961
124
        case SRE_OP_LITERAL_UNI_IGNORE:
1962
124
        case SRE_OP_NOT_LITERAL_UNI_IGNORE:
1963
124
        case SRE_OP_LITERAL_LOC_IGNORE:
1964
124
        case SRE_OP_NOT_LITERAL_LOC_IGNORE:
1965
124
            GET_ARG;
1966
            /* The arg is just a character, nothing to check */
1967
124
            break;
1968
1969
124
        case SRE_OP_SUCCESS:
1970
0
        case SRE_OP_FAILURE:
1971
            /* Nothing to check; these normally end the matching process */
1972
0
            break;
1973
1974
27
        case SRE_OP_AT:
1975
27
            GET_ARG;
1976
27
            switch (arg) {
1977
7
            case SRE_AT_BEGINNING:
1978
10
            case SRE_AT_BEGINNING_STRING:
1979
10
            case SRE_AT_BEGINNING_LINE:
1980
20
            case SRE_AT_END:
1981
20
            case SRE_AT_END_LINE:
1982
26
            case SRE_AT_END_STRING:
1983
27
            case SRE_AT_BOUNDARY:
1984
27
            case SRE_AT_NON_BOUNDARY:
1985
27
            case SRE_AT_LOC_BOUNDARY:
1986
27
            case SRE_AT_LOC_NON_BOUNDARY:
1987
27
            case SRE_AT_UNI_BOUNDARY:
1988
27
            case SRE_AT_UNI_NON_BOUNDARY:
1989
27
                break;
1990
0
            default:
1991
0
                FAIL;
1992
27
            }
1993
27
            break;
1994
1995
27
        case SRE_OP_ANY:
1996
7
        case SRE_OP_ANY_ALL:
1997
            /* These have no operands */
1998
7
            break;
1999
2000
84
        case SRE_OP_IN:
2001
84
        case SRE_OP_IN_IGNORE:
2002
96
        case SRE_OP_IN_UNI_IGNORE:
2003
96
        case SRE_OP_IN_LOC_IGNORE:
2004
96
            GET_SKIP;
2005
            /* Stop 1 before the end; we check the FAILURE below */
2006
96
            if (_validate_charset(code, code+skip-2))
2007
0
                FAIL;
2008
96
            if (code[skip-2] != SRE_OP_FAILURE)
2009
0
                FAIL;
2010
96
            code += skip-1;
2011
96
            break;
2012
2013
61
        case SRE_OP_INFO:
2014
61
            {
2015
                /* A minimal info field is
2016
                   <INFO> <1=skip> <2=flags> <3=min> <4=max>;
2017
                   If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
2018
                   more follows. */
2019
61
                SRE_CODE flags, i;
2020
61
                SRE_CODE *newcode;
2021
61
                GET_SKIP;
2022
61
                newcode = code+skip-1;
2023
61
                GET_ARG; flags = arg;
2024
61
                GET_ARG;
2025
61
                GET_ARG;
2026
                /* Check that only valid flags are present */
2027
61
                if ((flags & ~(SRE_INFO_PREFIX |
2028
61
                               SRE_INFO_LITERAL |
2029
61
                               SRE_INFO_CHARSET)) != 0)
2030
0
                    FAIL;
2031
                /* PREFIX and CHARSET are mutually exclusive */
2032
61
                if ((flags & SRE_INFO_PREFIX) &&
2033
10
                    (flags & SRE_INFO_CHARSET))
2034
0
                    FAIL;
2035
                /* LITERAL implies PREFIX */
2036
61
                if ((flags & SRE_INFO_LITERAL) &&
2037
6
                    !(flags & SRE_INFO_PREFIX))
2038
0
                    FAIL;
2039
                /* Validate the prefix */
2040
61
                if (flags & SRE_INFO_PREFIX) {
2041
10
                    SRE_CODE prefix_len;
2042
10
                    GET_ARG; prefix_len = arg;
2043
10
                    GET_ARG;
2044
                    /* Here comes the prefix string */
2045
10
                    if (prefix_len > (uintptr_t)(newcode - code))
2046
0
                        FAIL;
2047
10
                    code += prefix_len;
2048
                    /* And here comes the overlap table */
2049
10
                    if (prefix_len > (uintptr_t)(newcode - code))
2050
0
                        FAIL;
2051
                    /* Each overlap value should be < prefix_len */
2052
27
                    for (i = 0; i < prefix_len; i++) {
2053
17
                        if (code[i] >= prefix_len)
2054
0
                            FAIL;
2055
17
                    }
2056
10
                    code += prefix_len;
2057
10
                }
2058
                /* Validate the charset */
2059
61
                if (flags & SRE_INFO_CHARSET) {
2060
6
                    if (_validate_charset(code, newcode-1))
2061
0
                        FAIL;
2062
6
                    if (newcode[-1] != SRE_OP_FAILURE)
2063
0
                        FAIL;
2064
6
                    code = newcode;
2065
6
                }
2066
55
                else if (code != newcode) {
2067
0
                  VTRACE(("code=%p, newcode=%p\n", code, newcode));
2068
0
                    FAIL;
2069
0
                }
2070
61
            }
2071
61
            break;
2072
2073
61
        case SRE_OP_BRANCH:
2074
13
            {
2075
13
                SRE_CODE *target = NULL;
2076
49
                for (;;) {
2077
49
                    GET_SKIP;
2078
49
                    if (skip == 0)
2079
13
                        break;
2080
                    /* Stop 2 before the end; we check the JUMP below */
2081
36
                    if (_validate_inner(code, code+skip-3, groups))
2082
0
                        FAIL;
2083
36
                    code += skip-3;
2084
                    /* Check that it ends with a JUMP, and that each JUMP
2085
                       has the same target */
2086
36
                    GET_OP;
2087
36
                    if (op != SRE_OP_JUMP)
2088
0
                        FAIL;
2089
36
                    GET_SKIP;
2090
36
                    if (target == NULL)
2091
13
                        target = code+skip-1;
2092
23
                    else if (code+skip-1 != target)
2093
0
                        FAIL;
2094
36
                }
2095
13
                if (code != target)
2096
0
                    FAIL;
2097
13
            }
2098
13
            break;
2099
2100
57
        case SRE_OP_REPEAT_ONE:
2101
64
        case SRE_OP_MIN_REPEAT_ONE:
2102
64
        case SRE_OP_POSSESSIVE_REPEAT_ONE:
2103
64
            {
2104
64
                SRE_CODE min, max;
2105
64
                GET_SKIP;
2106
64
                GET_ARG; min = arg;
2107
64
                GET_ARG; max = arg;
2108
64
                if (min > max)
2109
0
                    FAIL;
2110
64
                if (max > SRE_MAXREPEAT)
2111
0
                    FAIL;
2112
64
                if (_validate_inner(code, code+skip-4, groups))
2113
0
                    FAIL;
2114
64
                code += skip-4;
2115
64
                GET_OP;
2116
64
                if (op != SRE_OP_SUCCESS)
2117
0
                    FAIL;
2118
64
            }
2119
64
            break;
2120
2121
64
        case SRE_OP_REPEAT:
2122
57
        case SRE_OP_POSSESSIVE_REPEAT:
2123
57
            {
2124
57
                SRE_CODE op1 = op, min, max;
2125
57
                GET_SKIP;
2126
57
                GET_ARG; min = arg;
2127
57
                GET_ARG; max = arg;
2128
57
                if (min > max)
2129
0
                    FAIL;
2130
57
                if (max > SRE_MAXREPEAT)
2131
0
                    FAIL;
2132
57
                if (_validate_inner(code, code+skip-3, groups))
2133
0
                    FAIL;
2134
57
                code += skip-3;
2135
57
                GET_OP;
2136
57
                if (op1 == SRE_OP_POSSESSIVE_REPEAT) {
2137
0
                    if (op != SRE_OP_SUCCESS)
2138
0
                        FAIL;
2139
0
                }
2140
57
                else {
2141
57
                    if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
2142
0
                        FAIL;
2143
57
                }
2144
57
            }
2145
57
            break;
2146
2147
57
        case SRE_OP_ATOMIC_GROUP:
2148
0
            {
2149
0
                GET_SKIP;
2150
0
                if (_validate_inner(code, code+skip-2, groups))
2151
0
                    FAIL;
2152
0
                code += skip-2;
2153
0
                GET_OP;
2154
0
                if (op != SRE_OP_SUCCESS)
2155
0
                    FAIL;
2156
0
            }
2157
0
            break;
2158
2159
0
        case SRE_OP_GROUPREF:
2160
0
        case SRE_OP_GROUPREF_IGNORE:
2161
0
        case SRE_OP_GROUPREF_UNI_IGNORE:
2162
0
        case SRE_OP_GROUPREF_LOC_IGNORE:
2163
0
            GET_ARG;
2164
0
            if (arg >= (size_t)groups)
2165
0
                FAIL;
2166
0
            break;
2167
2168
0
        case SRE_OP_GROUPREF_EXISTS:
2169
            /* The regex syntax for this is: '(?(group)then|else)', where
2170
               'group' is either an integer group number or a group name,
2171
               'then' and 'else' are sub-regexes, and 'else' is optional. */
2172
0
            GET_ARG;
2173
0
            if (arg >= (size_t)groups)
2174
0
                FAIL;
2175
0
            GET_SKIP_ADJ(1);
2176
0
            code--; /* The skip is relative to the first arg! */
2177
            /* There are two possibilities here: if there is both a 'then'
2178
               part and an 'else' part, the generated code looks like:
2179
2180
               GROUPREF_EXISTS
2181
               <group>
2182
               <skipyes>
2183
               ...then part...
2184
               JUMP
2185
               <skipno>
2186
               (<skipyes> jumps here)
2187
               ...else part...
2188
               (<skipno> jumps here)
2189
2190
               If there is only a 'then' part, it looks like:
2191
2192
               GROUPREF_EXISTS
2193
               <group>
2194
               <skip>
2195
               ...then part...
2196
               (<skip> jumps here)
2197
2198
               There is no direct way to decide which it is, and we don't want
2199
               to allow arbitrary jumps anywhere in the code; so we just look
2200
               for a JUMP opcode preceding our skip target.
2201
            */
2202
0
            VTRACE(("then part:\n"));
2203
0
            int rc = _validate_inner(code+1, code+skip-1, groups);
2204
0
            if (rc == 1) {
2205
0
                VTRACE(("else part:\n"));
2206
0
                code += skip-2; /* Position after JUMP, at <skipno> */
2207
0
                GET_SKIP;
2208
0
                rc = _validate_inner(code, code+skip-1, groups);
2209
0
            }
2210
0
            if (rc)
2211
0
                FAIL;
2212
0
            code += skip-1;
2213
0
            break;
2214
2215
7
        case SRE_OP_ASSERT:
2216
7
        case SRE_OP_ASSERT_NOT:
2217
7
            GET_SKIP;
2218
7
            GET_ARG; /* 0 for lookahead, width for lookbehind */
2219
7
            code--; /* Back up over arg to simplify math below */
2220
            /* Stop 1 before the end; we check the SUCCESS below */
2221
7
            if (_validate_inner(code+1, code+skip-2, groups))
2222
0
                FAIL;
2223
7
            code += skip-2;
2224
7
            GET_OP;
2225
7
            if (op != SRE_OP_SUCCESS)
2226
0
                FAIL;
2227
7
            break;
2228
2229
7
        case SRE_OP_JUMP:
2230
0
            if (code + 1 != end)
2231
0
                FAIL;
2232
0
            VTRACE(("JUMP: %d\n", __LINE__));
2233
0
            return 1;
2234
2235
0
        default:
2236
0
            FAIL;
2237
2238
608
        }
2239
608
    }
2240
2241
225
    VTRACE(("okay\n"));
2242
225
    return 0;
2243
225
}
2244
2245
static int
2246
_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
2247
61
{
2248
61
    if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
2249
61
        code >= end || end[-1] != SRE_OP_SUCCESS)
2250
0
        FAIL;
2251
61
    return _validate_inner(code, end-1, groups);
2252
61
}
2253
2254
static int
2255
_validate(PatternObject *self)
2256
61
{
2257
61
    if (_validate_outer(self->code, self->code+self->codesize, self->groups))
2258
0
    {
2259
0
        PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
2260
0
        return 0;
2261
0
    }
2262
61
    else
2263
61
        VTRACE(("Success!\n"));
2264
61
    return 1;
2265
61
}
2266
2267
/* -------------------------------------------------------------------- */
2268
/* match methods */
2269
2270
static int
2271
match_traverse(PyObject *op, visitproc visit, void *arg)
2272
2
{
2273
2
    MatchObject *self = _MatchObject_CAST(op);
2274
2
    Py_VISIT(Py_TYPE(self));
2275
2
    Py_VISIT(self->string);
2276
2
    Py_VISIT(self->regs);
2277
2
    Py_VISIT(self->pattern);
2278
2
    return 0;
2279
2
}
2280
2281
static int
2282
match_clear(PyObject *op)
2283
10.4k
{
2284
10.4k
    MatchObject *self = _MatchObject_CAST(op);
2285
10.4k
    Py_CLEAR(self->string);
2286
10.4k
    Py_CLEAR(self->regs);
2287
10.4k
    Py_CLEAR(self->pattern);
2288
10.4k
    return 0;
2289
10.4k
}
2290
2291
static void
2292
match_dealloc(PyObject *self)
2293
10.4k
{
2294
10.4k
    PyTypeObject *tp = Py_TYPE(self);
2295
10.4k
    PyObject_GC_UnTrack(self);
2296
10.4k
    (void)match_clear(self);
2297
10.4k
    tp->tp_free(self);
2298
10.4k
    Py_DECREF(tp);
2299
10.4k
}
2300
2301
static PyObject*
2302
match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
2303
30
{
2304
30
    Py_ssize_t length;
2305
30
    int isbytes, charsize;
2306
30
    Py_buffer view;
2307
30
    PyObject *result;
2308
30
    const void* ptr;
2309
30
    Py_ssize_t i, j;
2310
2311
30
    assert(0 <= index && index < self->groups);
2312
30
    index *= 2;
2313
2314
30
    if (self->string == Py_None || self->mark[index] < 0) {
2315
        /* return default value if the string or group is undefined */
2316
24
        return Py_NewRef(def);
2317
24
    }
2318
2319
6
    ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
2320
6
    if (ptr == NULL)
2321
0
        return NULL;
2322
2323
6
    i = self->mark[index];
2324
6
    j = self->mark[index+1];
2325
6
    i = Py_MIN(i, length);
2326
6
    j = Py_MIN(j, length);
2327
6
    result = getslice(isbytes, ptr, self->string, i, j);
2328
6
    if (isbytes && view.buf != NULL)
2329
0
        PyBuffer_Release(&view);
2330
6
    return result;
2331
6
}
2332
2333
static Py_ssize_t
2334
match_getindex(MatchObject* self, PyObject* index)
2335
10.3k
{
2336
10.3k
    Py_ssize_t i;
2337
2338
10.3k
    if (index == NULL)
2339
        /* Default value */
2340
10.3k
        return 0;
2341
2342
30
    if (PyIndex_Check(index)) {
2343
0
        i = PyNumber_AsSsize_t(index, NULL);
2344
0
    }
2345
30
    else {
2346
30
        i = -1;
2347
2348
30
        if (self->pattern->groupindex) {
2349
30
            index = PyDict_GetItemWithError(self->pattern->groupindex, index);
2350
30
            if (index && PyLong_Check(index)) {
2351
30
                i = PyLong_AsSsize_t(index);
2352
30
            }
2353
30
        }
2354
30
    }
2355
30
    if (i < 0 || i >= self->groups) {
2356
        /* raise IndexError if we were given a bad group number */
2357
0
        if (!PyErr_Occurred()) {
2358
0
            PyErr_SetString(PyExc_IndexError, "no such group");
2359
0
        }
2360
0
        return -1;
2361
0
    }
2362
2363
    // Check that i*2 cannot overflow to make static analyzers happy
2364
30
    assert((size_t)i <= SRE_MAXGROUPS);
2365
30
    return i;
2366
30
}
2367
2368
static PyObject*
2369
match_getslice(MatchObject* self, PyObject* index, PyObject* def)
2370
30
{
2371
30
    Py_ssize_t i = match_getindex(self, index);
2372
2373
30
    if (i < 0) {
2374
0
        return NULL;
2375
0
    }
2376
2377
30
    return match_getslice_by_index(self, i, def);
2378
30
}
2379
2380
/*[clinic input]
2381
@permit_long_summary
2382
_sre.SRE_Match.expand
2383
2384
    template: object
2385
2386
Return the string obtained by doing backslash substitution on the string template, as done by the sub() method.
2387
[clinic start generated code]*/
2388
2389
static PyObject *
2390
_sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template)
2391
/*[clinic end generated code: output=931b58ccc323c3a1 input=dc74d81265376ac3]*/
2392
0
{
2393
0
    _sremodulestate *module_state = get_sre_module_state_by_class(Py_TYPE(self));
2394
0
    PyObject *filter = compile_template(module_state, self->pattern, template);
2395
0
    if (filter == NULL) {
2396
0
        return NULL;
2397
0
    }
2398
0
    PyObject *result = expand_template((TemplateObject *)filter, self);
2399
0
    Py_DECREF(filter);
2400
0
    return result;
2401
0
}
2402
2403
static PyObject*
2404
match_group(PyObject *op, PyObject* args)
2405
30
{
2406
30
    MatchObject *self = _MatchObject_CAST(op);
2407
30
    PyObject* result;
2408
30
    Py_ssize_t i, size;
2409
2410
30
    size = PyTuple_GET_SIZE(args);
2411
2412
30
    switch (size) {
2413
0
    case 0:
2414
0
        result = match_getslice(self, _PyLong_GetZero(), Py_None);
2415
0
        break;
2416
30
    case 1:
2417
30
        result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2418
0
        break;
2419
0
    default:
2420
        /* fetch multiple items */
2421
0
        result = PyTuple_New(size);
2422
0
        if (!result)
2423
0
            return NULL;
2424
0
        for (i = 0; i < size; i++) {
2425
0
            PyObject* item = match_getslice(
2426
0
                self, PyTuple_GET_ITEM(args, i), Py_None
2427
0
                );
2428
0
            if (!item) {
2429
0
                Py_DECREF(result);
2430
0
                return NULL;
2431
0
            }
2432
0
            PyTuple_SET_ITEM(result, i, item);
2433
0
        }
2434
0
        break;
2435
30
    }
2436
30
    return result;
2437
30
}
2438
2439
static PyObject*
2440
match_getitem(PyObject *op, PyObject* name)
2441
0
{
2442
0
    MatchObject *self = _MatchObject_CAST(op);
2443
0
    return match_getslice(self, name, Py_None);
2444
0
}
2445
2446
/*[clinic input]
2447
_sre.SRE_Match.groups
2448
2449
    default: object = None
2450
        Is used for groups that did not participate in the match.
2451
2452
Return a tuple containing all the subgroups of the match, from 1.
2453
[clinic start generated code]*/
2454
2455
static PyObject *
2456
_sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value)
2457
/*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/
2458
0
{
2459
0
    PyObject* result;
2460
0
    Py_ssize_t index;
2461
2462
0
    result = PyTuple_New(self->groups-1);
2463
0
    if (!result)
2464
0
        return NULL;
2465
2466
0
    for (index = 1; index < self->groups; index++) {
2467
0
        PyObject* item;
2468
0
        item = match_getslice_by_index(self, index, default_value);
2469
0
        if (!item) {
2470
0
            Py_DECREF(result);
2471
0
            return NULL;
2472
0
        }
2473
0
        PyTuple_SET_ITEM(result, index-1, item);
2474
0
    }
2475
2476
0
    return result;
2477
0
}
2478
2479
/*[clinic input]
2480
@permit_long_summary
2481
_sre.SRE_Match.groupdict
2482
2483
    default: object = None
2484
        Is used for groups that did not participate in the match.
2485
2486
Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name.
2487
[clinic start generated code]*/
2488
2489
static PyObject *
2490
_sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value)
2491
/*[clinic end generated code: output=29917c9073e41757 input=a8d3a1dc80336872]*/
2492
0
{
2493
0
    PyObject *result;
2494
0
    PyObject *key;
2495
0
    PyObject *value;
2496
0
    Py_ssize_t pos = 0;
2497
0
    Py_hash_t hash;
2498
2499
0
    result = PyDict_New();
2500
0
    if (!result || !self->pattern->groupindex)
2501
0
        return result;
2502
2503
0
    Py_BEGIN_CRITICAL_SECTION(self->pattern->groupindex);
2504
0
    while (_PyDict_Next(self->pattern->groupindex, &pos, &key, &value, &hash)) {
2505
0
        int status;
2506
0
        Py_INCREF(key);
2507
0
        value = match_getslice(self, key, default_value);
2508
0
        if (!value) {
2509
0
            Py_DECREF(key);
2510
0
            Py_CLEAR(result);
2511
0
            goto exit;
2512
0
        }
2513
0
        status = _PyDict_SetItem_KnownHash(result, key, value, hash);
2514
0
        Py_DECREF(value);
2515
0
        Py_DECREF(key);
2516
0
        if (status < 0) {
2517
0
            Py_CLEAR(result);
2518
0
            goto exit;
2519
0
        }
2520
0
    }
2521
0
exit:;
2522
0
    Py_END_CRITICAL_SECTION();
2523
2524
0
    return result;
2525
0
}
2526
2527
/*[clinic input]
2528
_sre.SRE_Match.start -> Py_ssize_t
2529
2530
    group: object(c_default="NULL") = 0
2531
    /
2532
2533
Return index of the start of the substring matched by group.
2534
[clinic start generated code]*/
2535
2536
static Py_ssize_t
2537
_sre_SRE_Match_start_impl(MatchObject *self, PyObject *group)
2538
/*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/
2539
0
{
2540
0
    Py_ssize_t index = match_getindex(self, group);
2541
2542
0
    if (index < 0) {
2543
0
        return -1;
2544
0
    }
2545
2546
    /* mark is -1 if group is undefined */
2547
0
    return self->mark[index*2];
2548
0
}
2549
2550
/*[clinic input]
2551
_sre.SRE_Match.end -> Py_ssize_t
2552
2553
    group: object(c_default="NULL") = 0
2554
    /
2555
2556
Return index of the end of the substring matched by group.
2557
[clinic start generated code]*/
2558
2559
static Py_ssize_t
2560
_sre_SRE_Match_end_impl(MatchObject *self, PyObject *group)
2561
/*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/
2562
10.3k
{
2563
10.3k
    Py_ssize_t index = match_getindex(self, group);
2564
2565
10.3k
    if (index < 0) {
2566
0
        return -1;
2567
0
    }
2568
2569
    /* mark is -1 if group is undefined */
2570
10.3k
    return self->mark[index*2+1];
2571
10.3k
}
2572
2573
LOCAL(PyObject*)
2574
_pair(Py_ssize_t i1, Py_ssize_t i2)
2575
0
{
2576
0
    PyObject* item1 = PyLong_FromSsize_t(i1);
2577
0
    if (!item1) {
2578
0
        return NULL;
2579
0
    }
2580
0
    PyObject* item2 = PyLong_FromSsize_t(i2);
2581
0
    if(!item2) {
2582
0
        Py_DECREF(item1);
2583
0
        return NULL;
2584
0
    }
2585
2586
0
    return _PyTuple_FromPairSteal(item1, item2);
2587
0
}
2588
2589
/*[clinic input]
2590
@permit_long_summary
2591
_sre.SRE_Match.span
2592
2593
    group: object(c_default="NULL") = 0
2594
    /
2595
2596
For match object m, return the 2-tuple (m.start(group), m.end(group)).
2597
[clinic start generated code]*/
2598
2599
static PyObject *
2600
_sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)
2601
/*[clinic end generated code: output=f02ae40594d14fe6 input=834cfe444f0f55cf]*/
2602
0
{
2603
0
    Py_ssize_t index = match_getindex(self, group);
2604
2605
0
    if (index < 0) {
2606
0
        return NULL;
2607
0
    }
2608
2609
    /* marks are -1 if group is undefined */
2610
0
    return _pair(self->mark[index*2], self->mark[index*2+1]);
2611
0
}
2612
2613
static PyObject*
2614
match_regs(MatchObject* self)
2615
0
{
2616
0
    PyObject* regs;
2617
0
    PyObject* item;
2618
0
    Py_ssize_t index;
2619
2620
0
    regs = PyTuple_New(self->groups);
2621
0
    if (!regs)
2622
0
        return NULL;
2623
2624
0
    for (index = 0; index < self->groups; index++) {
2625
0
        item = _pair(self->mark[index*2], self->mark[index*2+1]);
2626
0
        if (!item) {
2627
0
            Py_DECREF(regs);
2628
0
            return NULL;
2629
0
        }
2630
0
        PyTuple_SET_ITEM(regs, index, item);
2631
0
    }
2632
2633
0
    self->regs = Py_NewRef(regs);
2634
2635
0
    return regs;
2636
0
}
2637
2638
/*[clinic input]
2639
_sre.SRE_Match.__copy__
2640
2641
[clinic start generated code]*/
2642
2643
static PyObject *
2644
_sre_SRE_Match___copy___impl(MatchObject *self)
2645
/*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/
2646
0
{
2647
0
    return Py_NewRef(self);
2648
0
}
2649
2650
/*[clinic input]
2651
_sre.SRE_Match.__deepcopy__
2652
2653
    memo: object
2654
    /
2655
2656
[clinic start generated code]*/
2657
2658
static PyObject *
2659
_sre_SRE_Match___deepcopy___impl(MatchObject *self, PyObject *memo)
2660
/*[clinic end generated code: output=2b657578eb03f4a3 input=779d12a31c2c325e]*/
2661
0
{
2662
0
    return Py_NewRef(self);
2663
0
}
2664
2665
PyDoc_STRVAR(match_doc,
2666
"The result of re.search(), re.prefixmatch(), and re.fullmatch().\n\
2667
Match objects always have a boolean value of True.");
2668
2669
PyDoc_STRVAR(match_group_doc,
2670
"group([group1, ...]) -> str or tuple.\n\
2671
    Return subgroup(s) of the match by indices or names.\n\
2672
    For 0 returns the entire match.");
2673
2674
static PyObject *
2675
match_lastindex_get(PyObject *op, void *Py_UNUSED(ignored))
2676
0
{
2677
0
    MatchObject *self = _MatchObject_CAST(op);
2678
0
    if (self->lastindex >= 0)
2679
0
        return PyLong_FromSsize_t(self->lastindex);
2680
0
    Py_RETURN_NONE;
2681
0
}
2682
2683
static PyObject *
2684
match_lastgroup_get(PyObject *op, void *Py_UNUSED(ignored))
2685
0
{
2686
0
    MatchObject *self = _MatchObject_CAST(op);
2687
0
    if (self->pattern->indexgroup &&
2688
0
        self->lastindex >= 0 &&
2689
0
        self->lastindex < PyTuple_GET_SIZE(self->pattern->indexgroup))
2690
0
    {
2691
0
        PyObject *result = PyTuple_GET_ITEM(self->pattern->indexgroup,
2692
0
                                            self->lastindex);
2693
0
        return Py_NewRef(result);
2694
0
    }
2695
0
    Py_RETURN_NONE;
2696
0
}
2697
2698
static PyObject *
2699
match_regs_get(PyObject *op, void *Py_UNUSED(ignored))
2700
0
{
2701
0
    MatchObject *self = _MatchObject_CAST(op);
2702
0
    if (self->regs) {
2703
0
        return Py_NewRef(self->regs);
2704
0
    } else
2705
0
        return match_regs(self);
2706
0
}
2707
2708
static PyObject *
2709
match_repr(PyObject *op)
2710
0
{
2711
0
    MatchObject *self = _MatchObject_CAST(op);
2712
0
    PyObject *result;
2713
0
    PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
2714
0
    if (group0 == NULL)
2715
0
        return NULL;
2716
0
    result = PyUnicode_FromFormat(
2717
0
            "<%s object; span=(%zd, %zd), match=%.50R>",
2718
0
            Py_TYPE(self)->tp_name,
2719
0
            self->mark[0], self->mark[1], group0);
2720
0
    Py_DECREF(group0);
2721
0
    return result;
2722
0
}
2723
2724
2725
static PyObject*
2726
pattern_new_match(_sremodulestate* module_state,
2727
                  PatternObject* pattern,
2728
                  SRE_STATE* state,
2729
                  Py_ssize_t status)
2730
10.7k
{
2731
    /* create match object (from state object) */
2732
2733
10.7k
    MatchObject* match;
2734
10.7k
    Py_ssize_t i, j;
2735
10.7k
    char* base;
2736
10.7k
    int n;
2737
2738
10.7k
    if (status > 0) {
2739
2740
        /* create match object (with room for extra group marks) */
2741
        /* coverity[ampersand_in_size] */
2742
10.4k
        match = PyObject_GC_NewVar(MatchObject,
2743
10.4k
                                   module_state->Match_Type,
2744
10.4k
                                   2*(pattern->groups+1));
2745
10.4k
        if (!match)
2746
0
            return NULL;
2747
2748
10.4k
        Py_INCREF(pattern);
2749
10.4k
        match->pattern = pattern;
2750
2751
10.4k
        match->string = Py_NewRef(state->string);
2752
2753
10.4k
        match->regs = NULL;
2754
10.4k
        match->groups = pattern->groups+1;
2755
2756
        /* fill in group slices */
2757
2758
10.4k
        base = (char*) state->beginning;
2759
10.4k
        n = state->charsize;
2760
2761
10.4k
        match->mark[0] = ((char*) state->start - base) / n;
2762
10.4k
        match->mark[1] = ((char*) state->ptr - base) / n;
2763
2764
10.5k
        for (i = j = 0; i < pattern->groups; i++, j+=2)
2765
142
            if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
2766
55
                match->mark[j+2] = ((char*) state->mark[j] - base) / n;
2767
55
                match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
2768
2769
                /* check wrong span */
2770
55
                if (match->mark[j+2] > match->mark[j+3]) {
2771
0
                    PyErr_SetString(PyExc_SystemError,
2772
0
                                    "The span of capturing group is wrong,"
2773
0
                                    " please report a bug for the re module.");
2774
0
                    Py_DECREF(match);
2775
0
                    return NULL;
2776
0
                }
2777
55
            } else
2778
87
                match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
2779
2780
10.4k
        match->pos = state->pos;
2781
10.4k
        match->endpos = state->endpos;
2782
2783
10.4k
        match->lastindex = state->lastindex;
2784
2785
10.4k
        PyObject_GC_Track(match);
2786
10.4k
        return (PyObject*) match;
2787
2788
10.4k
    } else if (status == 0) {
2789
2790
        /* no match */
2791
338
        Py_RETURN_NONE;
2792
2793
338
    }
2794
2795
    /* internal error */
2796
0
    pattern_error(status);
2797
0
    return NULL;
2798
10.7k
}
2799
2800
2801
/* -------------------------------------------------------------------- */
2802
/* scanner methods (experimental) */
2803
2804
static int
2805
scanner_traverse(PyObject *op, visitproc visit, void *arg)
2806
0
{
2807
0
    ScannerObject *self = _ScannerObject_CAST(op);
2808
0
    Py_VISIT(Py_TYPE(self));
2809
0
    Py_VISIT(self->pattern);
2810
0
    return 0;
2811
0
}
2812
2813
static int
2814
scanner_clear(PyObject *op)
2815
0
{
2816
0
    ScannerObject *self = _ScannerObject_CAST(op);
2817
0
    Py_CLEAR(self->pattern);
2818
0
    return 0;
2819
0
}
2820
2821
static void
2822
scanner_dealloc(PyObject *self)
2823
0
{
2824
0
    PyTypeObject *tp = Py_TYPE(self);
2825
0
    PyObject_GC_UnTrack(self);
2826
0
    ScannerObject *scanner = _ScannerObject_CAST(self);
2827
0
    state_fini(&scanner->state);
2828
0
    (void)scanner_clear(self);
2829
0
    tp->tp_free(self);
2830
0
    Py_DECREF(tp);
2831
0
}
2832
2833
static int
2834
scanner_begin(ScannerObject* self)
2835
0
{
2836
#ifdef Py_GIL_DISABLED
2837
    int was_executing = _Py_atomic_exchange_int(&self->executing, 1);
2838
#else
2839
0
    int was_executing = self->executing;
2840
0
    self->executing = 1;
2841
0
#endif
2842
0
    if (was_executing) {
2843
0
        PyErr_SetString(PyExc_ValueError,
2844
0
                        "regular expression scanner already executing");
2845
0
        return 0;
2846
0
    }
2847
0
    return 1;
2848
0
}
2849
2850
static void
2851
scanner_end(ScannerObject* self)
2852
0
{
2853
0
    assert(FT_ATOMIC_LOAD_INT_RELAXED(self->executing));
2854
0
    FT_ATOMIC_STORE_INT(self->executing, 0);
2855
0
}
2856
2857
/*[clinic input]
2858
_sre.SRE_Scanner.prefixmatch
2859
2860
    cls: defining_class
2861
    /
2862
2863
[clinic start generated code]*/
2864
2865
static PyObject *
2866
_sre_SRE_Scanner_prefixmatch_impl(ScannerObject *self, PyTypeObject *cls)
2867
/*[clinic end generated code: output=02b3b9d2954a2157 input=3049b20466c56a8e]*/
2868
0
{
2869
0
    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
2870
0
    SRE_STATE* state = &self->state;
2871
0
    PyObject* match;
2872
0
    Py_ssize_t status;
2873
2874
0
    if (!scanner_begin(self)) {
2875
0
        return NULL;
2876
0
    }
2877
0
    if (state->start == NULL) {
2878
0
        scanner_end(self);
2879
0
        Py_RETURN_NONE;
2880
0
    }
2881
2882
0
    state_reset(state);
2883
2884
0
    state->ptr = state->start;
2885
2886
0
    status = sre_match(state, PatternObject_GetCode(self->pattern));
2887
0
    if (PyErr_Occurred()) {
2888
0
        scanner_end(self);
2889
0
        return NULL;
2890
0
    }
2891
2892
0
    match = pattern_new_match(module_state, self->pattern,
2893
0
                              state, status);
2894
2895
0
    if (status == 0)
2896
0
        state->start = NULL;
2897
0
    else {
2898
0
        state->must_advance = (state->ptr == state->start);
2899
0
        state->start = state->ptr;
2900
0
    }
2901
2902
0
    scanner_end(self);
2903
0
    return match;
2904
0
}
2905
2906
2907
/*[clinic input]
2908
_sre.SRE_Scanner.search
2909
2910
    cls: defining_class
2911
    /
2912
2913
[clinic start generated code]*/
2914
2915
static PyObject *
2916
_sre_SRE_Scanner_search_impl(ScannerObject *self, PyTypeObject *cls)
2917
/*[clinic end generated code: output=23e8fc78013f9161 input=056c2d37171d0bf2]*/
2918
0
{
2919
0
    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
2920
0
    SRE_STATE* state = &self->state;
2921
0
    PyObject* match;
2922
0
    Py_ssize_t status;
2923
2924
0
    if (!scanner_begin(self)) {
2925
0
        return NULL;
2926
0
    }
2927
0
    if (state->start == NULL) {
2928
0
        scanner_end(self);
2929
0
        Py_RETURN_NONE;
2930
0
    }
2931
2932
0
    state_reset(state);
2933
2934
0
    state->ptr = state->start;
2935
2936
0
    status = sre_search(state, PatternObject_GetCode(self->pattern));
2937
0
    if (PyErr_Occurred()) {
2938
0
        scanner_end(self);
2939
0
        return NULL;
2940
0
    }
2941
2942
0
    match = pattern_new_match(module_state, self->pattern,
2943
0
                              state, status);
2944
2945
0
    if (status == 0)
2946
0
        state->start = NULL;
2947
0
    else {
2948
0
        state->must_advance = (state->ptr == state->start);
2949
0
        state->start = state->ptr;
2950
0
    }
2951
2952
0
    scanner_end(self);
2953
0
    return match;
2954
0
}
2955
2956
static PyObject *
2957
pattern_scanner(_sremodulestate *module_state,
2958
                PatternObject *self,
2959
                PyObject *string,
2960
                Py_ssize_t pos,
2961
                Py_ssize_t endpos)
2962
0
{
2963
0
    ScannerObject* scanner;
2964
2965
    /* create scanner object */
2966
0
    scanner = PyObject_GC_New(ScannerObject, module_state->Scanner_Type);
2967
0
    if (!scanner)
2968
0
        return NULL;
2969
0
    scanner->pattern = NULL;
2970
0
    scanner->executing = 0;
2971
2972
    /* create search state object */
2973
0
    if (!state_init(&scanner->state, self, string, pos, endpos)) {
2974
0
        Py_DECREF(scanner);
2975
0
        return NULL;
2976
0
    }
2977
2978
0
    Py_INCREF(self);
2979
0
    scanner->pattern = self;
2980
2981
0
    PyObject_GC_Track(scanner);
2982
0
    return (PyObject*) scanner;
2983
0
}
2984
2985
/* -------------------------------------------------------------------- */
2986
/* template methods */
2987
2988
static int
2989
template_traverse(PyObject *op, visitproc visit, void *arg)
2990
0
{
2991
0
    TemplateObject *self = _TemplateObject_CAST(op);
2992
0
    Py_VISIT(Py_TYPE(self));
2993
0
    Py_VISIT(self->literal);
2994
0
    for (Py_ssize_t i = 0, n = Py_SIZE(self); i < n; i++) {
2995
0
        Py_VISIT(self->items[i].literal);
2996
0
    }
2997
0
    return 0;
2998
0
}
2999
3000
static int
3001
template_clear(PyObject *op)
3002
0
{
3003
0
    TemplateObject *self = _TemplateObject_CAST(op);
3004
0
    Py_CLEAR(self->literal);
3005
0
    for (Py_ssize_t i = 0, n = Py_SIZE(self); i < n; i++) {
3006
0
        Py_CLEAR(self->items[i].literal);
3007
0
    }
3008
0
    return 0;
3009
0
}
3010
3011
static void
3012
template_dealloc(PyObject *self)
3013
0
{
3014
0
    PyTypeObject *tp = Py_TYPE(self);
3015
0
    PyObject_GC_UnTrack(self);
3016
0
    (void)template_clear(self);
3017
0
    tp->tp_free(self);
3018
0
    Py_DECREF(tp);
3019
0
}
3020
3021
static PyObject *
3022
expand_template(TemplateObject *self, MatchObject *match)
3023
0
{
3024
0
    if (Py_SIZE(self) == 0) {
3025
0
        return Py_NewRef(self->literal);
3026
0
    }
3027
3028
0
    PyObject *result = NULL;
3029
0
    Py_ssize_t count = 0;  // the number of non-empty chunks
3030
    /* For small number of strings use a buffer allocated on the stack,
3031
     * otherwise use a list object. */
3032
0
    PyObject *buffer[10];
3033
0
    PyObject **out = buffer;
3034
0
    PyObject *list = NULL;
3035
0
    if (self->chunks > (int)Py_ARRAY_LENGTH(buffer) ||
3036
0
        !PyUnicode_Check(self->literal))
3037
0
    {
3038
0
        list = PyList_New(self->chunks);
3039
0
        if (!list) {
3040
0
            return NULL;
3041
0
        }
3042
0
        out = &PyList_GET_ITEM(list, 0);
3043
0
    }
3044
3045
0
    out[count++] = Py_NewRef(self->literal);
3046
0
    for (Py_ssize_t i = 0; i < Py_SIZE(self); i++) {
3047
0
        Py_ssize_t index = self->items[i].index;
3048
0
        if (index >= match->groups) {
3049
0
            PyErr_SetString(PyExc_IndexError, "no such group");
3050
0
            goto cleanup;
3051
0
        }
3052
0
        PyObject *item = match_getslice_by_index(match, index, Py_None);
3053
0
        if (item == NULL) {
3054
0
            goto cleanup;
3055
0
        }
3056
0
        if (item != Py_None) {
3057
0
            out[count++] = Py_NewRef(item);
3058
0
        }
3059
0
        Py_DECREF(item);
3060
3061
0
        PyObject *literal = self->items[i].literal;
3062
0
        if (literal != NULL) {
3063
0
            out[count++] = Py_NewRef(literal);
3064
0
        }
3065
0
    }
3066
3067
0
    if (PyUnicode_Check(self->literal)) {
3068
0
        result = _PyUnicode_JoinArray(&_Py_STR(empty), out, count);
3069
0
    }
3070
0
    else {
3071
0
        Py_SET_SIZE(list, count);
3072
0
        result = PyBytes_Join((PyObject *)&_Py_SINGLETON(bytes_empty), list);
3073
0
    }
3074
3075
0
cleanup:
3076
0
    if (list) {
3077
0
        Py_DECREF(list);
3078
0
    }
3079
0
    else {
3080
0
        for (Py_ssize_t i = 0; i < count; i++) {
3081
0
            Py_DECREF(out[i]);
3082
0
        }
3083
0
    }
3084
0
    return result;
3085
0
}
3086
3087
3088
static Py_hash_t
3089
pattern_hash(PyObject *op)
3090
0
{
3091
0
    PatternObject *self = _PatternObject_CAST(op);
3092
3093
0
    Py_hash_t hash, hash2;
3094
3095
0
    hash = PyObject_Hash(self->pattern);
3096
0
    if (hash == -1) {
3097
0
        return -1;
3098
0
    }
3099
3100
0
    hash2 = Py_HashBuffer(self->code, sizeof(self->code[0]) * self->codesize);
3101
0
    hash ^= hash2;
3102
3103
0
    hash ^= self->flags;
3104
0
    hash ^= self->isbytes;
3105
0
    hash ^= self->codesize;
3106
3107
0
    if (hash == -1) {
3108
0
        hash = -2;
3109
0
    }
3110
0
    return hash;
3111
0
}
3112
3113
static PyObject*
3114
pattern_richcompare(PyObject *lefto, PyObject *righto, int op)
3115
0
{
3116
0
    PyTypeObject *tp = Py_TYPE(lefto);
3117
0
    _sremodulestate *module_state = get_sre_module_state_by_class(tp);
3118
0
    PatternObject *left, *right;
3119
0
    int cmp;
3120
3121
0
    if (op != Py_EQ && op != Py_NE) {
3122
0
        Py_RETURN_NOTIMPLEMENTED;
3123
0
    }
3124
3125
0
    if (!Py_IS_TYPE(righto, module_state->Pattern_Type))
3126
0
    {
3127
0
        Py_RETURN_NOTIMPLEMENTED;
3128
0
    }
3129
3130
0
    if (lefto == righto) {
3131
        /* a pattern is equal to itself */
3132
0
        return PyBool_FromLong(op == Py_EQ);
3133
0
    }
3134
3135
0
    left = (PatternObject *)lefto;
3136
0
    right = (PatternObject *)righto;
3137
3138
0
    cmp = (left->flags == right->flags
3139
0
           && left->isbytes == right->isbytes
3140
0
           && left->codesize == right->codesize);
3141
0
    if (cmp) {
3142
        /* Compare the code and the pattern because the same pattern can
3143
           produce different codes depending on the locale used to compile the
3144
           pattern when the re.LOCALE flag is used. Don't compare groups,
3145
           indexgroup nor groupindex: they are derivated from the pattern. */
3146
0
        cmp = (memcmp(left->code, right->code,
3147
0
                      sizeof(left->code[0]) * left->codesize) == 0);
3148
0
    }
3149
0
    if (cmp) {
3150
0
        cmp = PyObject_RichCompareBool(left->pattern, right->pattern,
3151
0
                                       Py_EQ);
3152
0
        if (cmp < 0) {
3153
0
            return NULL;
3154
0
        }
3155
0
    }
3156
0
    if (op == Py_NE) {
3157
0
        cmp = !cmp;
3158
0
    }
3159
0
    return PyBool_FromLong(cmp);
3160
0
}
3161
3162
#include "clinic/sre.c.h"
3163
3164
static PyMethodDef pattern_methods[] = {
3165
    _SRE_SRE_PATTERN_PREFIXMATCH_METHODDEF
3166
    /* "match" reuses the prefixmatch Clinic-generated parser and impl
3167
     * to avoid duplicating the argument parsing boilerplate code. */
3168
    {"match", _PyCFunction_CAST(_sre_SRE_Pattern_prefixmatch),
3169
     METH_METHOD|METH_FASTCALL|METH_KEYWORDS,
3170
     _sre_SRE_Pattern_prefixmatch__doc__},
3171
    _SRE_SRE_PATTERN_FULLMATCH_METHODDEF
3172
    _SRE_SRE_PATTERN_SEARCH_METHODDEF
3173
    _SRE_SRE_PATTERN_SUB_METHODDEF
3174
    _SRE_SRE_PATTERN_SUBN_METHODDEF
3175
    _SRE_SRE_PATTERN_FINDALL_METHODDEF
3176
    _SRE_SRE_PATTERN_SPLIT_METHODDEF
3177
    _SRE_SRE_PATTERN_FINDITER_METHODDEF
3178
    _SRE_SRE_PATTERN_SCANNER_METHODDEF
3179
    _SRE_SRE_PATTERN___COPY___METHODDEF
3180
    _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF
3181
    _SRE_SRE_PATTERN__FAIL_AFTER_METHODDEF
3182
    {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS,
3183
     PyDoc_STR("See PEP 585")},
3184
    {NULL, NULL}
3185
};
3186
3187
static PyGetSetDef pattern_getset[] = {
3188
    {"groupindex", pattern_groupindex, NULL,
3189
      "A dictionary mapping group names to group numbers."},
3190
    {NULL}  /* Sentinel */
3191
};
3192
3193
#define PAT_OFF(x) offsetof(PatternObject, x)
3194
static PyMemberDef pattern_members[] = {
3195
    {"pattern",    _Py_T_OBJECT,    PAT_OFF(pattern),       Py_READONLY,
3196
     "The pattern string from which the RE object was compiled."},
3197
    {"flags",      Py_T_INT,       PAT_OFF(flags),         Py_READONLY,
3198
     "The regex matching flags."},
3199
    {"groups",     Py_T_PYSSIZET,  PAT_OFF(groups),        Py_READONLY,
3200
     "The number of capturing groups in the pattern."},
3201
    {"__weaklistoffset__", Py_T_PYSSIZET, offsetof(PatternObject, weakreflist), Py_READONLY},
3202
    {NULL}  /* Sentinel */
3203
};
3204
3205
static PyType_Slot pattern_slots[] = {
3206
    {Py_tp_dealloc, pattern_dealloc},
3207
    {Py_tp_repr, pattern_repr},
3208
    {Py_tp_hash, pattern_hash},
3209
    {Py_tp_doc, (void *)pattern_doc},
3210
    {Py_tp_richcompare, pattern_richcompare},
3211
    {Py_tp_methods, pattern_methods},
3212
    {Py_tp_members, pattern_members},
3213
    {Py_tp_getset, pattern_getset},
3214
    {Py_tp_traverse, pattern_traverse},
3215
    {Py_tp_clear, pattern_clear},
3216
    {0, NULL},
3217
};
3218
3219
static PyType_Spec pattern_spec = {
3220
    .name = "re.Pattern",
3221
    .basicsize = sizeof(PatternObject),
3222
    .itemsize = sizeof(SRE_CODE),
3223
    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE |
3224
              Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC),
3225
    .slots = pattern_slots,
3226
};
3227
3228
static PyMethodDef match_methods[] = {
3229
    {"group", match_group, METH_VARARGS, match_group_doc},
3230
    _SRE_SRE_MATCH_START_METHODDEF
3231
    _SRE_SRE_MATCH_END_METHODDEF
3232
    _SRE_SRE_MATCH_SPAN_METHODDEF
3233
    _SRE_SRE_MATCH_GROUPS_METHODDEF
3234
    _SRE_SRE_MATCH_GROUPDICT_METHODDEF
3235
    _SRE_SRE_MATCH_EXPAND_METHODDEF
3236
    _SRE_SRE_MATCH___COPY___METHODDEF
3237
    _SRE_SRE_MATCH___DEEPCOPY___METHODDEF
3238
    {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS,
3239
     PyDoc_STR("See PEP 585")},
3240
    {NULL, NULL}
3241
};
3242
3243
static PyGetSetDef match_getset[] = {
3244
    {"lastindex", match_lastindex_get, NULL,
3245
     "The integer index of the last matched capturing group."},
3246
    {"lastgroup", match_lastgroup_get, NULL,
3247
     "The name of the last matched capturing group."},
3248
    {"regs", match_regs_get, NULL, NULL},
3249
    {NULL}
3250
};
3251
3252
#define MATCH_OFF(x) offsetof(MatchObject, x)
3253
static PyMemberDef match_members[] = {
3254
    {"string",  _Py_T_OBJECT,   MATCH_OFF(string),  Py_READONLY,
3255
     "The string passed to match() or search()."},
3256
    {"re",      _Py_T_OBJECT,   MATCH_OFF(pattern), Py_READONLY,
3257
     "The regular expression object."},
3258
    {"pos",     Py_T_PYSSIZET, MATCH_OFF(pos),     Py_READONLY,
3259
     "The index into the string at which the RE engine started looking for a match."},
3260
    {"endpos",  Py_T_PYSSIZET, MATCH_OFF(endpos),  Py_READONLY,
3261
     "The index into the string beyond which the RE engine will not go."},
3262
    {NULL}
3263
};
3264
3265
/* FIXME: implement setattr("string", None) as a special case (to
3266
   detach the associated string, if any */
3267
static PyType_Slot match_slots[] = {
3268
    {Py_tp_dealloc, match_dealloc},
3269
    {Py_tp_repr, match_repr},
3270
    {Py_tp_doc, (void *)match_doc},
3271
    {Py_tp_methods, match_methods},
3272
    {Py_tp_members, match_members},
3273
    {Py_tp_getset, match_getset},
3274
    {Py_tp_traverse, match_traverse},
3275
    {Py_tp_clear, match_clear},
3276
3277
    /* As mapping.
3278
     *
3279
     * Match objects do not support length or assignment, but do support
3280
     * __getitem__.
3281
     */
3282
    {Py_mp_subscript, match_getitem},
3283
3284
    {0, NULL},
3285
};
3286
3287
static PyType_Spec match_spec = {
3288
    .name = "re.Match",
3289
    .basicsize = sizeof(MatchObject),
3290
    .itemsize = sizeof(Py_ssize_t),
3291
    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE |
3292
              Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC),
3293
    .slots = match_slots,
3294
};
3295
3296
static PyMethodDef scanner_methods[] = {
3297
    _SRE_SRE_SCANNER_PREFIXMATCH_METHODDEF
3298
    /* "match" reuses the prefixmatch Clinic-generated parser and impl
3299
     * to avoid duplicating the argument parsing boilerplate code. */
3300
    {"match", _PyCFunction_CAST(_sre_SRE_Scanner_prefixmatch),
3301
     METH_METHOD|METH_FASTCALL|METH_KEYWORDS,
3302
     _sre_SRE_Scanner_prefixmatch__doc__},
3303
    _SRE_SRE_SCANNER_SEARCH_METHODDEF
3304
    {NULL, NULL}
3305
};
3306
3307
#define SCAN_OFF(x) offsetof(ScannerObject, x)
3308
static PyMemberDef scanner_members[] = {
3309
    {"pattern", _Py_T_OBJECT, SCAN_OFF(pattern), Py_READONLY},
3310
    {NULL}  /* Sentinel */
3311
};
3312
3313
static PyType_Slot scanner_slots[] = {
3314
    {Py_tp_dealloc, scanner_dealloc},
3315
    {Py_tp_methods, scanner_methods},
3316
    {Py_tp_members, scanner_members},
3317
    {Py_tp_traverse, scanner_traverse},
3318
    {Py_tp_clear, scanner_clear},
3319
    {0, NULL},
3320
};
3321
3322
static PyType_Spec scanner_spec = {
3323
    .name = "_sre.SRE_Scanner",
3324
    .basicsize = sizeof(ScannerObject),
3325
    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE |
3326
              Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC),
3327
    .slots = scanner_slots,
3328
};
3329
3330
static PyType_Slot template_slots[] = {
3331
    {Py_tp_dealloc, template_dealloc},
3332
    {Py_tp_traverse, template_traverse},
3333
    {Py_tp_clear, template_clear},
3334
    {0, NULL},
3335
};
3336
3337
static PyType_Spec template_spec = {
3338
    .name = "_sre.SRE_Template",
3339
    .basicsize = sizeof(TemplateObject),
3340
    .itemsize = sizeof(((TemplateObject *)0)->items[0]),
3341
    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE |
3342
              Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC),
3343
    .slots = template_slots,
3344
};
3345
3346
static PyMethodDef _functions[] = {
3347
    _SRE_COMPILE_METHODDEF
3348
    _SRE_TEMPLATE_METHODDEF
3349
    _SRE_GETCODESIZE_METHODDEF
3350
    _SRE_ASCII_ISCASED_METHODDEF
3351
    _SRE_UNICODE_ISCASED_METHODDEF
3352
    _SRE_ASCII_TOLOWER_METHODDEF
3353
    _SRE_UNICODE_TOLOWER_METHODDEF
3354
    {NULL, NULL}
3355
};
3356
3357
static int
3358
sre_traverse(PyObject *module, visitproc visit, void *arg)
3359
58
{
3360
58
    _sremodulestate *state = get_sre_module_state(module);
3361
3362
58
    Py_VISIT(state->Pattern_Type);
3363
58
    Py_VISIT(state->Match_Type);
3364
58
    Py_VISIT(state->Scanner_Type);
3365
58
    Py_VISIT(state->Template_Type);
3366
58
    Py_VISIT(state->compile_template);
3367
3368
58
    return 0;
3369
58
}
3370
3371
static int
3372
sre_clear(PyObject *module)
3373
0
{
3374
0
    _sremodulestate *state = get_sre_module_state(module);
3375
3376
0
    Py_CLEAR(state->Pattern_Type);
3377
0
    Py_CLEAR(state->Match_Type);
3378
0
    Py_CLEAR(state->Scanner_Type);
3379
0
    Py_CLEAR(state->Template_Type);
3380
0
    Py_CLEAR(state->compile_template);
3381
3382
0
    return 0;
3383
0
}
3384
3385
static void
3386
sre_free(void *module)
3387
0
{
3388
0
    sre_clear((PyObject *)module);
3389
0
}
3390
3391
24
#define CREATE_TYPE(m, type, spec)                                  \
3392
24
do {                                                                \
3393
24
    type = (PyTypeObject *)PyType_FromModuleAndSpec(m, spec, NULL); \
3394
24
    if (type == NULL) {                                             \
3395
0
        goto error;                                                 \
3396
0
    }                                                               \
3397
24
} while (0)
3398
3399
#define ADD_ULONG_CONSTANT(module, name, value)           \
3400
12
    do {                                                  \
3401
12
        if (PyModule_Add(module, name, PyLong_FromUnsignedLong(value)) < 0) { \
3402
0
            goto error;                                   \
3403
0
        }                                                 \
3404
12
} while (0)
3405
3406
3407
#ifdef Py_DEBUG
3408
static void
3409
_assert_match_aliases_prefixmatch(PyMethodDef *methods)
3410
{
3411
    PyMethodDef *prefixmatch_md = &methods[0];
3412
    PyMethodDef *match_md = &methods[1];
3413
    assert(strcmp(prefixmatch_md->ml_name, "prefixmatch") == 0);
3414
    assert(strcmp(match_md->ml_name, "match") == 0);
3415
    assert(match_md->ml_meth == prefixmatch_md->ml_meth);
3416
    assert(match_md->ml_flags == prefixmatch_md->ml_flags);
3417
    assert(match_md->ml_doc == prefixmatch_md->ml_doc);
3418
}
3419
#endif
3420
3421
static int
3422
sre_exec(PyObject *m)
3423
6
{
3424
6
    _sremodulestate *state;
3425
3426
#ifdef Py_DEBUG
3427
    _assert_match_aliases_prefixmatch(pattern_methods);
3428
    _assert_match_aliases_prefixmatch(scanner_methods);
3429
#endif
3430
3431
    /* Create heap types */
3432
6
    state = get_sre_module_state(m);
3433
6
    CREATE_TYPE(m, state->Pattern_Type, &pattern_spec);
3434
6
    CREATE_TYPE(m, state->Match_Type, &match_spec);
3435
6
    CREATE_TYPE(m, state->Scanner_Type, &scanner_spec);
3436
6
    CREATE_TYPE(m, state->Template_Type, &template_spec);
3437
3438
6
    if (PyModule_AddIntConstant(m, "MAGIC", SRE_MAGIC) < 0) {
3439
0
        goto error;
3440
0
    }
3441
3442
6
    if (PyModule_AddIntConstant(m, "CODESIZE", sizeof(SRE_CODE)) < 0) {
3443
0
        goto error;
3444
0
    }
3445
3446
6
    ADD_ULONG_CONSTANT(m, "MAXREPEAT", SRE_MAXREPEAT);
3447
6
    ADD_ULONG_CONSTANT(m, "MAXGROUPS", SRE_MAXGROUPS);
3448
3449
6
    if (PyModule_AddStringConstant(m, "copyright", copyright) < 0) {
3450
0
        goto error;
3451
0
    }
3452
3453
6
    return 0;
3454
3455
0
error:
3456
0
    return -1;
3457
6
}
3458
3459
static PyModuleDef_Slot sre_slots[] = {
3460
    _Py_ABI_SLOT,
3461
    {Py_mod_exec, sre_exec},
3462
    {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
3463
    {Py_mod_gil, Py_MOD_GIL_NOT_USED},
3464
    {0, NULL},
3465
};
3466
3467
static struct PyModuleDef sremodule = {
3468
    .m_base = PyModuleDef_HEAD_INIT,
3469
    .m_name = "_sre",
3470
    .m_size = sizeof(_sremodulestate),
3471
    .m_methods = _functions,
3472
    .m_slots = sre_slots,
3473
    .m_traverse = sre_traverse,
3474
    .m_free = sre_free,
3475
    .m_clear = sre_clear,
3476
};
3477
3478
PyMODINIT_FUNC
3479
PyInit__sre(void)
3480
6
{
3481
6
    return PyModuleDef_Init(&sremodule);
3482
6
}
3483
3484
/* vim:ts=4:sw=4:et
3485
*/