Coverage Report

Created: 2026-04-12 06:14

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython3/Modules/_sre/sre.c
Line
Count
Source
1
/*
2
 * Secret Labs' Regular Expression Engine
3
 *
4
 * regular expression matching engine
5
 *
6
 * partial history:
7
 * 1999-10-24 fl   created (based on existing template matcher code)
8
 * 2000-03-06 fl   first alpha, sort of
9
 * 2000-08-01 fl   fixes for 1.6b1
10
 * 2000-08-07 fl   use PyOS_CheckStack() if available
11
 * 2000-09-20 fl   added expand method
12
 * 2001-03-20 fl   lots of fixes for 2.1b2
13
 * 2001-04-15 fl   export copyright as Python attribute, not global
14
 * 2001-04-28 fl   added __copy__ methods (work in progress)
15
 * 2001-05-14 fl   fixes for 1.5.2 compatibility
16
 * 2001-07-01 fl   added BIGCHARSET support (from Martin von Loewis)
17
 * 2001-10-18 fl   fixed group reset issue (from Matthew Mueller)
18
 * 2001-10-20 fl   added split primitive; re-enable unicode for 1.6/2.0/2.1
19
 * 2001-10-21 fl   added sub/subn primitive
20
 * 2001-10-24 fl   added finditer primitive (for 2.2 only)
21
 * 2001-12-07 fl   fixed memory leak in sub/subn (Guido van Rossum)
22
 * 2002-11-09 fl   fixed empty sub/subn return type
23
 * 2003-04-18 mvl  fully support 4-byte codes
24
 * 2003-10-17 gn   implemented non recursive scheme
25
 * 2013-02-04 mrab added fullmatch primitive
26
 *
27
 * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
28
 *
29
 * This version of the SRE library can be redistributed under CNRI's
30
 * Python 1.6 license.  For any other use, please contact Secret Labs
31
 * AB (info@pythonware.com).
32
 *
33
 * Portions of this engine have been developed in cooperation with
34
 * CNRI.  Hewlett-Packard provided funding for 1.6 integration and
35
 * other compatibility work.
36
 */
37
38
static const char copyright[] =
39
    " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
40
41
#include "Python.h"
42
#include "pycore_critical_section.h" // Py_BEGIN_CRITICAL_SECTION
43
#include "pycore_dict.h"             // _PyDict_Next()
44
#include "pycore_long.h"             // _PyLong_GetZero()
45
#include "pycore_moduleobject.h"     // _PyModule_GetState()
46
#include "pycore_tuple.h"            // _PyTuple_FromPairSteal
47
#include "pycore_unicodeobject.h"    // _PyUnicode_Copy
48
#include "pycore_weakref.h"          // FT_CLEAR_WEAKREFS()
49
50
#include "sre.h"                     // SRE_CODE
51
52
#include <ctype.h>                   // tolower(), toupper(), isalnum()
53
54
21.0M
#define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
55
56
// On macOS, use the wide character ctype API using btowc()
57
#if defined(__APPLE__)
58
#  define USE_CTYPE_WINT_T
59
#endif
60
61
0
static int sre_isalnum(unsigned int ch) {
62
#ifdef USE_CTYPE_WINT_T
63
    return (unsigned int)iswalnum(btowc((int)ch));
64
#else
65
0
    return (unsigned int)isalnum((int)ch);
66
0
#endif
67
0
}
68
69
0
static unsigned int sre_tolower(unsigned int ch) {
70
#ifdef USE_CTYPE_WINT_T
71
    return (unsigned int)towlower(btowc((int)ch));
72
#else
73
0
    return (unsigned int)tolower((int)ch);
74
0
#endif
75
0
}
76
77
0
static unsigned int sre_toupper(unsigned int ch) {
78
#ifdef USE_CTYPE_WINT_T
79
    return (unsigned int)towupper(btowc((int)ch));
80
#else
81
0
    return (unsigned int)toupper((int)ch);
82
0
#endif
83
0
}
84
85
/* Defining this one controls tracing:
86
 * 0 -- disabled
87
 * 1 -- only if the DEBUG flag set
88
 * 2 -- always
89
 */
90
#ifndef VERBOSE
91
#  define VERBOSE 0
92
#endif
93
94
/* -------------------------------------------------------------------- */
95
96
#if defined(_MSC_VER) && !defined(__clang__)
97
#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
98
#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
99
/* fastest possible local call under MSVC */
100
#define LOCAL(type) static __inline type __fastcall
101
#else
102
#define LOCAL(type) static inline type
103
#endif
104
105
/* error codes */
106
#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
107
#define SRE_ERROR_STATE -2 /* illegal state */
108
0
#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
109
0
#define SRE_ERROR_MEMORY -9 /* out of memory */
110
0
#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
111
112
#if VERBOSE == 0
113
#  define INIT_TRACE(state)
114
#  define DO_TRACE 0
115
#  define TRACE(v)
116
#elif VERBOSE == 1
117
#  define INIT_TRACE(state) int _debug = (state)->debug
118
#  define DO_TRACE (_debug)
119
#  define TRACE(v) do {     \
120
        if (_debug) { \
121
            printf v;       \
122
        }                   \
123
    } while (0)
124
#elif VERBOSE == 2
125
#  define INIT_TRACE(state)
126
#  define DO_TRACE 1
127
#  define TRACE(v) printf v
128
#else
129
#  error VERBOSE must be 0, 1 or 2
130
#endif
131
132
/* -------------------------------------------------------------------- */
133
/* search engine state */
134
135
#define SRE_IS_DIGIT(ch)\
136
1.44M
    ((ch) <= '9' && Py_ISDIGIT(ch))
137
#define SRE_IS_SPACE(ch)\
138
0
    ((ch) <= ' ' && Py_ISSPACE(ch))
139
#define SRE_IS_LINEBREAK(ch)\
140
9
    ((ch) == '\n')
141
#define SRE_IS_WORD(ch)\
142
4
    ((ch) <= 'z' && (Py_ISALNUM(ch) || (ch) == '_'))
143
144
static unsigned int sre_lower_ascii(unsigned int ch)
145
4.62M
{
146
4.62M
    return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
147
4.62M
}
148
149
/* locale-specific character predicates */
150
/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
151
 * warnings when c's type supports only numbers < N+1 */
152
0
#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? sre_isalnum((ch)) : 0)
153
0
#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
154
155
static unsigned int sre_lower_locale(unsigned int ch)
156
0
{
157
0
    return ((ch) < 256 ? (unsigned int)sre_tolower((ch)) : ch);
158
0
}
159
160
static unsigned int sre_upper_locale(unsigned int ch)
161
0
{
162
0
    return ((ch) < 256 ? (unsigned int)sre_toupper((ch)) : ch);
163
0
}
164
165
/* unicode-specific character predicates */
166
167
12
#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
168
0
#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
169
0
#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
170
0
#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
171
0
#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
172
173
static unsigned int sre_lower_unicode(unsigned int ch)
174
188
{
175
188
    return (unsigned int) Py_UNICODE_TOLOWER(ch);
176
188
}
177
178
static unsigned int sre_upper_unicode(unsigned int ch)
179
60
{
180
60
    return (unsigned int) Py_UNICODE_TOUPPER(ch);
181
60
}
182
183
LOCAL(int)
184
sre_category(SRE_CODE category, unsigned int ch)
185
1.44M
{
186
1.44M
    switch (category) {
187
188
1.44M
    case SRE_CATEGORY_DIGIT:
189
1.44M
        return SRE_IS_DIGIT(ch);
190
0
    case SRE_CATEGORY_NOT_DIGIT:
191
0
        return !SRE_IS_DIGIT(ch);
192
0
    case SRE_CATEGORY_SPACE:
193
0
        return SRE_IS_SPACE(ch);
194
0
    case SRE_CATEGORY_NOT_SPACE:
195
0
        return !SRE_IS_SPACE(ch);
196
0
    case SRE_CATEGORY_WORD:
197
0
        return SRE_IS_WORD(ch);
198
0
    case SRE_CATEGORY_NOT_WORD:
199
0
        return !SRE_IS_WORD(ch);
200
0
    case SRE_CATEGORY_LINEBREAK:
201
0
        return SRE_IS_LINEBREAK(ch);
202
0
    case SRE_CATEGORY_NOT_LINEBREAK:
203
0
        return !SRE_IS_LINEBREAK(ch);
204
205
0
    case SRE_CATEGORY_LOC_WORD:
206
0
        return SRE_LOC_IS_WORD(ch);
207
0
    case SRE_CATEGORY_LOC_NOT_WORD:
208
0
        return !SRE_LOC_IS_WORD(ch);
209
210
12
    case SRE_CATEGORY_UNI_DIGIT:
211
12
        return SRE_UNI_IS_DIGIT(ch);
212
0
    case SRE_CATEGORY_UNI_NOT_DIGIT:
213
0
        return !SRE_UNI_IS_DIGIT(ch);
214
0
    case SRE_CATEGORY_UNI_SPACE:
215
0
        return SRE_UNI_IS_SPACE(ch);
216
0
    case SRE_CATEGORY_UNI_NOT_SPACE:
217
0
        return !SRE_UNI_IS_SPACE(ch);
218
0
    case SRE_CATEGORY_UNI_WORD:
219
0
        return SRE_UNI_IS_WORD(ch);
220
0
    case SRE_CATEGORY_UNI_NOT_WORD:
221
0
        return !SRE_UNI_IS_WORD(ch);
222
0
    case SRE_CATEGORY_UNI_LINEBREAK:
223
0
        return SRE_UNI_IS_LINEBREAK(ch);
224
0
    case SRE_CATEGORY_UNI_NOT_LINEBREAK:
225
0
        return !SRE_UNI_IS_LINEBREAK(ch);
226
1.44M
    }
227
0
    return 0;
228
1.44M
}
229
230
LOCAL(int)
231
char_loc_ignore(SRE_CODE pattern, SRE_CODE ch)
232
0
{
233
0
    return ch == pattern
234
0
        || (SRE_CODE) sre_lower_locale(ch) == pattern
235
0
        || (SRE_CODE) sre_upper_locale(ch) == pattern;
236
0
}
237
238
239
/* helpers */
240
241
static void
242
data_stack_dealloc(SRE_STATE* state)
243
10.7k
{
244
10.7k
    if (state->data_stack) {
245
10.7k
        PyMem_Free(state->data_stack);
246
10.7k
        state->data_stack = NULL;
247
10.7k
    }
248
10.7k
    state->data_stack_size = state->data_stack_base = 0;
249
10.7k
}
250
251
static int
252
data_stack_grow(SRE_STATE* state, Py_ssize_t size)
253
13.8k
{
254
13.8k
    INIT_TRACE(state);
255
13.8k
    Py_ssize_t minsize, cursize;
256
13.8k
    minsize = state->data_stack_base+size;
257
13.8k
    cursize = state->data_stack_size;
258
13.8k
    if (cursize < minsize) {
259
13.8k
        void* stack;
260
13.8k
        cursize = minsize+minsize/4+1024;
261
13.8k
        TRACE(("allocate/grow stack %zd\n", cursize));
262
13.8k
        stack = PyMem_Realloc(state->data_stack, cursize);
263
13.8k
        if (!stack) {
264
0
            data_stack_dealloc(state);
265
0
            return SRE_ERROR_MEMORY;
266
0
        }
267
13.8k
        state->data_stack = (char *)stack;
268
13.8k
        state->data_stack_size = cursize;
269
13.8k
    }
270
13.8k
    return 0;
271
13.8k
}
272
273
/* memory pool functions for SRE_REPEAT, this can avoid memory
274
   leak when SRE(match) function terminates abruptly.
275
   state->repeat_pool_used is a doubly-linked list, so that we
276
   can remove a SRE_REPEAT node from it.
277
   state->repeat_pool_unused is a singly-linked list, we put/get
278
   node at the head. */
279
static SRE_REPEAT *
280
repeat_pool_malloc(SRE_STATE *state)
281
3.21k
{
282
3.21k
    SRE_REPEAT *repeat;
283
284
3.21k
    if (state->repeat_pool_unused) {
285
        /* remove from unused pool (singly-linked list) */
286
2.71k
        repeat = state->repeat_pool_unused;
287
2.71k
        state->repeat_pool_unused = repeat->pool_next;
288
2.71k
    }
289
507
    else {
290
507
        repeat = PyMem_Malloc(sizeof(SRE_REPEAT));
291
507
        if (!repeat) {
292
0
            return NULL;
293
0
        }
294
507
    }
295
296
    /* add to used pool (doubly-linked list) */
297
3.21k
    SRE_REPEAT *temp = state->repeat_pool_used;
298
3.21k
    if (temp) {
299
201
        temp->pool_prev = repeat;
300
201
    }
301
3.21k
    repeat->pool_prev = NULL;
302
3.21k
    repeat->pool_next = temp;
303
3.21k
    state->repeat_pool_used = repeat;
304
305
3.21k
    return repeat;
306
3.21k
}
307
308
static void
309
repeat_pool_free(SRE_STATE *state, SRE_REPEAT *repeat)
310
3.21k
{
311
3.21k
    SRE_REPEAT *prev = repeat->pool_prev;
312
3.21k
    SRE_REPEAT *next = repeat->pool_next;
313
314
    /* remove from used pool (doubly-linked list) */
315
3.21k
    if (prev) {
316
0
        prev->pool_next = next;
317
0
    }
318
3.21k
    else {
319
3.21k
        state->repeat_pool_used = next;
320
3.21k
    }
321
3.21k
    if (next) {
322
201
        next->pool_prev = prev;
323
201
    }
324
325
    /* add to unused pool (singly-linked list) */
326
3.21k
    repeat->pool_next = state->repeat_pool_unused;
327
3.21k
    state->repeat_pool_unused = repeat;
328
3.21k
}
329
330
static void
331
repeat_pool_clear(SRE_STATE *state)
332
10.7k
{
333
    /* clear used pool */
334
10.7k
    SRE_REPEAT *next = state->repeat_pool_used;
335
10.7k
    state->repeat_pool_used = NULL;
336
10.7k
    while (next) {
337
0
        SRE_REPEAT *temp = next;
338
0
        next = temp->pool_next;
339
0
        PyMem_Free(temp);
340
0
    }
341
342
    /* clear unused pool */
343
10.7k
    next = state->repeat_pool_unused;
344
10.7k
    state->repeat_pool_unused = NULL;
345
11.3k
    while (next) {
346
507
        SRE_REPEAT *temp = next;
347
507
        next = temp->pool_next;
348
507
        PyMem_Free(temp);
349
507
    }
350
10.7k
}
351
352
/* generate 8-bit version */
353
354
1.44M
#define SRE_CHAR Py_UCS1
355
#define SIZEOF_SRE_CHAR 1
356
9.76M
#define SRE(F) sre_ucs1_##F
357
#include "sre_lib.h"
358
359
/* generate 16-bit unicode version */
360
361
1.80k
#define SRE_CHAR Py_UCS2
362
#define SIZEOF_SRE_CHAR 2
363
2.61M
#define SRE(F) sre_ucs2_##F
364
#include "sre_lib.h"
365
366
/* generate 32-bit unicode version */
367
368
2.23k
#define SRE_CHAR Py_UCS4
369
#define SIZEOF_SRE_CHAR 4
370
2.49M
#define SRE(F) sre_ucs4_##F
371
#include "sre_lib.h"
372
373
/* -------------------------------------------------------------------- */
374
/* factories and destructors */
375
376
/* module state */
377
typedef struct {
378
    PyTypeObject *Pattern_Type;
379
    PyTypeObject *Match_Type;
380
    PyTypeObject *Scanner_Type;
381
    PyTypeObject *Template_Type;
382
    PyObject *compile_template;  // reference to re._compile_template
383
} _sremodulestate;
384
385
static _sremodulestate *
386
get_sre_module_state(PyObject *m)
387
13.4k
{
388
13.4k
    _sremodulestate *state = (_sremodulestate *)_PyModule_GetState(m);
389
13.4k
    assert(state);
390
13.4k
    return state;
391
13.4k
}
392
393
static struct PyModuleDef sremodule;
394
#define get_sre_module_state_by_class(cls) \
395
10.7k
    (get_sre_module_state(PyType_GetModule(cls)))
396
397
/* see sre.h for object declarations */
398
static PyObject*pattern_new_match(_sremodulestate *, PatternObject*, SRE_STATE*, Py_ssize_t);
399
static PyObject *pattern_scanner(_sremodulestate *, PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t);
400
401
124k
#define _PatternObject_CAST(op)     ((PatternObject *)(op))
402
10.4k
#define _MatchObject_CAST(op)       ((MatchObject *)(op))
403
0
#define _TemplateObject_CAST(op)    ((TemplateObject *)(op))
404
0
#define _ScannerObject_CAST(op)     ((ScannerObject *)(op))
405
406
/*[clinic input]
407
module _sre
408
class _sre.SRE_Pattern "PatternObject *" "get_sre_module_state_by_class(tp)->Pattern_Type"
409
class _sre.SRE_Match "MatchObject *" "get_sre_module_state_by_class(tp)->Match_Type"
410
class _sre.SRE_Scanner "ScannerObject *" "get_sre_module_state_by_class(tp)->Scanner_Type"
411
[clinic start generated code]*/
412
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=fe2966e32b66a231]*/
413
414
/*[clinic input]
415
_sre.getcodesize -> int
416
[clinic start generated code]*/
417
418
static int
419
_sre_getcodesize_impl(PyObject *module)
420
/*[clinic end generated code: output=e0db7ce34a6dd7b1 input=bd6f6ecf4916bb2b]*/
421
0
{
422
0
    return sizeof(SRE_CODE);
423
0
}
424
425
/*[clinic input]
426
_sre.ascii_iscased -> bool
427
428
    character: int
429
    /
430
431
[clinic start generated code]*/
432
433
static int
434
_sre_ascii_iscased_impl(PyObject *module, int character)
435
/*[clinic end generated code: output=4f454b630fbd19a2 input=9f0bd952812c7ed3]*/
436
2.03M
{
437
2.03M
    unsigned int ch = (unsigned int)character;
438
2.03M
    return ch < 128 && Py_ISALPHA(ch);
439
2.03M
}
440
441
/*[clinic input]
442
_sre.unicode_iscased -> bool
443
444
    character: int
445
    /
446
447
[clinic start generated code]*/
448
449
static int
450
_sre_unicode_iscased_impl(PyObject *module, int character)
451
/*[clinic end generated code: output=9c5ddee0dc2bc258 input=51e42c3b8dddb78e]*/
452
76
{
453
76
    unsigned int ch = (unsigned int)character;
454
76
    return ch != sre_lower_unicode(ch) || ch != sre_upper_unicode(ch);
455
76
}
456
457
/*[clinic input]
458
_sre.ascii_tolower -> int
459
460
    character: int
461
    /
462
463
[clinic start generated code]*/
464
465
static int
466
_sre_ascii_tolower_impl(PyObject *module, int character)
467
/*[clinic end generated code: output=228294ed6ff2a612 input=272c609b5b61f136]*/
468
4.62M
{
469
4.62M
    return sre_lower_ascii(character);
470
4.62M
}
471
472
/*[clinic input]
473
_sre.unicode_tolower -> int
474
475
    character: int
476
    /
477
478
[clinic start generated code]*/
479
480
static int
481
_sre_unicode_tolower_impl(PyObject *module, int character)
482
/*[clinic end generated code: output=6422272d7d7fee65 input=91d708c5f3c2045a]*/
483
68
{
484
68
    return sre_lower_unicode(character);
485
68
}
486
487
LOCAL(void)
488
state_reset(SRE_STATE* state)
489
0
{
490
    /* state->mark will be set to 0 in SRE_OP_MARK dynamically. */
491
    /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
492
493
0
    state->lastmark = -1;
494
0
    state->lastindex = -1;
495
496
0
    state->repeat = NULL;
497
498
0
    data_stack_dealloc(state);
499
0
}
500
501
static const void*
502
getstring(PyObject* string, Py_ssize_t* p_length,
503
          int* p_isbytes, int* p_charsize,
504
          Py_buffer *view)
505
13.0k
{
506
    /* given a python object, return a data pointer, a length (in
507
       characters), and a character size.  return NULL if the object
508
       is not a string (or not compatible) */
509
510
    /* Unicode objects do not support the buffer API. So, get the data
511
       directly instead. */
512
13.0k
    if (PyUnicode_Check(string)) {
513
10.3k
        *p_length = PyUnicode_GET_LENGTH(string);
514
10.3k
        *p_charsize = PyUnicode_KIND(string);
515
0
        *p_isbytes = 0;
516
10.3k
        return PyUnicode_DATA(string);
517
10.3k
    }
518
519
    /* get pointer to byte string buffer */
520
2.62k
    if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
521
0
        PyErr_Format(PyExc_TypeError, "expected string or bytes-like "
522
0
                     "object, got '%.200s'", Py_TYPE(string)->tp_name);
523
0
        return NULL;
524
0
    }
525
526
2.62k
    *p_length = view->len;
527
2.62k
    *p_charsize = 1;
528
2.62k
    *p_isbytes = 1;
529
530
2.62k
    if (view->buf == NULL) {
531
0
        PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
532
0
        PyBuffer_Release(view);
533
0
        view->buf = NULL;
534
0
        return NULL;
535
0
    }
536
2.62k
    return view->buf;
537
2.62k
}
538
539
LOCAL(PyObject*)
540
state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
541
           Py_ssize_t start, Py_ssize_t end)
542
10.7k
{
543
    /* prepare state object */
544
545
10.7k
    Py_ssize_t length;
546
10.7k
    int isbytes, charsize;
547
10.7k
    const void* ptr;
548
549
10.7k
    memset(state, 0, sizeof(SRE_STATE));
550
551
10.7k
    state->mark = PyMem_New(const void *, pattern->groups * 2);
552
10.7k
    if (!state->mark) {
553
0
        PyErr_NoMemory();
554
0
        goto err;
555
0
    }
556
10.7k
    state->lastmark = -1;
557
10.7k
    state->lastindex = -1;
558
559
10.7k
    state->buffer.buf = NULL;
560
10.7k
    ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
561
10.7k
    if (!ptr)
562
0
        goto err;
563
564
10.7k
    if (isbytes && pattern->isbytes == 0) {
565
0
        PyErr_SetString(PyExc_TypeError,
566
0
                        "cannot use a string pattern on a bytes-like object");
567
0
        goto err;
568
0
    }
569
10.7k
    if (!isbytes && pattern->isbytes > 0) {
570
0
        PyErr_SetString(PyExc_TypeError,
571
0
                        "cannot use a bytes pattern on a string-like object");
572
0
        goto err;
573
0
    }
574
575
    /* adjust boundaries */
576
10.7k
    if (start < 0)
577
0
        start = 0;
578
10.7k
    else if (start > length)
579
0
        start = length;
580
581
10.7k
    if (end < 0)
582
0
        end = 0;
583
10.7k
    else if (end > length)
584
10.7k
        end = length;
585
586
10.7k
    state->isbytes = isbytes;
587
10.7k
    state->charsize = charsize;
588
10.7k
    state->match_all = 0;
589
10.7k
    state->must_advance = 0;
590
10.7k
    state->debug = ((pattern->flags & SRE_FLAG_DEBUG) != 0);
591
592
10.7k
    state->beginning = ptr;
593
594
10.7k
    state->start = (void*) ((char*) ptr + start * state->charsize);
595
10.7k
    state->end = (void*) ((char*) ptr + end * state->charsize);
596
597
10.7k
    state->string = Py_NewRef(string);
598
10.7k
    state->pos = start;
599
10.7k
    state->endpos = end;
600
601
#ifdef Py_DEBUG
602
    state->fail_after_count = pattern->fail_after_count;
603
    state->fail_after_exc = pattern->fail_after_exc; // borrowed ref
604
#endif
605
606
10.7k
    return string;
607
0
  err:
608
    /* We add an explicit cast here because MSVC has a bug when
609
       compiling C code where it believes that `const void**` cannot be
610
       safely casted to `void*`, see bpo-39943 for details. */
611
0
    PyMem_Free((void*) state->mark);
612
0
    state->mark = NULL;
613
0
    if (state->buffer.buf)
614
0
        PyBuffer_Release(&state->buffer);
615
0
    return NULL;
616
10.7k
}
617
618
LOCAL(void)
619
state_fini(SRE_STATE* state)
620
10.7k
{
621
10.7k
    if (state->buffer.buf)
622
466
        PyBuffer_Release(&state->buffer);
623
10.7k
    Py_XDECREF(state->string);
624
10.7k
    data_stack_dealloc(state);
625
    /* See above PyMem_Free() for why we explicitly cast here. */
626
10.7k
    PyMem_Free((void*) state->mark);
627
10.7k
    state->mark = NULL;
628
    /* SRE_REPEAT pool */
629
10.7k
    repeat_pool_clear(state);
630
10.7k
}
631
632
/* calculate offset from start of string */
633
#define STATE_OFFSET(state, member)\
634
0
    (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
635
636
LOCAL(PyObject*)
637
getslice(int isbytes, const void *ptr,
638
         PyObject* string, Py_ssize_t start, Py_ssize_t end)
639
8
{
640
8
    if (isbytes) {
641
0
        if (PyBytes_CheckExact(string) &&
642
0
            start == 0 && end == PyBytes_GET_SIZE(string)) {
643
0
            return Py_NewRef(string);
644
0
        }
645
0
        return PyBytes_FromStringAndSize(
646
0
                (const char *)ptr + start, end - start);
647
0
    }
648
8
    else {
649
8
        return PyUnicode_Substring(string, start, end);
650
8
    }
651
8
}
652
653
LOCAL(PyObject*)
654
state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
655
0
{
656
0
    Py_ssize_t i, j;
657
658
0
    index = (index - 1) * 2;
659
660
0
    if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
661
0
        if (empty)
662
            /* want empty string */
663
0
            i = j = 0;
664
0
        else {
665
0
            Py_RETURN_NONE;
666
0
        }
667
0
    } else {
668
0
        i = STATE_OFFSET(state, state->mark[index]);
669
0
        j = STATE_OFFSET(state, state->mark[index+1]);
670
671
        /* check wrong span */
672
0
        if (i > j) {
673
0
            PyErr_SetString(PyExc_SystemError,
674
0
                            "The span of capturing group is wrong,"
675
0
                            " please report a bug for the re module.");
676
0
            return NULL;
677
0
        }
678
0
    }
679
680
0
    return getslice(state->isbytes, state->beginning, string, i, j);
681
0
}
682
683
static void
684
pattern_error(Py_ssize_t status)
685
0
{
686
0
    switch (status) {
687
0
    case SRE_ERROR_RECURSION_LIMIT:
688
        /* This error code seems to be unused. */
689
0
        PyErr_SetString(
690
0
            PyExc_RecursionError,
691
0
            "maximum recursion limit exceeded"
692
0
            );
693
0
        break;
694
0
    case SRE_ERROR_MEMORY:
695
0
        PyErr_NoMemory();
696
0
        break;
697
0
    case SRE_ERROR_INTERRUPTED:
698
    /* An exception has already been raised, so let it fly */
699
0
        break;
700
0
    default:
701
        /* other error codes indicate compiler/engine bugs */
702
0
        PyErr_SetString(
703
0
            PyExc_RuntimeError,
704
0
            "internal error in regular expression engine"
705
0
            );
706
0
    }
707
0
}
708
709
static int
710
pattern_traverse(PyObject *op, visitproc visit, void *arg)
711
122k
{
712
122k
    PatternObject *self = _PatternObject_CAST(op);
713
122k
    Py_VISIT(Py_TYPE(self));
714
122k
    Py_VISIT(self->groupindex);
715
122k
    Py_VISIT(self->indexgroup);
716
122k
    Py_VISIT(self->pattern);
717
#ifdef Py_DEBUG
718
    Py_VISIT(self->fail_after_exc);
719
#endif
720
122k
    return 0;
721
122k
}
722
723
static int
724
pattern_clear(PyObject *op)
725
1.63k
{
726
1.63k
    PatternObject *self = _PatternObject_CAST(op);
727
1.63k
    Py_CLEAR(self->groupindex);
728
1.63k
    Py_CLEAR(self->indexgroup);
729
1.63k
    Py_CLEAR(self->pattern);
730
#ifdef Py_DEBUG
731
    Py_CLEAR(self->fail_after_exc);
732
#endif
733
1.63k
    return 0;
734
1.63k
}
735
736
static void
737
pattern_dealloc(PyObject *self)
738
1.63k
{
739
1.63k
    PyTypeObject *tp = Py_TYPE(self);
740
1.63k
    PyObject_GC_UnTrack(self);
741
1.63k
    FT_CLEAR_WEAKREFS(self, _PatternObject_CAST(self)->weakreflist);
742
1.63k
    (void)pattern_clear(self);
743
1.63k
    tp->tp_free(self);
744
1.63k
    Py_DECREF(tp);
745
1.63k
}
746
747
LOCAL(Py_ssize_t)
748
sre_match(SRE_STATE* state, SRE_CODE* pattern)
749
10.7k
{
750
10.7k
    if (state->charsize == 1)
751
6.75k
        return sre_ucs1_match(state, pattern, 1);
752
4.04k
    if (state->charsize == 2)
753
1.80k
        return sre_ucs2_match(state, pattern, 1);
754
4.04k
    assert(state->charsize == 4);
755
2.23k
    return sre_ucs4_match(state, pattern, 1);
756
2.23k
}
757
758
LOCAL(Py_ssize_t)
759
sre_search(SRE_STATE* state, SRE_CODE* pattern)
760
0
{
761
0
    if (state->charsize == 1)
762
0
        return sre_ucs1_search(state, pattern);
763
0
    if (state->charsize == 2)
764
0
        return sre_ucs2_search(state, pattern);
765
0
    assert(state->charsize == 4);
766
0
    return sre_ucs4_search(state, pattern);
767
0
}
768
769
/*[clinic input]
770
_sre.SRE_Pattern.prefixmatch
771
772
    cls: defining_class
773
    /
774
    string: object
775
    pos: Py_ssize_t = 0
776
    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
777
778
Matches zero or more characters at the beginning of the string.
779
[clinic start generated code]*/
780
781
static PyObject *
782
_sre_SRE_Pattern_prefixmatch_impl(PatternObject *self, PyTypeObject *cls,
783
                                  PyObject *string, Py_ssize_t pos,
784
                                  Py_ssize_t endpos)
785
/*[clinic end generated code: output=a0e079fb4f875240 input=e2a7e68ea47d048c]*/
786
10.7k
{
787
10.7k
    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
788
10.7k
    SRE_STATE state;
789
10.7k
    Py_ssize_t status;
790
10.7k
    PyObject *match;
791
792
10.7k
    if (!state_init(&state, self, string, pos, endpos))
793
0
        return NULL;
794
795
10.7k
    INIT_TRACE(&state);
796
10.7k
    state.ptr = state.start;
797
798
10.7k
    TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
799
800
10.7k
    status = sre_match(&state, PatternObject_GetCode(self));
801
802
10.7k
    TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
803
10.7k
    if (PyErr_Occurred()) {
804
0
        state_fini(&state);
805
0
        return NULL;
806
0
    }
807
808
10.7k
    match = pattern_new_match(module_state, self, &state, status);
809
10.7k
    state_fini(&state);
810
10.7k
    return match;
811
10.7k
}
812
813
814
/*[clinic input]
815
_sre.SRE_Pattern.fullmatch
816
817
    cls: defining_class
818
    /
819
    string: object
820
    pos: Py_ssize_t = 0
821
    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
822
823
Matches against all of the string.
824
[clinic start generated code]*/
825
826
static PyObject *
827
_sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyTypeObject *cls,
828
                                PyObject *string, Py_ssize_t pos,
829
                                Py_ssize_t endpos)
830
/*[clinic end generated code: output=625b75b027ef94da input=50981172ab0fcfdd]*/
831
0
{
832
0
    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
833
0
    SRE_STATE state;
834
0
    Py_ssize_t status;
835
0
    PyObject *match;
836
837
0
    if (!state_init(&state, self, string, pos, endpos))
838
0
        return NULL;
839
840
0
    INIT_TRACE(&state);
841
0
    state.ptr = state.start;
842
843
0
    TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
844
845
0
    state.match_all = 1;
846
0
    status = sre_match(&state, PatternObject_GetCode(self));
847
848
0
    TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
849
0
    if (PyErr_Occurred()) {
850
0
        state_fini(&state);
851
0
        return NULL;
852
0
    }
853
854
0
    match = pattern_new_match(module_state, self, &state, status);
855
0
    state_fini(&state);
856
0
    return match;
857
0
}
858
859
/*[clinic input]
860
@permit_long_summary
861
_sre.SRE_Pattern.search
862
863
    cls: defining_class
864
    /
865
    string: object
866
    pos: Py_ssize_t = 0
867
    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
868
869
Scan through string looking for a match, and return a corresponding match object instance.
870
871
Return None if no position in the string matches.
872
[clinic start generated code]*/
873
874
static PyObject *
875
_sre_SRE_Pattern_search_impl(PatternObject *self, PyTypeObject *cls,
876
                             PyObject *string, Py_ssize_t pos,
877
                             Py_ssize_t endpos)
878
/*[clinic end generated code: output=bd7f2d9d583e1463 input=05e9feee0334c156]*/
879
0
{
880
0
    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
881
0
    SRE_STATE state;
882
0
    Py_ssize_t status;
883
0
    PyObject *match;
884
885
0
    if (!state_init(&state, self, string, pos, endpos))
886
0
        return NULL;
887
888
0
    INIT_TRACE(&state);
889
0
    TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
890
891
0
    status = sre_search(&state, PatternObject_GetCode(self));
892
893
0
    TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
894
895
0
    if (PyErr_Occurred()) {
896
0
        state_fini(&state);
897
0
        return NULL;
898
0
    }
899
900
0
    match = pattern_new_match(module_state, self, &state, status);
901
0
    state_fini(&state);
902
0
    return match;
903
0
}
904
905
/*[clinic input]
906
_sre.SRE_Pattern.findall
907
908
    string: object
909
    pos: Py_ssize_t = 0
910
    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
911
912
Return a list of all non-overlapping matches of pattern in string.
913
[clinic start generated code]*/
914
915
static PyObject *
916
_sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,
917
                              Py_ssize_t pos, Py_ssize_t endpos)
918
/*[clinic end generated code: output=f4966baceea60aca input=5b6a4ee799741563]*/
919
0
{
920
0
    SRE_STATE state;
921
0
    PyObject* list;
922
0
    Py_ssize_t status;
923
0
    Py_ssize_t i, b, e;
924
925
0
    if (!state_init(&state, self, string, pos, endpos))
926
0
        return NULL;
927
928
0
    list = PyList_New(0);
929
0
    if (!list) {
930
0
        state_fini(&state);
931
0
        return NULL;
932
0
    }
933
934
0
    while (state.start <= state.end) {
935
936
0
        PyObject* item;
937
938
0
        state_reset(&state);
939
940
0
        state.ptr = state.start;
941
942
0
        status = sre_search(&state, PatternObject_GetCode(self));
943
0
        if (PyErr_Occurred())
944
0
            goto error;
945
946
0
        if (status <= 0) {
947
0
            if (status == 0)
948
0
                break;
949
0
            pattern_error(status);
950
0
            goto error;
951
0
        }
952
953
        /* don't bother to build a match object */
954
0
        switch (self->groups) {
955
0
        case 0:
956
0
            b = STATE_OFFSET(&state, state.start);
957
0
            e = STATE_OFFSET(&state, state.ptr);
958
0
            item = getslice(state.isbytes, state.beginning,
959
0
                            string, b, e);
960
0
            if (!item)
961
0
                goto error;
962
0
            break;
963
0
        case 1:
964
0
            item = state_getslice(&state, 1, string, 1);
965
0
            if (!item)
966
0
                goto error;
967
0
            break;
968
0
        default:
969
0
            item = PyTuple_New(self->groups);
970
0
            if (!item)
971
0
                goto error;
972
0
            for (i = 0; i < self->groups; i++) {
973
0
                PyObject* o = state_getslice(&state, i+1, string, 1);
974
0
                if (!o) {
975
0
                    Py_DECREF(item);
976
0
                    goto error;
977
0
                }
978
0
                PyTuple_SET_ITEM(item, i, o);
979
0
            }
980
0
            break;
981
0
        }
982
983
0
        status = PyList_Append(list, item);
984
0
        Py_DECREF(item);
985
0
        if (status < 0)
986
0
            goto error;
987
988
0
        state.must_advance = (state.ptr == state.start);
989
0
        state.start = state.ptr;
990
0
    }
991
992
0
    state_fini(&state);
993
0
    return list;
994
995
0
error:
996
0
    Py_DECREF(list);
997
0
    state_fini(&state);
998
0
    return NULL;
999
1000
0
}
1001
1002
/*[clinic input]
1003
@permit_long_summary
1004
_sre.SRE_Pattern.finditer
1005
1006
    cls: defining_class
1007
    /
1008
    string: object
1009
    pos: Py_ssize_t = 0
1010
    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
1011
1012
Return an iterator over all non-overlapping matches for the RE pattern in string.
1013
1014
For each match, the iterator returns a match object.
1015
[clinic start generated code]*/
1016
1017
static PyObject *
1018
_sre_SRE_Pattern_finditer_impl(PatternObject *self, PyTypeObject *cls,
1019
                               PyObject *string, Py_ssize_t pos,
1020
                               Py_ssize_t endpos)
1021
/*[clinic end generated code: output=1791dbf3618ade56 input=ee28865796048023]*/
1022
0
{
1023
0
    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
1024
0
    PyObject* scanner;
1025
0
    PyObject* search;
1026
0
    PyObject* iterator;
1027
1028
0
    scanner = pattern_scanner(module_state, self, string, pos, endpos);
1029
0
    if (!scanner)
1030
0
        return NULL;
1031
1032
0
    search = PyObject_GetAttrString(scanner, "search");
1033
0
    Py_DECREF(scanner);
1034
0
    if (!search)
1035
0
        return NULL;
1036
1037
0
    iterator = PyCallIter_New(search, Py_None);
1038
0
    Py_DECREF(search);
1039
1040
0
    return iterator;
1041
0
}
1042
1043
/*[clinic input]
1044
_sre.SRE_Pattern.scanner
1045
1046
    cls: defining_class
1047
    /
1048
    string: object
1049
    pos: Py_ssize_t = 0
1050
    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
1051
1052
[clinic start generated code]*/
1053
1054
static PyObject *
1055
_sre_SRE_Pattern_scanner_impl(PatternObject *self, PyTypeObject *cls,
1056
                              PyObject *string, Py_ssize_t pos,
1057
                              Py_ssize_t endpos)
1058
/*[clinic end generated code: output=f70cd506112f1bd9 input=2e487e5151bcee4c]*/
1059
0
{
1060
0
    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
1061
1062
0
    return pattern_scanner(module_state, self, string, pos, endpos);
1063
0
}
1064
1065
/*[clinic input]
1066
_sre.SRE_Pattern.split
1067
1068
    string: object
1069
    maxsplit: Py_ssize_t = 0
1070
1071
Split string by the occurrences of pattern.
1072
[clinic start generated code]*/
1073
1074
static PyObject *
1075
_sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
1076
                            Py_ssize_t maxsplit)
1077
/*[clinic end generated code: output=7ac66f381c45e0be input=1eeeb10dafc9947a]*/
1078
0
{
1079
0
    SRE_STATE state;
1080
0
    PyObject* list;
1081
0
    PyObject* item;
1082
0
    Py_ssize_t status;
1083
0
    Py_ssize_t n;
1084
0
    Py_ssize_t i;
1085
0
    const void* last;
1086
1087
0
    assert(self->codesize != 0);
1088
1089
0
    if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX))
1090
0
        return NULL;
1091
1092
0
    list = PyList_New(0);
1093
0
    if (!list) {
1094
0
        state_fini(&state);
1095
0
        return NULL;
1096
0
    }
1097
1098
0
    n = 0;
1099
0
    last = state.start;
1100
1101
0
    while (!maxsplit || n < maxsplit) {
1102
1103
0
        state_reset(&state);
1104
1105
0
        state.ptr = state.start;
1106
1107
0
        status = sre_search(&state, PatternObject_GetCode(self));
1108
0
        if (PyErr_Occurred())
1109
0
            goto error;
1110
1111
0
        if (status <= 0) {
1112
0
            if (status == 0)
1113
0
                break;
1114
0
            pattern_error(status);
1115
0
            goto error;
1116
0
        }
1117
1118
        /* get segment before this match */
1119
0
        item = getslice(state.isbytes, state.beginning,
1120
0
            string, STATE_OFFSET(&state, last),
1121
0
            STATE_OFFSET(&state, state.start)
1122
0
            );
1123
0
        if (!item)
1124
0
            goto error;
1125
0
        status = PyList_Append(list, item);
1126
0
        Py_DECREF(item);
1127
0
        if (status < 0)
1128
0
            goto error;
1129
1130
        /* add groups (if any) */
1131
0
        for (i = 0; i < self->groups; i++) {
1132
0
            item = state_getslice(&state, i+1, string, 0);
1133
0
            if (!item)
1134
0
                goto error;
1135
0
            status = PyList_Append(list, item);
1136
0
            Py_DECREF(item);
1137
0
            if (status < 0)
1138
0
                goto error;
1139
0
        }
1140
1141
0
        n = n + 1;
1142
0
        state.must_advance = (state.ptr == state.start);
1143
0
        last = state.start = state.ptr;
1144
1145
0
    }
1146
1147
    /* get segment following last match (even if empty) */
1148
0
    item = getslice(state.isbytes, state.beginning,
1149
0
        string, STATE_OFFSET(&state, last), state.endpos
1150
0
        );
1151
0
    if (!item)
1152
0
        goto error;
1153
0
    status = PyList_Append(list, item);
1154
0
    Py_DECREF(item);
1155
0
    if (status < 0)
1156
0
        goto error;
1157
1158
0
    state_fini(&state);
1159
0
    return list;
1160
1161
0
error:
1162
0
    Py_DECREF(list);
1163
0
    state_fini(&state);
1164
0
    return NULL;
1165
1166
0
}
1167
1168
static PyObject *
1169
compile_template(_sremodulestate *module_state,
1170
                 PatternObject *pattern, PyObject *template)
1171
0
{
1172
    /* delegate to Python code */
1173
0
    PyObject *func = FT_ATOMIC_LOAD_PTR(module_state->compile_template);
1174
0
    if (func == NULL) {
1175
0
        func = PyImport_ImportModuleAttrString("re", "_compile_template");
1176
0
        if (func == NULL) {
1177
0
            return NULL;
1178
0
        }
1179
#ifdef Py_GIL_DISABLED
1180
        PyObject *other_func = NULL;
1181
        if (!_Py_atomic_compare_exchange_ptr(&module_state->compile_template, &other_func, func))  {
1182
            Py_DECREF(func);
1183
            func = other_func;
1184
        }
1185
#else
1186
0
        Py_XSETREF(module_state->compile_template, func);
1187
0
#endif
1188
0
    }
1189
1190
0
    PyObject *args[] = {(PyObject *)pattern, template};
1191
0
    PyObject *result = PyObject_Vectorcall(func, args, 2, NULL);
1192
1193
0
    if (result == NULL && PyErr_ExceptionMatches(PyExc_TypeError)) {
1194
        /* If the replacement string is unhashable (e.g. bytearray),
1195
         * convert it to the basic type (str or bytes) and repeat. */
1196
0
        if (PyUnicode_Check(template) && !PyUnicode_CheckExact(template)) {
1197
0
            PyErr_Clear();
1198
0
            template = _PyUnicode_Copy(template);
1199
0
        }
1200
0
        else if (PyObject_CheckBuffer(template) && !PyBytes_CheckExact(template)) {
1201
0
            PyErr_Clear();
1202
0
            template = PyBytes_FromObject(template);
1203
0
        }
1204
0
        else {
1205
0
            return NULL;
1206
0
        }
1207
0
        if (template == NULL) {
1208
0
            return NULL;
1209
0
        }
1210
0
        args[1] = template;
1211
0
        result = PyObject_Vectorcall(func, args, 2, NULL);
1212
0
        Py_DECREF(template);
1213
0
    }
1214
1215
0
    if (result != NULL && Py_TYPE(result) != module_state->Template_Type) {
1216
0
        PyErr_Format(PyExc_RuntimeError,
1217
0
                    "the result of compiling a replacement string is %.200s",
1218
0
                    Py_TYPE(result)->tp_name);
1219
0
        Py_DECREF(result);
1220
0
        return NULL;
1221
0
    }
1222
0
    return result;
1223
0
}
1224
1225
static PyObject *expand_template(TemplateObject *, MatchObject *); /* Forward */
1226
1227
static PyObject*
1228
pattern_subx(_sremodulestate* module_state,
1229
             PatternObject* self,
1230
             PyObject* ptemplate,
1231
             PyObject* string,
1232
             Py_ssize_t count,
1233
             Py_ssize_t subn)
1234
0
{
1235
0
    SRE_STATE state;
1236
0
    PyObject* list;
1237
0
    PyObject* joiner;
1238
0
    PyObject* item;
1239
0
    PyObject* filter;
1240
0
    PyObject* match;
1241
0
    const void* ptr;
1242
0
    Py_ssize_t status;
1243
0
    Py_ssize_t n;
1244
0
    Py_ssize_t i, b, e;
1245
0
    int isbytes, charsize;
1246
0
    enum {LITERAL, TEMPLATE, CALLABLE} filter_type;
1247
0
    Py_buffer view;
1248
1249
0
    if (PyCallable_Check(ptemplate)) {
1250
        /* sub/subn takes either a function or a template */
1251
0
        filter = Py_NewRef(ptemplate);
1252
0
        filter_type = CALLABLE;
1253
0
    } else {
1254
        /* if not callable, check if it's a literal string */
1255
0
        int literal;
1256
0
        view.buf = NULL;
1257
0
        ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
1258
0
        if (ptr) {
1259
0
            if (charsize == 1)
1260
0
                literal = memchr(ptr, '\\', n) == NULL;
1261
0
            else
1262
0
                literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
1263
0
        } else {
1264
0
            PyErr_Clear();
1265
0
            literal = 0;
1266
0
        }
1267
0
        if (view.buf)
1268
0
            PyBuffer_Release(&view);
1269
0
        if (literal) {
1270
0
            filter = Py_NewRef(ptemplate);
1271
0
            filter_type = LITERAL;
1272
0
        } else {
1273
            /* not a literal; hand it over to the template compiler */
1274
0
            filter = compile_template(module_state, self, ptemplate);
1275
0
            if (!filter)
1276
0
                return NULL;
1277
1278
0
            assert(Py_TYPE(filter) == module_state->Template_Type);
1279
0
            if (Py_SIZE(filter) == 0) {
1280
0
                Py_SETREF(filter,
1281
0
                          Py_NewRef(((TemplateObject *)filter)->literal));
1282
0
                filter_type = LITERAL;
1283
0
            }
1284
0
            else {
1285
0
                filter_type = TEMPLATE;
1286
0
            }
1287
0
        }
1288
0
    }
1289
1290
0
    if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) {
1291
0
        Py_DECREF(filter);
1292
0
        return NULL;
1293
0
    }
1294
1295
0
    list = PyList_New(0);
1296
0
    if (!list) {
1297
0
        Py_DECREF(filter);
1298
0
        state_fini(&state);
1299
0
        return NULL;
1300
0
    }
1301
1302
0
    n = i = 0;
1303
1304
0
    while (!count || n < count) {
1305
1306
0
        state_reset(&state);
1307
1308
0
        state.ptr = state.start;
1309
1310
0
        status = sre_search(&state, PatternObject_GetCode(self));
1311
0
        if (PyErr_Occurred())
1312
0
            goto error;
1313
1314
0
        if (status <= 0) {
1315
0
            if (status == 0)
1316
0
                break;
1317
0
            pattern_error(status);
1318
0
            goto error;
1319
0
        }
1320
1321
0
        b = STATE_OFFSET(&state, state.start);
1322
0
        e = STATE_OFFSET(&state, state.ptr);
1323
1324
0
        if (i < b) {
1325
            /* get segment before this match */
1326
0
            item = getslice(state.isbytes, state.beginning,
1327
0
                string, i, b);
1328
0
            if (!item)
1329
0
                goto error;
1330
0
            status = PyList_Append(list, item);
1331
0
            Py_DECREF(item);
1332
0
            if (status < 0)
1333
0
                goto error;
1334
1335
0
        }
1336
1337
0
        if (filter_type != LITERAL) {
1338
            /* pass match object through filter */
1339
0
            match = pattern_new_match(module_state, self, &state, 1);
1340
0
            if (!match)
1341
0
                goto error;
1342
0
            if (filter_type == TEMPLATE) {
1343
0
                item = expand_template((TemplateObject *)filter,
1344
0
                                       (MatchObject *)match);
1345
0
            }
1346
0
            else {
1347
0
                assert(filter_type == CALLABLE);
1348
0
                item = PyObject_CallOneArg(filter, match);
1349
0
            }
1350
0
            Py_DECREF(match);
1351
0
            if (!item)
1352
0
                goto error;
1353
0
        } else {
1354
            /* filter is literal string */
1355
0
            item = Py_NewRef(filter);
1356
0
        }
1357
1358
        /* add to list */
1359
0
        if (item != Py_None) {
1360
0
            status = PyList_Append(list, item);
1361
0
            Py_DECREF(item);
1362
0
            if (status < 0)
1363
0
                goto error;
1364
0
        }
1365
1366
0
        i = e;
1367
0
        n = n + 1;
1368
0
        state.must_advance = (state.ptr == state.start);
1369
0
        state.start = state.ptr;
1370
0
    }
1371
1372
    /* get segment following last match */
1373
0
    if (i < state.endpos) {
1374
0
        item = getslice(state.isbytes, state.beginning,
1375
0
                        string, i, state.endpos);
1376
0
        if (!item)
1377
0
            goto error;
1378
0
        status = PyList_Append(list, item);
1379
0
        Py_DECREF(item);
1380
0
        if (status < 0)
1381
0
            goto error;
1382
0
    }
1383
1384
0
    state_fini(&state);
1385
1386
0
    Py_DECREF(filter);
1387
1388
    /* convert list to single string (also removes list) */
1389
0
    joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
1390
0
    if (!joiner) {
1391
0
        Py_DECREF(list);
1392
0
        return NULL;
1393
0
    }
1394
0
    if (PyList_GET_SIZE(list) == 0) {
1395
0
        Py_DECREF(list);
1396
0
        item = joiner;
1397
0
    }
1398
0
    else {
1399
0
        if (state.isbytes)
1400
0
            item = PyBytes_Join(joiner, list);
1401
0
        else
1402
0
            item = PyUnicode_Join(joiner, list);
1403
0
        Py_DECREF(joiner);
1404
0
        Py_DECREF(list);
1405
0
        if (!item)
1406
0
            return NULL;
1407
0
    }
1408
1409
0
    if (subn)
1410
0
        return Py_BuildValue("Nn", item, n);
1411
1412
0
    return item;
1413
1414
0
error:
1415
0
    Py_DECREF(list);
1416
0
    state_fini(&state);
1417
0
    Py_DECREF(filter);
1418
0
    return NULL;
1419
1420
0
}
1421
1422
/*[clinic input]
1423
@permit_long_summary
1424
_sre.SRE_Pattern.sub
1425
1426
    cls: defining_class
1427
    /
1428
    repl: object
1429
    string: object
1430
    count: Py_ssize_t = 0
1431
1432
Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl.
1433
[clinic start generated code]*/
1434
1435
static PyObject *
1436
_sre_SRE_Pattern_sub_impl(PatternObject *self, PyTypeObject *cls,
1437
                          PyObject *repl, PyObject *string, Py_ssize_t count)
1438
/*[clinic end generated code: output=4be141ab04bca60d input=eba511fd1c4908b7]*/
1439
0
{
1440
0
    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
1441
1442
0
    return pattern_subx(module_state, self, repl, string, count, 0);
1443
0
}
1444
1445
/*[clinic input]
1446
@permit_long_summary
1447
_sre.SRE_Pattern.subn
1448
1449
    cls: defining_class
1450
    /
1451
    repl: object
1452
    string: object
1453
    count: Py_ssize_t = 0
1454
1455
Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.
1456
[clinic start generated code]*/
1457
1458
static PyObject *
1459
_sre_SRE_Pattern_subn_impl(PatternObject *self, PyTypeObject *cls,
1460
                           PyObject *repl, PyObject *string,
1461
                           Py_ssize_t count)
1462
/*[clinic end generated code: output=da02fd85258b1e1f input=6a5bb5b61717abf0]*/
1463
0
{
1464
0
    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
1465
1466
0
    return pattern_subx(module_state, self, repl, string, count, 1);
1467
0
}
1468
1469
/*[clinic input]
1470
_sre.SRE_Pattern.__copy__
1471
1472
[clinic start generated code]*/
1473
1474
static PyObject *
1475
_sre_SRE_Pattern___copy___impl(PatternObject *self)
1476
/*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/
1477
0
{
1478
0
    return Py_NewRef(self);
1479
0
}
1480
1481
/*[clinic input]
1482
_sre.SRE_Pattern.__deepcopy__
1483
1484
    memo: object
1485
    /
1486
1487
[clinic start generated code]*/
1488
1489
static PyObject *
1490
_sre_SRE_Pattern___deepcopy___impl(PatternObject *self, PyObject *memo)
1491
/*[clinic end generated code: output=75efe69bd12c5d7d input=a465b1602f997bed]*/
1492
0
{
1493
0
    return Py_NewRef(self);
1494
0
}
1495
1496
#ifdef Py_DEBUG
1497
/*[clinic input]
1498
_sre.SRE_Pattern._fail_after
1499
1500
    count: int
1501
    exception: object
1502
    /
1503
1504
For debugging.
1505
[clinic start generated code]*/
1506
1507
static PyObject *
1508
_sre_SRE_Pattern__fail_after_impl(PatternObject *self, int count,
1509
                                  PyObject *exception)
1510
/*[clinic end generated code: output=9a6bf12135ac50c2 input=ef80a45c66c5499d]*/
1511
{
1512
    self->fail_after_count = count;
1513
    Py_INCREF(exception);
1514
    Py_XSETREF(self->fail_after_exc, exception);
1515
    Py_RETURN_NONE;
1516
}
1517
#endif /* Py_DEBUG */
1518
1519
static PyObject *
1520
pattern_repr(PyObject *self)
1521
0
{
1522
0
    static const struct {
1523
0
        const char *name;
1524
0
        int value;
1525
0
    } flag_names[] = {
1526
0
        {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
1527
0
        {"re.LOCALE", SRE_FLAG_LOCALE},
1528
0
        {"re.MULTILINE", SRE_FLAG_MULTILINE},
1529
0
        {"re.DOTALL", SRE_FLAG_DOTALL},
1530
0
        {"re.UNICODE", SRE_FLAG_UNICODE},
1531
0
        {"re.VERBOSE", SRE_FLAG_VERBOSE},
1532
0
        {"re.DEBUG", SRE_FLAG_DEBUG},
1533
0
        {"re.ASCII", SRE_FLAG_ASCII},
1534
0
    };
1535
1536
0
    PatternObject *obj = _PatternObject_CAST(self);
1537
0
    PyObject *result = NULL;
1538
0
    PyObject *flag_items;
1539
0
    size_t i;
1540
0
    int flags = obj->flags;
1541
1542
    /* Omit re.UNICODE for valid string patterns. */
1543
0
    if (obj->isbytes == 0 &&
1544
0
        (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
1545
0
         SRE_FLAG_UNICODE)
1546
0
        flags &= ~SRE_FLAG_UNICODE;
1547
1548
0
    flag_items = PyList_New(0);
1549
0
    if (!flag_items)
1550
0
        return NULL;
1551
1552
0
    for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
1553
0
        if (flags & flag_names[i].value) {
1554
0
            PyObject *item = PyUnicode_FromString(flag_names[i].name);
1555
0
            if (!item)
1556
0
                goto done;
1557
1558
0
            if (PyList_Append(flag_items, item) < 0) {
1559
0
                Py_DECREF(item);
1560
0
                goto done;
1561
0
            }
1562
0
            Py_DECREF(item);
1563
0
            flags &= ~flag_names[i].value;
1564
0
        }
1565
0
    }
1566
0
    if (flags) {
1567
0
        PyObject *item = PyUnicode_FromFormat("0x%x", flags);
1568
0
        if (!item)
1569
0
            goto done;
1570
1571
0
        if (PyList_Append(flag_items, item) < 0) {
1572
0
            Py_DECREF(item);
1573
0
            goto done;
1574
0
        }
1575
0
        Py_DECREF(item);
1576
0
    }
1577
1578
0
    if (PyList_Size(flag_items) > 0) {
1579
0
        PyObject *flags_result;
1580
0
        PyObject *sep = PyUnicode_FromString("|");
1581
0
        if (!sep)
1582
0
            goto done;
1583
0
        flags_result = PyUnicode_Join(sep, flag_items);
1584
0
        Py_DECREF(sep);
1585
0
        if (!flags_result)
1586
0
            goto done;
1587
0
        result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
1588
0
                                      obj->pattern, flags_result);
1589
0
        Py_DECREF(flags_result);
1590
0
    }
1591
0
    else {
1592
0
        result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
1593
0
    }
1594
1595
0
done:
1596
0
    Py_DECREF(flag_items);
1597
0
    return result;
1598
0
}
1599
1600
PyDoc_STRVAR(pattern_doc, "Compiled regular expression object.");
1601
1602
/* PatternObject's 'groupindex' method. */
1603
static PyObject *
1604
pattern_groupindex(PyObject *op, void *Py_UNUSED(ignored))
1605
0
{
1606
0
    PatternObject *self = _PatternObject_CAST(op);
1607
0
    if (self->groupindex == NULL)
1608
0
        return PyDict_New();
1609
0
    return PyDictProxy_New(self->groupindex);
1610
0
}
1611
1612
static int _validate(PatternObject *self); /* Forward */
1613
1614
/*[clinic input]
1615
_sre.compile
1616
1617
    pattern: object
1618
    flags: int
1619
    code: object(subclass_of='&PyList_Type')
1620
    groups: Py_ssize_t
1621
    groupindex: object(subclass_of='&PyDict_Type')
1622
    indexgroup: object(subclass_of='&PyTuple_Type')
1623
1624
[clinic start generated code]*/
1625
1626
static PyObject *
1627
_sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
1628
                  PyObject *code, Py_ssize_t groups, PyObject *groupindex,
1629
                  PyObject *indexgroup)
1630
/*[clinic end generated code: output=ef9c2b3693776404 input=0a68476dbbe5db30]*/
1631
2.20k
{
1632
    /* "compile" pattern descriptor to pattern object */
1633
1634
2.20k
    _sremodulestate *module_state = get_sre_module_state(module);
1635
2.20k
    PatternObject* self;
1636
2.20k
    Py_ssize_t i, n;
1637
1638
2.20k
    n = PyList_GET_SIZE(code);
1639
    /* coverity[ampersand_in_size] */
1640
2.20k
    self = PyObject_GC_NewVar(PatternObject, module_state->Pattern_Type, n);
1641
2.20k
    if (!self)
1642
0
        return NULL;
1643
2.20k
    self->weakreflist = NULL;
1644
2.20k
    self->pattern = NULL;
1645
2.20k
    self->groupindex = NULL;
1646
2.20k
    self->indexgroup = NULL;
1647
#ifdef Py_DEBUG
1648
    self->fail_after_count = -1;
1649
    self->fail_after_exc = NULL;
1650
#endif
1651
1652
2.20k
    self->codesize = n;
1653
1654
18.0M
    for (i = 0; i < n; i++) {
1655
18.0M
        PyObject *o = PyList_GET_ITEM(code, i);
1656
0
        unsigned long value = PyLong_AsUnsignedLong(o);
1657
18.0M
        if (value == (unsigned long)-1 && PyErr_Occurred()) {
1658
0
            break;
1659
0
        }
1660
18.0M
        self->code[i] = (SRE_CODE) value;
1661
18.0M
        if ((unsigned long) self->code[i] != value) {
1662
0
            PyErr_SetString(PyExc_OverflowError,
1663
0
                            "regular expression code size limit exceeded");
1664
0
            break;
1665
0
        }
1666
18.0M
    }
1667
2.20k
    PyObject_GC_Track(self);
1668
1669
2.20k
    if (PyErr_Occurred()) {
1670
0
        Py_DECREF(self);
1671
0
        return NULL;
1672
0
    }
1673
1674
2.20k
    if (pattern == Py_None) {
1675
0
        self->isbytes = -1;
1676
0
    }
1677
2.20k
    else {
1678
2.20k
        Py_ssize_t p_length;
1679
2.20k
        int charsize;
1680
2.20k
        Py_buffer view;
1681
2.20k
        view.buf = NULL;
1682
2.20k
        if (!getstring(pattern, &p_length, &self->isbytes,
1683
2.20k
                       &charsize, &view)) {
1684
0
            Py_DECREF(self);
1685
0
            return NULL;
1686
0
        }
1687
2.20k
        if (view.buf)
1688
2.16k
            PyBuffer_Release(&view);
1689
2.20k
    }
1690
1691
2.20k
    self->pattern = Py_NewRef(pattern);
1692
1693
2.20k
    self->flags = flags;
1694
1695
2.20k
    self->groups = groups;
1696
1697
2.20k
    if (PyDict_GET_SIZE(groupindex) > 0) {
1698
39
        self->groupindex = Py_NewRef(groupindex);
1699
39
        if (PyTuple_GET_SIZE(indexgroup) > 0) {
1700
39
            self->indexgroup = Py_NewRef(indexgroup);
1701
39
        }
1702
39
    }
1703
1704
2.20k
    if (!_validate(self)) {
1705
0
        Py_DECREF(self);
1706
0
        return NULL;
1707
0
    }
1708
1709
2.20k
    return (PyObject*) self;
1710
2.20k
}
1711
1712
/*[clinic input]
1713
_sre.template
1714
1715
    pattern: object
1716
    template: object(subclass_of="&PyList_Type")
1717
        A list containing interleaved literal strings (str or bytes) and group
1718
        indices (int), as returned by re._parser.parse_template():
1719
            [literal1, group1, ..., literalN, groupN]
1720
    /
1721
1722
[clinic start generated code]*/
1723
1724
static PyObject *
1725
_sre_template_impl(PyObject *module, PyObject *pattern, PyObject *template)
1726
/*[clinic end generated code: output=d51290e596ebca86 input=af55380b27f02942]*/
1727
0
{
1728
    /* template is a list containing interleaved literal strings (str or bytes)
1729
     * and group indices (int), as returned by _parser.parse_template:
1730
     * [literal1, group1, literal2, ..., literalN].
1731
     */
1732
0
    _sremodulestate *module_state = get_sre_module_state(module);
1733
0
    TemplateObject *self = NULL;
1734
0
    Py_ssize_t n = PyList_GET_SIZE(template);
1735
0
    if ((n & 1) == 0 || n < 1) {
1736
0
        goto bad_template;
1737
0
    }
1738
0
    n /= 2;
1739
0
    self = PyObject_GC_NewVar(TemplateObject, module_state->Template_Type, n);
1740
0
    if (!self)
1741
0
        return NULL;
1742
0
    self->chunks = 1 + 2*n;
1743
0
    self->literal = Py_NewRef(PyList_GET_ITEM(template, 0));
1744
0
    for (Py_ssize_t i = 0; i < n; i++) {
1745
0
        Py_ssize_t index = PyLong_AsSsize_t(PyList_GET_ITEM(template, 2*i+1));
1746
0
        if (index == -1 && PyErr_Occurred()) {
1747
0
            Py_SET_SIZE(self, i);
1748
0
            Py_DECREF(self);
1749
0
            return NULL;
1750
0
        }
1751
0
        if (index < 0) {
1752
0
            Py_SET_SIZE(self, i);
1753
0
            goto bad_template;
1754
0
        }
1755
0
        self->items[i].index = index;
1756
1757
0
        PyObject *literal = PyList_GET_ITEM(template, 2*i+2);
1758
        // Skip empty literals.
1759
0
        if ((PyUnicode_Check(literal) && !PyUnicode_GET_LENGTH(literal)) ||
1760
0
            (PyBytes_Check(literal) && !PyBytes_GET_SIZE(literal)))
1761
0
        {
1762
0
            literal = NULL;
1763
0
            self->chunks--;
1764
0
        }
1765
0
        self->items[i].literal = Py_XNewRef(literal);
1766
0
    }
1767
0
    PyObject_GC_Track(self);
1768
0
    return (PyObject*) self;
1769
1770
0
bad_template:
1771
0
    PyErr_SetString(PyExc_TypeError, "invalid template");
1772
0
    Py_XDECREF(self);
1773
0
    return NULL;
1774
0
}
1775
1776
/* -------------------------------------------------------------------- */
1777
/* Code validation */
1778
1779
/* To learn more about this code, have a look at the _compile() function in
1780
   Lib/sre_compile.py.  The validation functions below checks the code array
1781
   for conformance with the code patterns generated there.
1782
1783
   The nice thing about the generated code is that it is position-independent:
1784
   all jumps are relative jumps forward.  Also, jumps don't cross each other:
1785
   the target of a later jump is always earlier than the target of an earlier
1786
   jump.  IOW, this is okay:
1787
1788
   J---------J-------T--------T
1789
    \         \_____/        /
1790
     \______________________/
1791
1792
   but this is not:
1793
1794
   J---------J-------T--------T
1795
    \_________\_____/        /
1796
               \____________/
1797
1798
   It also helps that SRE_CODE is always an unsigned type.
1799
*/
1800
1801
/* Defining this one enables tracing of the validator */
1802
#undef VVERBOSE
1803
1804
/* Trace macro for the validator */
1805
#if defined(VVERBOSE)
1806
#define VTRACE(v) printf v
1807
#else
1808
33.8M
#define VTRACE(v) do {} while(0)  /* do nothing */
1809
#endif
1810
1811
/* Report failure */
1812
0
#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return -1; } while (0)
1813
1814
/* Extract opcode, argument, or skip count from code array */
1815
#define GET_OP                                          \
1816
6.84M
    do {                                                \
1817
6.84M
        VTRACE(("%p: ", code));                         \
1818
6.84M
        if (code >= end) FAIL;                          \
1819
6.84M
        op = *code++;                                   \
1820
6.84M
        VTRACE(("%lu (op)\n", (unsigned long)op));      \
1821
6.84M
    } while (0)
1822
#define GET_ARG                                         \
1823
5.59M
    do {                                                \
1824
5.59M
        VTRACE(("%p= ", code));                         \
1825
5.59M
        if (code >= end) FAIL;                          \
1826
5.59M
        arg = *code++;                                  \
1827
5.59M
        VTRACE(("%lu (arg)\n", (unsigned long)arg));    \
1828
5.59M
    } while (0)
1829
#define GET_SKIP_ADJ(adj)                               \
1830
2.83M
    do {                                                \
1831
2.83M
        VTRACE(("%p= ", code));                         \
1832
2.83M
        if (code >= end) FAIL;                          \
1833
2.83M
        skip = *code;                                   \
1834
2.83M
        VTRACE(("%lu (skip to %p)\n",                   \
1835
2.83M
               (unsigned long)skip, code+skip));        \
1836
2.83M
        if (skip-adj > (uintptr_t)(end - code))         \
1837
2.83M
            FAIL;                                       \
1838
2.83M
        code++;                                         \
1839
2.83M
    } while (0)
1840
2.83M
#define GET_SKIP GET_SKIP_ADJ(0)
1841
1842
static int
1843
_validate_charset(SRE_CODE *code, SRE_CODE *end)
1844
143k
{
1845
    /* Some variables are manipulated by the macros above */
1846
143k
    SRE_CODE op;
1847
143k
    SRE_CODE arg;
1848
143k
    SRE_CODE offset;
1849
143k
    int i;
1850
1851
345k
    while (code < end) {
1852
201k
        GET_OP;
1853
201k
        switch (op) {
1854
1855
7.48k
        case SRE_OP_NEGATE:
1856
7.48k
            break;
1857
1858
92.3k
        case SRE_OP_LITERAL:
1859
92.3k
            GET_ARG;
1860
92.3k
            break;
1861
1862
92.3k
        case SRE_OP_RANGE:
1863
3.58k
        case SRE_OP_RANGE_UNI_IGNORE:
1864
3.58k
            GET_ARG;
1865
3.58k
            GET_ARG;
1866
3.58k
            break;
1867
1868
44.5k
        case SRE_OP_CHARSET:
1869
44.5k
            offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
1870
44.5k
            if (offset > (uintptr_t)(end - code))
1871
0
                FAIL;
1872
44.5k
            code += offset;
1873
44.5k
            break;
1874
1875
2
        case SRE_OP_BIGCHARSET:
1876
2
            GET_ARG; /* Number of blocks */
1877
2
            offset = 256/sizeof(SRE_CODE); /* 256-byte table */
1878
2
            if (offset > (uintptr_t)(end - code))
1879
0
                FAIL;
1880
            /* Make sure that each byte points to a valid block */
1881
514
            for (i = 0; i < 256; i++) {
1882
512
                if (((unsigned char *)code)[i] >= arg)
1883
0
                    FAIL;
1884
512
            }
1885
2
            code += offset;
1886
2
            offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
1887
2
            if (offset > (uintptr_t)(end - code))
1888
0
                FAIL;
1889
2
            code += offset;
1890
2
            break;
1891
1892
53.9k
        case SRE_OP_CATEGORY:
1893
53.9k
            GET_ARG;
1894
53.9k
            switch (arg) {
1895
27.1k
            case SRE_CATEGORY_DIGIT:
1896
40.3k
            case SRE_CATEGORY_NOT_DIGIT:
1897
44.9k
            case SRE_CATEGORY_SPACE:
1898
45.0k
            case SRE_CATEGORY_NOT_SPACE:
1899
53.7k
            case SRE_CATEGORY_WORD:
1900
53.7k
            case SRE_CATEGORY_NOT_WORD:
1901
53.7k
            case SRE_CATEGORY_LINEBREAK:
1902
53.7k
            case SRE_CATEGORY_NOT_LINEBREAK:
1903
53.8k
            case SRE_CATEGORY_LOC_WORD:
1904
53.8k
            case SRE_CATEGORY_LOC_NOT_WORD:
1905
53.9k
            case SRE_CATEGORY_UNI_DIGIT:
1906
53.9k
            case SRE_CATEGORY_UNI_NOT_DIGIT:
1907
53.9k
            case SRE_CATEGORY_UNI_SPACE:
1908
53.9k
            case SRE_CATEGORY_UNI_NOT_SPACE:
1909
53.9k
            case SRE_CATEGORY_UNI_WORD:
1910
53.9k
            case SRE_CATEGORY_UNI_NOT_WORD:
1911
53.9k
            case SRE_CATEGORY_UNI_LINEBREAK:
1912
53.9k
            case SRE_CATEGORY_UNI_NOT_LINEBREAK:
1913
53.9k
                break;
1914
0
            default:
1915
0
                FAIL;
1916
53.9k
            }
1917
53.9k
            break;
1918
1919
53.9k
        default:
1920
0
            FAIL;
1921
1922
201k
        }
1923
201k
    }
1924
1925
143k
    return 0;
1926
143k
}
1927
1928
/* Returns 0 on success, -1 on failure, and 1 if the last op is JUMP. */
1929
static int
1930
_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1931
1.65M
{
1932
    /* Some variables are manipulated by the macros above */
1933
1.65M
    SRE_CODE op;
1934
1.65M
    SRE_CODE arg;
1935
1.65M
    SRE_CODE skip;
1936
1937
1.65M
    VTRACE(("code=%p, end=%p\n", code, end));
1938
1939
1.65M
    if (code > end)
1940
0
        FAIL;
1941
1942
6.64M
    while (code < end) {
1943
4.99M
        GET_OP;
1944
4.99M
        switch (op) {
1945
1946
817k
        case SRE_OP_MARK:
1947
            /* We don't check whether marks are properly nested; the
1948
               sre_match() code is robust even if they don't, and the worst
1949
               you can get is nonsensical match results. */
1950
817k
            GET_ARG;
1951
817k
            if (arg >= 2 * (size_t)groups) {
1952
0
                VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
1953
0
                FAIL;
1954
0
            }
1955
817k
            break;
1956
1957
2.44M
        case SRE_OP_LITERAL:
1958
2.44M
        case SRE_OP_NOT_LITERAL:
1959
2.56M
        case SRE_OP_LITERAL_IGNORE:
1960
2.56M
        case SRE_OP_NOT_LITERAL_IGNORE:
1961
2.56M
        case SRE_OP_LITERAL_UNI_IGNORE:
1962
2.56M
        case SRE_OP_NOT_LITERAL_UNI_IGNORE:
1963
2.76M
        case SRE_OP_LITERAL_LOC_IGNORE:
1964
2.76M
        case SRE_OP_NOT_LITERAL_LOC_IGNORE:
1965
2.76M
            GET_ARG;
1966
            /* The arg is just a character, nothing to check */
1967
2.76M
            break;
1968
1969
2.76M
        case SRE_OP_SUCCESS:
1970
243
        case SRE_OP_FAILURE:
1971
            /* Nothing to check; these normally end the matching process */
1972
243
            break;
1973
1974
158k
        case SRE_OP_AT:
1975
158k
            GET_ARG;
1976
158k
            switch (arg) {
1977
120k
            case SRE_AT_BEGINNING:
1978
120k
            case SRE_AT_BEGINNING_STRING:
1979
155k
            case SRE_AT_BEGINNING_LINE:
1980
157k
            case SRE_AT_END:
1981
157k
            case SRE_AT_END_LINE:
1982
157k
            case SRE_AT_END_STRING:
1983
157k
            case SRE_AT_BOUNDARY:
1984
158k
            case SRE_AT_NON_BOUNDARY:
1985
158k
            case SRE_AT_LOC_BOUNDARY:
1986
158k
            case SRE_AT_LOC_NON_BOUNDARY:
1987
158k
            case SRE_AT_UNI_BOUNDARY:
1988
158k
            case SRE_AT_UNI_NON_BOUNDARY:
1989
158k
                break;
1990
0
            default:
1991
0
                FAIL;
1992
158k
            }
1993
158k
            break;
1994
1995
158k
        case SRE_OP_ANY:
1996
9.50k
        case SRE_OP_ANY_ALL:
1997
            /* These have no operands */
1998
9.50k
            break;
1999
2000
75.6k
        case SRE_OP_IN:
2001
100k
        case SRE_OP_IN_IGNORE:
2002
100k
        case SRE_OP_IN_UNI_IGNORE:
2003
142k
        case SRE_OP_IN_LOC_IGNORE:
2004
142k
            GET_SKIP;
2005
            /* Stop 1 before the end; we check the FAILURE below */
2006
142k
            if (_validate_charset(code, code+skip-2))
2007
0
                FAIL;
2008
142k
            if (code[skip-2] != SRE_OP_FAILURE)
2009
0
                FAIL;
2010
142k
            code += skip-1;
2011
142k
            break;
2012
2013
2.20k
        case SRE_OP_INFO:
2014
2.20k
            {
2015
                /* A minimal info field is
2016
                   <INFO> <1=skip> <2=flags> <3=min> <4=max>;
2017
                   If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
2018
                   more follows. */
2019
2.20k
                SRE_CODE flags, i;
2020
2.20k
                SRE_CODE *newcode;
2021
2.20k
                GET_SKIP;
2022
2.20k
                newcode = code+skip-1;
2023
2.20k
                GET_ARG; flags = arg;
2024
2.20k
                GET_ARG;
2025
2.20k
                GET_ARG;
2026
                /* Check that only valid flags are present */
2027
2.20k
                if ((flags & ~(SRE_INFO_PREFIX |
2028
2.20k
                               SRE_INFO_LITERAL |
2029
2.20k
                               SRE_INFO_CHARSET)) != 0)
2030
0
                    FAIL;
2031
                /* PREFIX and CHARSET are mutually exclusive */
2032
2.20k
                if ((flags & SRE_INFO_PREFIX) &&
2033
625
                    (flags & SRE_INFO_CHARSET))
2034
0
                    FAIL;
2035
                /* LITERAL implies PREFIX */
2036
2.20k
                if ((flags & SRE_INFO_LITERAL) &&
2037
249
                    !(flags & SRE_INFO_PREFIX))
2038
0
                    FAIL;
2039
                /* Validate the prefix */
2040
2.20k
                if (flags & SRE_INFO_PREFIX) {
2041
625
                    SRE_CODE prefix_len;
2042
625
                    GET_ARG; prefix_len = arg;
2043
625
                    GET_ARG;
2044
                    /* Here comes the prefix string */
2045
625
                    if (prefix_len > (uintptr_t)(newcode - code))
2046
0
                        FAIL;
2047
625
                    code += prefix_len;
2048
                    /* And here comes the overlap table */
2049
625
                    if (prefix_len > (uintptr_t)(newcode - code))
2050
0
                        FAIL;
2051
                    /* Each overlap value should be < prefix_len */
2052
1.14M
                    for (i = 0; i < prefix_len; i++) {
2053
1.13M
                        if (code[i] >= prefix_len)
2054
0
                            FAIL;
2055
1.13M
                    }
2056
625
                    code += prefix_len;
2057
625
                }
2058
                /* Validate the charset */
2059
2.20k
                if (flags & SRE_INFO_CHARSET) {
2060
265
                    if (_validate_charset(code, newcode-1))
2061
0
                        FAIL;
2062
265
                    if (newcode[-1] != SRE_OP_FAILURE)
2063
0
                        FAIL;
2064
265
                    code = newcode;
2065
265
                }
2066
1.93k
                else if (code != newcode) {
2067
0
                  VTRACE(("code=%p, newcode=%p\n", code, newcode));
2068
0
                    FAIL;
2069
0
                }
2070
2.20k
            }
2071
2.20k
            break;
2072
2073
239k
        case SRE_OP_BRANCH:
2074
239k
            {
2075
239k
                SRE_CODE *target = NULL;
2076
1.04M
                for (;;) {
2077
1.04M
                    GET_SKIP;
2078
1.04M
                    if (skip == 0)
2079
239k
                        break;
2080
                    /* Stop 2 before the end; we check the JUMP below */
2081
803k
                    if (_validate_inner(code, code+skip-3, groups))
2082
0
                        FAIL;
2083
803k
                    code += skip-3;
2084
                    /* Check that it ends with a JUMP, and that each JUMP
2085
                       has the same target */
2086
803k
                    GET_OP;
2087
803k
                    if (op != SRE_OP_JUMP)
2088
0
                        FAIL;
2089
803k
                    GET_SKIP;
2090
803k
                    if (target == NULL)
2091
239k
                        target = code+skip-1;
2092
563k
                    else if (code+skip-1 != target)
2093
0
                        FAIL;
2094
803k
                }
2095
239k
                if (code != target)
2096
0
                    FAIL;
2097
239k
            }
2098
239k
            break;
2099
2100
596k
        case SRE_OP_REPEAT_ONE:
2101
605k
        case SRE_OP_MIN_REPEAT_ONE:
2102
607k
        case SRE_OP_POSSESSIVE_REPEAT_ONE:
2103
607k
            {
2104
607k
                SRE_CODE min, max;
2105
607k
                GET_SKIP;
2106
607k
                GET_ARG; min = arg;
2107
607k
                GET_ARG; max = arg;
2108
607k
                if (min > max)
2109
0
                    FAIL;
2110
607k
                if (max > SRE_MAXREPEAT)
2111
0
                    FAIL;
2112
607k
                if (_validate_inner(code, code+skip-4, groups))
2113
0
                    FAIL;
2114
607k
                code += skip-4;
2115
607k
                GET_OP;
2116
607k
                if (op != SRE_OP_SUCCESS)
2117
0
                    FAIL;
2118
607k
            }
2119
607k
            break;
2120
2121
607k
        case SRE_OP_REPEAT:
2122
233k
        case SRE_OP_POSSESSIVE_REPEAT:
2123
233k
            {
2124
233k
                SRE_CODE op1 = op, min, max;
2125
233k
                GET_SKIP;
2126
233k
                GET_ARG; min = arg;
2127
233k
                GET_ARG; max = arg;
2128
233k
                if (min > max)
2129
0
                    FAIL;
2130
233k
                if (max > SRE_MAXREPEAT)
2131
0
                    FAIL;
2132
233k
                if (_validate_inner(code, code+skip-3, groups))
2133
0
                    FAIL;
2134
233k
                code += skip-3;
2135
233k
                GET_OP;
2136
233k
                if (op1 == SRE_OP_POSSESSIVE_REPEAT) {
2137
375
                    if (op != SRE_OP_SUCCESS)
2138
0
                        FAIL;
2139
375
                }
2140
232k
                else {
2141
232k
                    if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
2142
0
                        FAIL;
2143
232k
                }
2144
233k
            }
2145
233k
            break;
2146
2147
233k
        case SRE_OP_ATOMIC_GROUP:
2148
1.63k
            {
2149
1.63k
                GET_SKIP;
2150
1.63k
                if (_validate_inner(code, code+skip-2, groups))
2151
0
                    FAIL;
2152
1.63k
                code += skip-2;
2153
1.63k
                GET_OP;
2154
1.63k
                if (op != SRE_OP_SUCCESS)
2155
0
                    FAIL;
2156
1.63k
            }
2157
1.63k
            break;
2158
2159
1.82k
        case SRE_OP_GROUPREF:
2160
4.60k
        case SRE_OP_GROUPREF_IGNORE:
2161
4.60k
        case SRE_OP_GROUPREF_UNI_IGNORE:
2162
5.03k
        case SRE_OP_GROUPREF_LOC_IGNORE:
2163
5.03k
            GET_ARG;
2164
5.03k
            if (arg >= (size_t)groups)
2165
0
                FAIL;
2166
5.03k
            break;
2167
2168
5.03k
        case SRE_OP_GROUPREF_EXISTS:
2169
            /* The regex syntax for this is: '(?(group)then|else)', where
2170
               'group' is either an integer group number or a group name,
2171
               'then' and 'else' are sub-regexes, and 'else' is optional. */
2172
1.62k
            GET_ARG;
2173
1.62k
            if (arg >= (size_t)groups)
2174
0
                FAIL;
2175
1.62k
            GET_SKIP_ADJ(1);
2176
1.62k
            code--; /* The skip is relative to the first arg! */
2177
            /* There are two possibilities here: if there is both a 'then'
2178
               part and an 'else' part, the generated code looks like:
2179
2180
               GROUPREF_EXISTS
2181
               <group>
2182
               <skipyes>
2183
               ...then part...
2184
               JUMP
2185
               <skipno>
2186
               (<skipyes> jumps here)
2187
               ...else part...
2188
               (<skipno> jumps here)
2189
2190
               If there is only a 'then' part, it looks like:
2191
2192
               GROUPREF_EXISTS
2193
               <group>
2194
               <skip>
2195
               ...then part...
2196
               (<skip> jumps here)
2197
2198
               There is no direct way to decide which it is, and we don't want
2199
               to allow arbitrary jumps anywhere in the code; so we just look
2200
               for a JUMP opcode preceding our skip target.
2201
            */
2202
1.62k
            VTRACE(("then part:\n"));
2203
1.62k
            int rc = _validate_inner(code+1, code+skip-1, groups);
2204
1.62k
            if (rc == 1) {
2205
1.27k
                VTRACE(("else part:\n"));
2206
1.27k
                code += skip-2; /* Position after JUMP, at <skipno> */
2207
1.27k
                GET_SKIP;
2208
1.27k
                rc = _validate_inner(code, code+skip-1, groups);
2209
1.27k
            }
2210
1.62k
            if (rc)
2211
0
                FAIL;
2212
1.62k
            code += skip-1;
2213
1.62k
            break;
2214
2215
2.17k
        case SRE_OP_ASSERT:
2216
2.75k
        case SRE_OP_ASSERT_NOT:
2217
2.75k
            GET_SKIP;
2218
2.75k
            GET_ARG; /* 0 for lookahead, width for lookbehind */
2219
2.75k
            code--; /* Back up over arg to simplify math below */
2220
            /* Stop 1 before the end; we check the SUCCESS below */
2221
2.75k
            if (_validate_inner(code+1, code+skip-2, groups))
2222
0
                FAIL;
2223
2.75k
            code += skip-2;
2224
2.75k
            GET_OP;
2225
2.75k
            if (op != SRE_OP_SUCCESS)
2226
0
                FAIL;
2227
2.75k
            break;
2228
2229
2.75k
        case SRE_OP_JUMP:
2230
1.27k
            if (code + 1 != end)
2231
0
                FAIL;
2232
1.27k
            VTRACE(("JUMP: %d\n", __LINE__));
2233
1.27k
            return 1;
2234
2235
0
        default:
2236
0
            FAIL;
2237
2238
4.99M
        }
2239
4.99M
    }
2240
2241
1.65M
    VTRACE(("okay\n"));
2242
1.65M
    return 0;
2243
1.65M
}
2244
2245
static int
2246
_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
2247
2.20k
{
2248
2.20k
    if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
2249
2.20k
        code >= end || end[-1] != SRE_OP_SUCCESS)
2250
0
        FAIL;
2251
2.20k
    return _validate_inner(code, end-1, groups);
2252
2.20k
}
2253
2254
static int
2255
_validate(PatternObject *self)
2256
2.20k
{
2257
2.20k
    if (_validate_outer(self->code, self->code+self->codesize, self->groups))
2258
0
    {
2259
0
        PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
2260
0
        return 0;
2261
0
    }
2262
2.20k
    else
2263
2.20k
        VTRACE(("Success!\n"));
2264
2.20k
    return 1;
2265
2.20k
}
2266
2267
/* -------------------------------------------------------------------- */
2268
/* match methods */
2269
2270
static int
2271
match_traverse(PyObject *op, visitproc visit, void *arg)
2272
0
{
2273
0
    MatchObject *self = _MatchObject_CAST(op);
2274
0
    Py_VISIT(Py_TYPE(self));
2275
0
    Py_VISIT(self->string);
2276
0
    Py_VISIT(self->regs);
2277
0
    Py_VISIT(self->pattern);
2278
0
    return 0;
2279
0
}
2280
2281
static int
2282
match_clear(PyObject *op)
2283
10.4k
{
2284
10.4k
    MatchObject *self = _MatchObject_CAST(op);
2285
10.4k
    Py_CLEAR(self->string);
2286
10.4k
    Py_CLEAR(self->regs);
2287
10.4k
    Py_CLEAR(self->pattern);
2288
10.4k
    return 0;
2289
10.4k
}
2290
2291
static void
2292
match_dealloc(PyObject *self)
2293
10.4k
{
2294
10.4k
    PyTypeObject *tp = Py_TYPE(self);
2295
10.4k
    PyObject_GC_UnTrack(self);
2296
10.4k
    (void)match_clear(self);
2297
10.4k
    tp->tp_free(self);
2298
10.4k
    Py_DECREF(tp);
2299
10.4k
}
2300
2301
static PyObject*
2302
match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
2303
40
{
2304
40
    Py_ssize_t length;
2305
40
    int isbytes, charsize;
2306
40
    Py_buffer view;
2307
40
    PyObject *result;
2308
40
    const void* ptr;
2309
40
    Py_ssize_t i, j;
2310
2311
40
    assert(0 <= index && index < self->groups);
2312
40
    index *= 2;
2313
2314
40
    if (self->string == Py_None || self->mark[index] < 0) {
2315
        /* return default value if the string or group is undefined */
2316
32
        return Py_NewRef(def);
2317
32
    }
2318
2319
8
    ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
2320
8
    if (ptr == NULL)
2321
0
        return NULL;
2322
2323
8
    i = self->mark[index];
2324
8
    j = self->mark[index+1];
2325
8
    i = Py_MIN(i, length);
2326
8
    j = Py_MIN(j, length);
2327
8
    result = getslice(isbytes, ptr, self->string, i, j);
2328
8
    if (isbytes && view.buf != NULL)
2329
0
        PyBuffer_Release(&view);
2330
8
    return result;
2331
8
}
2332
2333
static Py_ssize_t
2334
match_getindex(MatchObject* self, PyObject* index)
2335
10.3k
{
2336
10.3k
    Py_ssize_t i;
2337
2338
10.3k
    if (index == NULL)
2339
        /* Default value */
2340
10.3k
        return 0;
2341
2342
40
    if (PyIndex_Check(index)) {
2343
0
        i = PyNumber_AsSsize_t(index, NULL);
2344
0
    }
2345
40
    else {
2346
40
        i = -1;
2347
2348
40
        if (self->pattern->groupindex) {
2349
40
            index = PyDict_GetItemWithError(self->pattern->groupindex, index);
2350
40
            if (index && PyLong_Check(index)) {
2351
40
                i = PyLong_AsSsize_t(index);
2352
40
            }
2353
40
        }
2354
40
    }
2355
40
    if (i < 0 || i >= self->groups) {
2356
        /* raise IndexError if we were given a bad group number */
2357
0
        if (!PyErr_Occurred()) {
2358
0
            PyErr_SetString(PyExc_IndexError, "no such group");
2359
0
        }
2360
0
        return -1;
2361
0
    }
2362
2363
    // Check that i*2 cannot overflow to make static analyzers happy
2364
40
    assert((size_t)i <= SRE_MAXGROUPS);
2365
40
    return i;
2366
40
}
2367
2368
static PyObject*
2369
match_getslice(MatchObject* self, PyObject* index, PyObject* def)
2370
40
{
2371
40
    Py_ssize_t i = match_getindex(self, index);
2372
2373
40
    if (i < 0) {
2374
0
        return NULL;
2375
0
    }
2376
2377
40
    return match_getslice_by_index(self, i, def);
2378
40
}
2379
2380
/*[clinic input]
2381
@permit_long_summary
2382
_sre.SRE_Match.expand
2383
2384
    template: object
2385
2386
Return the string obtained by doing backslash substitution on the string template, as done by the sub() method.
2387
[clinic start generated code]*/
2388
2389
static PyObject *
2390
_sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template)
2391
/*[clinic end generated code: output=931b58ccc323c3a1 input=dc74d81265376ac3]*/
2392
0
{
2393
0
    _sremodulestate *module_state = get_sre_module_state_by_class(Py_TYPE(self));
2394
0
    PyObject *filter = compile_template(module_state, self->pattern, template);
2395
0
    if (filter == NULL) {
2396
0
        return NULL;
2397
0
    }
2398
0
    PyObject *result = expand_template((TemplateObject *)filter, self);
2399
0
    Py_DECREF(filter);
2400
0
    return result;
2401
0
}
2402
2403
static PyObject*
2404
match_group(PyObject *op, PyObject* args)
2405
40
{
2406
40
    MatchObject *self = _MatchObject_CAST(op);
2407
40
    PyObject* result;
2408
40
    Py_ssize_t i, size;
2409
2410
40
    size = PyTuple_GET_SIZE(args);
2411
2412
40
    switch (size) {
2413
0
    case 0:
2414
0
        result = match_getslice(self, _PyLong_GetZero(), Py_None);
2415
0
        break;
2416
40
    case 1:
2417
40
        result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2418
0
        break;
2419
0
    default:
2420
        /* fetch multiple items */
2421
0
        result = PyTuple_New(size);
2422
0
        if (!result)
2423
0
            return NULL;
2424
0
        for (i = 0; i < size; i++) {
2425
0
            PyObject* item = match_getslice(
2426
0
                self, PyTuple_GET_ITEM(args, i), Py_None
2427
0
                );
2428
0
            if (!item) {
2429
0
                Py_DECREF(result);
2430
0
                return NULL;
2431
0
            }
2432
0
            PyTuple_SET_ITEM(result, i, item);
2433
0
        }
2434
0
        break;
2435
40
    }
2436
40
    return result;
2437
40
}
2438
2439
static PyObject*
2440
match_getitem(PyObject *op, PyObject* name)
2441
0
{
2442
0
    MatchObject *self = _MatchObject_CAST(op);
2443
0
    return match_getslice(self, name, Py_None);
2444
0
}
2445
2446
/*[clinic input]
2447
_sre.SRE_Match.groups
2448
2449
    default: object = None
2450
        Is used for groups that did not participate in the match.
2451
2452
Return a tuple containing all the subgroups of the match, from 1.
2453
[clinic start generated code]*/
2454
2455
static PyObject *
2456
_sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value)
2457
/*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/
2458
0
{
2459
0
    PyObject* result;
2460
0
    Py_ssize_t index;
2461
2462
0
    result = PyTuple_New(self->groups-1);
2463
0
    if (!result)
2464
0
        return NULL;
2465
2466
0
    for (index = 1; index < self->groups; index++) {
2467
0
        PyObject* item;
2468
0
        item = match_getslice_by_index(self, index, default_value);
2469
0
        if (!item) {
2470
0
            Py_DECREF(result);
2471
0
            return NULL;
2472
0
        }
2473
0
        PyTuple_SET_ITEM(result, index-1, item);
2474
0
    }
2475
2476
0
    return result;
2477
0
}
2478
2479
/*[clinic input]
2480
@permit_long_summary
2481
_sre.SRE_Match.groupdict
2482
2483
    default: object = None
2484
        Is used for groups that did not participate in the match.
2485
2486
Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name.
2487
[clinic start generated code]*/
2488
2489
static PyObject *
2490
_sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value)
2491
/*[clinic end generated code: output=29917c9073e41757 input=a8d3a1dc80336872]*/
2492
0
{
2493
0
    PyObject *result;
2494
0
    PyObject *key;
2495
0
    PyObject *value;
2496
0
    Py_ssize_t pos = 0;
2497
0
    Py_hash_t hash;
2498
2499
0
    result = PyDict_New();
2500
0
    if (!result || !self->pattern->groupindex)
2501
0
        return result;
2502
2503
0
    Py_BEGIN_CRITICAL_SECTION(self->pattern->groupindex);
2504
0
    while (_PyDict_Next(self->pattern->groupindex, &pos, &key, &value, &hash)) {
2505
0
        int status;
2506
0
        Py_INCREF(key);
2507
0
        value = match_getslice(self, key, default_value);
2508
0
        if (!value) {
2509
0
            Py_DECREF(key);
2510
0
            Py_CLEAR(result);
2511
0
            goto exit;
2512
0
        }
2513
0
        status = _PyDict_SetItem_KnownHash(result, key, value, hash);
2514
0
        Py_DECREF(value);
2515
0
        Py_DECREF(key);
2516
0
        if (status < 0) {
2517
0
            Py_CLEAR(result);
2518
0
            goto exit;
2519
0
        }
2520
0
    }
2521
0
exit:;
2522
0
    Py_END_CRITICAL_SECTION();
2523
2524
0
    return result;
2525
0
}
2526
2527
/*[clinic input]
2528
_sre.SRE_Match.start -> Py_ssize_t
2529
2530
    group: object(c_default="NULL") = 0
2531
    /
2532
2533
Return index of the start of the substring matched by group.
2534
[clinic start generated code]*/
2535
2536
static Py_ssize_t
2537
_sre_SRE_Match_start_impl(MatchObject *self, PyObject *group)
2538
/*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/
2539
0
{
2540
0
    Py_ssize_t index = match_getindex(self, group);
2541
2542
0
    if (index < 0) {
2543
0
        return -1;
2544
0
    }
2545
2546
    /* mark is -1 if group is undefined */
2547
0
    return self->mark[index*2];
2548
0
}
2549
2550
/*[clinic input]
2551
_sre.SRE_Match.end -> Py_ssize_t
2552
2553
    group: object(c_default="NULL") = 0
2554
    /
2555
2556
Return index of the end of the substring matched by group.
2557
[clinic start generated code]*/
2558
2559
static Py_ssize_t
2560
_sre_SRE_Match_end_impl(MatchObject *self, PyObject *group)
2561
/*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/
2562
10.3k
{
2563
10.3k
    Py_ssize_t index = match_getindex(self, group);
2564
2565
10.3k
    if (index < 0) {
2566
0
        return -1;
2567
0
    }
2568
2569
    /* mark is -1 if group is undefined */
2570
10.3k
    return self->mark[index*2+1];
2571
10.3k
}
2572
2573
LOCAL(PyObject*)
2574
_pair(Py_ssize_t i1, Py_ssize_t i2)
2575
0
{
2576
0
    PyObject* item1 = PyLong_FromSsize_t(i1);
2577
0
    if (!item1) {
2578
0
        return NULL;
2579
0
    }
2580
0
    PyObject* item2 = PyLong_FromSsize_t(i2);
2581
0
    if(!item2) {
2582
0
        Py_DECREF(item1);
2583
0
        return NULL;
2584
0
    }
2585
2586
0
    return _PyTuple_FromPairSteal(item1, item2);
2587
0
}
2588
2589
/*[clinic input]
2590
_sre.SRE_Match.span
2591
2592
    group: object(c_default="NULL") = 0
2593
    /
2594
2595
For match object m, return the 2-tuple (m.start(group), m.end(group)).
2596
[clinic start generated code]*/
2597
2598
static PyObject *
2599
_sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)
2600
/*[clinic end generated code: output=f02ae40594d14fe6 input=8fa6014e982d71d4]*/
2601
0
{
2602
0
    Py_ssize_t index = match_getindex(self, group);
2603
2604
0
    if (index < 0) {
2605
0
        return NULL;
2606
0
    }
2607
2608
    /* marks are -1 if group is undefined */
2609
0
    return _pair(self->mark[index*2], self->mark[index*2+1]);
2610
0
}
2611
2612
static PyObject*
2613
match_regs(MatchObject* self)
2614
0
{
2615
0
    PyObject* regs;
2616
0
    PyObject* item;
2617
0
    Py_ssize_t index;
2618
2619
0
    regs = PyTuple_New(self->groups);
2620
0
    if (!regs)
2621
0
        return NULL;
2622
2623
0
    for (index = 0; index < self->groups; index++) {
2624
0
        item = _pair(self->mark[index*2], self->mark[index*2+1]);
2625
0
        if (!item) {
2626
0
            Py_DECREF(regs);
2627
0
            return NULL;
2628
0
        }
2629
0
        PyTuple_SET_ITEM(regs, index, item);
2630
0
    }
2631
2632
0
    self->regs = Py_NewRef(regs);
2633
2634
0
    return regs;
2635
0
}
2636
2637
/*[clinic input]
2638
_sre.SRE_Match.__copy__
2639
2640
[clinic start generated code]*/
2641
2642
static PyObject *
2643
_sre_SRE_Match___copy___impl(MatchObject *self)
2644
/*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/
2645
0
{
2646
0
    return Py_NewRef(self);
2647
0
}
2648
2649
/*[clinic input]
2650
_sre.SRE_Match.__deepcopy__
2651
2652
    memo: object
2653
    /
2654
2655
[clinic start generated code]*/
2656
2657
static PyObject *
2658
_sre_SRE_Match___deepcopy___impl(MatchObject *self, PyObject *memo)
2659
/*[clinic end generated code: output=2b657578eb03f4a3 input=779d12a31c2c325e]*/
2660
0
{
2661
0
    return Py_NewRef(self);
2662
0
}
2663
2664
PyDoc_STRVAR(match_doc,
2665
"The result of re.search(), re.prefixmatch(), and re.fullmatch().\n\
2666
Match objects always have a boolean value of True.");
2667
2668
PyDoc_STRVAR(match_group_doc,
2669
"group([group1, ...]) -> str or tuple.\n\
2670
    Return subgroup(s) of the match by indices or names.\n\
2671
    For 0 returns the entire match.");
2672
2673
static PyObject *
2674
match_lastindex_get(PyObject *op, void *Py_UNUSED(ignored))
2675
0
{
2676
0
    MatchObject *self = _MatchObject_CAST(op);
2677
0
    if (self->lastindex >= 0)
2678
0
        return PyLong_FromSsize_t(self->lastindex);
2679
0
    Py_RETURN_NONE;
2680
0
}
2681
2682
static PyObject *
2683
match_lastgroup_get(PyObject *op, void *Py_UNUSED(ignored))
2684
0
{
2685
0
    MatchObject *self = _MatchObject_CAST(op);
2686
0
    if (self->pattern->indexgroup &&
2687
0
        self->lastindex >= 0 &&
2688
0
        self->lastindex < PyTuple_GET_SIZE(self->pattern->indexgroup))
2689
0
    {
2690
0
        PyObject *result = PyTuple_GET_ITEM(self->pattern->indexgroup,
2691
0
                                            self->lastindex);
2692
0
        return Py_NewRef(result);
2693
0
    }
2694
0
    Py_RETURN_NONE;
2695
0
}
2696
2697
static PyObject *
2698
match_regs_get(PyObject *op, void *Py_UNUSED(ignored))
2699
0
{
2700
0
    MatchObject *self = _MatchObject_CAST(op);
2701
0
    if (self->regs) {
2702
0
        return Py_NewRef(self->regs);
2703
0
    } else
2704
0
        return match_regs(self);
2705
0
}
2706
2707
static PyObject *
2708
match_repr(PyObject *op)
2709
0
{
2710
0
    MatchObject *self = _MatchObject_CAST(op);
2711
0
    PyObject *result;
2712
0
    PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
2713
0
    if (group0 == NULL)
2714
0
        return NULL;
2715
0
    result = PyUnicode_FromFormat(
2716
0
            "<%s object; span=(%zd, %zd), match=%.50R>",
2717
0
            Py_TYPE(self)->tp_name,
2718
0
            self->mark[0], self->mark[1], group0);
2719
0
    Py_DECREF(group0);
2720
0
    return result;
2721
0
}
2722
2723
2724
static PyObject*
2725
pattern_new_match(_sremodulestate* module_state,
2726
                  PatternObject* pattern,
2727
                  SRE_STATE* state,
2728
                  Py_ssize_t status)
2729
10.7k
{
2730
    /* create match object (from state object) */
2731
2732
10.7k
    MatchObject* match;
2733
10.7k
    Py_ssize_t i, j;
2734
10.7k
    char* base;
2735
10.7k
    int n;
2736
2737
10.7k
    if (status > 0) {
2738
2739
        /* create match object (with room for extra group marks) */
2740
        /* coverity[ampersand_in_size] */
2741
10.4k
        match = PyObject_GC_NewVar(MatchObject,
2742
10.4k
                                   module_state->Match_Type,
2743
10.4k
                                   2*(pattern->groups+1));
2744
10.4k
        if (!match)
2745
0
            return NULL;
2746
2747
10.4k
        Py_INCREF(pattern);
2748
10.4k
        match->pattern = pattern;
2749
2750
10.4k
        match->string = Py_NewRef(state->string);
2751
2752
10.4k
        match->regs = NULL;
2753
10.4k
        match->groups = pattern->groups+1;
2754
2755
        /* fill in group slices */
2756
2757
10.4k
        base = (char*) state->beginning;
2758
10.4k
        n = state->charsize;
2759
2760
10.4k
        match->mark[0] = ((char*) state->start - base) / n;
2761
10.4k
        match->mark[1] = ((char*) state->ptr - base) / n;
2762
2763
10.6k
        for (i = j = 0; i < pattern->groups; i++, j+=2)
2764
191
            if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
2765
75
                match->mark[j+2] = ((char*) state->mark[j] - base) / n;
2766
75
                match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
2767
2768
                /* check wrong span */
2769
75
                if (match->mark[j+2] > match->mark[j+3]) {
2770
0
                    PyErr_SetString(PyExc_SystemError,
2771
0
                                    "The span of capturing group is wrong,"
2772
0
                                    " please report a bug for the re module.");
2773
0
                    Py_DECREF(match);
2774
0
                    return NULL;
2775
0
                }
2776
75
            } else
2777
116
                match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
2778
2779
10.4k
        match->pos = state->pos;
2780
10.4k
        match->endpos = state->endpos;
2781
2782
10.4k
        match->lastindex = state->lastindex;
2783
2784
10.4k
        PyObject_GC_Track(match);
2785
10.4k
        return (PyObject*) match;
2786
2787
10.4k
    } else if (status == 0) {
2788
2789
        /* no match */
2790
363
        Py_RETURN_NONE;
2791
2792
363
    }
2793
2794
    /* internal error */
2795
0
    pattern_error(status);
2796
0
    return NULL;
2797
10.7k
}
2798
2799
2800
/* -------------------------------------------------------------------- */
2801
/* scanner methods (experimental) */
2802
2803
static int
2804
scanner_traverse(PyObject *op, visitproc visit, void *arg)
2805
0
{
2806
0
    ScannerObject *self = _ScannerObject_CAST(op);
2807
0
    Py_VISIT(Py_TYPE(self));
2808
0
    Py_VISIT(self->pattern);
2809
0
    return 0;
2810
0
}
2811
2812
static int
2813
scanner_clear(PyObject *op)
2814
0
{
2815
0
    ScannerObject *self = _ScannerObject_CAST(op);
2816
0
    Py_CLEAR(self->pattern);
2817
0
    return 0;
2818
0
}
2819
2820
static void
2821
scanner_dealloc(PyObject *self)
2822
0
{
2823
0
    PyTypeObject *tp = Py_TYPE(self);
2824
0
    PyObject_GC_UnTrack(self);
2825
0
    ScannerObject *scanner = _ScannerObject_CAST(self);
2826
0
    state_fini(&scanner->state);
2827
0
    (void)scanner_clear(self);
2828
0
    tp->tp_free(self);
2829
0
    Py_DECREF(tp);
2830
0
}
2831
2832
static int
2833
scanner_begin(ScannerObject* self)
2834
0
{
2835
#ifdef Py_GIL_DISABLED
2836
    int was_executing = _Py_atomic_exchange_int(&self->executing, 1);
2837
#else
2838
0
    int was_executing = self->executing;
2839
0
    self->executing = 1;
2840
0
#endif
2841
0
    if (was_executing) {
2842
0
        PyErr_SetString(PyExc_ValueError,
2843
0
                        "regular expression scanner already executing");
2844
0
        return 0;
2845
0
    }
2846
0
    return 1;
2847
0
}
2848
2849
static void
2850
scanner_end(ScannerObject* self)
2851
0
{
2852
0
    assert(FT_ATOMIC_LOAD_INT_RELAXED(self->executing));
2853
0
    FT_ATOMIC_STORE_INT(self->executing, 0);
2854
0
}
2855
2856
/*[clinic input]
2857
_sre.SRE_Scanner.prefixmatch
2858
2859
    cls: defining_class
2860
    /
2861
2862
[clinic start generated code]*/
2863
2864
static PyObject *
2865
_sre_SRE_Scanner_prefixmatch_impl(ScannerObject *self, PyTypeObject *cls)
2866
/*[clinic end generated code: output=02b3b9d2954a2157 input=3049b20466c56a8e]*/
2867
0
{
2868
0
    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
2869
0
    SRE_STATE* state = &self->state;
2870
0
    PyObject* match;
2871
0
    Py_ssize_t status;
2872
2873
0
    if (!scanner_begin(self)) {
2874
0
        return NULL;
2875
0
    }
2876
0
    if (state->start == NULL) {
2877
0
        scanner_end(self);
2878
0
        Py_RETURN_NONE;
2879
0
    }
2880
2881
0
    state_reset(state);
2882
2883
0
    state->ptr = state->start;
2884
2885
0
    status = sre_match(state, PatternObject_GetCode(self->pattern));
2886
0
    if (PyErr_Occurred()) {
2887
0
        scanner_end(self);
2888
0
        return NULL;
2889
0
    }
2890
2891
0
    match = pattern_new_match(module_state, self->pattern,
2892
0
                              state, status);
2893
2894
0
    if (status == 0)
2895
0
        state->start = NULL;
2896
0
    else {
2897
0
        state->must_advance = (state->ptr == state->start);
2898
0
        state->start = state->ptr;
2899
0
    }
2900
2901
0
    scanner_end(self);
2902
0
    return match;
2903
0
}
2904
2905
2906
/*[clinic input]
2907
_sre.SRE_Scanner.search
2908
2909
    cls: defining_class
2910
    /
2911
2912
[clinic start generated code]*/
2913
2914
static PyObject *
2915
_sre_SRE_Scanner_search_impl(ScannerObject *self, PyTypeObject *cls)
2916
/*[clinic end generated code: output=23e8fc78013f9161 input=056c2d37171d0bf2]*/
2917
0
{
2918
0
    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
2919
0
    SRE_STATE* state = &self->state;
2920
0
    PyObject* match;
2921
0
    Py_ssize_t status;
2922
2923
0
    if (!scanner_begin(self)) {
2924
0
        return NULL;
2925
0
    }
2926
0
    if (state->start == NULL) {
2927
0
        scanner_end(self);
2928
0
        Py_RETURN_NONE;
2929
0
    }
2930
2931
0
    state_reset(state);
2932
2933
0
    state->ptr = state->start;
2934
2935
0
    status = sre_search(state, PatternObject_GetCode(self->pattern));
2936
0
    if (PyErr_Occurred()) {
2937
0
        scanner_end(self);
2938
0
        return NULL;
2939
0
    }
2940
2941
0
    match = pattern_new_match(module_state, self->pattern,
2942
0
                              state, status);
2943
2944
0
    if (status == 0)
2945
0
        state->start = NULL;
2946
0
    else {
2947
0
        state->must_advance = (state->ptr == state->start);
2948
0
        state->start = state->ptr;
2949
0
    }
2950
2951
0
    scanner_end(self);
2952
0
    return match;
2953
0
}
2954
2955
static PyObject *
2956
pattern_scanner(_sremodulestate *module_state,
2957
                PatternObject *self,
2958
                PyObject *string,
2959
                Py_ssize_t pos,
2960
                Py_ssize_t endpos)
2961
0
{
2962
0
    ScannerObject* scanner;
2963
2964
    /* create scanner object */
2965
0
    scanner = PyObject_GC_New(ScannerObject, module_state->Scanner_Type);
2966
0
    if (!scanner)
2967
0
        return NULL;
2968
0
    scanner->pattern = NULL;
2969
0
    scanner->executing = 0;
2970
2971
    /* create search state object */
2972
0
    if (!state_init(&scanner->state, self, string, pos, endpos)) {
2973
0
        Py_DECREF(scanner);
2974
0
        return NULL;
2975
0
    }
2976
2977
0
    Py_INCREF(self);
2978
0
    scanner->pattern = self;
2979
2980
0
    PyObject_GC_Track(scanner);
2981
0
    return (PyObject*) scanner;
2982
0
}
2983
2984
/* -------------------------------------------------------------------- */
2985
/* template methods */
2986
2987
static int
2988
template_traverse(PyObject *op, visitproc visit, void *arg)
2989
0
{
2990
0
    TemplateObject *self = _TemplateObject_CAST(op);
2991
0
    Py_VISIT(Py_TYPE(self));
2992
0
    Py_VISIT(self->literal);
2993
0
    for (Py_ssize_t i = 0, n = Py_SIZE(self); i < n; i++) {
2994
0
        Py_VISIT(self->items[i].literal);
2995
0
    }
2996
0
    return 0;
2997
0
}
2998
2999
static int
3000
template_clear(PyObject *op)
3001
0
{
3002
0
    TemplateObject *self = _TemplateObject_CAST(op);
3003
0
    Py_CLEAR(self->literal);
3004
0
    for (Py_ssize_t i = 0, n = Py_SIZE(self); i < n; i++) {
3005
0
        Py_CLEAR(self->items[i].literal);
3006
0
    }
3007
0
    return 0;
3008
0
}
3009
3010
static void
3011
template_dealloc(PyObject *self)
3012
0
{
3013
0
    PyTypeObject *tp = Py_TYPE(self);
3014
0
    PyObject_GC_UnTrack(self);
3015
0
    (void)template_clear(self);
3016
0
    tp->tp_free(self);
3017
0
    Py_DECREF(tp);
3018
0
}
3019
3020
static PyObject *
3021
expand_template(TemplateObject *self, MatchObject *match)
3022
0
{
3023
0
    if (Py_SIZE(self) == 0) {
3024
0
        return Py_NewRef(self->literal);
3025
0
    }
3026
3027
0
    PyObject *result = NULL;
3028
0
    Py_ssize_t count = 0;  // the number of non-empty chunks
3029
    /* For small number of strings use a buffer allocated on the stack,
3030
     * otherwise use a list object. */
3031
0
    PyObject *buffer[10];
3032
0
    PyObject **out = buffer;
3033
0
    PyObject *list = NULL;
3034
0
    if (self->chunks > (int)Py_ARRAY_LENGTH(buffer) ||
3035
0
        !PyUnicode_Check(self->literal))
3036
0
    {
3037
0
        list = PyList_New(self->chunks);
3038
0
        if (!list) {
3039
0
            return NULL;
3040
0
        }
3041
0
        out = &PyList_GET_ITEM(list, 0);
3042
0
    }
3043
3044
0
    out[count++] = Py_NewRef(self->literal);
3045
0
    for (Py_ssize_t i = 0; i < Py_SIZE(self); i++) {
3046
0
        Py_ssize_t index = self->items[i].index;
3047
0
        if (index >= match->groups) {
3048
0
            PyErr_SetString(PyExc_IndexError, "no such group");
3049
0
            goto cleanup;
3050
0
        }
3051
0
        PyObject *item = match_getslice_by_index(match, index, Py_None);
3052
0
        if (item == NULL) {
3053
0
            goto cleanup;
3054
0
        }
3055
0
        if (item != Py_None) {
3056
0
            out[count++] = Py_NewRef(item);
3057
0
        }
3058
0
        Py_DECREF(item);
3059
3060
0
        PyObject *literal = self->items[i].literal;
3061
0
        if (literal != NULL) {
3062
0
            out[count++] = Py_NewRef(literal);
3063
0
        }
3064
0
    }
3065
3066
0
    if (PyUnicode_Check(self->literal)) {
3067
0
        result = _PyUnicode_JoinArray(&_Py_STR(empty), out, count);
3068
0
    }
3069
0
    else {
3070
0
        Py_SET_SIZE(list, count);
3071
0
        result = PyBytes_Join((PyObject *)&_Py_SINGLETON(bytes_empty), list);
3072
0
    }
3073
3074
0
cleanup:
3075
0
    if (list) {
3076
0
        Py_DECREF(list);
3077
0
    }
3078
0
    else {
3079
0
        for (Py_ssize_t i = 0; i < count; i++) {
3080
0
            Py_DECREF(out[i]);
3081
0
        }
3082
0
    }
3083
0
    return result;
3084
0
}
3085
3086
3087
static Py_hash_t
3088
pattern_hash(PyObject *op)
3089
0
{
3090
0
    PatternObject *self = _PatternObject_CAST(op);
3091
3092
0
    Py_hash_t hash, hash2;
3093
3094
0
    hash = PyObject_Hash(self->pattern);
3095
0
    if (hash == -1) {
3096
0
        return -1;
3097
0
    }
3098
3099
0
    hash2 = Py_HashBuffer(self->code, sizeof(self->code[0]) * self->codesize);
3100
0
    hash ^= hash2;
3101
3102
0
    hash ^= self->flags;
3103
0
    hash ^= self->isbytes;
3104
0
    hash ^= self->codesize;
3105
3106
0
    if (hash == -1) {
3107
0
        hash = -2;
3108
0
    }
3109
0
    return hash;
3110
0
}
3111
3112
static PyObject*
3113
pattern_richcompare(PyObject *lefto, PyObject *righto, int op)
3114
0
{
3115
0
    PyTypeObject *tp = Py_TYPE(lefto);
3116
0
    _sremodulestate *module_state = get_sre_module_state_by_class(tp);
3117
0
    PatternObject *left, *right;
3118
0
    int cmp;
3119
3120
0
    if (op != Py_EQ && op != Py_NE) {
3121
0
        Py_RETURN_NOTIMPLEMENTED;
3122
0
    }
3123
3124
0
    if (!Py_IS_TYPE(righto, module_state->Pattern_Type))
3125
0
    {
3126
0
        Py_RETURN_NOTIMPLEMENTED;
3127
0
    }
3128
3129
0
    if (lefto == righto) {
3130
        /* a pattern is equal to itself */
3131
0
        return PyBool_FromLong(op == Py_EQ);
3132
0
    }
3133
3134
0
    left = (PatternObject *)lefto;
3135
0
    right = (PatternObject *)righto;
3136
3137
0
    cmp = (left->flags == right->flags
3138
0
           && left->isbytes == right->isbytes
3139
0
           && left->codesize == right->codesize);
3140
0
    if (cmp) {
3141
        /* Compare the code and the pattern because the same pattern can
3142
           produce different codes depending on the locale used to compile the
3143
           pattern when the re.LOCALE flag is used. Don't compare groups,
3144
           indexgroup nor groupindex: they are derivated from the pattern. */
3145
0
        cmp = (memcmp(left->code, right->code,
3146
0
                      sizeof(left->code[0]) * left->codesize) == 0);
3147
0
    }
3148
0
    if (cmp) {
3149
0
        cmp = PyObject_RichCompareBool(left->pattern, right->pattern,
3150
0
                                       Py_EQ);
3151
0
        if (cmp < 0) {
3152
0
            return NULL;
3153
0
        }
3154
0
    }
3155
0
    if (op == Py_NE) {
3156
0
        cmp = !cmp;
3157
0
    }
3158
0
    return PyBool_FromLong(cmp);
3159
0
}
3160
3161
#include "clinic/sre.c.h"
3162
3163
static PyMethodDef pattern_methods[] = {
3164
    _SRE_SRE_PATTERN_PREFIXMATCH_METHODDEF
3165
    /* "match" reuses the prefixmatch Clinic-generated parser and impl
3166
     * to avoid duplicating the argument parsing boilerplate code. */
3167
    {"match", _PyCFunction_CAST(_sre_SRE_Pattern_prefixmatch),
3168
     METH_METHOD|METH_FASTCALL|METH_KEYWORDS,
3169
     _sre_SRE_Pattern_prefixmatch__doc__},
3170
    _SRE_SRE_PATTERN_FULLMATCH_METHODDEF
3171
    _SRE_SRE_PATTERN_SEARCH_METHODDEF
3172
    _SRE_SRE_PATTERN_SUB_METHODDEF
3173
    _SRE_SRE_PATTERN_SUBN_METHODDEF
3174
    _SRE_SRE_PATTERN_FINDALL_METHODDEF
3175
    _SRE_SRE_PATTERN_SPLIT_METHODDEF
3176
    _SRE_SRE_PATTERN_FINDITER_METHODDEF
3177
    _SRE_SRE_PATTERN_SCANNER_METHODDEF
3178
    _SRE_SRE_PATTERN___COPY___METHODDEF
3179
    _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF
3180
    _SRE_SRE_PATTERN__FAIL_AFTER_METHODDEF
3181
    {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS,
3182
     PyDoc_STR("See PEP 585")},
3183
    {NULL, NULL}
3184
};
3185
3186
static PyGetSetDef pattern_getset[] = {
3187
    {"groupindex", pattern_groupindex, NULL,
3188
      "A dictionary mapping group names to group numbers."},
3189
    {NULL}  /* Sentinel */
3190
};
3191
3192
#define PAT_OFF(x) offsetof(PatternObject, x)
3193
static PyMemberDef pattern_members[] = {
3194
    {"pattern",    _Py_T_OBJECT,    PAT_OFF(pattern),       Py_READONLY,
3195
     "The pattern string from which the RE object was compiled."},
3196
    {"flags",      Py_T_INT,       PAT_OFF(flags),         Py_READONLY,
3197
     "The regex matching flags."},
3198
    {"groups",     Py_T_PYSSIZET,  PAT_OFF(groups),        Py_READONLY,
3199
     "The number of capturing groups in the pattern."},
3200
    {"__weaklistoffset__", Py_T_PYSSIZET, offsetof(PatternObject, weakreflist), Py_READONLY},
3201
    {NULL}  /* Sentinel */
3202
};
3203
3204
static PyType_Slot pattern_slots[] = {
3205
    {Py_tp_dealloc, pattern_dealloc},
3206
    {Py_tp_repr, pattern_repr},
3207
    {Py_tp_hash, pattern_hash},
3208
    {Py_tp_doc, (void *)pattern_doc},
3209
    {Py_tp_richcompare, pattern_richcompare},
3210
    {Py_tp_methods, pattern_methods},
3211
    {Py_tp_members, pattern_members},
3212
    {Py_tp_getset, pattern_getset},
3213
    {Py_tp_traverse, pattern_traverse},
3214
    {Py_tp_clear, pattern_clear},
3215
    {0, NULL},
3216
};
3217
3218
static PyType_Spec pattern_spec = {
3219
    .name = "re.Pattern",
3220
    .basicsize = sizeof(PatternObject),
3221
    .itemsize = sizeof(SRE_CODE),
3222
    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE |
3223
              Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC),
3224
    .slots = pattern_slots,
3225
};
3226
3227
static PyMethodDef match_methods[] = {
3228
    {"group", match_group, METH_VARARGS, match_group_doc},
3229
    _SRE_SRE_MATCH_START_METHODDEF
3230
    _SRE_SRE_MATCH_END_METHODDEF
3231
    _SRE_SRE_MATCH_SPAN_METHODDEF
3232
    _SRE_SRE_MATCH_GROUPS_METHODDEF
3233
    _SRE_SRE_MATCH_GROUPDICT_METHODDEF
3234
    _SRE_SRE_MATCH_EXPAND_METHODDEF
3235
    _SRE_SRE_MATCH___COPY___METHODDEF
3236
    _SRE_SRE_MATCH___DEEPCOPY___METHODDEF
3237
    {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS,
3238
     PyDoc_STR("See PEP 585")},
3239
    {NULL, NULL}
3240
};
3241
3242
static PyGetSetDef match_getset[] = {
3243
    {"lastindex", match_lastindex_get, NULL,
3244
     "The integer index of the last matched capturing group."},
3245
    {"lastgroup", match_lastgroup_get, NULL,
3246
     "The name of the last matched capturing group."},
3247
    {"regs", match_regs_get, NULL, NULL},
3248
    {NULL}
3249
};
3250
3251
#define MATCH_OFF(x) offsetof(MatchObject, x)
3252
static PyMemberDef match_members[] = {
3253
    {"string",  _Py_T_OBJECT,   MATCH_OFF(string),  Py_READONLY,
3254
     "The string passed to match() or search()."},
3255
    {"re",      _Py_T_OBJECT,   MATCH_OFF(pattern), Py_READONLY,
3256
     "The regular expression object."},
3257
    {"pos",     Py_T_PYSSIZET, MATCH_OFF(pos),     Py_READONLY,
3258
     "The index into the string at which the RE engine started looking for a match."},
3259
    {"endpos",  Py_T_PYSSIZET, MATCH_OFF(endpos),  Py_READONLY,
3260
     "The index into the string beyond which the RE engine will not go."},
3261
    {NULL}
3262
};
3263
3264
/* FIXME: implement setattr("string", None) as a special case (to
3265
   detach the associated string, if any */
3266
static PyType_Slot match_slots[] = {
3267
    {Py_tp_dealloc, match_dealloc},
3268
    {Py_tp_repr, match_repr},
3269
    {Py_tp_doc, (void *)match_doc},
3270
    {Py_tp_methods, match_methods},
3271
    {Py_tp_members, match_members},
3272
    {Py_tp_getset, match_getset},
3273
    {Py_tp_traverse, match_traverse},
3274
    {Py_tp_clear, match_clear},
3275
3276
    /* As mapping.
3277
     *
3278
     * Match objects do not support length or assignment, but do support
3279
     * __getitem__.
3280
     */
3281
    {Py_mp_subscript, match_getitem},
3282
3283
    {0, NULL},
3284
};
3285
3286
static PyType_Spec match_spec = {
3287
    .name = "re.Match",
3288
    .basicsize = sizeof(MatchObject),
3289
    .itemsize = sizeof(Py_ssize_t),
3290
    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE |
3291
              Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC),
3292
    .slots = match_slots,
3293
};
3294
3295
static PyMethodDef scanner_methods[] = {
3296
    _SRE_SRE_SCANNER_PREFIXMATCH_METHODDEF
3297
    /* "match" reuses the prefixmatch Clinic-generated parser and impl
3298
     * to avoid duplicating the argument parsing boilerplate code. */
3299
    {"match", _PyCFunction_CAST(_sre_SRE_Scanner_prefixmatch),
3300
     METH_METHOD|METH_FASTCALL|METH_KEYWORDS,
3301
     _sre_SRE_Scanner_prefixmatch__doc__},
3302
    _SRE_SRE_SCANNER_SEARCH_METHODDEF
3303
    {NULL, NULL}
3304
};
3305
3306
#define SCAN_OFF(x) offsetof(ScannerObject, x)
3307
static PyMemberDef scanner_members[] = {
3308
    {"pattern", _Py_T_OBJECT, SCAN_OFF(pattern), Py_READONLY},
3309
    {NULL}  /* Sentinel */
3310
};
3311
3312
static PyType_Slot scanner_slots[] = {
3313
    {Py_tp_dealloc, scanner_dealloc},
3314
    {Py_tp_methods, scanner_methods},
3315
    {Py_tp_members, scanner_members},
3316
    {Py_tp_traverse, scanner_traverse},
3317
    {Py_tp_clear, scanner_clear},
3318
    {0, NULL},
3319
};
3320
3321
static PyType_Spec scanner_spec = {
3322
    .name = "_sre.SRE_Scanner",
3323
    .basicsize = sizeof(ScannerObject),
3324
    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE |
3325
              Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC),
3326
    .slots = scanner_slots,
3327
};
3328
3329
static PyType_Slot template_slots[] = {
3330
    {Py_tp_dealloc, template_dealloc},
3331
    {Py_tp_traverse, template_traverse},
3332
    {Py_tp_clear, template_clear},
3333
    {0, NULL},
3334
};
3335
3336
static PyType_Spec template_spec = {
3337
    .name = "_sre.SRE_Template",
3338
    .basicsize = sizeof(TemplateObject),
3339
    .itemsize = sizeof(((TemplateObject *)0)->items[0]),
3340
    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE |
3341
              Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC),
3342
    .slots = template_slots,
3343
};
3344
3345
static PyMethodDef _functions[] = {
3346
    _SRE_COMPILE_METHODDEF
3347
    _SRE_TEMPLATE_METHODDEF
3348
    _SRE_GETCODESIZE_METHODDEF
3349
    _SRE_ASCII_ISCASED_METHODDEF
3350
    _SRE_UNICODE_ISCASED_METHODDEF
3351
    _SRE_ASCII_TOLOWER_METHODDEF
3352
    _SRE_UNICODE_TOLOWER_METHODDEF
3353
    {NULL, NULL}
3354
};
3355
3356
static int
3357
sre_traverse(PyObject *module, visitproc visit, void *arg)
3358
405
{
3359
405
    _sremodulestate *state = get_sre_module_state(module);
3360
3361
405
    Py_VISIT(state->Pattern_Type);
3362
405
    Py_VISIT(state->Match_Type);
3363
405
    Py_VISIT(state->Scanner_Type);
3364
405
    Py_VISIT(state->Template_Type);
3365
405
    Py_VISIT(state->compile_template);
3366
3367
405
    return 0;
3368
405
}
3369
3370
static int
3371
sre_clear(PyObject *module)
3372
0
{
3373
0
    _sremodulestate *state = get_sre_module_state(module);
3374
3375
0
    Py_CLEAR(state->Pattern_Type);
3376
0
    Py_CLEAR(state->Match_Type);
3377
0
    Py_CLEAR(state->Scanner_Type);
3378
0
    Py_CLEAR(state->Template_Type);
3379
0
    Py_CLEAR(state->compile_template);
3380
3381
0
    return 0;
3382
0
}
3383
3384
static void
3385
sre_free(void *module)
3386
0
{
3387
0
    sre_clear((PyObject *)module);
3388
0
}
3389
3390
28
#define CREATE_TYPE(m, type, spec)                                  \
3391
28
do {                                                                \
3392
28
    type = (PyTypeObject *)PyType_FromModuleAndSpec(m, spec, NULL); \
3393
28
    if (type == NULL) {                                             \
3394
0
        goto error;                                                 \
3395
0
    }                                                               \
3396
28
} while (0)
3397
3398
#define ADD_ULONG_CONSTANT(module, name, value)           \
3399
14
    do {                                                  \
3400
14
        if (PyModule_Add(module, name, PyLong_FromUnsignedLong(value)) < 0) { \
3401
0
            goto error;                                   \
3402
0
        }                                                 \
3403
14
} while (0)
3404
3405
3406
#ifdef Py_DEBUG
3407
static void
3408
_assert_match_aliases_prefixmatch(PyMethodDef *methods)
3409
{
3410
    PyMethodDef *prefixmatch_md = &methods[0];
3411
    PyMethodDef *match_md = &methods[1];
3412
    assert(strcmp(prefixmatch_md->ml_name, "prefixmatch") == 0);
3413
    assert(strcmp(match_md->ml_name, "match") == 0);
3414
    assert(match_md->ml_meth == prefixmatch_md->ml_meth);
3415
    assert(match_md->ml_flags == prefixmatch_md->ml_flags);
3416
    assert(match_md->ml_doc == prefixmatch_md->ml_doc);
3417
}
3418
#endif
3419
3420
static int
3421
sre_exec(PyObject *m)
3422
7
{
3423
7
    _sremodulestate *state;
3424
3425
#ifdef Py_DEBUG
3426
    _assert_match_aliases_prefixmatch(pattern_methods);
3427
    _assert_match_aliases_prefixmatch(scanner_methods);
3428
#endif
3429
3430
    /* Create heap types */
3431
7
    state = get_sre_module_state(m);
3432
7
    CREATE_TYPE(m, state->Pattern_Type, &pattern_spec);
3433
7
    CREATE_TYPE(m, state->Match_Type, &match_spec);
3434
7
    CREATE_TYPE(m, state->Scanner_Type, &scanner_spec);
3435
7
    CREATE_TYPE(m, state->Template_Type, &template_spec);
3436
3437
7
    if (PyModule_AddIntConstant(m, "MAGIC", SRE_MAGIC) < 0) {
3438
0
        goto error;
3439
0
    }
3440
3441
7
    if (PyModule_AddIntConstant(m, "CODESIZE", sizeof(SRE_CODE)) < 0) {
3442
0
        goto error;
3443
0
    }
3444
3445
7
    ADD_ULONG_CONSTANT(m, "MAXREPEAT", SRE_MAXREPEAT);
3446
7
    ADD_ULONG_CONSTANT(m, "MAXGROUPS", SRE_MAXGROUPS);
3447
3448
7
    if (PyModule_AddStringConstant(m, "copyright", copyright) < 0) {
3449
0
        goto error;
3450
0
    }
3451
3452
7
    return 0;
3453
3454
0
error:
3455
0
    return -1;
3456
7
}
3457
3458
static PyModuleDef_Slot sre_slots[] = {
3459
    _Py_ABI_SLOT,
3460
    {Py_mod_exec, sre_exec},
3461
    {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
3462
    {Py_mod_gil, Py_MOD_GIL_NOT_USED},
3463
    {0, NULL},
3464
};
3465
3466
static struct PyModuleDef sremodule = {
3467
    .m_base = PyModuleDef_HEAD_INIT,
3468
    .m_name = "_sre",
3469
    .m_size = sizeof(_sremodulestate),
3470
    .m_methods = _functions,
3471
    .m_slots = sre_slots,
3472
    .m_traverse = sre_traverse,
3473
    .m_free = sre_free,
3474
    .m_clear = sre_clear,
3475
};
3476
3477
PyMODINIT_FUNC
3478
PyInit__sre(void)
3479
7
{
3480
7
    return PyModuleDef_Init(&sremodule);
3481
7
}
3482
3483
/* vim:ts=4:sw=4:et
3484
*/