Coverage Report

Created: 2025-07-04 06:49

/src/cpython/Modules/_sre/sre.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Secret Labs' Regular Expression Engine
3
 *
4
 * regular expression matching engine
5
 *
6
 * partial history:
7
 * 1999-10-24 fl   created (based on existing template matcher code)
8
 * 2000-03-06 fl   first alpha, sort of
9
 * 2000-08-01 fl   fixes for 1.6b1
10
 * 2000-08-07 fl   use PyOS_CheckStack() if available
11
 * 2000-09-20 fl   added expand method
12
 * 2001-03-20 fl   lots of fixes for 2.1b2
13
 * 2001-04-15 fl   export copyright as Python attribute, not global
14
 * 2001-04-28 fl   added __copy__ methods (work in progress)
15
 * 2001-05-14 fl   fixes for 1.5.2 compatibility
16
 * 2001-07-01 fl   added BIGCHARSET support (from Martin von Loewis)
17
 * 2001-10-18 fl   fixed group reset issue (from Matthew Mueller)
18
 * 2001-10-20 fl   added split primitive; re-enable unicode for 1.6/2.0/2.1
19
 * 2001-10-21 fl   added sub/subn primitive
20
 * 2001-10-24 fl   added finditer primitive (for 2.2 only)
21
 * 2001-12-07 fl   fixed memory leak in sub/subn (Guido van Rossum)
22
 * 2002-11-09 fl   fixed empty sub/subn return type
23
 * 2003-04-18 mvl  fully support 4-byte codes
24
 * 2003-10-17 gn   implemented non recursive scheme
25
 * 2013-02-04 mrab added fullmatch primitive
26
 *
27
 * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
28
 *
29
 * This version of the SRE library can be redistributed under CNRI's
30
 * Python 1.6 license.  For any other use, please contact Secret Labs
31
 * AB (info@pythonware.com).
32
 *
33
 * Portions of this engine have been developed in cooperation with
34
 * CNRI.  Hewlett-Packard provided funding for 1.6 integration and
35
 * other compatibility work.
36
 */
37
38
static const char copyright[] =
39
    " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
40
41
#include "Python.h"
42
#include "pycore_critical_section.h" // Py_BEGIN_CRITICAL_SECTION
43
#include "pycore_dict.h"             // _PyDict_Next()
44
#include "pycore_long.h"             // _PyLong_GetZero()
45
#include "pycore_moduleobject.h"     // _PyModule_GetState()
46
#include "pycore_unicodeobject.h"    // _PyUnicode_Copy
47
#include "pycore_weakref.h"          // FT_CLEAR_WEAKREFS()
48
49
#include "sre.h"                     // SRE_CODE
50
51
#include <ctype.h>                   // tolower(), toupper(), isalnum()
52
53
2.17G
#define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
54
55
// On macOS, use the wide character ctype API using btowc()
56
#if defined(__APPLE__)
57
#  define USE_CTYPE_WINT_T
58
#endif
59
60
0
static int sre_isalnum(unsigned int ch) {
61
#ifdef USE_CTYPE_WINT_T
62
    return (unsigned int)iswalnum(btowc((int)ch));
63
#else
64
0
    return (unsigned int)isalnum((int)ch);
65
0
#endif
66
0
}
67
68
0
static unsigned int sre_tolower(unsigned int ch) {
69
#ifdef USE_CTYPE_WINT_T
70
    return (unsigned int)towlower(btowc((int)ch));
71
#else
72
0
    return (unsigned int)tolower((int)ch);
73
0
#endif
74
0
}
75
76
0
static unsigned int sre_toupper(unsigned int ch) {
77
#ifdef USE_CTYPE_WINT_T
78
    return (unsigned int)towupper(btowc((int)ch));
79
#else
80
0
    return (unsigned int)toupper((int)ch);
81
0
#endif
82
0
}
83
84
/* Defining this one controls tracing:
85
 * 0 -- disabled
86
 * 1 -- only if the DEBUG flag set
87
 * 2 -- always
88
 */
89
#ifndef VERBOSE
90
#  define VERBOSE 0
91
#endif
92
93
/* -------------------------------------------------------------------- */
94
95
#if defined(_MSC_VER) && !defined(__clang__)
96
#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
97
#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
98
/* fastest possible local call under MSVC */
99
#define LOCAL(type) static __inline type __fastcall
100
#else
101
#define LOCAL(type) static inline type
102
#endif
103
104
/* error codes */
105
#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
106
#define SRE_ERROR_STATE -2 /* illegal state */
107
0
#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
108
0
#define SRE_ERROR_MEMORY -9 /* out of memory */
109
0
#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
110
111
#if VERBOSE == 0
112
#  define INIT_TRACE(state)
113
#  define DO_TRACE 0
114
#  define TRACE(v)
115
#elif VERBOSE == 1
116
#  define INIT_TRACE(state) int _debug = (state)->debug
117
#  define DO_TRACE (_debug)
118
#  define TRACE(v) do {     \
119
        if (_debug) { \
120
            printf v;       \
121
        }                   \
122
    } while (0)
123
#elif VERBOSE == 2
124
#  define INIT_TRACE(state)
125
#  define DO_TRACE 1
126
#  define TRACE(v) printf v
127
#else
128
#  error VERBOSE must be 0, 1 or 2
129
#endif
130
131
/* -------------------------------------------------------------------- */
132
/* search engine state */
133
134
#define SRE_IS_DIGIT(ch)\
135
0
    ((ch) <= '9' && Py_ISDIGIT(ch))
136
#define SRE_IS_SPACE(ch)\
137
0
    ((ch) <= ' ' && Py_ISSPACE(ch))
138
#define SRE_IS_LINEBREAK(ch)\
139
22.5k
    ((ch) == '\n')
140
#define SRE_IS_WORD(ch)\
141
10.9M
    ((ch) <= 'z' && (Py_ISALNUM(ch) || (ch) == '_'))
142
143
static unsigned int sre_lower_ascii(unsigned int ch)
144
4.75M
{
145
4.75M
    return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
146
4.75M
}
147
148
/* locale-specific character predicates */
149
/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
150
 * warnings when c's type supports only numbers < N+1 */
151
0
#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? sre_isalnum((ch)) : 0)
152
0
#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
153
154
static unsigned int sre_lower_locale(unsigned int ch)
155
0
{
156
0
    return ((ch) < 256 ? (unsigned int)sre_tolower((ch)) : ch);
157
0
}
158
159
static unsigned int sre_upper_locale(unsigned int ch)
160
0
{
161
0
    return ((ch) < 256 ? (unsigned int)sre_toupper((ch)) : ch);
162
0
}
163
164
/* unicode-specific character predicates */
165
166
0
#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
167
53.5k
#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
168
0
#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
169
0
#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
170
0
#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
171
172
static unsigned int sre_lower_unicode(unsigned int ch)
173
68
{
174
68
    return (unsigned int) Py_UNICODE_TOLOWER(ch);
175
68
}
176
177
static unsigned int sre_upper_unicode(unsigned int ch)
178
36
{
179
36
    return (unsigned int) Py_UNICODE_TOUPPER(ch);
180
36
}
181
182
LOCAL(int)
183
sre_category(SRE_CODE category, unsigned int ch)
184
10.9M
{
185
10.9M
    switch (category) {
186
187
0
    case SRE_CATEGORY_DIGIT:
188
0
        return SRE_IS_DIGIT(ch);
189
0
    case SRE_CATEGORY_NOT_DIGIT:
190
0
        return !SRE_IS_DIGIT(ch);
191
0
    case SRE_CATEGORY_SPACE:
192
0
        return SRE_IS_SPACE(ch);
193
0
    case SRE_CATEGORY_NOT_SPACE:
194
0
        return !SRE_IS_SPACE(ch);
195
10.9M
    case SRE_CATEGORY_WORD:
196
10.9M
        return SRE_IS_WORD(ch);
197
0
    case SRE_CATEGORY_NOT_WORD:
198
0
        return !SRE_IS_WORD(ch);
199
0
    case SRE_CATEGORY_LINEBREAK:
200
0
        return SRE_IS_LINEBREAK(ch);
201
0
    case SRE_CATEGORY_NOT_LINEBREAK:
202
0
        return !SRE_IS_LINEBREAK(ch);
203
204
0
    case SRE_CATEGORY_LOC_WORD:
205
0
        return SRE_LOC_IS_WORD(ch);
206
0
    case SRE_CATEGORY_LOC_NOT_WORD:
207
0
        return !SRE_LOC_IS_WORD(ch);
208
209
0
    case SRE_CATEGORY_UNI_DIGIT:
210
0
        return SRE_UNI_IS_DIGIT(ch);
211
0
    case SRE_CATEGORY_UNI_NOT_DIGIT:
212
0
        return !SRE_UNI_IS_DIGIT(ch);
213
53.5k
    case SRE_CATEGORY_UNI_SPACE:
214
53.5k
        return SRE_UNI_IS_SPACE(ch);
215
0
    case SRE_CATEGORY_UNI_NOT_SPACE:
216
0
        return !SRE_UNI_IS_SPACE(ch);
217
0
    case SRE_CATEGORY_UNI_WORD:
218
0
        return SRE_UNI_IS_WORD(ch);
219
0
    case SRE_CATEGORY_UNI_NOT_WORD:
220
0
        return !SRE_UNI_IS_WORD(ch);
221
0
    case SRE_CATEGORY_UNI_LINEBREAK:
222
0
        return SRE_UNI_IS_LINEBREAK(ch);
223
0
    case SRE_CATEGORY_UNI_NOT_LINEBREAK:
224
0
        return !SRE_UNI_IS_LINEBREAK(ch);
225
10.9M
    }
226
0
    return 0;
227
10.9M
}
228
229
LOCAL(int)
230
char_loc_ignore(SRE_CODE pattern, SRE_CODE ch)
231
0
{
232
0
    return ch == pattern
233
0
        || (SRE_CODE) sre_lower_locale(ch) == pattern
234
0
        || (SRE_CODE) sre_upper_locale(ch) == pattern;
235
0
}
236
237
238
/* helpers */
239
240
static void
241
data_stack_dealloc(SRE_STATE* state)
242
188M
{
243
188M
    if (state->data_stack) {
244
176M
        PyMem_Free(state->data_stack);
245
176M
        state->data_stack = NULL;
246
176M
    }
247
188M
    state->data_stack_size = state->data_stack_base = 0;
248
188M
}
249
250
static int
251
data_stack_grow(SRE_STATE* state, Py_ssize_t size)
252
176M
{
253
176M
    INIT_TRACE(state);
254
176M
    Py_ssize_t minsize, cursize;
255
176M
    minsize = state->data_stack_base+size;
256
176M
    cursize = state->data_stack_size;
257
176M
    if (cursize < minsize) {
258
176M
        void* stack;
259
176M
        cursize = minsize+minsize/4+1024;
260
176M
        TRACE(("allocate/grow stack %zd\n", cursize));
261
176M
        stack = PyMem_Realloc(state->data_stack, cursize);
262
176M
        if (!stack) {
263
0
            data_stack_dealloc(state);
264
0
            return SRE_ERROR_MEMORY;
265
0
        }
266
176M
        state->data_stack = (char *)stack;
267
176M
        state->data_stack_size = cursize;
268
176M
    }
269
176M
    return 0;
270
176M
}
271
272
/* memory pool functions for SRE_REPEAT, this can avoid memory
273
   leak when SRE(match) function terminates abruptly.
274
   state->repeat_pool_used is a doubly-linked list, so that we
275
   can remove a SRE_REPEAT node from it.
276
   state->repeat_pool_unused is a singly-linked list, we put/get
277
   node at the head. */
278
static SRE_REPEAT *
279
repeat_pool_malloc(SRE_STATE *state)
280
180M
{
281
180M
    SRE_REPEAT *repeat;
282
283
180M
    if (state->repeat_pool_unused) {
284
        /* remove from unused pool (singly-linked list) */
285
512
        repeat = state->repeat_pool_unused;
286
512
        state->repeat_pool_unused = repeat->pool_next;
287
512
    }
288
180M
    else {
289
180M
        repeat = PyMem_Malloc(sizeof(SRE_REPEAT));
290
180M
        if (!repeat) {
291
0
            return NULL;
292
0
        }
293
180M
    }
294
295
    /* add to used pool (doubly-linked list) */
296
180M
    SRE_REPEAT *temp = state->repeat_pool_used;
297
180M
    if (temp) {
298
118M
        temp->pool_prev = repeat;
299
118M
    }
300
180M
    repeat->pool_prev = NULL;
301
180M
    repeat->pool_next = temp;
302
180M
    state->repeat_pool_used = repeat;
303
304
180M
    return repeat;
305
180M
}
306
307
static void
308
repeat_pool_free(SRE_STATE *state, SRE_REPEAT *repeat)
309
180M
{
310
180M
    SRE_REPEAT *prev = repeat->pool_prev;
311
180M
    SRE_REPEAT *next = repeat->pool_next;
312
313
    /* remove from used pool (doubly-linked list) */
314
180M
    if (prev) {
315
0
        prev->pool_next = next;
316
0
    }
317
180M
    else {
318
180M
        state->repeat_pool_used = next;
319
180M
    }
320
180M
    if (next) {
321
118M
        next->pool_prev = prev;
322
118M
    }
323
324
    /* add to unused pool (singly-linked list) */
325
180M
    repeat->pool_next = state->repeat_pool_unused;
326
180M
    state->repeat_pool_unused = repeat;
327
180M
}
328
329
static void
330
repeat_pool_clear(SRE_STATE *state)
331
103M
{
332
    /* clear used pool */
333
103M
    SRE_REPEAT *next = state->repeat_pool_used;
334
103M
    state->repeat_pool_used = NULL;
335
103M
    while (next) {
336
0
        SRE_REPEAT *temp = next;
337
0
        next = temp->pool_next;
338
0
        PyMem_Free(temp);
339
0
    }
340
341
    /* clear unused pool */
342
103M
    next = state->repeat_pool_unused;
343
103M
    state->repeat_pool_unused = NULL;
344
283M
    while (next) {
345
180M
        SRE_REPEAT *temp = next;
346
180M
        next = temp->pool_next;
347
180M
        PyMem_Free(temp);
348
180M
    }
349
103M
}
350
351
/* generate 8-bit version */
352
353
267M
#define SRE_CHAR Py_UCS1
354
#define SIZEOF_SRE_CHAR 1
355
1.18G
#define SRE(F) sre_ucs1_##F
356
#include "sre_lib.h"
357
358
/* generate 16-bit unicode version */
359
360
405M
#define SRE_CHAR Py_UCS2
361
#define SIZEOF_SRE_CHAR 2
362
2.02G
#define SRE(F) sre_ucs2_##F
363
#include "sre_lib.h"
364
365
/* generate 32-bit unicode version */
366
367
223M
#define SRE_CHAR Py_UCS4
368
#define SIZEOF_SRE_CHAR 4
369
1.26G
#define SRE(F) sre_ucs4_##F
370
#include "sre_lib.h"
371
372
/* -------------------------------------------------------------------- */
373
/* factories and destructors */
374
375
/* module state */
376
typedef struct {
377
    PyTypeObject *Pattern_Type;
378
    PyTypeObject *Match_Type;
379
    PyTypeObject *Scanner_Type;
380
    PyTypeObject *Template_Type;
381
    PyObject *compile_template;  // reference to re._compile_template
382
} _sremodulestate;
383
384
static _sremodulestate *
385
get_sre_module_state(PyObject *m)
386
100M
{
387
100M
    _sremodulestate *state = (_sremodulestate *)_PyModule_GetState(m);
388
100M
    assert(state);
389
100M
    return state;
390
100M
}
391
392
static struct PyModuleDef sremodule;
393
#define get_sre_module_state_by_class(cls) \
394
100M
    (get_sre_module_state(PyType_GetModule(cls)))
395
396
/* see sre.h for object declarations */
397
static PyObject*pattern_new_match(_sremodulestate *, PatternObject*, SRE_STATE*, Py_ssize_t);
398
static PyObject *pattern_scanner(_sremodulestate *, PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t);
399
400
195k
#define _PatternObject_CAST(op)     ((PatternObject *)(op))
401
157M
#define _MatchObject_CAST(op)       ((MatchObject *)(op))
402
0
#define _TemplateObject_CAST(op)    ((TemplateObject *)(op))
403
678k
#define _ScannerObject_CAST(op)     ((ScannerObject *)(op))
404
405
/*[clinic input]
406
module _sre
407
class _sre.SRE_Pattern "PatternObject *" "get_sre_module_state_by_class(tp)->Pattern_Type"
408
class _sre.SRE_Match "MatchObject *" "get_sre_module_state_by_class(tp)->Match_Type"
409
class _sre.SRE_Scanner "ScannerObject *" "get_sre_module_state_by_class(tp)->Scanner_Type"
410
[clinic start generated code]*/
411
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=fe2966e32b66a231]*/
412
413
/*[clinic input]
414
_sre.getcodesize -> int
415
[clinic start generated code]*/
416
417
static int
418
_sre_getcodesize_impl(PyObject *module)
419
/*[clinic end generated code: output=e0db7ce34a6dd7b1 input=bd6f6ecf4916bb2b]*/
420
0
{
421
0
    return sizeof(SRE_CODE);
422
0
}
423
424
/*[clinic input]
425
_sre.ascii_iscased -> bool
426
427
    character: int
428
    /
429
430
[clinic start generated code]*/
431
432
static int
433
_sre_ascii_iscased_impl(PyObject *module, int character)
434
/*[clinic end generated code: output=4f454b630fbd19a2 input=9f0bd952812c7ed3]*/
435
35
{
436
35
    unsigned int ch = (unsigned int)character;
437
35
    return ch < 128 && Py_ISALPHA(ch);
438
35
}
439
440
/*[clinic input]
441
_sre.unicode_iscased -> bool
442
443
    character: int
444
    /
445
446
[clinic start generated code]*/
447
448
static int
449
_sre_unicode_iscased_impl(PyObject *module, int character)
450
/*[clinic end generated code: output=9c5ddee0dc2bc258 input=51e42c3b8dddb78e]*/
451
36
{
452
36
    unsigned int ch = (unsigned int)character;
453
36
    return ch != sre_lower_unicode(ch) || ch != sre_upper_unicode(ch);
454
36
}
455
456
/*[clinic input]
457
_sre.ascii_tolower -> int
458
459
    character: int
460
    /
461
462
[clinic start generated code]*/
463
464
static int
465
_sre_ascii_tolower_impl(PyObject *module, int character)
466
/*[clinic end generated code: output=228294ed6ff2a612 input=272c609b5b61f136]*/
467
25
{
468
25
    return sre_lower_ascii(character);
469
25
}
470
471
/*[clinic input]
472
_sre.unicode_tolower -> int
473
474
    character: int
475
    /
476
477
[clinic start generated code]*/
478
479
static int
480
_sre_unicode_tolower_impl(PyObject *module, int character)
481
/*[clinic end generated code: output=6422272d7d7fee65 input=91d708c5f3c2045a]*/
482
32
{
483
32
    return sre_lower_unicode(character);
484
32
}
485
486
LOCAL(void)
487
state_reset(SRE_STATE* state)
488
85.1M
{
489
    /* state->mark will be set to 0 in SRE_OP_MARK dynamically. */
490
    /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
491
492
85.1M
    state->lastmark = -1;
493
85.1M
    state->lastindex = -1;
494
495
85.1M
    state->repeat = NULL;
496
497
85.1M
    data_stack_dealloc(state);
498
85.1M
}
499
500
static const void*
501
getstring(PyObject* string, Py_ssize_t* p_length,
502
          int* p_isbytes, int* p_charsize,
503
          Py_buffer *view)
504
179M
{
505
    /* given a python object, return a data pointer, a length (in
506
       characters), and a character size.  return NULL if the object
507
       is not a string (or not compatible) */
508
509
    /* Unicode objects do not support the buffer API. So, get the data
510
       directly instead. */
511
179M
    if (PyUnicode_Check(string)) {
512
179M
        *p_length = PyUnicode_GET_LENGTH(string);
513
179M
        *p_charsize = PyUnicode_KIND(string);
514
179M
        *p_isbytes = 0;
515
179M
        return PyUnicode_DATA(string);
516
179M
    }
517
518
    /* get pointer to byte string buffer */
519
54.6k
    if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
520
0
        PyErr_Format(PyExc_TypeError, "expected string or bytes-like "
521
0
                     "object, got '%.200s'", Py_TYPE(string)->tp_name);
522
0
        return NULL;
523
0
    }
524
525
54.6k
    *p_length = view->len;
526
54.6k
    *p_charsize = 1;
527
54.6k
    *p_isbytes = 1;
528
529
54.6k
    if (view->buf == NULL) {
530
0
        PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
531
0
        PyBuffer_Release(view);
532
0
        view->buf = NULL;
533
0
        return NULL;
534
0
    }
535
54.6k
    return view->buf;
536
54.6k
}
537
538
LOCAL(PyObject*)
539
state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
540
           Py_ssize_t start, Py_ssize_t end)
541
103M
{
542
    /* prepare state object */
543
544
103M
    Py_ssize_t length;
545
103M
    int isbytes, charsize;
546
103M
    const void* ptr;
547
548
103M
    memset(state, 0, sizeof(SRE_STATE));
549
550
103M
    state->mark = PyMem_New(const void *, pattern->groups * 2);
551
103M
    if (!state->mark) {
552
0
        PyErr_NoMemory();
553
0
        goto err;
554
0
    }
555
103M
    state->lastmark = -1;
556
103M
    state->lastindex = -1;
557
558
103M
    state->buffer.buf = NULL;
559
103M
    ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
560
103M
    if (!ptr)
561
0
        goto err;
562
563
103M
    if (isbytes && pattern->isbytes == 0) {
564
0
        PyErr_SetString(PyExc_TypeError,
565
0
                        "cannot use a string pattern on a bytes-like object");
566
0
        goto err;
567
0
    }
568
103M
    if (!isbytes && pattern->isbytes > 0) {
569
0
        PyErr_SetString(PyExc_TypeError,
570
0
                        "cannot use a bytes pattern on a string-like object");
571
0
        goto err;
572
0
    }
573
574
    /* adjust boundaries */
575
103M
    if (start < 0)
576
0
        start = 0;
577
103M
    else if (start > length)
578
0
        start = length;
579
580
103M
    if (end < 0)
581
0
        end = 0;
582
103M
    else if (end > length)
583
103M
        end = length;
584
585
103M
    state->isbytes = isbytes;
586
103M
    state->charsize = charsize;
587
103M
    state->match_all = 0;
588
103M
    state->must_advance = 0;
589
103M
    state->debug = ((pattern->flags & SRE_FLAG_DEBUG) != 0);
590
591
103M
    state->beginning = ptr;
592
593
103M
    state->start = (void*) ((char*) ptr + start * state->charsize);
594
103M
    state->end = (void*) ((char*) ptr + end * state->charsize);
595
596
103M
    state->string = Py_NewRef(string);
597
103M
    state->pos = start;
598
103M
    state->endpos = end;
599
600
#ifdef Py_DEBUG
601
    state->fail_after_count = pattern->fail_after_count;
602
    state->fail_after_exc = pattern->fail_after_exc; // borrowed ref
603
#endif
604
605
103M
    return string;
606
0
  err:
607
    /* We add an explicit cast here because MSVC has a bug when
608
       compiling C code where it believes that `const void**` cannot be
609
       safely casted to `void*`, see bpo-39943 for details. */
610
0
    PyMem_Free((void*) state->mark);
611
0
    state->mark = NULL;
612
0
    if (state->buffer.buf)
613
0
        PyBuffer_Release(&state->buffer);
614
0
    return NULL;
615
103M
}
616
617
LOCAL(void)
618
state_fini(SRE_STATE* state)
619
103M
{
620
103M
    if (state->buffer.buf)
621
28.0k
        PyBuffer_Release(&state->buffer);
622
103M
    Py_XDECREF(state->string);
623
103M
    data_stack_dealloc(state);
624
    /* See above PyMem_Free() for why we explicitly cast here. */
625
103M
    PyMem_Free((void*) state->mark);
626
103M
    state->mark = NULL;
627
    /* SRE_REPEAT pool */
628
103M
    repeat_pool_clear(state);
629
103M
}
630
631
/* calculate offset from start of string */
632
#define STATE_OFFSET(state, member)\
633
157M
    (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
634
635
LOCAL(PyObject*)
636
getslice(int isbytes, const void *ptr,
637
         PyObject* string, Py_ssize_t start, Py_ssize_t end)
638
154M
{
639
154M
    if (isbytes) {
640
81.7k
        if (PyBytes_CheckExact(string) &&
641
81.7k
            start == 0 && end == PyBytes_GET_SIZE(string)) {
642
1.98k
            return Py_NewRef(string);
643
1.98k
        }
644
79.7k
        return PyBytes_FromStringAndSize(
645
79.7k
                (const char *)ptr + start, end - start);
646
81.7k
    }
647
154M
    else {
648
154M
        return PyUnicode_Substring(string, start, end);
649
154M
    }
650
154M
}
651
652
LOCAL(PyObject*)
653
state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
654
925k
{
655
925k
    Py_ssize_t i, j;
656
657
925k
    index = (index - 1) * 2;
658
659
925k
    if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
660
0
        if (empty)
661
            /* want empty string */
662
0
            i = j = 0;
663
0
        else {
664
0
            Py_RETURN_NONE;
665
0
        }
666
925k
    } else {
667
925k
        i = STATE_OFFSET(state, state->mark[index]);
668
925k
        j = STATE_OFFSET(state, state->mark[index+1]);
669
670
        /* check wrong span */
671
925k
        if (i > j) {
672
0
            PyErr_SetString(PyExc_SystemError,
673
0
                            "The span of capturing group is wrong,"
674
0
                            " please report a bug for the re module.");
675
0
            return NULL;
676
0
        }
677
925k
    }
678
679
925k
    return getslice(state->isbytes, state->beginning, string, i, j);
680
925k
}
681
682
static void
683
pattern_error(Py_ssize_t status)
684
0
{
685
0
    switch (status) {
686
0
    case SRE_ERROR_RECURSION_LIMIT:
687
        /* This error code seems to be unused. */
688
0
        PyErr_SetString(
689
0
            PyExc_RecursionError,
690
0
            "maximum recursion limit exceeded"
691
0
            );
692
0
        break;
693
0
    case SRE_ERROR_MEMORY:
694
0
        PyErr_NoMemory();
695
0
        break;
696
0
    case SRE_ERROR_INTERRUPTED:
697
    /* An exception has already been raised, so let it fly */
698
0
        break;
699
0
    default:
700
        /* other error codes indicate compiler/engine bugs */
701
0
        PyErr_SetString(
702
0
            PyExc_RuntimeError,
703
0
            "internal error in regular expression engine"
704
0
            );
705
0
    }
706
0
}
707
708
static int
709
pattern_traverse(PyObject *op, visitproc visit, void *arg)
710
195k
{
711
195k
    PatternObject *self = _PatternObject_CAST(op);
712
195k
    Py_VISIT(Py_TYPE(self));
713
195k
    Py_VISIT(self->groupindex);
714
195k
    Py_VISIT(self->indexgroup);
715
195k
    Py_VISIT(self->pattern);
716
#ifdef Py_DEBUG
717
    Py_VISIT(self->fail_after_exc);
718
#endif
719
195k
    return 0;
720
195k
}
721
722
static int
723
pattern_clear(PyObject *op)
724
22
{
725
22
    PatternObject *self = _PatternObject_CAST(op);
726
22
    Py_CLEAR(self->groupindex);
727
22
    Py_CLEAR(self->indexgroup);
728
22
    Py_CLEAR(self->pattern);
729
#ifdef Py_DEBUG
730
    Py_CLEAR(self->fail_after_exc);
731
#endif
732
22
    return 0;
733
22
}
734
735
static void
736
pattern_dealloc(PyObject *self)
737
22
{
738
22
    PyTypeObject *tp = Py_TYPE(self);
739
22
    PyObject_GC_UnTrack(self);
740
22
    FT_CLEAR_WEAKREFS(self, _PatternObject_CAST(self)->weakreflist);
741
22
    (void)pattern_clear(self);
742
22
    tp->tp_free(self);
743
22
    Py_DECREF(tp);
744
22
}
745
746
LOCAL(Py_ssize_t)
747
sre_match(SRE_STATE* state, SRE_CODE* pattern)
748
96.0M
{
749
96.0M
    if (state->charsize == 1)
750
29.7M
        return sre_ucs1_match(state, pattern, 1);
751
66.2M
    if (state->charsize == 2)
752
26.1M
        return sre_ucs2_match(state, pattern, 1);
753
40.1M
    assert(state->charsize == 4);
754
40.1M
    return sre_ucs4_match(state, pattern, 1);
755
66.2M
}
756
757
LOCAL(Py_ssize_t)
758
sre_search(SRE_STATE* state, SRE_CODE* pattern)
759
85.6M
{
760
85.6M
    if (state->charsize == 1)
761
32.7M
        return sre_ucs1_search(state, pattern);
762
52.8M
    if (state->charsize == 2)
763
45.4M
        return sre_ucs2_search(state, pattern);
764
7.44M
    assert(state->charsize == 4);
765
7.44M
    return sre_ucs4_search(state, pattern);
766
52.8M
}
767
768
/*[clinic input]
769
_sre.SRE_Pattern.match
770
771
    cls: defining_class
772
    /
773
    string: object
774
    pos: Py_ssize_t = 0
775
    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
776
777
Matches zero or more characters at the beginning of the string.
778
[clinic start generated code]*/
779
780
static PyObject *
781
_sre_SRE_Pattern_match_impl(PatternObject *self, PyTypeObject *cls,
782
                            PyObject *string, Py_ssize_t pos,
783
                            Py_ssize_t endpos)
784
/*[clinic end generated code: output=ec6208ea58a0cca0 input=4bdb9c3e564d13ac]*/
785
96.0M
{
786
96.0M
    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
787
96.0M
    SRE_STATE state;
788
96.0M
    Py_ssize_t status;
789
96.0M
    PyObject *match;
790
791
96.0M
    if (!state_init(&state, self, string, pos, endpos))
792
0
        return NULL;
793
794
96.0M
    INIT_TRACE(&state);
795
96.0M
    state.ptr = state.start;
796
797
96.0M
    TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
798
799
96.0M
    status = sre_match(&state, PatternObject_GetCode(self));
800
801
96.0M
    TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
802
96.0M
    if (PyErr_Occurred()) {
803
0
        state_fini(&state);
804
0
        return NULL;
805
0
    }
806
807
96.0M
    match = pattern_new_match(module_state, self, &state, status);
808
96.0M
    state_fini(&state);
809
96.0M
    return match;
810
96.0M
}
811
812
/*[clinic input]
813
_sre.SRE_Pattern.fullmatch
814
815
    cls: defining_class
816
    /
817
    string: object
818
    pos: Py_ssize_t = 0
819
    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
820
821
Matches against all of the string.
822
[clinic start generated code]*/
823
824
static PyObject *
825
_sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyTypeObject *cls,
826
                                PyObject *string, Py_ssize_t pos,
827
                                Py_ssize_t endpos)
828
/*[clinic end generated code: output=625b75b027ef94da input=50981172ab0fcfdd]*/
829
0
{
830
0
    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
831
0
    SRE_STATE state;
832
0
    Py_ssize_t status;
833
0
    PyObject *match;
834
835
0
    if (!state_init(&state, self, string, pos, endpos))
836
0
        return NULL;
837
838
0
    INIT_TRACE(&state);
839
0
    state.ptr = state.start;
840
841
0
    TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
842
843
0
    state.match_all = 1;
844
0
    status = sre_match(&state, PatternObject_GetCode(self));
845
846
0
    TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
847
0
    if (PyErr_Occurred()) {
848
0
        state_fini(&state);
849
0
        return NULL;
850
0
    }
851
852
0
    match = pattern_new_match(module_state, self, &state, status);
853
0
    state_fini(&state);
854
0
    return match;
855
0
}
856
857
/*[clinic input]
858
_sre.SRE_Pattern.search
859
860
    cls: defining_class
861
    /
862
    string: object
863
    pos: Py_ssize_t = 0
864
    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
865
866
Scan through string looking for a match, and return a corresponding match object instance.
867
868
Return None if no position in the string matches.
869
[clinic start generated code]*/
870
871
static PyObject *
872
_sre_SRE_Pattern_search_impl(PatternObject *self, PyTypeObject *cls,
873
                             PyObject *string, Py_ssize_t pos,
874
                             Py_ssize_t endpos)
875
/*[clinic end generated code: output=bd7f2d9d583e1463 input=afa9afb66a74a4b3]*/
876
508k
{
877
508k
    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
878
508k
    SRE_STATE state;
879
508k
    Py_ssize_t status;
880
508k
    PyObject *match;
881
882
508k
    if (!state_init(&state, self, string, pos, endpos))
883
0
        return NULL;
884
885
508k
    INIT_TRACE(&state);
886
508k
    TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
887
888
508k
    status = sre_search(&state, PatternObject_GetCode(self));
889
890
508k
    TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
891
892
508k
    if (PyErr_Occurred()) {
893
0
        state_fini(&state);
894
0
        return NULL;
895
0
    }
896
897
508k
    match = pattern_new_match(module_state, self, &state, status);
898
508k
    state_fini(&state);
899
508k
    return match;
900
508k
}
901
902
/*[clinic input]
903
_sre.SRE_Pattern.findall
904
905
    string: object
906
    pos: Py_ssize_t = 0
907
    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
908
909
Return a list of all non-overlapping matches of pattern in string.
910
[clinic start generated code]*/
911
912
static PyObject *
913
_sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,
914
                              Py_ssize_t pos, Py_ssize_t endpos)
915
/*[clinic end generated code: output=f4966baceea60aca input=5b6a4ee799741563]*/
916
4.03M
{
917
4.03M
    SRE_STATE state;
918
4.03M
    PyObject* list;
919
4.03M
    Py_ssize_t status;
920
4.03M
    Py_ssize_t i, b, e;
921
922
4.03M
    if (!state_init(&state, self, string, pos, endpos))
923
0
        return NULL;
924
925
4.03M
    list = PyList_New(0);
926
4.03M
    if (!list) {
927
0
        state_fini(&state);
928
0
        return NULL;
929
0
    }
930
931
75.7M
    while (state.start <= state.end) {
932
933
75.7M
        PyObject* item;
934
935
75.7M
        state_reset(&state);
936
937
75.7M
        state.ptr = state.start;
938
939
75.7M
        status = sre_search(&state, PatternObject_GetCode(self));
940
75.7M
        if (PyErr_Occurred())
941
0
            goto error;
942
943
75.7M
        if (status <= 0) {
944
4.03M
            if (status == 0)
945
4.03M
                break;
946
0
            pattern_error(status);
947
0
            goto error;
948
4.03M
        }
949
950
        /* don't bother to build a match object */
951
71.7M
        switch (self->groups) {
952
71.7M
        case 0:
953
71.7M
            b = STATE_OFFSET(&state, state.start);
954
71.7M
            e = STATE_OFFSET(&state, state.ptr);
955
71.7M
            item = getslice(state.isbytes, state.beginning,
956
71.7M
                            string, b, e);
957
71.7M
            if (!item)
958
0
                goto error;
959
71.7M
            break;
960
71.7M
        case 1:
961
0
            item = state_getslice(&state, 1, string, 1);
962
0
            if (!item)
963
0
                goto error;
964
0
            break;
965
0
        default:
966
0
            item = PyTuple_New(self->groups);
967
0
            if (!item)
968
0
                goto error;
969
0
            for (i = 0; i < self->groups; i++) {
970
0
                PyObject* o = state_getslice(&state, i+1, string, 1);
971
0
                if (!o) {
972
0
                    Py_DECREF(item);
973
0
                    goto error;
974
0
                }
975
0
                PyTuple_SET_ITEM(item, i, o);
976
0
            }
977
0
            break;
978
71.7M
        }
979
980
71.7M
        status = PyList_Append(list, item);
981
71.7M
        Py_DECREF(item);
982
71.7M
        if (status < 0)
983
0
            goto error;
984
985
71.7M
        state.must_advance = (state.ptr == state.start);
986
71.7M
        state.start = state.ptr;
987
71.7M
    }
988
989
4.03M
    state_fini(&state);
990
4.03M
    return list;
991
992
0
error:
993
0
    Py_DECREF(list);
994
0
    state_fini(&state);
995
0
    return NULL;
996
997
4.03M
}
998
999
/*[clinic input]
1000
_sre.SRE_Pattern.finditer
1001
1002
    cls: defining_class
1003
    /
1004
    string: object
1005
    pos: Py_ssize_t = 0
1006
    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
1007
1008
Return an iterator over all non-overlapping matches for the RE pattern in string.
1009
1010
For each match, the iterator returns a match object.
1011
[clinic start generated code]*/
1012
1013
static PyObject *
1014
_sre_SRE_Pattern_finditer_impl(PatternObject *self, PyTypeObject *cls,
1015
                               PyObject *string, Py_ssize_t pos,
1016
                               Py_ssize_t endpos)
1017
/*[clinic end generated code: output=1791dbf3618ade56 input=812e332a4848cbaf]*/
1018
338k
{
1019
338k
    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
1020
338k
    PyObject* scanner;
1021
338k
    PyObject* search;
1022
338k
    PyObject* iterator;
1023
1024
338k
    scanner = pattern_scanner(module_state, self, string, pos, endpos);
1025
338k
    if (!scanner)
1026
0
        return NULL;
1027
1028
338k
    search = PyObject_GetAttrString(scanner, "search");
1029
338k
    Py_DECREF(scanner);
1030
338k
    if (!search)
1031
0
        return NULL;
1032
1033
338k
    iterator = PyCallIter_New(search, Py_None);
1034
338k
    Py_DECREF(search);
1035
1036
338k
    return iterator;
1037
338k
}
1038
1039
/*[clinic input]
1040
_sre.SRE_Pattern.scanner
1041
1042
    cls: defining_class
1043
    /
1044
    string: object
1045
    pos: Py_ssize_t = 0
1046
    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
1047
1048
[clinic start generated code]*/
1049
1050
static PyObject *
1051
_sre_SRE_Pattern_scanner_impl(PatternObject *self, PyTypeObject *cls,
1052
                              PyObject *string, Py_ssize_t pos,
1053
                              Py_ssize_t endpos)
1054
/*[clinic end generated code: output=f70cd506112f1bd9 input=2e487e5151bcee4c]*/
1055
0
{
1056
0
    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
1057
1058
0
    return pattern_scanner(module_state, self, string, pos, endpos);
1059
0
}
1060
1061
/*[clinic input]
1062
_sre.SRE_Pattern.split
1063
1064
    string: object
1065
    maxsplit: Py_ssize_t = 0
1066
1067
Split string by the occurrences of pattern.
1068
[clinic start generated code]*/
1069
1070
static PyObject *
1071
_sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
1072
                            Py_ssize_t maxsplit)
1073
/*[clinic end generated code: output=7ac66f381c45e0be input=1eeeb10dafc9947a]*/
1074
1.51M
{
1075
1.51M
    SRE_STATE state;
1076
1.51M
    PyObject* list;
1077
1.51M
    PyObject* item;
1078
1.51M
    Py_ssize_t status;
1079
1.51M
    Py_ssize_t n;
1080
1.51M
    Py_ssize_t i;
1081
1.51M
    const void* last;
1082
1083
1.51M
    assert(self->codesize != 0);
1084
1085
1.51M
    if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX))
1086
0
        return NULL;
1087
1088
1.51M
    list = PyList_New(0);
1089
1.51M
    if (!list) {
1090
0
        state_fini(&state);
1091
0
        return NULL;
1092
0
    }
1093
1094
1.51M
    n = 0;
1095
1.51M
    last = state.start;
1096
1097
2.50M
    while (!maxsplit || n < maxsplit) {
1098
1099
1.58M
        state_reset(&state);
1100
1101
1.58M
        state.ptr = state.start;
1102
1103
1.58M
        status = sre_search(&state, PatternObject_GetCode(self));
1104
1.58M
        if (PyErr_Occurred())
1105
0
            goto error;
1106
1107
1.58M
        if (status <= 0) {
1108
590k
            if (status == 0)
1109
590k
                break;
1110
0
            pattern_error(status);
1111
0
            goto error;
1112
590k
        }
1113
1114
        /* get segment before this match */
1115
991k
        item = getslice(state.isbytes, state.beginning,
1116
991k
            string, STATE_OFFSET(&state, last),
1117
991k
            STATE_OFFSET(&state, state.start)
1118
991k
            );
1119
991k
        if (!item)
1120
0
            goto error;
1121
991k
        status = PyList_Append(list, item);
1122
991k
        Py_DECREF(item);
1123
991k
        if (status < 0)
1124
0
            goto error;
1125
1126
        /* add groups (if any) */
1127
1.91M
        for (i = 0; i < self->groups; i++) {
1128
925k
            item = state_getslice(&state, i+1, string, 0);
1129
925k
            if (!item)
1130
0
                goto error;
1131
925k
            status = PyList_Append(list, item);
1132
925k
            Py_DECREF(item);
1133
925k
            if (status < 0)
1134
0
                goto error;
1135
925k
        }
1136
1137
991k
        n = n + 1;
1138
991k
        state.must_advance = (state.ptr == state.start);
1139
991k
        last = state.start = state.ptr;
1140
1141
991k
    }
1142
1143
    /* get segment following last match (even if empty) */
1144
1.51M
    item = getslice(state.isbytes, state.beginning,
1145
1.51M
        string, STATE_OFFSET(&state, last), state.endpos
1146
1.51M
        );
1147
1.51M
    if (!item)
1148
0
        goto error;
1149
1.51M
    status = PyList_Append(list, item);
1150
1.51M
    Py_DECREF(item);
1151
1.51M
    if (status < 0)
1152
0
        goto error;
1153
1154
1.51M
    state_fini(&state);
1155
1.51M
    return list;
1156
1157
0
error:
1158
0
    Py_DECREF(list);
1159
0
    state_fini(&state);
1160
0
    return NULL;
1161
1162
1.51M
}
1163
1164
static PyObject *
1165
compile_template(_sremodulestate *module_state,
1166
                 PatternObject *pattern, PyObject *template)
1167
0
{
1168
    /* delegate to Python code */
1169
0
    PyObject *func = FT_ATOMIC_LOAD_PTR(module_state->compile_template);
1170
0
    if (func == NULL) {
1171
0
        func = PyImport_ImportModuleAttrString("re", "_compile_template");
1172
0
        if (func == NULL) {
1173
0
            return NULL;
1174
0
        }
1175
#ifdef Py_GIL_DISABLED
1176
        PyObject *other_func = NULL;
1177
        if (!_Py_atomic_compare_exchange_ptr(&module_state->compile_template, &other_func, func))  {
1178
            Py_DECREF(func);
1179
            func = other_func;
1180
        }
1181
#else
1182
0
        Py_XSETREF(module_state->compile_template, func);
1183
0
#endif
1184
0
    }
1185
1186
0
    PyObject *args[] = {(PyObject *)pattern, template};
1187
0
    PyObject *result = PyObject_Vectorcall(func, args, 2, NULL);
1188
1189
0
    if (result == NULL && PyErr_ExceptionMatches(PyExc_TypeError)) {
1190
        /* If the replacement string is unhashable (e.g. bytearray),
1191
         * convert it to the basic type (str or bytes) and repeat. */
1192
0
        if (PyUnicode_Check(template) && !PyUnicode_CheckExact(template)) {
1193
0
            PyErr_Clear();
1194
0
            template = _PyUnicode_Copy(template);
1195
0
        }
1196
0
        else if (PyObject_CheckBuffer(template) && !PyBytes_CheckExact(template)) {
1197
0
            PyErr_Clear();
1198
0
            template = PyBytes_FromObject(template);
1199
0
        }
1200
0
        else {
1201
0
            return NULL;
1202
0
        }
1203
0
        if (template == NULL) {
1204
0
            return NULL;
1205
0
        }
1206
0
        args[1] = template;
1207
0
        result = PyObject_Vectorcall(func, args, 2, NULL);
1208
0
        Py_DECREF(template);
1209
0
    }
1210
1211
0
    if (result != NULL && Py_TYPE(result) != module_state->Template_Type) {
1212
0
        PyErr_Format(PyExc_RuntimeError,
1213
0
                    "the result of compiling a replacement string is %.200s",
1214
0
                    Py_TYPE(result)->tp_name);
1215
0
        Py_DECREF(result);
1216
0
        return NULL;
1217
0
    }
1218
0
    return result;
1219
0
}
1220
1221
static PyObject *expand_template(TemplateObject *, MatchObject *); /* Forward */
1222
1223
static PyObject*
1224
pattern_subx(_sremodulestate* module_state,
1225
             PatternObject* self,
1226
             PyObject* ptemplate,
1227
             PyObject* string,
1228
             Py_ssize_t count,
1229
             Py_ssize_t subn)
1230
889k
{
1231
889k
    SRE_STATE state;
1232
889k
    PyObject* list;
1233
889k
    PyObject* joiner;
1234
889k
    PyObject* item;
1235
889k
    PyObject* filter;
1236
889k
    PyObject* match;
1237
889k
    const void* ptr;
1238
889k
    Py_ssize_t status;
1239
889k
    Py_ssize_t n;
1240
889k
    Py_ssize_t i, b, e;
1241
889k
    int isbytes, charsize;
1242
889k
    enum {LITERAL, TEMPLATE, CALLABLE} filter_type;
1243
889k
    Py_buffer view;
1244
1245
889k
    if (PyCallable_Check(ptemplate)) {
1246
        /* sub/subn takes either a function or a template */
1247
889k
        filter = Py_NewRef(ptemplate);
1248
889k
        filter_type = CALLABLE;
1249
889k
    } else {
1250
        /* if not callable, check if it's a literal string */
1251
0
        int literal;
1252
0
        view.buf = NULL;
1253
0
        ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
1254
0
        if (ptr) {
1255
0
            if (charsize == 1)
1256
0
                literal = memchr(ptr, '\\', n) == NULL;
1257
0
            else
1258
0
                literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
1259
0
        } else {
1260
0
            PyErr_Clear();
1261
0
            literal = 0;
1262
0
        }
1263
0
        if (view.buf)
1264
0
            PyBuffer_Release(&view);
1265
0
        if (literal) {
1266
0
            filter = Py_NewRef(ptemplate);
1267
0
            filter_type = LITERAL;
1268
0
        } else {
1269
            /* not a literal; hand it over to the template compiler */
1270
0
            filter = compile_template(module_state, self, ptemplate);
1271
0
            if (!filter)
1272
0
                return NULL;
1273
1274
0
            assert(Py_TYPE(filter) == module_state->Template_Type);
1275
0
            if (Py_SIZE(filter) == 0) {
1276
0
                Py_SETREF(filter,
1277
0
                          Py_NewRef(((TemplateObject *)filter)->literal));
1278
0
                filter_type = LITERAL;
1279
0
            }
1280
0
            else {
1281
0
                filter_type = TEMPLATE;
1282
0
            }
1283
0
        }
1284
0
    }
1285
1286
889k
    if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) {
1287
0
        Py_DECREF(filter);
1288
0
        return NULL;
1289
0
    }
1290
1291
889k
    list = PyList_New(0);
1292
889k
    if (!list) {
1293
0
        Py_DECREF(filter);
1294
0
        state_fini(&state);
1295
0
        return NULL;
1296
0
    }
1297
1298
889k
    n = i = 0;
1299
1300
5.04M
    while (!count || n < count) {
1301
1302
5.04M
        state_reset(&state);
1303
1304
5.04M
        state.ptr = state.start;
1305
1306
5.04M
        status = sre_search(&state, PatternObject_GetCode(self));
1307
5.04M
        if (PyErr_Occurred())
1308
0
            goto error;
1309
1310
5.04M
        if (status <= 0) {
1311
889k
            if (status == 0)
1312
889k
                break;
1313
0
            pattern_error(status);
1314
0
            goto error;
1315
889k
        }
1316
1317
4.15M
        b = STATE_OFFSET(&state, state.start);
1318
4.15M
        e = STATE_OFFSET(&state, state.ptr);
1319
1320
4.15M
        if (i < b) {
1321
            /* get segment before this match */
1322
2.48M
            item = getslice(state.isbytes, state.beginning,
1323
2.48M
                string, i, b);
1324
2.48M
            if (!item)
1325
0
                goto error;
1326
2.48M
            status = PyList_Append(list, item);
1327
2.48M
            Py_DECREF(item);
1328
2.48M
            if (status < 0)
1329
0
                goto error;
1330
1331
2.48M
        }
1332
1333
4.15M
        if (filter_type != LITERAL) {
1334
            /* pass match object through filter */
1335
4.15M
            match = pattern_new_match(module_state, self, &state, 1);
1336
4.15M
            if (!match)
1337
0
                goto error;
1338
4.15M
            if (filter_type == TEMPLATE) {
1339
0
                item = expand_template((TemplateObject *)filter,
1340
0
                                       (MatchObject *)match);
1341
0
            }
1342
4.15M
            else {
1343
4.15M
                assert(filter_type == CALLABLE);
1344
4.15M
                item = PyObject_CallOneArg(filter, match);
1345
4.15M
            }
1346
4.15M
            Py_DECREF(match);
1347
4.15M
            if (!item)
1348
15
                goto error;
1349
4.15M
        } else {
1350
            /* filter is literal string */
1351
0
            item = Py_NewRef(filter);
1352
0
        }
1353
1354
        /* add to list */
1355
4.15M
        if (item != Py_None) {
1356
4.15M
            status = PyList_Append(list, item);
1357
4.15M
            Py_DECREF(item);
1358
4.15M
            if (status < 0)
1359
0
                goto error;
1360
4.15M
        }
1361
1362
4.15M
        i = e;
1363
4.15M
        n = n + 1;
1364
4.15M
        state.must_advance = (state.ptr == state.start);
1365
4.15M
        state.start = state.ptr;
1366
4.15M
    }
1367
1368
    /* get segment following last match */
1369
889k
    if (i < state.endpos) {
1370
522k
        item = getslice(state.isbytes, state.beginning,
1371
522k
                        string, i, state.endpos);
1372
522k
        if (!item)
1373
0
            goto error;
1374
522k
        status = PyList_Append(list, item);
1375
522k
        Py_DECREF(item);
1376
522k
        if (status < 0)
1377
0
            goto error;
1378
522k
    }
1379
1380
889k
    state_fini(&state);
1381
1382
889k
    Py_DECREF(filter);
1383
1384
    /* convert list to single string (also removes list) */
1385
889k
    joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
1386
889k
    if (!joiner) {
1387
0
        Py_DECREF(list);
1388
0
        return NULL;
1389
0
    }
1390
889k
    if (PyList_GET_SIZE(list) == 0) {
1391
271
        Py_DECREF(list);
1392
271
        item = joiner;
1393
271
    }
1394
889k
    else {
1395
889k
        if (state.isbytes)
1396
27.8k
            item = PyBytes_Join(joiner, list);
1397
861k
        else
1398
861k
            item = PyUnicode_Join(joiner, list);
1399
889k
        Py_DECREF(joiner);
1400
889k
        Py_DECREF(list);
1401
889k
        if (!item)
1402
0
            return NULL;
1403
889k
    }
1404
1405
889k
    if (subn)
1406
0
        return Py_BuildValue("Nn", item, n);
1407
1408
889k
    return item;
1409
1410
15
error:
1411
15
    Py_DECREF(list);
1412
15
    state_fini(&state);
1413
15
    Py_DECREF(filter);
1414
15
    return NULL;
1415
1416
889k
}
1417
1418
/*[clinic input]
1419
_sre.SRE_Pattern.sub
1420
1421
    cls: defining_class
1422
    /
1423
    repl: object
1424
    string: object
1425
    count: Py_ssize_t = 0
1426
1427
Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl.
1428
[clinic start generated code]*/
1429
1430
static PyObject *
1431
_sre_SRE_Pattern_sub_impl(PatternObject *self, PyTypeObject *cls,
1432
                          PyObject *repl, PyObject *string, Py_ssize_t count)
1433
/*[clinic end generated code: output=4be141ab04bca60d input=d8d1d4ac2311a07c]*/
1434
889k
{
1435
889k
    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
1436
1437
889k
    return pattern_subx(module_state, self, repl, string, count, 0);
1438
889k
}
1439
1440
/*[clinic input]
1441
_sre.SRE_Pattern.subn
1442
1443
    cls: defining_class
1444
    /
1445
    repl: object
1446
    string: object
1447
    count: Py_ssize_t = 0
1448
1449
Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.
1450
[clinic start generated code]*/
1451
1452
static PyObject *
1453
_sre_SRE_Pattern_subn_impl(PatternObject *self, PyTypeObject *cls,
1454
                           PyObject *repl, PyObject *string,
1455
                           Py_ssize_t count)
1456
/*[clinic end generated code: output=da02fd85258b1e1f input=8b78a65b8302e58d]*/
1457
0
{
1458
0
    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
1459
1460
0
    return pattern_subx(module_state, self, repl, string, count, 1);
1461
0
}
1462
1463
/*[clinic input]
1464
_sre.SRE_Pattern.__copy__
1465
1466
[clinic start generated code]*/
1467
1468
static PyObject *
1469
_sre_SRE_Pattern___copy___impl(PatternObject *self)
1470
/*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/
1471
0
{
1472
0
    return Py_NewRef(self);
1473
0
}
1474
1475
/*[clinic input]
1476
_sre.SRE_Pattern.__deepcopy__
1477
1478
    memo: object
1479
    /
1480
1481
[clinic start generated code]*/
1482
1483
static PyObject *
1484
_sre_SRE_Pattern___deepcopy___impl(PatternObject *self, PyObject *memo)
1485
/*[clinic end generated code: output=75efe69bd12c5d7d input=a465b1602f997bed]*/
1486
0
{
1487
0
    return Py_NewRef(self);
1488
0
}
1489
1490
#ifdef Py_DEBUG
1491
/*[clinic input]
1492
_sre.SRE_Pattern._fail_after
1493
1494
    count: int
1495
    exception: object
1496
    /
1497
1498
For debugging.
1499
[clinic start generated code]*/
1500
1501
static PyObject *
1502
_sre_SRE_Pattern__fail_after_impl(PatternObject *self, int count,
1503
                                  PyObject *exception)
1504
/*[clinic end generated code: output=9a6bf12135ac50c2 input=ef80a45c66c5499d]*/
1505
{
1506
    self->fail_after_count = count;
1507
    Py_INCREF(exception);
1508
    Py_XSETREF(self->fail_after_exc, exception);
1509
    Py_RETURN_NONE;
1510
}
1511
#endif /* Py_DEBUG */
1512
1513
static PyObject *
1514
pattern_repr(PyObject *self)
1515
0
{
1516
0
    static const struct {
1517
0
        const char *name;
1518
0
        int value;
1519
0
    } flag_names[] = {
1520
0
        {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
1521
0
        {"re.LOCALE", SRE_FLAG_LOCALE},
1522
0
        {"re.MULTILINE", SRE_FLAG_MULTILINE},
1523
0
        {"re.DOTALL", SRE_FLAG_DOTALL},
1524
0
        {"re.UNICODE", SRE_FLAG_UNICODE},
1525
0
        {"re.VERBOSE", SRE_FLAG_VERBOSE},
1526
0
        {"re.DEBUG", SRE_FLAG_DEBUG},
1527
0
        {"re.ASCII", SRE_FLAG_ASCII},
1528
0
    };
1529
1530
0
    PatternObject *obj = _PatternObject_CAST(self);
1531
0
    PyObject *result = NULL;
1532
0
    PyObject *flag_items;
1533
0
    size_t i;
1534
0
    int flags = obj->flags;
1535
1536
    /* Omit re.UNICODE for valid string patterns. */
1537
0
    if (obj->isbytes == 0 &&
1538
0
        (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
1539
0
         SRE_FLAG_UNICODE)
1540
0
        flags &= ~SRE_FLAG_UNICODE;
1541
1542
0
    flag_items = PyList_New(0);
1543
0
    if (!flag_items)
1544
0
        return NULL;
1545
1546
0
    for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
1547
0
        if (flags & flag_names[i].value) {
1548
0
            PyObject *item = PyUnicode_FromString(flag_names[i].name);
1549
0
            if (!item)
1550
0
                goto done;
1551
1552
0
            if (PyList_Append(flag_items, item) < 0) {
1553
0
                Py_DECREF(item);
1554
0
                goto done;
1555
0
            }
1556
0
            Py_DECREF(item);
1557
0
            flags &= ~flag_names[i].value;
1558
0
        }
1559
0
    }
1560
0
    if (flags) {
1561
0
        PyObject *item = PyUnicode_FromFormat("0x%x", flags);
1562
0
        if (!item)
1563
0
            goto done;
1564
1565
0
        if (PyList_Append(flag_items, item) < 0) {
1566
0
            Py_DECREF(item);
1567
0
            goto done;
1568
0
        }
1569
0
        Py_DECREF(item);
1570
0
    }
1571
1572
0
    if (PyList_Size(flag_items) > 0) {
1573
0
        PyObject *flags_result;
1574
0
        PyObject *sep = PyUnicode_FromString("|");
1575
0
        if (!sep)
1576
0
            goto done;
1577
0
        flags_result = PyUnicode_Join(sep, flag_items);
1578
0
        Py_DECREF(sep);
1579
0
        if (!flags_result)
1580
0
            goto done;
1581
0
        result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
1582
0
                                      obj->pattern, flags_result);
1583
0
        Py_DECREF(flags_result);
1584
0
    }
1585
0
    else {
1586
0
        result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
1587
0
    }
1588
1589
0
done:
1590
0
    Py_DECREF(flag_items);
1591
0
    return result;
1592
0
}
1593
1594
PyDoc_STRVAR(pattern_doc, "Compiled regular expression object.");
1595
1596
/* PatternObject's 'groupindex' method. */
1597
static PyObject *
1598
pattern_groupindex(PyObject *op, void *Py_UNUSED(ignored))
1599
0
{
1600
0
    PatternObject *self = _PatternObject_CAST(op);
1601
0
    if (self->groupindex == NULL)
1602
0
        return PyDict_New();
1603
0
    return PyDictProxy_New(self->groupindex);
1604
0
}
1605
1606
static int _validate(PatternObject *self); /* Forward */
1607
1608
/*[clinic input]
1609
_sre.compile
1610
1611
    pattern: object
1612
    flags: int
1613
    code: object(subclass_of='&PyList_Type')
1614
    groups: Py_ssize_t
1615
    groupindex: object(subclass_of='&PyDict_Type')
1616
    indexgroup: object(subclass_of='&PyTuple_Type')
1617
1618
[clinic start generated code]*/
1619
1620
static PyObject *
1621
_sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
1622
                  PyObject *code, Py_ssize_t groups, PyObject *groupindex,
1623
                  PyObject *indexgroup)
1624
/*[clinic end generated code: output=ef9c2b3693776404 input=0a68476dbbe5db30]*/
1625
208
{
1626
    /* "compile" pattern descriptor to pattern object */
1627
1628
208
    _sremodulestate *module_state = get_sre_module_state(module);
1629
208
    PatternObject* self;
1630
208
    Py_ssize_t i, n;
1631
1632
208
    n = PyList_GET_SIZE(code);
1633
    /* coverity[ampersand_in_size] */
1634
208
    self = PyObject_GC_NewVar(PatternObject, module_state->Pattern_Type, n);
1635
208
    if (!self)
1636
0
        return NULL;
1637
208
    self->weakreflist = NULL;
1638
208
    self->pattern = NULL;
1639
208
    self->groupindex = NULL;
1640
208
    self->indexgroup = NULL;
1641
#ifdef Py_DEBUG
1642
    self->fail_after_count = -1;
1643
    self->fail_after_exc = NULL;
1644
#endif
1645
1646
208
    self->codesize = n;
1647
1648
8.66k
    for (i = 0; i < n; i++) {
1649
8.45k
        PyObject *o = PyList_GET_ITEM(code, i);
1650
8.45k
        unsigned long value = PyLong_AsUnsignedLong(o);
1651
8.45k
        if (value == (unsigned long)-1 && PyErr_Occurred()) {
1652
0
            break;
1653
0
        }
1654
8.45k
        self->code[i] = (SRE_CODE) value;
1655
8.45k
        if ((unsigned long) self->code[i] != value) {
1656
0
            PyErr_SetString(PyExc_OverflowError,
1657
0
                            "regular expression code size limit exceeded");
1658
0
            break;
1659
0
        }
1660
8.45k
    }
1661
208
    PyObject_GC_Track(self);
1662
1663
208
    if (PyErr_Occurred()) {
1664
0
        Py_DECREF(self);
1665
0
        return NULL;
1666
0
    }
1667
1668
208
    if (pattern == Py_None) {
1669
0
        self->isbytes = -1;
1670
0
    }
1671
208
    else {
1672
208
        Py_ssize_t p_length;
1673
208
        int charsize;
1674
208
        Py_buffer view;
1675
208
        view.buf = NULL;
1676
208
        if (!getstring(pattern, &p_length, &self->isbytes,
1677
208
                       &charsize, &view)) {
1678
0
            Py_DECREF(self);
1679
0
            return NULL;
1680
0
        }
1681
208
        if (view.buf)
1682
16
            PyBuffer_Release(&view);
1683
208
    }
1684
1685
208
    self->pattern = Py_NewRef(pattern);
1686
1687
208
    self->flags = flags;
1688
1689
208
    self->groups = groups;
1690
1691
208
    if (PyDict_GET_SIZE(groupindex) > 0) {
1692
18
        self->groupindex = Py_NewRef(groupindex);
1693
18
        if (PyTuple_GET_SIZE(indexgroup) > 0) {
1694
18
            self->indexgroup = Py_NewRef(indexgroup);
1695
18
        }
1696
18
    }
1697
1698
208
    if (!_validate(self)) {
1699
0
        Py_DECREF(self);
1700
0
        return NULL;
1701
0
    }
1702
1703
208
    return (PyObject*) self;
1704
208
}
1705
1706
/*[clinic input]
1707
_sre.template
1708
1709
    pattern: object
1710
    template: object(subclass_of="&PyList_Type")
1711
        A list containing interleaved literal strings (str or bytes) and group
1712
        indices (int), as returned by re._parser.parse_template():
1713
            [literal1, group1, ..., literalN, groupN]
1714
    /
1715
1716
[clinic start generated code]*/
1717
1718
static PyObject *
1719
_sre_template_impl(PyObject *module, PyObject *pattern, PyObject *template)
1720
/*[clinic end generated code: output=d51290e596ebca86 input=af55380b27f02942]*/
1721
0
{
1722
    /* template is a list containing interleaved literal strings (str or bytes)
1723
     * and group indices (int), as returned by _parser.parse_template:
1724
     * [literal1, group1, literal2, ..., literalN].
1725
     */
1726
0
    _sremodulestate *module_state = get_sre_module_state(module);
1727
0
    TemplateObject *self = NULL;
1728
0
    Py_ssize_t n = PyList_GET_SIZE(template);
1729
0
    if ((n & 1) == 0 || n < 1) {
1730
0
        goto bad_template;
1731
0
    }
1732
0
    n /= 2;
1733
0
    self = PyObject_GC_NewVar(TemplateObject, module_state->Template_Type, n);
1734
0
    if (!self)
1735
0
        return NULL;
1736
0
    self->chunks = 1 + 2*n;
1737
0
    self->literal = Py_NewRef(PyList_GET_ITEM(template, 0));
1738
0
    for (Py_ssize_t i = 0; i < n; i++) {
1739
0
        Py_ssize_t index = PyLong_AsSsize_t(PyList_GET_ITEM(template, 2*i+1));
1740
0
        if (index == -1 && PyErr_Occurred()) {
1741
0
            Py_SET_SIZE(self, i);
1742
0
            Py_DECREF(self);
1743
0
            return NULL;
1744
0
        }
1745
0
        if (index < 0) {
1746
0
            Py_SET_SIZE(self, i);
1747
0
            goto bad_template;
1748
0
        }
1749
0
        self->items[i].index = index;
1750
1751
0
        PyObject *literal = PyList_GET_ITEM(template, 2*i+2);
1752
        // Skip empty literals.
1753
0
        if ((PyUnicode_Check(literal) && !PyUnicode_GET_LENGTH(literal)) ||
1754
0
            (PyBytes_Check(literal) && !PyBytes_GET_SIZE(literal)))
1755
0
        {
1756
0
            literal = NULL;
1757
0
            self->chunks--;
1758
0
        }
1759
0
        self->items[i].literal = Py_XNewRef(literal);
1760
0
    }
1761
0
    PyObject_GC_Track(self);
1762
0
    return (PyObject*) self;
1763
1764
0
bad_template:
1765
0
    PyErr_SetString(PyExc_TypeError, "invalid template");
1766
0
    Py_XDECREF(self);
1767
0
    return NULL;
1768
0
}
1769
1770
/* -------------------------------------------------------------------- */
1771
/* Code validation */
1772
1773
/* To learn more about this code, have a look at the _compile() function in
1774
   Lib/sre_compile.py.  The validation functions below checks the code array
1775
   for conformance with the code patterns generated there.
1776
1777
   The nice thing about the generated code is that it is position-independent:
1778
   all jumps are relative jumps forward.  Also, jumps don't cross each other:
1779
   the target of a later jump is always earlier than the target of an earlier
1780
   jump.  IOW, this is okay:
1781
1782
   J---------J-------T--------T
1783
    \         \_____/        /
1784
     \______________________/
1785
1786
   but this is not:
1787
1788
   J---------J-------T--------T
1789
    \_________\_____/        /
1790
               \____________/
1791
1792
   It also helps that SRE_CODE is always an unsigned type.
1793
*/
1794
1795
/* Defining this one enables tracing of the validator */
1796
#undef VVERBOSE
1797
1798
/* Trace macro for the validator */
1799
#if defined(VVERBOSE)
1800
#define VTRACE(v) printf v
1801
#else
1802
14.2k
#define VTRACE(v) do {} while(0)  /* do nothing */
1803
#endif
1804
1805
/* Report failure */
1806
0
#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return -1; } while (0)
1807
1808
/* Extract opcode, argument, or skip count from code array */
1809
#define GET_OP                                          \
1810
2.63k
    do {                                                \
1811
2.63k
        VTRACE(("%p: ", code));                         \
1812
2.63k
        if (code >= end) FAIL;                          \
1813
2.63k
        op = *code++;                                   \
1814
2.63k
        VTRACE(("%lu (op)\n", (unsigned long)op));      \
1815
2.63k
    } while (0)
1816
#define GET_ARG                                         \
1817
2.53k
    do {                                                \
1818
2.53k
        VTRACE(("%p= ", code));                         \
1819
2.53k
        if (code >= end) FAIL;                          \
1820
2.53k
        arg = *code++;                                  \
1821
2.53k
        VTRACE(("%lu (arg)\n", (unsigned long)arg));    \
1822
2.53k
    } while (0)
1823
#define GET_SKIP_ADJ(adj)                               \
1824
1.18k
    do {                                                \
1825
1.18k
        VTRACE(("%p= ", code));                         \
1826
1.18k
        if (code >= end) FAIL;                          \
1827
1.18k
        skip = *code;                                   \
1828
1.18k
        VTRACE(("%lu (skip to %p)\n",                   \
1829
1.18k
               (unsigned long)skip, code+skip));        \
1830
1.18k
        if (skip-adj > (uintptr_t)(end - code))         \
1831
1.18k
            FAIL;                                       \
1832
1.18k
        code++;                                         \
1833
1.18k
    } while (0)
1834
1.18k
#define GET_SKIP GET_SKIP_ADJ(0)
1835
1836
static int
1837
_validate_charset(SRE_CODE *code, SRE_CODE *end)
1838
370
{
1839
    /* Some variables are manipulated by the macros above */
1840
370
    SRE_CODE op;
1841
370
    SRE_CODE arg;
1842
370
    SRE_CODE offset;
1843
370
    int i;
1844
1845
970
    while (code < end) {
1846
600
        GET_OP;
1847
600
        switch (op) {
1848
1849
62
        case SRE_OP_NEGATE:
1850
62
            break;
1851
1852
222
        case SRE_OP_LITERAL:
1853
222
            GET_ARG;
1854
222
            break;
1855
1856
222
        case SRE_OP_RANGE:
1857
136
        case SRE_OP_RANGE_UNI_IGNORE:
1858
136
            GET_ARG;
1859
136
            GET_ARG;
1860
136
            break;
1861
1862
136
        case SRE_OP_CHARSET:
1863
124
            offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
1864
124
            if (offset > (uintptr_t)(end - code))
1865
0
                FAIL;
1866
124
            code += offset;
1867
124
            break;
1868
1869
4
        case SRE_OP_BIGCHARSET:
1870
4
            GET_ARG; /* Number of blocks */
1871
4
            offset = 256/sizeof(SRE_CODE); /* 256-byte table */
1872
4
            if (offset > (uintptr_t)(end - code))
1873
0
                FAIL;
1874
            /* Make sure that each byte points to a valid block */
1875
1.02k
            for (i = 0; i < 256; i++) {
1876
1.02k
                if (((unsigned char *)code)[i] >= arg)
1877
0
                    FAIL;
1878
1.02k
            }
1879
4
            code += offset;
1880
4
            offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
1881
4
            if (offset > (uintptr_t)(end - code))
1882
0
                FAIL;
1883
4
            code += offset;
1884
4
            break;
1885
1886
52
        case SRE_OP_CATEGORY:
1887
52
            GET_ARG;
1888
52
            switch (arg) {
1889
2
            case SRE_CATEGORY_DIGIT:
1890
2
            case SRE_CATEGORY_NOT_DIGIT:
1891
10
            case SRE_CATEGORY_SPACE:
1892
10
            case SRE_CATEGORY_NOT_SPACE:
1893
16
            case SRE_CATEGORY_WORD:
1894
16
            case SRE_CATEGORY_NOT_WORD:
1895
16
            case SRE_CATEGORY_LINEBREAK:
1896
16
            case SRE_CATEGORY_NOT_LINEBREAK:
1897
16
            case SRE_CATEGORY_LOC_WORD:
1898
16
            case SRE_CATEGORY_LOC_NOT_WORD:
1899
16
            case SRE_CATEGORY_UNI_DIGIT:
1900
16
            case SRE_CATEGORY_UNI_NOT_DIGIT:
1901
52
            case SRE_CATEGORY_UNI_SPACE:
1902
52
            case SRE_CATEGORY_UNI_NOT_SPACE:
1903
52
            case SRE_CATEGORY_UNI_WORD:
1904
52
            case SRE_CATEGORY_UNI_NOT_WORD:
1905
52
            case SRE_CATEGORY_UNI_LINEBREAK:
1906
52
            case SRE_CATEGORY_UNI_NOT_LINEBREAK:
1907
52
                break;
1908
0
            default:
1909
0
                FAIL;
1910
52
            }
1911
52
            break;
1912
1913
52
        default:
1914
0
            FAIL;
1915
1916
600
        }
1917
600
    }
1918
1919
370
    return 0;
1920
370
}
1921
1922
/* Returns 0 on success, -1 on failure, and 1 if the last op is JUMP. */
1923
static int
1924
_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1925
664
{
1926
    /* Some variables are manipulated by the macros above */
1927
664
    SRE_CODE op;
1928
664
    SRE_CODE arg;
1929
664
    SRE_CODE skip;
1930
1931
664
    VTRACE(("code=%p, end=%p\n", code, end));
1932
1933
664
    if (code > end)
1934
0
        FAIL;
1935
1936
2.24k
    while (code < end) {
1937
1.57k
        GET_OP;
1938
1.57k
        switch (op) {
1939
1940
250
        case SRE_OP_MARK:
1941
            /* We don't check whether marks are properly nested; the
1942
               sre_match() code is robust even if they don't, and the worst
1943
               you can get is nonsensical match results. */
1944
250
            GET_ARG;
1945
250
            if (arg > 2 * (size_t)groups + 1) {
1946
0
                VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
1947
0
                FAIL;
1948
0
            }
1949
250
            break;
1950
1951
371
        case SRE_OP_LITERAL:
1952
395
        case SRE_OP_NOT_LITERAL:
1953
406
        case SRE_OP_LITERAL_IGNORE:
1954
406
        case SRE_OP_NOT_LITERAL_IGNORE:
1955
416
        case SRE_OP_LITERAL_UNI_IGNORE:
1956
416
        case SRE_OP_NOT_LITERAL_UNI_IGNORE:
1957
416
        case SRE_OP_LITERAL_LOC_IGNORE:
1958
416
        case SRE_OP_NOT_LITERAL_LOC_IGNORE:
1959
416
            GET_ARG;
1960
            /* The arg is just a character, nothing to check */
1961
416
            break;
1962
1963
416
        case SRE_OP_SUCCESS:
1964
0
        case SRE_OP_FAILURE:
1965
            /* Nothing to check; these normally end the matching process */
1966
0
            break;
1967
1968
46
        case SRE_OP_AT:
1969
46
            GET_ARG;
1970
46
            switch (arg) {
1971
14
            case SRE_AT_BEGINNING:
1972
14
            case SRE_AT_BEGINNING_STRING:
1973
14
            case SRE_AT_BEGINNING_LINE:
1974
40
            case SRE_AT_END:
1975
40
            case SRE_AT_END_LINE:
1976
46
            case SRE_AT_END_STRING:
1977
46
            case SRE_AT_BOUNDARY:
1978
46
            case SRE_AT_NON_BOUNDARY:
1979
46
            case SRE_AT_LOC_BOUNDARY:
1980
46
            case SRE_AT_LOC_NON_BOUNDARY:
1981
46
            case SRE_AT_UNI_BOUNDARY:
1982
46
            case SRE_AT_UNI_NON_BOUNDARY:
1983
46
                break;
1984
0
            default:
1985
0
                FAIL;
1986
46
            }
1987
46
            break;
1988
1989
46
        case SRE_OP_ANY:
1990
10
        case SRE_OP_ANY_ALL:
1991
            /* These have no operands */
1992
10
            break;
1993
1994
295
        case SRE_OP_IN:
1995
295
        case SRE_OP_IN_IGNORE:
1996
295
        case SRE_OP_IN_UNI_IGNORE:
1997
295
        case SRE_OP_IN_LOC_IGNORE:
1998
295
            GET_SKIP;
1999
            /* Stop 1 before the end; we check the FAILURE below */
2000
295
            if (_validate_charset(code, code+skip-2))
2001
0
                FAIL;
2002
295
            if (code[skip-2] != SRE_OP_FAILURE)
2003
0
                FAIL;
2004
295
            code += skip-1;
2005
295
            break;
2006
2007
208
        case SRE_OP_INFO:
2008
208
            {
2009
                /* A minimal info field is
2010
                   <INFO> <1=skip> <2=flags> <3=min> <4=max>;
2011
                   If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
2012
                   more follows. */
2013
208
                SRE_CODE flags, i;
2014
208
                SRE_CODE *newcode;
2015
208
                GET_SKIP;
2016
208
                newcode = code+skip-1;
2017
208
                GET_ARG; flags = arg;
2018
208
                GET_ARG;
2019
208
                GET_ARG;
2020
                /* Check that only valid flags are present */
2021
208
                if ((flags & ~(SRE_INFO_PREFIX |
2022
208
                               SRE_INFO_LITERAL |
2023
208
                               SRE_INFO_CHARSET)) != 0)
2024
0
                    FAIL;
2025
                /* PREFIX and CHARSET are mutually exclusive */
2026
208
                if ((flags & SRE_INFO_PREFIX) &&
2027
208
                    (flags & SRE_INFO_CHARSET))
2028
0
                    FAIL;
2029
                /* LITERAL implies PREFIX */
2030
208
                if ((flags & SRE_INFO_LITERAL) &&
2031
208
                    !(flags & SRE_INFO_PREFIX))
2032
0
                    FAIL;
2033
                /* Validate the prefix */
2034
208
                if (flags & SRE_INFO_PREFIX) {
2035
50
                    SRE_CODE prefix_len;
2036
50
                    GET_ARG; prefix_len = arg;
2037
50
                    GET_ARG;
2038
                    /* Here comes the prefix string */
2039
50
                    if (prefix_len > (uintptr_t)(newcode - code))
2040
0
                        FAIL;
2041
50
                    code += prefix_len;
2042
                    /* And here comes the overlap table */
2043
50
                    if (prefix_len > (uintptr_t)(newcode - code))
2044
0
                        FAIL;
2045
                    /* Each overlap value should be < prefix_len */
2046
118
                    for (i = 0; i < prefix_len; i++) {
2047
68
                        if (code[i] >= prefix_len)
2048
0
                            FAIL;
2049
68
                    }
2050
50
                    code += prefix_len;
2051
50
                }
2052
                /* Validate the charset */
2053
208
                if (flags & SRE_INFO_CHARSET) {
2054
75
                    if (_validate_charset(code, newcode-1))
2055
0
                        FAIL;
2056
75
                    if (newcode[-1] != SRE_OP_FAILURE)
2057
0
                        FAIL;
2058
75
                    code = newcode;
2059
75
                }
2060
133
                else if (code != newcode) {
2061
0
                  VTRACE(("code=%p, newcode=%p\n", code, newcode));
2062
0
                    FAIL;
2063
0
                }
2064
208
            }
2065
208
            break;
2066
2067
208
        case SRE_OP_BRANCH:
2068
63
            {
2069
63
                SRE_CODE *target = NULL;
2070
230
                for (;;) {
2071
230
                    GET_SKIP;
2072
230
                    if (skip == 0)
2073
63
                        break;
2074
                    /* Stop 2 before the end; we check the JUMP below */
2075
167
                    if (_validate_inner(code, code+skip-3, groups))
2076
0
                        FAIL;
2077
167
                    code += skip-3;
2078
                    /* Check that it ends with a JUMP, and that each JUMP
2079
                       has the same target */
2080
167
                    GET_OP;
2081
167
                    if (op != SRE_OP_JUMP)
2082
0
                        FAIL;
2083
167
                    GET_SKIP;
2084
167
                    if (target == NULL)
2085
63
                        target = code+skip-1;
2086
104
                    else if (code+skip-1 != target)
2087
0
                        FAIL;
2088
167
                }
2089
63
                if (code != target)
2090
0
                    FAIL;
2091
63
            }
2092
63
            break;
2093
2094
205
        case SRE_OP_REPEAT_ONE:
2095
221
        case SRE_OP_MIN_REPEAT_ONE:
2096
221
        case SRE_OP_POSSESSIVE_REPEAT_ONE:
2097
221
            {
2098
221
                SRE_CODE min, max;
2099
221
                GET_SKIP;
2100
221
                GET_ARG; min = arg;
2101
221
                GET_ARG; max = arg;
2102
221
                if (min > max)
2103
0
                    FAIL;
2104
221
                if (max > SRE_MAXREPEAT)
2105
0
                    FAIL;
2106
221
                if (_validate_inner(code, code+skip-4, groups))
2107
0
                    FAIL;
2108
221
                code += skip-4;
2109
221
                GET_OP;
2110
221
                if (op != SRE_OP_SUCCESS)
2111
0
                    FAIL;
2112
221
            }
2113
221
            break;
2114
2115
221
        case SRE_OP_REPEAT:
2116
40
        case SRE_OP_POSSESSIVE_REPEAT:
2117
40
            {
2118
40
                SRE_CODE op1 = op, min, max;
2119
40
                GET_SKIP;
2120
40
                GET_ARG; min = arg;
2121
40
                GET_ARG; max = arg;
2122
40
                if (min > max)
2123
0
                    FAIL;
2124
40
                if (max > SRE_MAXREPEAT)
2125
0
                    FAIL;
2126
40
                if (_validate_inner(code, code+skip-3, groups))
2127
0
                    FAIL;
2128
40
                code += skip-3;
2129
40
                GET_OP;
2130
40
                if (op1 == SRE_OP_POSSESSIVE_REPEAT) {
2131
0
                    if (op != SRE_OP_SUCCESS)
2132
0
                        FAIL;
2133
0
                }
2134
40
                else {
2135
40
                    if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
2136
0
                        FAIL;
2137
40
                }
2138
40
            }
2139
40
            break;
2140
2141
40
        case SRE_OP_ATOMIC_GROUP:
2142
0
            {
2143
0
                GET_SKIP;
2144
0
                if (_validate_inner(code, code+skip-2, groups))
2145
0
                    FAIL;
2146
0
                code += skip-2;
2147
0
                GET_OP;
2148
0
                if (op != SRE_OP_SUCCESS)
2149
0
                    FAIL;
2150
0
            }
2151
0
            break;
2152
2153
0
        case SRE_OP_GROUPREF:
2154
0
        case SRE_OP_GROUPREF_IGNORE:
2155
2
        case SRE_OP_GROUPREF_UNI_IGNORE:
2156
2
        case SRE_OP_GROUPREF_LOC_IGNORE:
2157
2
            GET_ARG;
2158
2
            if (arg >= (size_t)groups)
2159
0
                FAIL;
2160
2
            break;
2161
2162
2
        case SRE_OP_GROUPREF_EXISTS:
2163
            /* The regex syntax for this is: '(?(group)then|else)', where
2164
               'group' is either an integer group number or a group name,
2165
               'then' and 'else' are sub-regexes, and 'else' is optional. */
2166
0
            GET_ARG;
2167
0
            if (arg >= (size_t)groups)
2168
0
                FAIL;
2169
0
            GET_SKIP_ADJ(1);
2170
0
            code--; /* The skip is relative to the first arg! */
2171
            /* There are two possibilities here: if there is both a 'then'
2172
               part and an 'else' part, the generated code looks like:
2173
2174
               GROUPREF_EXISTS
2175
               <group>
2176
               <skipyes>
2177
               ...then part...
2178
               JUMP
2179
               <skipno>
2180
               (<skipyes> jumps here)
2181
               ...else part...
2182
               (<skipno> jumps here)
2183
2184
               If there is only a 'then' part, it looks like:
2185
2186
               GROUPREF_EXISTS
2187
               <group>
2188
               <skip>
2189
               ...then part...
2190
               (<skip> jumps here)
2191
2192
               There is no direct way to decide which it is, and we don't want
2193
               to allow arbitrary jumps anywhere in the code; so we just look
2194
               for a JUMP opcode preceding our skip target.
2195
            */
2196
0
            VTRACE(("then part:\n"));
2197
0
            int rc = _validate_inner(code+1, code+skip-1, groups);
2198
0
            if (rc == 1) {
2199
0
                VTRACE(("else part:\n"));
2200
0
                code += skip-2; /* Position after JUMP, at <skipno> */
2201
0
                GET_SKIP;
2202
0
                rc = _validate_inner(code, code+skip-1, groups);
2203
0
            }
2204
0
            if (rc)
2205
0
                FAIL;
2206
0
            code += skip-1;
2207
0
            break;
2208
2209
8
        case SRE_OP_ASSERT:
2210
28
        case SRE_OP_ASSERT_NOT:
2211
28
            GET_SKIP;
2212
28
            GET_ARG; /* 0 for lookahead, width for lookbehind */
2213
28
            code--; /* Back up over arg to simplify math below */
2214
            /* Stop 1 before the end; we check the SUCCESS below */
2215
28
            if (_validate_inner(code+1, code+skip-2, groups))
2216
0
                FAIL;
2217
28
            code += skip-2;
2218
28
            GET_OP;
2219
28
            if (op != SRE_OP_SUCCESS)
2220
0
                FAIL;
2221
28
            break;
2222
2223
28
        case SRE_OP_JUMP:
2224
0
            if (code + 1 != end)
2225
0
                FAIL;
2226
0
            VTRACE(("JUMP: %d\n", __LINE__));
2227
0
            return 1;
2228
2229
0
        default:
2230
0
            FAIL;
2231
2232
1.57k
        }
2233
1.57k
    }
2234
2235
664
    VTRACE(("okay\n"));
2236
664
    return 0;
2237
664
}
2238
2239
static int
2240
_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
2241
208
{
2242
208
    if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
2243
208
        code >= end || end[-1] != SRE_OP_SUCCESS)
2244
0
        FAIL;
2245
208
    return _validate_inner(code, end-1, groups);
2246
208
}
2247
2248
static int
2249
_validate(PatternObject *self)
2250
208
{
2251
208
    if (_validate_outer(self->code, self->code+self->codesize, self->groups))
2252
0
    {
2253
0
        PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
2254
0
        return 0;
2255
0
    }
2256
208
    else
2257
208
        VTRACE(("Success!\n"));
2258
208
    return 1;
2259
208
}
2260
2261
/* -------------------------------------------------------------------- */
2262
/* match methods */
2263
2264
static int
2265
match_traverse(PyObject *op, visitproc visit, void *arg)
2266
63.0k
{
2267
63.0k
    MatchObject *self = _MatchObject_CAST(op);
2268
63.0k
    Py_VISIT(Py_TYPE(self));
2269
63.0k
    Py_VISIT(self->string);
2270
63.0k
    Py_VISIT(self->regs);
2271
63.0k
    Py_VISIT(self->pattern);
2272
63.0k
    return 0;
2273
63.0k
}
2274
2275
static int
2276
match_clear(PyObject *op)
2277
87.7M
{
2278
87.7M
    MatchObject *self = _MatchObject_CAST(op);
2279
87.7M
    Py_CLEAR(self->string);
2280
87.7M
    Py_CLEAR(self->regs);
2281
87.7M
    Py_CLEAR(self->pattern);
2282
87.7M
    return 0;
2283
87.7M
}
2284
2285
static void
2286
match_dealloc(PyObject *self)
2287
87.7M
{
2288
87.7M
    PyTypeObject *tp = Py_TYPE(self);
2289
87.7M
    PyObject_GC_UnTrack(self);
2290
87.7M
    (void)match_clear(self);
2291
87.7M
    tp->tp_free(self);
2292
87.7M
    Py_DECREF(tp);
2293
87.7M
}
2294
2295
static PyObject*
2296
match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
2297
182M
{
2298
182M
    Py_ssize_t length;
2299
182M
    int isbytes, charsize;
2300
182M
    Py_buffer view;
2301
182M
    PyObject *result;
2302
182M
    const void* ptr;
2303
182M
    Py_ssize_t i, j;
2304
2305
182M
    assert(0 <= index && index < self->groups);
2306
182M
    index *= 2;
2307
2308
182M
    if (self->string == Py_None || self->mark[index] < 0) {
2309
        /* return default value if the string or group is undefined */
2310
107M
        return Py_NewRef(def);
2311
107M
    }
2312
2313
75.8M
    ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
2314
75.8M
    if (ptr == NULL)
2315
0
        return NULL;
2316
2317
75.8M
    i = self->mark[index];
2318
75.8M
    j = self->mark[index+1];
2319
75.8M
    i = Py_MIN(i, length);
2320
75.8M
    j = Py_MIN(j, length);
2321
75.8M
    result = getslice(isbytes, ptr, self->string, i, j);
2322
75.8M
    if (isbytes && view.buf != NULL)
2323
26.5k
        PyBuffer_Release(&view);
2324
75.8M
    return result;
2325
75.8M
}
2326
2327
static Py_ssize_t
2328
match_getindex(MatchObject* self, PyObject* index)
2329
244M
{
2330
244M
    Py_ssize_t i;
2331
2332
244M
    if (index == NULL)
2333
        /* Default value */
2334
61.5M
        return 0;
2335
2336
182M
    if (PyIndex_Check(index)) {
2337
176M
        i = PyNumber_AsSsize_t(index, NULL);
2338
176M
    }
2339
6.41M
    else {
2340
6.41M
        i = -1;
2341
2342
6.41M
        if (self->pattern->groupindex) {
2343
6.41M
            index = PyDict_GetItemWithError(self->pattern->groupindex, index);
2344
6.41M
            if (index && PyLong_Check(index)) {
2345
6.41M
                i = PyLong_AsSsize_t(index);
2346
6.41M
            }
2347
6.41M
        }
2348
6.41M
    }
2349
182M
    if (i < 0 || i >= self->groups) {
2350
        /* raise IndexError if we were given a bad group number */
2351
0
        if (!PyErr_Occurred()) {
2352
0
            PyErr_SetString(PyExc_IndexError, "no such group");
2353
0
        }
2354
0
        return -1;
2355
0
    }
2356
2357
    // Check that i*2 cannot overflow to make static analyzers happy
2358
182M
    assert(i <= SRE_MAXGROUPS);
2359
182M
    return i;
2360
182M
}
2361
2362
static PyObject*
2363
match_getslice(MatchObject* self, PyObject* index, PyObject* def)
2364
182M
{
2365
182M
    Py_ssize_t i = match_getindex(self, index);
2366
2367
182M
    if (i < 0) {
2368
0
        return NULL;
2369
0
    }
2370
2371
182M
    return match_getslice_by_index(self, i, def);
2372
182M
}
2373
2374
/*[clinic input]
2375
_sre.SRE_Match.expand
2376
2377
    template: object
2378
2379
Return the string obtained by doing backslash substitution on the string template, as done by the sub() method.
2380
[clinic start generated code]*/
2381
2382
static PyObject *
2383
_sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template)
2384
/*[clinic end generated code: output=931b58ccc323c3a1 input=4bfdb22c2f8b146a]*/
2385
0
{
2386
0
    _sremodulestate *module_state = get_sre_module_state_by_class(Py_TYPE(self));
2387
0
    PyObject *filter = compile_template(module_state, self->pattern, template);
2388
0
    if (filter == NULL) {
2389
0
        return NULL;
2390
0
    }
2391
0
    PyObject *result = expand_template((TemplateObject *)filter, self);
2392
0
    Py_DECREF(filter);
2393
0
    return result;
2394
0
}
2395
2396
static PyObject*
2397
match_group(PyObject *op, PyObject* args)
2398
67.2M
{
2399
67.2M
    MatchObject *self = _MatchObject_CAST(op);
2400
67.2M
    PyObject* result;
2401
67.2M
    Py_ssize_t i, size;
2402
2403
67.2M
    size = PyTuple_GET_SIZE(args);
2404
2405
67.2M
    switch (size) {
2406
2.76M
    case 0:
2407
2.76M
        result = match_getslice(self, _PyLong_GetZero(), Py_None);
2408
2.76M
        break;
2409
6.31M
    case 1:
2410
6.31M
        result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2411
6.31M
        break;
2412
58.2M
    default:
2413
        /* fetch multiple items */
2414
58.2M
        result = PyTuple_New(size);
2415
58.2M
        if (!result)
2416
0
            return NULL;
2417
229M
        for (i = 0; i < size; i++) {
2418
171M
            PyObject* item = match_getslice(
2419
171M
                self, PyTuple_GET_ITEM(args, i), Py_None
2420
171M
                );
2421
171M
            if (!item) {
2422
0
                Py_DECREF(result);
2423
0
                return NULL;
2424
0
            }
2425
171M
            PyTuple_SET_ITEM(result, i, item);
2426
171M
        }
2427
58.2M
        break;
2428
67.2M
    }
2429
67.2M
    return result;
2430
67.2M
}
2431
2432
static PyObject*
2433
match_getitem(PyObject *op, PyObject* name)
2434
2.39M
{
2435
2.39M
    MatchObject *self = _MatchObject_CAST(op);
2436
2.39M
    return match_getslice(self, name, Py_None);
2437
2.39M
}
2438
2439
/*[clinic input]
2440
_sre.SRE_Match.groups
2441
2442
    default: object = None
2443
        Is used for groups that did not participate in the match.
2444
2445
Return a tuple containing all the subgroups of the match, from 1.
2446
[clinic start generated code]*/
2447
2448
static PyObject *
2449
_sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value)
2450
/*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/
2451
0
{
2452
0
    PyObject* result;
2453
0
    Py_ssize_t index;
2454
2455
0
    result = PyTuple_New(self->groups-1);
2456
0
    if (!result)
2457
0
        return NULL;
2458
2459
0
    for (index = 1; index < self->groups; index++) {
2460
0
        PyObject* item;
2461
0
        item = match_getslice_by_index(self, index, default_value);
2462
0
        if (!item) {
2463
0
            Py_DECREF(result);
2464
0
            return NULL;
2465
0
        }
2466
0
        PyTuple_SET_ITEM(result, index-1, item);
2467
0
    }
2468
2469
0
    return result;
2470
0
}
2471
2472
/*[clinic input]
2473
_sre.SRE_Match.groupdict
2474
2475
    default: object = None
2476
        Is used for groups that did not participate in the match.
2477
2478
Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name.
2479
[clinic start generated code]*/
2480
2481
static PyObject *
2482
_sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value)
2483
/*[clinic end generated code: output=29917c9073e41757 input=0ded7960b23780aa]*/
2484
0
{
2485
0
    PyObject *result;
2486
0
    PyObject *key;
2487
0
    PyObject *value;
2488
0
    Py_ssize_t pos = 0;
2489
0
    Py_hash_t hash;
2490
2491
0
    result = PyDict_New();
2492
0
    if (!result || !self->pattern->groupindex)
2493
0
        return result;
2494
2495
0
    Py_BEGIN_CRITICAL_SECTION(self->pattern->groupindex);
2496
0
    while (_PyDict_Next(self->pattern->groupindex, &pos, &key, &value, &hash)) {
2497
0
        int status;
2498
0
        Py_INCREF(key);
2499
0
        value = match_getslice(self, key, default_value);
2500
0
        if (!value) {
2501
0
            Py_DECREF(key);
2502
0
            Py_CLEAR(result);
2503
0
            goto exit;
2504
0
        }
2505
0
        status = _PyDict_SetItem_KnownHash(result, key, value, hash);
2506
0
        Py_DECREF(value);
2507
0
        Py_DECREF(key);
2508
0
        if (status < 0) {
2509
0
            Py_CLEAR(result);
2510
0
            goto exit;
2511
0
        }
2512
0
    }
2513
0
exit:;
2514
0
    Py_END_CRITICAL_SECTION();
2515
2516
0
    return result;
2517
0
}
2518
2519
/*[clinic input]
2520
_sre.SRE_Match.start -> Py_ssize_t
2521
2522
    group: object(c_default="NULL") = 0
2523
    /
2524
2525
Return index of the start of the substring matched by group.
2526
[clinic start generated code]*/
2527
2528
static Py_ssize_t
2529
_sre_SRE_Match_start_impl(MatchObject *self, PyObject *group)
2530
/*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/
2531
498k
{
2532
498k
    Py_ssize_t index = match_getindex(self, group);
2533
2534
498k
    if (index < 0) {
2535
0
        return -1;
2536
0
    }
2537
2538
    /* mark is -1 if group is undefined */
2539
498k
    return self->mark[index*2];
2540
498k
}
2541
2542
/*[clinic input]
2543
_sre.SRE_Match.end -> Py_ssize_t
2544
2545
    group: object(c_default="NULL") = 0
2546
    /
2547
2548
Return index of the end of the substring matched by group.
2549
[clinic start generated code]*/
2550
2551
static Py_ssize_t
2552
_sre_SRE_Match_end_impl(MatchObject *self, PyObject *group)
2553
/*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/
2554
58.5M
{
2555
58.5M
    Py_ssize_t index = match_getindex(self, group);
2556
2557
58.5M
    if (index < 0) {
2558
0
        return -1;
2559
0
    }
2560
2561
    /* mark is -1 if group is undefined */
2562
58.5M
    return self->mark[index*2+1];
2563
58.5M
}
2564
2565
LOCAL(PyObject*)
2566
_pair(Py_ssize_t i1, Py_ssize_t i2)
2567
2.53M
{
2568
2.53M
    PyObject* pair;
2569
2.53M
    PyObject* item;
2570
2571
2.53M
    pair = PyTuple_New(2);
2572
2.53M
    if (!pair)
2573
0
        return NULL;
2574
2575
2.53M
    item = PyLong_FromSsize_t(i1);
2576
2.53M
    if (!item)
2577
0
        goto error;
2578
2.53M
    PyTuple_SET_ITEM(pair, 0, item);
2579
2580
2.53M
    item = PyLong_FromSsize_t(i2);
2581
2.53M
    if (!item)
2582
0
        goto error;
2583
2.53M
    PyTuple_SET_ITEM(pair, 1, item);
2584
2585
2.53M
    return pair;
2586
2587
0
  error:
2588
0
    Py_DECREF(pair);
2589
0
    return NULL;
2590
2.53M
}
2591
2592
/*[clinic input]
2593
_sre.SRE_Match.span
2594
2595
    group: object(c_default="NULL") = 0
2596
    /
2597
2598
For match object m, return the 2-tuple (m.start(group), m.end(group)).
2599
[clinic start generated code]*/
2600
2601
static PyObject *
2602
_sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)
2603
/*[clinic end generated code: output=f02ae40594d14fe6 input=8fa6014e982d71d4]*/
2604
2.53M
{
2605
2.53M
    Py_ssize_t index = match_getindex(self, group);
2606
2607
2.53M
    if (index < 0) {
2608
0
        return NULL;
2609
0
    }
2610
2611
    /* marks are -1 if group is undefined */
2612
2.53M
    return _pair(self->mark[index*2], self->mark[index*2+1]);
2613
2.53M
}
2614
2615
static PyObject*
2616
match_regs(MatchObject* self)
2617
0
{
2618
0
    PyObject* regs;
2619
0
    PyObject* item;
2620
0
    Py_ssize_t index;
2621
2622
0
    regs = PyTuple_New(self->groups);
2623
0
    if (!regs)
2624
0
        return NULL;
2625
2626
0
    for (index = 0; index < self->groups; index++) {
2627
0
        item = _pair(self->mark[index*2], self->mark[index*2+1]);
2628
0
        if (!item) {
2629
0
            Py_DECREF(regs);
2630
0
            return NULL;
2631
0
        }
2632
0
        PyTuple_SET_ITEM(regs, index, item);
2633
0
    }
2634
2635
0
    self->regs = Py_NewRef(regs);
2636
2637
0
    return regs;
2638
0
}
2639
2640
/*[clinic input]
2641
_sre.SRE_Match.__copy__
2642
2643
[clinic start generated code]*/
2644
2645
static PyObject *
2646
_sre_SRE_Match___copy___impl(MatchObject *self)
2647
/*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/
2648
0
{
2649
0
    return Py_NewRef(self);
2650
0
}
2651
2652
/*[clinic input]
2653
_sre.SRE_Match.__deepcopy__
2654
2655
    memo: object
2656
    /
2657
2658
[clinic start generated code]*/
2659
2660
static PyObject *
2661
_sre_SRE_Match___deepcopy___impl(MatchObject *self, PyObject *memo)
2662
/*[clinic end generated code: output=2b657578eb03f4a3 input=779d12a31c2c325e]*/
2663
0
{
2664
0
    return Py_NewRef(self);
2665
0
}
2666
2667
PyDoc_STRVAR(match_doc,
2668
"The result of re.match() and re.search().\n\
2669
Match objects always have a boolean value of True.");
2670
2671
PyDoc_STRVAR(match_group_doc,
2672
"group([group1, ...]) -> str or tuple.\n\
2673
    Return subgroup(s) of the match by indices or names.\n\
2674
    For 0 returns the entire match.");
2675
2676
static PyObject *
2677
match_lastindex_get(PyObject *op, void *Py_UNUSED(ignored))
2678
0
{
2679
0
    MatchObject *self = _MatchObject_CAST(op);
2680
0
    if (self->lastindex >= 0)
2681
0
        return PyLong_FromSsize_t(self->lastindex);
2682
0
    Py_RETURN_NONE;
2683
0
}
2684
2685
static PyObject *
2686
match_lastgroup_get(PyObject *op, void *Py_UNUSED(ignored))
2687
0
{
2688
0
    MatchObject *self = _MatchObject_CAST(op);
2689
0
    if (self->pattern->indexgroup &&
2690
0
        self->lastindex >= 0 &&
2691
0
        self->lastindex < PyTuple_GET_SIZE(self->pattern->indexgroup))
2692
0
    {
2693
0
        PyObject *result = PyTuple_GET_ITEM(self->pattern->indexgroup,
2694
0
                                            self->lastindex);
2695
0
        return Py_NewRef(result);
2696
0
    }
2697
0
    Py_RETURN_NONE;
2698
0
}
2699
2700
static PyObject *
2701
match_regs_get(PyObject *op, void *Py_UNUSED(ignored))
2702
0
{
2703
0
    MatchObject *self = _MatchObject_CAST(op);
2704
0
    if (self->regs) {
2705
0
        return Py_NewRef(self->regs);
2706
0
    } else
2707
0
        return match_regs(self);
2708
0
}
2709
2710
static PyObject *
2711
match_repr(PyObject *op)
2712
0
{
2713
0
    MatchObject *self = _MatchObject_CAST(op);
2714
0
    PyObject *result;
2715
0
    PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
2716
0
    if (group0 == NULL)
2717
0
        return NULL;
2718
0
    result = PyUnicode_FromFormat(
2719
0
            "<%s object; span=(%zd, %zd), match=%.50R>",
2720
0
            Py_TYPE(self)->tp_name,
2721
0
            self->mark[0], self->mark[1], group0);
2722
0
    Py_DECREF(group0);
2723
0
    return result;
2724
0
}
2725
2726
2727
static PyObject*
2728
pattern_new_match(_sremodulestate* module_state,
2729
                  PatternObject* pattern,
2730
                  SRE_STATE* state,
2731
                  Py_ssize_t status)
2732
103M
{
2733
    /* create match object (from state object) */
2734
2735
103M
    MatchObject* match;
2736
103M
    Py_ssize_t i, j;
2737
103M
    char* base;
2738
103M
    int n;
2739
2740
103M
    if (status > 0) {
2741
2742
        /* create match object (with room for extra group marks) */
2743
        /* coverity[ampersand_in_size] */
2744
87.7M
        match = PyObject_GC_NewVar(MatchObject,
2745
87.7M
                                   module_state->Match_Type,
2746
87.7M
                                   2*(pattern->groups+1));
2747
87.7M
        if (!match)
2748
0
            return NULL;
2749
2750
87.7M
        Py_INCREF(pattern);
2751
87.7M
        match->pattern = pattern;
2752
2753
87.7M
        match->string = Py_NewRef(state->string);
2754
2755
87.7M
        match->regs = NULL;
2756
87.7M
        match->groups = pattern->groups+1;
2757
2758
        /* fill in group slices */
2759
2760
87.7M
        base = (char*) state->beginning;
2761
87.7M
        n = state->charsize;
2762
2763
87.7M
        match->mark[0] = ((char*) state->start - base) / n;
2764
87.7M
        match->mark[1] = ((char*) state->ptr - base) / n;
2765
2766
272M
        for (i = j = 0; i < pattern->groups; i++, j+=2)
2767
184M
            if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
2768
77.0M
                match->mark[j+2] = ((char*) state->mark[j] - base) / n;
2769
77.0M
                match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
2770
2771
                /* check wrong span */
2772
77.0M
                if (match->mark[j+2] > match->mark[j+3]) {
2773
0
                    PyErr_SetString(PyExc_SystemError,
2774
0
                                    "The span of capturing group is wrong,"
2775
0
                                    " please report a bug for the re module.");
2776
0
                    Py_DECREF(match);
2777
0
                    return NULL;
2778
0
                }
2779
77.0M
            } else
2780
107M
                match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
2781
2782
87.7M
        match->pos = state->pos;
2783
87.7M
        match->endpos = state->endpos;
2784
2785
87.7M
        match->lastindex = state->lastindex;
2786
2787
87.7M
        PyObject_GC_Track(match);
2788
87.7M
        return (PyObject*) match;
2789
2790
87.7M
    } else if (status == 0) {
2791
2792
        /* no match */
2793
15.6M
        Py_RETURN_NONE;
2794
2795
15.6M
    }
2796
2797
    /* internal error */
2798
0
    pattern_error(status);
2799
0
    return NULL;
2800
103M
}
2801
2802
2803
/* -------------------------------------------------------------------- */
2804
/* scanner methods (experimental) */
2805
2806
static int
2807
scanner_traverse(PyObject *op, visitproc visit, void *arg)
2808
373
{
2809
373
    ScannerObject *self = _ScannerObject_CAST(op);
2810
373
    Py_VISIT(Py_TYPE(self));
2811
373
    Py_VISIT(self->pattern);
2812
373
    return 0;
2813
373
}
2814
2815
static int
2816
scanner_clear(PyObject *op)
2817
338k
{
2818
338k
    ScannerObject *self = _ScannerObject_CAST(op);
2819
338k
    Py_CLEAR(self->pattern);
2820
338k
    return 0;
2821
338k
}
2822
2823
static void
2824
scanner_dealloc(PyObject *self)
2825
338k
{
2826
338k
    PyTypeObject *tp = Py_TYPE(self);
2827
338k
    PyObject_GC_UnTrack(self);
2828
338k
    ScannerObject *scanner = _ScannerObject_CAST(self);
2829
338k
    state_fini(&scanner->state);
2830
338k
    (void)scanner_clear(self);
2831
338k
    tp->tp_free(self);
2832
338k
    Py_DECREF(tp);
2833
338k
}
2834
2835
static int
2836
scanner_begin(ScannerObject* self)
2837
2.73M
{
2838
2.73M
    if (self->executing) {
2839
0
        PyErr_SetString(PyExc_ValueError,
2840
0
                        "regular expression scanner already executing");
2841
0
        return 0;
2842
0
    }
2843
2.73M
    self->executing = 1;
2844
2.73M
    return 1;
2845
2.73M
}
2846
2847
static void
2848
scanner_end(ScannerObject* self)
2849
2.73M
{
2850
2.73M
    assert(self->executing);
2851
2.73M
    self->executing = 0;
2852
2.73M
}
2853
2854
/*[clinic input]
2855
_sre.SRE_Scanner.match
2856
2857
    cls: defining_class
2858
    /
2859
2860
[clinic start generated code]*/
2861
2862
static PyObject *
2863
_sre_SRE_Scanner_match_impl(ScannerObject *self, PyTypeObject *cls)
2864
/*[clinic end generated code: output=6e22c149dc0f0325 input=b5146e1f30278cb7]*/
2865
0
{
2866
0
    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
2867
0
    SRE_STATE* state = &self->state;
2868
0
    PyObject* match;
2869
0
    Py_ssize_t status;
2870
2871
0
    if (!scanner_begin(self)) {
2872
0
        return NULL;
2873
0
    }
2874
0
    if (state->start == NULL) {
2875
0
        scanner_end(self);
2876
0
        Py_RETURN_NONE;
2877
0
    }
2878
2879
0
    state_reset(state);
2880
2881
0
    state->ptr = state->start;
2882
2883
0
    status = sre_match(state, PatternObject_GetCode(self->pattern));
2884
0
    if (PyErr_Occurred()) {
2885
0
        scanner_end(self);
2886
0
        return NULL;
2887
0
    }
2888
2889
0
    match = pattern_new_match(module_state, self->pattern,
2890
0
                              state, status);
2891
2892
0
    if (status == 0)
2893
0
        state->start = NULL;
2894
0
    else {
2895
0
        state->must_advance = (state->ptr == state->start);
2896
0
        state->start = state->ptr;
2897
0
    }
2898
2899
0
    scanner_end(self);
2900
0
    return match;
2901
0
}
2902
2903
2904
/*[clinic input]
2905
_sre.SRE_Scanner.search
2906
2907
    cls: defining_class
2908
    /
2909
2910
[clinic start generated code]*/
2911
2912
static PyObject *
2913
_sre_SRE_Scanner_search_impl(ScannerObject *self, PyTypeObject *cls)
2914
/*[clinic end generated code: output=23e8fc78013f9161 input=056c2d37171d0bf2]*/
2915
2.73M
{
2916
2.73M
    _sremodulestate *module_state = get_sre_module_state_by_class(cls);
2917
2.73M
    SRE_STATE* state = &self->state;
2918
2.73M
    PyObject* match;
2919
2.73M
    Py_ssize_t status;
2920
2921
2.73M
    if (!scanner_begin(self)) {
2922
0
        return NULL;
2923
0
    }
2924
2.73M
    if (state->start == NULL) {
2925
0
        scanner_end(self);
2926
0
        Py_RETURN_NONE;
2927
0
    }
2928
2929
2.73M
    state_reset(state);
2930
2931
2.73M
    state->ptr = state->start;
2932
2933
2.73M
    status = sre_search(state, PatternObject_GetCode(self->pattern));
2934
2.73M
    if (PyErr_Occurred()) {
2935
0
        scanner_end(self);
2936
0
        return NULL;
2937
0
    }
2938
2939
2.73M
    match = pattern_new_match(module_state, self->pattern,
2940
2.73M
                              state, status);
2941
2942
2.73M
    if (status == 0)
2943
338k
        state->start = NULL;
2944
2.39M
    else {
2945
2.39M
        state->must_advance = (state->ptr == state->start);
2946
2.39M
        state->start = state->ptr;
2947
2.39M
    }
2948
2949
2.73M
    scanner_end(self);
2950
2.73M
    return match;
2951
2.73M
}
2952
2953
static PyObject *
2954
pattern_scanner(_sremodulestate *module_state,
2955
                PatternObject *self,
2956
                PyObject *string,
2957
                Py_ssize_t pos,
2958
                Py_ssize_t endpos)
2959
338k
{
2960
338k
    ScannerObject* scanner;
2961
2962
    /* create scanner object */
2963
338k
    scanner = PyObject_GC_New(ScannerObject, module_state->Scanner_Type);
2964
338k
    if (!scanner)
2965
0
        return NULL;
2966
338k
    scanner->pattern = NULL;
2967
338k
    scanner->executing = 0;
2968
2969
    /* create search state object */
2970
338k
    if (!state_init(&scanner->state, self, string, pos, endpos)) {
2971
0
        Py_DECREF(scanner);
2972
0
        return NULL;
2973
0
    }
2974
2975
338k
    Py_INCREF(self);
2976
338k
    scanner->pattern = self;
2977
2978
338k
    PyObject_GC_Track(scanner);
2979
338k
    return (PyObject*) scanner;
2980
338k
}
2981
2982
/* -------------------------------------------------------------------- */
2983
/* template methods */
2984
2985
static int
2986
template_traverse(PyObject *op, visitproc visit, void *arg)
2987
0
{
2988
0
    TemplateObject *self = _TemplateObject_CAST(op);
2989
0
    Py_VISIT(Py_TYPE(self));
2990
0
    Py_VISIT(self->literal);
2991
0
    for (Py_ssize_t i = 0, n = Py_SIZE(self); i < n; i++) {
2992
0
        Py_VISIT(self->items[i].literal);
2993
0
    }
2994
0
    return 0;
2995
0
}
2996
2997
static int
2998
template_clear(PyObject *op)
2999
0
{
3000
0
    TemplateObject *self = _TemplateObject_CAST(op);
3001
0
    Py_CLEAR(self->literal);
3002
0
    for (Py_ssize_t i = 0, n = Py_SIZE(self); i < n; i++) {
3003
0
        Py_CLEAR(self->items[i].literal);
3004
0
    }
3005
0
    return 0;
3006
0
}
3007
3008
static void
3009
template_dealloc(PyObject *self)
3010
0
{
3011
0
    PyTypeObject *tp = Py_TYPE(self);
3012
0
    PyObject_GC_UnTrack(self);
3013
0
    (void)template_clear(self);
3014
0
    tp->tp_free(self);
3015
0
    Py_DECREF(tp);
3016
0
}
3017
3018
static PyObject *
3019
expand_template(TemplateObject *self, MatchObject *match)
3020
0
{
3021
0
    if (Py_SIZE(self) == 0) {
3022
0
        return Py_NewRef(self->literal);
3023
0
    }
3024
3025
0
    PyObject *result = NULL;
3026
0
    Py_ssize_t count = 0;  // the number of non-empty chunks
3027
    /* For small number of strings use a buffer allocated on the stack,
3028
     * otherwise use a list object. */
3029
0
    PyObject *buffer[10];
3030
0
    PyObject **out = buffer;
3031
0
    PyObject *list = NULL;
3032
0
    if (self->chunks > (int)Py_ARRAY_LENGTH(buffer) ||
3033
0
        !PyUnicode_Check(self->literal))
3034
0
    {
3035
0
        list = PyList_New(self->chunks);
3036
0
        if (!list) {
3037
0
            return NULL;
3038
0
        }
3039
0
        out = &PyList_GET_ITEM(list, 0);
3040
0
    }
3041
3042
0
    out[count++] = Py_NewRef(self->literal);
3043
0
    for (Py_ssize_t i = 0; i < Py_SIZE(self); i++) {
3044
0
        Py_ssize_t index = self->items[i].index;
3045
0
        if (index >= match->groups) {
3046
0
            PyErr_SetString(PyExc_IndexError, "no such group");
3047
0
            goto cleanup;
3048
0
        }
3049
0
        PyObject *item = match_getslice_by_index(match, index, Py_None);
3050
0
        if (item == NULL) {
3051
0
            goto cleanup;
3052
0
        }
3053
0
        if (item != Py_None) {
3054
0
            out[count++] = Py_NewRef(item);
3055
0
        }
3056
0
        Py_DECREF(item);
3057
3058
0
        PyObject *literal = self->items[i].literal;
3059
0
        if (literal != NULL) {
3060
0
            out[count++] = Py_NewRef(literal);
3061
0
        }
3062
0
    }
3063
3064
0
    if (PyUnicode_Check(self->literal)) {
3065
0
        result = _PyUnicode_JoinArray(&_Py_STR(empty), out, count);
3066
0
    }
3067
0
    else {
3068
0
        Py_SET_SIZE(list, count);
3069
0
        result = PyBytes_Join((PyObject *)&_Py_SINGLETON(bytes_empty), list);
3070
0
    }
3071
3072
0
cleanup:
3073
0
    if (list) {
3074
0
        Py_DECREF(list);
3075
0
    }
3076
0
    else {
3077
0
        for (Py_ssize_t i = 0; i < count; i++) {
3078
0
            Py_DECREF(out[i]);
3079
0
        }
3080
0
    }
3081
0
    return result;
3082
0
}
3083
3084
3085
static Py_hash_t
3086
pattern_hash(PyObject *op)
3087
0
{
3088
0
    PatternObject *self = _PatternObject_CAST(op);
3089
3090
0
    Py_hash_t hash, hash2;
3091
3092
0
    hash = PyObject_Hash(self->pattern);
3093
0
    if (hash == -1) {
3094
0
        return -1;
3095
0
    }
3096
3097
0
    hash2 = Py_HashBuffer(self->code, sizeof(self->code[0]) * self->codesize);
3098
0
    hash ^= hash2;
3099
3100
0
    hash ^= self->flags;
3101
0
    hash ^= self->isbytes;
3102
0
    hash ^= self->codesize;
3103
3104
0
    if (hash == -1) {
3105
0
        hash = -2;
3106
0
    }
3107
0
    return hash;
3108
0
}
3109
3110
static PyObject*
3111
pattern_richcompare(PyObject *lefto, PyObject *righto, int op)
3112
0
{
3113
0
    PyTypeObject *tp = Py_TYPE(lefto);
3114
0
    _sremodulestate *module_state = get_sre_module_state_by_class(tp);
3115
0
    PatternObject *left, *right;
3116
0
    int cmp;
3117
3118
0
    if (op != Py_EQ && op != Py_NE) {
3119
0
        Py_RETURN_NOTIMPLEMENTED;
3120
0
    }
3121
3122
0
    if (!Py_IS_TYPE(righto, module_state->Pattern_Type))
3123
0
    {
3124
0
        Py_RETURN_NOTIMPLEMENTED;
3125
0
    }
3126
3127
0
    if (lefto == righto) {
3128
        /* a pattern is equal to itself */
3129
0
        return PyBool_FromLong(op == Py_EQ);
3130
0
    }
3131
3132
0
    left = (PatternObject *)lefto;
3133
0
    right = (PatternObject *)righto;
3134
3135
0
    cmp = (left->flags == right->flags
3136
0
           && left->isbytes == right->isbytes
3137
0
           && left->codesize == right->codesize);
3138
0
    if (cmp) {
3139
        /* Compare the code and the pattern because the same pattern can
3140
           produce different codes depending on the locale used to compile the
3141
           pattern when the re.LOCALE flag is used. Don't compare groups,
3142
           indexgroup nor groupindex: they are derivated from the pattern. */
3143
0
        cmp = (memcmp(left->code, right->code,
3144
0
                      sizeof(left->code[0]) * left->codesize) == 0);
3145
0
    }
3146
0
    if (cmp) {
3147
0
        cmp = PyObject_RichCompareBool(left->pattern, right->pattern,
3148
0
                                       Py_EQ);
3149
0
        if (cmp < 0) {
3150
0
            return NULL;
3151
0
        }
3152
0
    }
3153
0
    if (op == Py_NE) {
3154
0
        cmp = !cmp;
3155
0
    }
3156
0
    return PyBool_FromLong(cmp);
3157
0
}
3158
3159
#include "clinic/sre.c.h"
3160
3161
static PyMethodDef pattern_methods[] = {
3162
    _SRE_SRE_PATTERN_MATCH_METHODDEF
3163
    _SRE_SRE_PATTERN_FULLMATCH_METHODDEF
3164
    _SRE_SRE_PATTERN_SEARCH_METHODDEF
3165
    _SRE_SRE_PATTERN_SUB_METHODDEF
3166
    _SRE_SRE_PATTERN_SUBN_METHODDEF
3167
    _SRE_SRE_PATTERN_FINDALL_METHODDEF
3168
    _SRE_SRE_PATTERN_SPLIT_METHODDEF
3169
    _SRE_SRE_PATTERN_FINDITER_METHODDEF
3170
    _SRE_SRE_PATTERN_SCANNER_METHODDEF
3171
    _SRE_SRE_PATTERN___COPY___METHODDEF
3172
    _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF
3173
    _SRE_SRE_PATTERN__FAIL_AFTER_METHODDEF
3174
    {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS,
3175
     PyDoc_STR("See PEP 585")},
3176
    {NULL, NULL}
3177
};
3178
3179
static PyGetSetDef pattern_getset[] = {
3180
    {"groupindex", pattern_groupindex, NULL,
3181
      "A dictionary mapping group names to group numbers."},
3182
    {NULL}  /* Sentinel */
3183
};
3184
3185
#define PAT_OFF(x) offsetof(PatternObject, x)
3186
static PyMemberDef pattern_members[] = {
3187
    {"pattern",    _Py_T_OBJECT,    PAT_OFF(pattern),       Py_READONLY,
3188
     "The pattern string from which the RE object was compiled."},
3189
    {"flags",      Py_T_INT,       PAT_OFF(flags),         Py_READONLY,
3190
     "The regex matching flags."},
3191
    {"groups",     Py_T_PYSSIZET,  PAT_OFF(groups),        Py_READONLY,
3192
     "The number of capturing groups in the pattern."},
3193
    {"__weaklistoffset__", Py_T_PYSSIZET, offsetof(PatternObject, weakreflist), Py_READONLY},
3194
    {NULL}  /* Sentinel */
3195
};
3196
3197
static PyType_Slot pattern_slots[] = {
3198
    {Py_tp_dealloc, pattern_dealloc},
3199
    {Py_tp_repr, pattern_repr},
3200
    {Py_tp_hash, pattern_hash},
3201
    {Py_tp_doc, (void *)pattern_doc},
3202
    {Py_tp_richcompare, pattern_richcompare},
3203
    {Py_tp_methods, pattern_methods},
3204
    {Py_tp_members, pattern_members},
3205
    {Py_tp_getset, pattern_getset},
3206
    {Py_tp_traverse, pattern_traverse},
3207
    {Py_tp_clear, pattern_clear},
3208
    {0, NULL},
3209
};
3210
3211
static PyType_Spec pattern_spec = {
3212
    .name = "re.Pattern",
3213
    .basicsize = sizeof(PatternObject),
3214
    .itemsize = sizeof(SRE_CODE),
3215
    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE |
3216
              Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC),
3217
    .slots = pattern_slots,
3218
};
3219
3220
static PyMethodDef match_methods[] = {
3221
    {"group", match_group, METH_VARARGS, match_group_doc},
3222
    _SRE_SRE_MATCH_START_METHODDEF
3223
    _SRE_SRE_MATCH_END_METHODDEF
3224
    _SRE_SRE_MATCH_SPAN_METHODDEF
3225
    _SRE_SRE_MATCH_GROUPS_METHODDEF
3226
    _SRE_SRE_MATCH_GROUPDICT_METHODDEF
3227
    _SRE_SRE_MATCH_EXPAND_METHODDEF
3228
    _SRE_SRE_MATCH___COPY___METHODDEF
3229
    _SRE_SRE_MATCH___DEEPCOPY___METHODDEF
3230
    {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS,
3231
     PyDoc_STR("See PEP 585")},
3232
    {NULL, NULL}
3233
};
3234
3235
static PyGetSetDef match_getset[] = {
3236
    {"lastindex", match_lastindex_get, NULL,
3237
     "The integer index of the last matched capturing group."},
3238
    {"lastgroup", match_lastgroup_get, NULL,
3239
     "The name of the last matched capturing group."},
3240
    {"regs", match_regs_get, NULL, NULL},
3241
    {NULL}
3242
};
3243
3244
#define MATCH_OFF(x) offsetof(MatchObject, x)
3245
static PyMemberDef match_members[] = {
3246
    {"string",  _Py_T_OBJECT,   MATCH_OFF(string),  Py_READONLY,
3247
     "The string passed to match() or search()."},
3248
    {"re",      _Py_T_OBJECT,   MATCH_OFF(pattern), Py_READONLY,
3249
     "The regular expression object."},
3250
    {"pos",     Py_T_PYSSIZET, MATCH_OFF(pos),     Py_READONLY,
3251
     "The index into the string at which the RE engine started looking for a match."},
3252
    {"endpos",  Py_T_PYSSIZET, MATCH_OFF(endpos),  Py_READONLY,
3253
     "The index into the string beyond which the RE engine will not go."},
3254
    {NULL}
3255
};
3256
3257
/* FIXME: implement setattr("string", None) as a special case (to
3258
   detach the associated string, if any */
3259
static PyType_Slot match_slots[] = {
3260
    {Py_tp_dealloc, match_dealloc},
3261
    {Py_tp_repr, match_repr},
3262
    {Py_tp_doc, (void *)match_doc},
3263
    {Py_tp_methods, match_methods},
3264
    {Py_tp_members, match_members},
3265
    {Py_tp_getset, match_getset},
3266
    {Py_tp_traverse, match_traverse},
3267
    {Py_tp_clear, match_clear},
3268
3269
    /* As mapping.
3270
     *
3271
     * Match objects do not support length or assignment, but do support
3272
     * __getitem__.
3273
     */
3274
    {Py_mp_subscript, match_getitem},
3275
3276
    {0, NULL},
3277
};
3278
3279
static PyType_Spec match_spec = {
3280
    .name = "re.Match",
3281
    .basicsize = sizeof(MatchObject),
3282
    .itemsize = sizeof(Py_ssize_t),
3283
    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE |
3284
              Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC),
3285
    .slots = match_slots,
3286
};
3287
3288
static PyMethodDef scanner_methods[] = {
3289
    _SRE_SRE_SCANNER_MATCH_METHODDEF
3290
    _SRE_SRE_SCANNER_SEARCH_METHODDEF
3291
    {NULL, NULL}
3292
};
3293
3294
#define SCAN_OFF(x) offsetof(ScannerObject, x)
3295
static PyMemberDef scanner_members[] = {
3296
    {"pattern", _Py_T_OBJECT, SCAN_OFF(pattern), Py_READONLY},
3297
    {NULL}  /* Sentinel */
3298
};
3299
3300
static PyType_Slot scanner_slots[] = {
3301
    {Py_tp_dealloc, scanner_dealloc},
3302
    {Py_tp_methods, scanner_methods},
3303
    {Py_tp_members, scanner_members},
3304
    {Py_tp_traverse, scanner_traverse},
3305
    {Py_tp_clear, scanner_clear},
3306
    {0, NULL},
3307
};
3308
3309
static PyType_Spec scanner_spec = {
3310
    .name = "_sre.SRE_Scanner",
3311
    .basicsize = sizeof(ScannerObject),
3312
    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE |
3313
              Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC),
3314
    .slots = scanner_slots,
3315
};
3316
3317
static PyType_Slot template_slots[] = {
3318
    {Py_tp_dealloc, template_dealloc},
3319
    {Py_tp_traverse, template_traverse},
3320
    {Py_tp_clear, template_clear},
3321
    {0, NULL},
3322
};
3323
3324
static PyType_Spec template_spec = {
3325
    .name = "_sre.SRE_Template",
3326
    .basicsize = sizeof(TemplateObject),
3327
    .itemsize = sizeof(((TemplateObject *)0)->items[0]),
3328
    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE |
3329
              Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC),
3330
    .slots = template_slots,
3331
};
3332
3333
static PyMethodDef _functions[] = {
3334
    _SRE_COMPILE_METHODDEF
3335
    _SRE_TEMPLATE_METHODDEF
3336
    _SRE_GETCODESIZE_METHODDEF
3337
    _SRE_ASCII_ISCASED_METHODDEF
3338
    _SRE_UNICODE_ISCASED_METHODDEF
3339
    _SRE_ASCII_TOLOWER_METHODDEF
3340
    _SRE_UNICODE_TOLOWER_METHODDEF
3341
    {NULL, NULL}
3342
};
3343
3344
static int
3345
sre_traverse(PyObject *module, visitproc visit, void *arg)
3346
5.57k
{
3347
5.57k
    _sremodulestate *state = get_sre_module_state(module);
3348
3349
5.57k
    Py_VISIT(state->Pattern_Type);
3350
5.57k
    Py_VISIT(state->Match_Type);
3351
5.57k
    Py_VISIT(state->Scanner_Type);
3352
5.57k
    Py_VISIT(state->Template_Type);
3353
5.57k
    Py_VISIT(state->compile_template);
3354
3355
5.57k
    return 0;
3356
5.57k
}
3357
3358
static int
3359
sre_clear(PyObject *module)
3360
0
{
3361
0
    _sremodulestate *state = get_sre_module_state(module);
3362
3363
0
    Py_CLEAR(state->Pattern_Type);
3364
0
    Py_CLEAR(state->Match_Type);
3365
0
    Py_CLEAR(state->Scanner_Type);
3366
0
    Py_CLEAR(state->Template_Type);
3367
0
    Py_CLEAR(state->compile_template);
3368
3369
0
    return 0;
3370
0
}
3371
3372
static void
3373
sre_free(void *module)
3374
0
{
3375
0
    sre_clear((PyObject *)module);
3376
0
}
3377
3378
44
#define CREATE_TYPE(m, type, spec)                                  \
3379
44
do {                                                                \
3380
44
    type = (PyTypeObject *)PyType_FromModuleAndSpec(m, spec, NULL); \
3381
44
    if (type == NULL) {                                             \
3382
0
        goto error;                                                 \
3383
0
    }                                                               \
3384
44
} while (0)
3385
3386
#define ADD_ULONG_CONSTANT(module, name, value)           \
3387
22
    do {                                                  \
3388
22
        if (PyModule_Add(module, name, PyLong_FromUnsignedLong(value)) < 0) { \
3389
0
            goto error;                                   \
3390
0
        }                                                 \
3391
22
} while (0)
3392
3393
static int
3394
sre_exec(PyObject *m)
3395
11
{
3396
11
    _sremodulestate *state;
3397
3398
    /* Create heap types */
3399
11
    state = get_sre_module_state(m);
3400
11
    CREATE_TYPE(m, state->Pattern_Type, &pattern_spec);
3401
11
    CREATE_TYPE(m, state->Match_Type, &match_spec);
3402
11
    CREATE_TYPE(m, state->Scanner_Type, &scanner_spec);
3403
11
    CREATE_TYPE(m, state->Template_Type, &template_spec);
3404
3405
11
    if (PyModule_AddIntConstant(m, "MAGIC", SRE_MAGIC) < 0) {
3406
0
        goto error;
3407
0
    }
3408
3409
11
    if (PyModule_AddIntConstant(m, "CODESIZE", sizeof(SRE_CODE)) < 0) {
3410
0
        goto error;
3411
0
    }
3412
3413
11
    ADD_ULONG_CONSTANT(m, "MAXREPEAT", SRE_MAXREPEAT);
3414
11
    ADD_ULONG_CONSTANT(m, "MAXGROUPS", SRE_MAXGROUPS);
3415
3416
11
    if (PyModule_AddStringConstant(m, "copyright", copyright) < 0) {
3417
0
        goto error;
3418
0
    }
3419
3420
11
    return 0;
3421
3422
0
error:
3423
0
    return -1;
3424
11
}
3425
3426
static PyModuleDef_Slot sre_slots[] = {
3427
    {Py_mod_exec, sre_exec},
3428
    {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
3429
    {Py_mod_gil, Py_MOD_GIL_NOT_USED},
3430
    {0, NULL},
3431
};
3432
3433
static struct PyModuleDef sremodule = {
3434
    .m_base = PyModuleDef_HEAD_INIT,
3435
    .m_name = "_sre",
3436
    .m_size = sizeof(_sremodulestate),
3437
    .m_methods = _functions,
3438
    .m_slots = sre_slots,
3439
    .m_traverse = sre_traverse,
3440
    .m_free = sre_free,
3441
    .m_clear = sre_clear,
3442
};
3443
3444
PyMODINIT_FUNC
3445
PyInit__sre(void)
3446
11
{
3447
11
    return PyModuleDef_Init(&sremodule);
3448
11
}
3449
3450
/* vim:ts=4:sw=4:et
3451
*/