Coverage Report

Created: 2025-07-11 06:59

/src/Python-3.8.3/Modules/_sre.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Secret Labs' Regular Expression Engine
3
 *
4
 * regular expression matching engine
5
 *
6
 * partial history:
7
 * 1999-10-24 fl   created (based on existing template matcher code)
8
 * 2000-03-06 fl   first alpha, sort of
9
 * 2000-08-01 fl   fixes for 1.6b1
10
 * 2000-08-07 fl   use PyOS_CheckStack() if available
11
 * 2000-09-20 fl   added expand method
12
 * 2001-03-20 fl   lots of fixes for 2.1b2
13
 * 2001-04-15 fl   export copyright as Python attribute, not global
14
 * 2001-04-28 fl   added __copy__ methods (work in progress)
15
 * 2001-05-14 fl   fixes for 1.5.2 compatibility
16
 * 2001-07-01 fl   added BIGCHARSET support (from Martin von Loewis)
17
 * 2001-10-18 fl   fixed group reset issue (from Matthew Mueller)
18
 * 2001-10-20 fl   added split primitive; reenable unicode for 1.6/2.0/2.1
19
 * 2001-10-21 fl   added sub/subn primitive
20
 * 2001-10-24 fl   added finditer primitive (for 2.2 only)
21
 * 2001-12-07 fl   fixed memory leak in sub/subn (Guido van Rossum)
22
 * 2002-11-09 fl   fixed empty sub/subn return type
23
 * 2003-04-18 mvl  fully support 4-byte codes
24
 * 2003-10-17 gn   implemented non recursive scheme
25
 * 2013-02-04 mrab added fullmatch primitive
26
 *
27
 * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
28
 *
29
 * This version of the SRE library can be redistributed under CNRI's
30
 * Python 1.6 license.  For any other use, please contact Secret Labs
31
 * AB (info@pythonware.com).
32
 *
33
 * Portions of this engine have been developed in cooperation with
34
 * CNRI.  Hewlett-Packard provided funding for 1.6 integration and
35
 * other compatibility work.
36
 */
37
38
static const char copyright[] =
39
    " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
40
41
#define PY_SSIZE_T_CLEAN
42
43
#include "Python.h"
44
#include "structmember.h" /* offsetof */
45
46
#include "sre.h"
47
48
32
#define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
49
50
#include <ctype.h>
51
52
/* name of this module, minus the leading underscore */
53
#if !defined(SRE_MODULE)
54
#define SRE_MODULE "sre"
55
#endif
56
57
0
#define SRE_PY_MODULE "re"
58
59
/* defining this one enables tracing */
60
#undef VERBOSE
61
62
/* -------------------------------------------------------------------- */
63
64
#if defined(_MSC_VER)
65
#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
66
#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
67
/* fastest possible local call under MSVC */
68
#define LOCAL(type) static __inline type __fastcall
69
#else
70
#define LOCAL(type) static inline type
71
#endif
72
73
/* error codes */
74
#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
75
#define SRE_ERROR_STATE -2 /* illegal state */
76
0
#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
77
0
#define SRE_ERROR_MEMORY -9 /* out of memory */
78
0
#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
79
80
#if defined(VERBOSE)
81
#define TRACE(v) printf v
82
#else
83
#define TRACE(v)
84
#endif
85
86
/* -------------------------------------------------------------------- */
87
/* search engine state */
88
89
#define SRE_IS_DIGIT(ch)\
90
0
    ((ch) <= '9' && Py_ISDIGIT(ch))
91
#define SRE_IS_SPACE(ch)\
92
0
    ((ch) <= ' ' && Py_ISSPACE(ch))
93
#define SRE_IS_LINEBREAK(ch)\
94
113
    ((ch) == '\n')
95
#define SRE_IS_WORD(ch)\
96
0
    ((ch) <= 'z' && (Py_ISALNUM(ch) || (ch) == '_'))
97
98
static unsigned int sre_lower_ascii(unsigned int ch)
99
128
{
100
128
    return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
101
128
}
102
103
/* locale-specific character predicates */
104
/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
105
 * warnings when c's type supports only numbers < N+1 */
106
0
#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
107
0
#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
108
109
static unsigned int sre_lower_locale(unsigned int ch)
110
0
{
111
0
    return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
112
0
}
113
114
static unsigned int sre_upper_locale(unsigned int ch)
115
0
{
116
0
    return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch);
117
0
}
118
119
/* unicode-specific character predicates */
120
121
2
#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
122
0
#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
123
0
#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
124
18
#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
125
18
#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
126
127
static unsigned int sre_lower_unicode(unsigned int ch)
128
78
{
129
78
    return (unsigned int) Py_UNICODE_TOLOWER(ch);
130
78
}
131
132
static unsigned int sre_upper_unicode(unsigned int ch)
133
36
{
134
36
    return (unsigned int) Py_UNICODE_TOUPPER(ch);
135
36
}
136
137
LOCAL(int)
138
sre_category(SRE_CODE category, unsigned int ch)
139
20
{
140
20
    switch (category) {
141
142
0
    case SRE_CATEGORY_DIGIT:
143
0
        return SRE_IS_DIGIT(ch);
144
0
    case SRE_CATEGORY_NOT_DIGIT:
145
0
        return !SRE_IS_DIGIT(ch);
146
0
    case SRE_CATEGORY_SPACE:
147
0
        return SRE_IS_SPACE(ch);
148
0
    case SRE_CATEGORY_NOT_SPACE:
149
0
        return !SRE_IS_SPACE(ch);
150
0
    case SRE_CATEGORY_WORD:
151
0
        return SRE_IS_WORD(ch);
152
0
    case SRE_CATEGORY_NOT_WORD:
153
0
        return !SRE_IS_WORD(ch);
154
0
    case SRE_CATEGORY_LINEBREAK:
155
0
        return SRE_IS_LINEBREAK(ch);
156
0
    case SRE_CATEGORY_NOT_LINEBREAK:
157
0
        return !SRE_IS_LINEBREAK(ch);
158
159
0
    case SRE_CATEGORY_LOC_WORD:
160
0
        return SRE_LOC_IS_WORD(ch);
161
0
    case SRE_CATEGORY_LOC_NOT_WORD:
162
0
        return !SRE_LOC_IS_WORD(ch);
163
164
2
    case SRE_CATEGORY_UNI_DIGIT:
165
2
        return SRE_UNI_IS_DIGIT(ch);
166
0
    case SRE_CATEGORY_UNI_NOT_DIGIT:
167
0
        return !SRE_UNI_IS_DIGIT(ch);
168
0
    case SRE_CATEGORY_UNI_SPACE:
169
0
        return SRE_UNI_IS_SPACE(ch);
170
0
    case SRE_CATEGORY_UNI_NOT_SPACE:
171
0
        return !SRE_UNI_IS_SPACE(ch);
172
18
    case SRE_CATEGORY_UNI_WORD:
173
18
        return SRE_UNI_IS_WORD(ch);
174
0
    case SRE_CATEGORY_UNI_NOT_WORD:
175
0
        return !SRE_UNI_IS_WORD(ch);
176
0
    case SRE_CATEGORY_UNI_LINEBREAK:
177
0
        return SRE_UNI_IS_LINEBREAK(ch);
178
0
    case SRE_CATEGORY_UNI_NOT_LINEBREAK:
179
0
        return !SRE_UNI_IS_LINEBREAK(ch);
180
20
    }
181
0
    return 0;
182
20
}
183
184
LOCAL(int)
185
char_loc_ignore(SRE_CODE pattern, SRE_CODE ch)
186
0
{
187
0
    return ch == pattern
188
0
        || (SRE_CODE) sre_lower_locale(ch) == pattern
189
0
        || (SRE_CODE) sre_upper_locale(ch) == pattern;
190
0
}
191
192
193
/* helpers */
194
195
static void
196
data_stack_dealloc(SRE_STATE* state)
197
6
{
198
6
    if (state->data_stack) {
199
6
        PyMem_FREE(state->data_stack);
200
6
        state->data_stack = NULL;
201
6
    }
202
6
    state->data_stack_size = state->data_stack_base = 0;
203
6
}
204
205
static int
206
data_stack_grow(SRE_STATE* state, Py_ssize_t size)
207
6
{
208
6
    Py_ssize_t minsize, cursize;
209
6
    minsize = state->data_stack_base+size;
210
6
    cursize = state->data_stack_size;
211
6
    if (cursize < minsize) {
212
6
        void* stack;
213
6
        cursize = minsize+minsize/4+1024;
214
6
        TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize));
215
6
        stack = PyMem_REALLOC(state->data_stack, cursize);
216
6
        if (!stack) {
217
0
            data_stack_dealloc(state);
218
0
            return SRE_ERROR_MEMORY;
219
0
        }
220
6
        state->data_stack = (char *)stack;
221
6
        state->data_stack_size = cursize;
222
6
    }
223
6
    return 0;
224
6
}
225
226
/* generate 8-bit version */
227
228
378
#define SRE_CHAR Py_UCS1
229
#define SIZEOF_SRE_CHAR 1
230
168
#define SRE(F) sre_ucs1_##F
231
#include "sre_lib.h"
232
233
/* generate 16-bit unicode version */
234
235
0
#define SRE_CHAR Py_UCS2
236
#define SIZEOF_SRE_CHAR 2
237
0
#define SRE(F) sre_ucs2_##F
238
#include "sre_lib.h"
239
240
/* generate 32-bit unicode version */
241
242
0
#define SRE_CHAR Py_UCS4
243
#define SIZEOF_SRE_CHAR 4
244
0
#define SRE(F) sre_ucs4_##F
245
#include "sre_lib.h"
246
247
/* -------------------------------------------------------------------- */
248
/* factories and destructors */
249
250
/* see sre.h for object declarations */
251
static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, Py_ssize_t);
252
static PyObject *pattern_scanner(PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t);
253
254
255
/*[clinic input]
256
module _sre
257
class _sre.SRE_Pattern "PatternObject *" "&Pattern_Type"
258
class _sre.SRE_Match "MatchObject *" "&Match_Type"
259
class _sre.SRE_Scanner "ScannerObject *" "&Scanner_Type"
260
[clinic start generated code]*/
261
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b0230ec19a0deac8]*/
262
263
static PyTypeObject Pattern_Type;
264
static PyTypeObject Match_Type;
265
static PyTypeObject Scanner_Type;
266
267
/*[clinic input]
268
_sre.getcodesize -> int
269
[clinic start generated code]*/
270
271
static int
272
_sre_getcodesize_impl(PyObject *module)
273
/*[clinic end generated code: output=e0db7ce34a6dd7b1 input=bd6f6ecf4916bb2b]*/
274
0
{
275
0
    return sizeof(SRE_CODE);
276
0
}
277
278
/*[clinic input]
279
_sre.ascii_iscased -> bool
280
281
    character: int
282
    /
283
284
[clinic start generated code]*/
285
286
static int
287
_sre_ascii_iscased_impl(PyObject *module, int character)
288
/*[clinic end generated code: output=4f454b630fbd19a2 input=9f0bd952812c7ed3]*/
289
8
{
290
8
    unsigned int ch = (unsigned int)character;
291
8
    return ch < 128 && Py_ISALPHA(ch);
292
8
}
293
294
/*[clinic input]
295
_sre.unicode_iscased -> bool
296
297
    character: int
298
    /
299
300
[clinic start generated code]*/
301
302
static int
303
_sre_unicode_iscased_impl(PyObject *module, int character)
304
/*[clinic end generated code: output=9c5ddee0dc2bc258 input=51e42c3b8dddb78e]*/
305
36
{
306
36
    unsigned int ch = (unsigned int)character;
307
36
    return ch != sre_lower_unicode(ch) || ch != sre_upper_unicode(ch);
308
36
}
309
310
/*[clinic input]
311
_sre.ascii_tolower -> int
312
313
    character: int
314
    /
315
316
[clinic start generated code]*/
317
318
static int
319
_sre_ascii_tolower_impl(PyObject *module, int character)
320
/*[clinic end generated code: output=228294ed6ff2a612 input=272c609b5b61f136]*/
321
128
{
322
128
    return sre_lower_ascii(character);
323
128
}
324
325
/*[clinic input]
326
_sre.unicode_tolower -> int
327
328
    character: int
329
    /
330
331
[clinic start generated code]*/
332
333
static int
334
_sre_unicode_tolower_impl(PyObject *module, int character)
335
/*[clinic end generated code: output=6422272d7d7fee65 input=91d708c5f3c2045a]*/
336
40
{
337
40
    return sre_lower_unicode(character);
338
40
}
339
340
LOCAL(void)
341
state_reset(SRE_STATE* state)
342
0
{
343
    /* state->mark will be set to 0 in SRE_OP_MARK dynamically. */
344
    /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
345
346
0
    state->lastmark = -1;
347
0
    state->lastindex = -1;
348
349
0
    state->repeat = NULL;
350
351
0
    data_stack_dealloc(state);
352
0
}
353
354
static void*
355
getstring(PyObject* string, Py_ssize_t* p_length,
356
          int* p_isbytes, int* p_charsize,
357
          Py_buffer *view)
358
14
{
359
    /* given a python object, return a data pointer, a length (in
360
       characters), and a character size.  return NULL if the object
361
       is not a string (or not compatible) */
362
363
    /* Unicode objects do not support the buffer API. So, get the data
364
       directly instead. */
365
14
    if (PyUnicode_Check(string)) {
366
12
        if (PyUnicode_READY(string) == -1)
367
0
            return NULL;
368
12
        *p_length = PyUnicode_GET_LENGTH(string);
369
12
        *p_charsize = PyUnicode_KIND(string);
370
12
        *p_isbytes = 0;
371
12
        return PyUnicode_DATA(string);
372
12
    }
373
374
    /* get pointer to byte string buffer */
375
2
    if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
376
0
        PyErr_SetString(PyExc_TypeError, "expected string or bytes-like object");
377
0
        return NULL;
378
0
    }
379
380
2
    *p_length = view->len;
381
2
    *p_charsize = 1;
382
2
    *p_isbytes = 1;
383
384
2
    if (view->buf == NULL) {
385
0
        PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
386
0
        PyBuffer_Release(view);
387
0
        view->buf = NULL;
388
0
        return NULL;
389
0
    }
390
2
    return view->buf;
391
2
}
392
393
LOCAL(PyObject*)
394
state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
395
           Py_ssize_t start, Py_ssize_t end)
396
6
{
397
    /* prepare state object */
398
399
6
    Py_ssize_t length;
400
6
    int isbytes, charsize;
401
6
    void* ptr;
402
403
6
    memset(state, 0, sizeof(SRE_STATE));
404
405
6
    state->mark = PyMem_New(void *, pattern->groups * 2);
406
6
    if (!state->mark) {
407
0
        PyErr_NoMemory();
408
0
        goto err;
409
0
    }
410
6
    state->lastmark = -1;
411
6
    state->lastindex = -1;
412
413
6
    state->buffer.buf = NULL;
414
6
    ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
415
6
    if (!ptr)
416
0
        goto err;
417
418
6
    if (isbytes && pattern->isbytes == 0) {
419
0
        PyErr_SetString(PyExc_TypeError,
420
0
                        "cannot use a string pattern on a bytes-like object");
421
0
        goto err;
422
0
    }
423
6
    if (!isbytes && pattern->isbytes > 0) {
424
0
        PyErr_SetString(PyExc_TypeError,
425
0
                        "cannot use a bytes pattern on a string-like object");
426
0
        goto err;
427
0
    }
428
429
    /* adjust boundaries */
430
6
    if (start < 0)
431
0
        start = 0;
432
6
    else if (start > length)
433
0
        start = length;
434
435
6
    if (end < 0)
436
0
        end = 0;
437
6
    else if (end > length)
438
6
        end = length;
439
440
6
    state->isbytes = isbytes;
441
6
    state->charsize = charsize;
442
6
    state->match_all = 0;
443
6
    state->must_advance = 0;
444
445
6
    state->beginning = ptr;
446
447
6
    state->start = (void*) ((char*) ptr + start * state->charsize);
448
6
    state->end = (void*) ((char*) ptr + end * state->charsize);
449
450
6
    Py_INCREF(string);
451
6
    state->string = string;
452
6
    state->pos = start;
453
6
    state->endpos = end;
454
455
6
    return string;
456
0
  err:
457
0
    PyMem_Del(state->mark);
458
0
    state->mark = NULL;
459
0
    if (state->buffer.buf)
460
0
        PyBuffer_Release(&state->buffer);
461
0
    return NULL;
462
6
}
463
464
LOCAL(void)
465
state_fini(SRE_STATE* state)
466
6
{
467
6
    if (state->buffer.buf)
468
1
        PyBuffer_Release(&state->buffer);
469
6
    Py_XDECREF(state->string);
470
6
    data_stack_dealloc(state);
471
6
    PyMem_Del(state->mark);
472
6
    state->mark = NULL;
473
6
}
474
475
/* calculate offset from start of string */
476
#define STATE_OFFSET(state, member)\
477
0
    (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
478
479
LOCAL(PyObject*)
480
getslice(int isbytes, const void *ptr,
481
         PyObject* string, Py_ssize_t start, Py_ssize_t end)
482
0
{
483
0
    if (isbytes) {
484
0
        if (PyBytes_CheckExact(string) &&
485
0
            start == 0 && end == PyBytes_GET_SIZE(string)) {
486
0
            Py_INCREF(string);
487
0
            return string;
488
0
        }
489
0
        return PyBytes_FromStringAndSize(
490
0
                (const char *)ptr + start, end - start);
491
0
    }
492
0
    else {
493
0
        return PyUnicode_Substring(string, start, end);
494
0
    }
495
0
}
496
497
LOCAL(PyObject*)
498
state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
499
0
{
500
0
    Py_ssize_t i, j;
501
502
0
    index = (index - 1) * 2;
503
504
0
    if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
505
0
        if (empty)
506
            /* want empty string */
507
0
            i = j = 0;
508
0
        else {
509
0
            Py_RETURN_NONE;
510
0
        }
511
0
    } else {
512
0
        i = STATE_OFFSET(state, state->mark[index]);
513
0
        j = STATE_OFFSET(state, state->mark[index+1]);
514
0
    }
515
516
0
    return getslice(state->isbytes, state->beginning, string, i, j);
517
0
}
518
519
static void
520
pattern_error(Py_ssize_t status)
521
0
{
522
0
    switch (status) {
523
0
    case SRE_ERROR_RECURSION_LIMIT:
524
        /* This error code seems to be unused. */
525
0
        PyErr_SetString(
526
0
            PyExc_RecursionError,
527
0
            "maximum recursion limit exceeded"
528
0
            );
529
0
        break;
530
0
    case SRE_ERROR_MEMORY:
531
0
        PyErr_NoMemory();
532
0
        break;
533
0
    case SRE_ERROR_INTERRUPTED:
534
    /* An exception has already been raised, so let it fly */
535
0
        break;
536
0
    default:
537
        /* other error codes indicate compiler/engine bugs */
538
0
        PyErr_SetString(
539
0
            PyExc_RuntimeError,
540
0
            "internal error in regular expression engine"
541
0
            );
542
0
    }
543
0
}
544
545
static void
546
pattern_dealloc(PatternObject* self)
547
2
{
548
2
    if (self->weakreflist != NULL)
549
0
        PyObject_ClearWeakRefs((PyObject *) self);
550
2
    Py_XDECREF(self->pattern);
551
2
    Py_XDECREF(self->groupindex);
552
2
    Py_XDECREF(self->indexgroup);
553
2
    PyObject_DEL(self);
554
2
}
555
556
LOCAL(Py_ssize_t)
557
sre_match(SRE_STATE* state, SRE_CODE* pattern)
558
4
{
559
4
    if (state->charsize == 1)
560
4
        return sre_ucs1_match(state, pattern, 1);
561
0
    if (state->charsize == 2)
562
0
        return sre_ucs2_match(state, pattern, 1);
563
0
    assert(state->charsize == 4);
564
0
    return sre_ucs4_match(state, pattern, 1);
565
0
}
566
567
LOCAL(Py_ssize_t)
568
sre_search(SRE_STATE* state, SRE_CODE* pattern)
569
2
{
570
2
    if (state->charsize == 1)
571
2
        return sre_ucs1_search(state, pattern);
572
0
    if (state->charsize == 2)
573
0
        return sre_ucs2_search(state, pattern);
574
0
    assert(state->charsize == 4);
575
0
    return sre_ucs4_search(state, pattern);
576
0
}
577
578
/*[clinic input]
579
_sre.SRE_Pattern.match
580
581
    string: object
582
    pos: Py_ssize_t = 0
583
    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
584
585
Matches zero or more characters at the beginning of the string.
586
[clinic start generated code]*/
587
588
static PyObject *
589
_sre_SRE_Pattern_match_impl(PatternObject *self, PyObject *string,
590
                            Py_ssize_t pos, Py_ssize_t endpos)
591
/*[clinic end generated code: output=ea2d838888510661 input=a2ba191647abebe5]*/
592
4
{
593
4
    SRE_STATE state;
594
4
    Py_ssize_t status;
595
4
    PyObject *match;
596
597
4
    if (!state_init(&state, (PatternObject *)self, string, pos, endpos))
598
0
        return NULL;
599
600
4
    state.ptr = state.start;
601
602
4
    TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
603
604
4
    status = sre_match(&state, PatternObject_GetCode(self));
605
606
4
    TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
607
4
    if (PyErr_Occurred()) {
608
0
        state_fini(&state);
609
0
        return NULL;
610
0
    }
611
612
4
    match = pattern_new_match(self, &state, status);
613
4
    state_fini(&state);
614
4
    return match;
615
4
}
616
617
/*[clinic input]
618
_sre.SRE_Pattern.fullmatch
619
620
    string: object
621
    pos: Py_ssize_t = 0
622
    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
623
624
Matches against all of the string.
625
[clinic start generated code]*/
626
627
static PyObject *
628
_sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyObject *string,
629
                                Py_ssize_t pos, Py_ssize_t endpos)
630
/*[clinic end generated code: output=5833c47782a35f4a input=d9fb03a7625b5828]*/
631
0
{
632
0
    SRE_STATE state;
633
0
    Py_ssize_t status;
634
0
    PyObject *match;
635
636
0
    if (!state_init(&state, self, string, pos, endpos))
637
0
        return NULL;
638
639
0
    state.ptr = state.start;
640
641
0
    TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
642
643
0
    state.match_all = 1;
644
0
    status = sre_match(&state, PatternObject_GetCode(self));
645
646
0
    TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
647
0
    if (PyErr_Occurred()) {
648
0
        state_fini(&state);
649
0
        return NULL;
650
0
    }
651
652
0
    match = pattern_new_match(self, &state, status);
653
0
    state_fini(&state);
654
0
    return match;
655
0
}
656
657
/*[clinic input]
658
_sre.SRE_Pattern.search
659
660
    string: object
661
    pos: Py_ssize_t = 0
662
    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
663
664
Scan through string looking for a match, and return a corresponding match object instance.
665
666
Return None if no position in the string matches.
667
[clinic start generated code]*/
668
669
static PyObject *
670
_sre_SRE_Pattern_search_impl(PatternObject *self, PyObject *string,
671
                             Py_ssize_t pos, Py_ssize_t endpos)
672
/*[clinic end generated code: output=25f302a644e951e8 input=4ae5cb7dc38fed1b]*/
673
2
{
674
2
    SRE_STATE state;
675
2
    Py_ssize_t status;
676
2
    PyObject *match;
677
678
2
    if (!state_init(&state, self, string, pos, endpos))
679
0
        return NULL;
680
681
2
    TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
682
683
2
    status = sre_search(&state, PatternObject_GetCode(self));
684
685
2
    TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
686
687
2
    if (PyErr_Occurred()) {
688
0
        state_fini(&state);
689
0
        return NULL;
690
0
    }
691
692
2
    match = pattern_new_match(self, &state, status);
693
2
    state_fini(&state);
694
2
    return match;
695
2
}
696
697
static PyObject*
698
call(const char* module, const char* function, PyObject* args)
699
0
{
700
0
    PyObject* name;
701
0
    PyObject* mod;
702
0
    PyObject* func;
703
0
    PyObject* result;
704
705
0
    if (!args)
706
0
        return NULL;
707
0
    name = PyUnicode_FromString(module);
708
0
    if (!name)
709
0
        return NULL;
710
0
    mod = PyImport_Import(name);
711
0
    Py_DECREF(name);
712
0
    if (!mod)
713
0
        return NULL;
714
0
    func = PyObject_GetAttrString(mod, function);
715
0
    Py_DECREF(mod);
716
0
    if (!func)
717
0
        return NULL;
718
0
    result = PyObject_CallObject(func, args);
719
0
    Py_DECREF(func);
720
0
    Py_DECREF(args);
721
0
    return result;
722
0
}
723
724
/*[clinic input]
725
_sre.SRE_Pattern.findall
726
727
    string: object
728
    pos: Py_ssize_t = 0
729
    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
730
731
Return a list of all non-overlapping matches of pattern in string.
732
[clinic start generated code]*/
733
734
static PyObject *
735
_sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,
736
                              Py_ssize_t pos, Py_ssize_t endpos)
737
/*[clinic end generated code: output=f4966baceea60aca input=5b6a4ee799741563]*/
738
0
{
739
0
    SRE_STATE state;
740
0
    PyObject* list;
741
0
    Py_ssize_t status;
742
0
    Py_ssize_t i, b, e;
743
744
0
    if (!state_init(&state, self, string, pos, endpos))
745
0
        return NULL;
746
747
0
    list = PyList_New(0);
748
0
    if (!list) {
749
0
        state_fini(&state);
750
0
        return NULL;
751
0
    }
752
753
0
    while (state.start <= state.end) {
754
755
0
        PyObject* item;
756
757
0
        state_reset(&state);
758
759
0
        state.ptr = state.start;
760
761
0
        status = sre_search(&state, PatternObject_GetCode(self));
762
0
        if (PyErr_Occurred())
763
0
            goto error;
764
765
0
        if (status <= 0) {
766
0
            if (status == 0)
767
0
                break;
768
0
            pattern_error(status);
769
0
            goto error;
770
0
        }
771
772
        /* don't bother to build a match object */
773
0
        switch (self->groups) {
774
0
        case 0:
775
0
            b = STATE_OFFSET(&state, state.start);
776
0
            e = STATE_OFFSET(&state, state.ptr);
777
0
            item = getslice(state.isbytes, state.beginning,
778
0
                            string, b, e);
779
0
            if (!item)
780
0
                goto error;
781
0
            break;
782
0
        case 1:
783
0
            item = state_getslice(&state, 1, string, 1);
784
0
            if (!item)
785
0
                goto error;
786
0
            break;
787
0
        default:
788
0
            item = PyTuple_New(self->groups);
789
0
            if (!item)
790
0
                goto error;
791
0
            for (i = 0; i < self->groups; i++) {
792
0
                PyObject* o = state_getslice(&state, i+1, string, 1);
793
0
                if (!o) {
794
0
                    Py_DECREF(item);
795
0
                    goto error;
796
0
                }
797
0
                PyTuple_SET_ITEM(item, i, o);
798
0
            }
799
0
            break;
800
0
        }
801
802
0
        status = PyList_Append(list, item);
803
0
        Py_DECREF(item);
804
0
        if (status < 0)
805
0
            goto error;
806
807
0
        state.must_advance = (state.ptr == state.start);
808
0
        state.start = state.ptr;
809
0
    }
810
811
0
    state_fini(&state);
812
0
    return list;
813
814
0
error:
815
0
    Py_DECREF(list);
816
0
    state_fini(&state);
817
0
    return NULL;
818
819
0
}
820
821
/*[clinic input]
822
_sre.SRE_Pattern.finditer
823
824
    string: object
825
    pos: Py_ssize_t = 0
826
    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
827
828
Return an iterator over all non-overlapping matches for the RE pattern in string.
829
830
For each match, the iterator returns a match object.
831
[clinic start generated code]*/
832
833
static PyObject *
834
_sre_SRE_Pattern_finditer_impl(PatternObject *self, PyObject *string,
835
                               Py_ssize_t pos, Py_ssize_t endpos)
836
/*[clinic end generated code: output=0bbb1a0aeb38bb14 input=612aab69e9fe08e4]*/
837
0
{
838
0
    PyObject* scanner;
839
0
    PyObject* search;
840
0
    PyObject* iterator;
841
842
0
    scanner = pattern_scanner(self, string, pos, endpos);
843
0
    if (!scanner)
844
0
        return NULL;
845
846
0
    search = PyObject_GetAttrString(scanner, "search");
847
0
    Py_DECREF(scanner);
848
0
    if (!search)
849
0
        return NULL;
850
851
0
    iterator = PyCallIter_New(search, Py_None);
852
0
    Py_DECREF(search);
853
854
0
    return iterator;
855
0
}
856
857
/*[clinic input]
858
_sre.SRE_Pattern.scanner
859
860
    string: object
861
    pos: Py_ssize_t = 0
862
    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
863
864
[clinic start generated code]*/
865
866
static PyObject *
867
_sre_SRE_Pattern_scanner_impl(PatternObject *self, PyObject *string,
868
                              Py_ssize_t pos, Py_ssize_t endpos)
869
/*[clinic end generated code: output=54ea548aed33890b input=3aacdbde77a3a637]*/
870
0
{
871
0
    return pattern_scanner(self, string, pos, endpos);
872
0
}
873
874
/*[clinic input]
875
_sre.SRE_Pattern.split
876
877
    string: object
878
    maxsplit: Py_ssize_t = 0
879
880
Split string by the occurrences of pattern.
881
[clinic start generated code]*/
882
883
static PyObject *
884
_sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
885
                            Py_ssize_t maxsplit)
886
/*[clinic end generated code: output=7ac66f381c45e0be input=1eeeb10dafc9947a]*/
887
0
{
888
0
    SRE_STATE state;
889
0
    PyObject* list;
890
0
    PyObject* item;
891
0
    Py_ssize_t status;
892
0
    Py_ssize_t n;
893
0
    Py_ssize_t i;
894
0
    void* last;
895
896
0
    assert(self->codesize != 0);
897
898
0
    if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX))
899
0
        return NULL;
900
901
0
    list = PyList_New(0);
902
0
    if (!list) {
903
0
        state_fini(&state);
904
0
        return NULL;
905
0
    }
906
907
0
    n = 0;
908
0
    last = state.start;
909
910
0
    while (!maxsplit || n < maxsplit) {
911
912
0
        state_reset(&state);
913
914
0
        state.ptr = state.start;
915
916
0
        status = sre_search(&state, PatternObject_GetCode(self));
917
0
        if (PyErr_Occurred())
918
0
            goto error;
919
920
0
        if (status <= 0) {
921
0
            if (status == 0)
922
0
                break;
923
0
            pattern_error(status);
924
0
            goto error;
925
0
        }
926
927
        /* get segment before this match */
928
0
        item = getslice(state.isbytes, state.beginning,
929
0
            string, STATE_OFFSET(&state, last),
930
0
            STATE_OFFSET(&state, state.start)
931
0
            );
932
0
        if (!item)
933
0
            goto error;
934
0
        status = PyList_Append(list, item);
935
0
        Py_DECREF(item);
936
0
        if (status < 0)
937
0
            goto error;
938
939
        /* add groups (if any) */
940
0
        for (i = 0; i < self->groups; i++) {
941
0
            item = state_getslice(&state, i+1, string, 0);
942
0
            if (!item)
943
0
                goto error;
944
0
            status = PyList_Append(list, item);
945
0
            Py_DECREF(item);
946
0
            if (status < 0)
947
0
                goto error;
948
0
        }
949
950
0
        n = n + 1;
951
0
        state.must_advance = (state.ptr == state.start);
952
0
        last = state.start = state.ptr;
953
954
0
    }
955
956
    /* get segment following last match (even if empty) */
957
0
    item = getslice(state.isbytes, state.beginning,
958
0
        string, STATE_OFFSET(&state, last), state.endpos
959
0
        );
960
0
    if (!item)
961
0
        goto error;
962
0
    status = PyList_Append(list, item);
963
0
    Py_DECREF(item);
964
0
    if (status < 0)
965
0
        goto error;
966
967
0
    state_fini(&state);
968
0
    return list;
969
970
0
error:
971
0
    Py_DECREF(list);
972
0
    state_fini(&state);
973
0
    return NULL;
974
975
0
}
976
977
static PyObject*
978
pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
979
             Py_ssize_t count, Py_ssize_t subn)
980
0
{
981
0
    SRE_STATE state;
982
0
    PyObject* list;
983
0
    PyObject* joiner;
984
0
    PyObject* item;
985
0
    PyObject* filter;
986
0
    PyObject* match;
987
0
    void* ptr;
988
0
    Py_ssize_t status;
989
0
    Py_ssize_t n;
990
0
    Py_ssize_t i, b, e;
991
0
    int isbytes, charsize;
992
0
    int filter_is_callable;
993
0
    Py_buffer view;
994
995
0
    if (PyCallable_Check(ptemplate)) {
996
        /* sub/subn takes either a function or a template */
997
0
        filter = ptemplate;
998
0
        Py_INCREF(filter);
999
0
        filter_is_callable = 1;
1000
0
    } else {
1001
        /* if not callable, check if it's a literal string */
1002
0
        int literal;
1003
0
        view.buf = NULL;
1004
0
        ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
1005
0
        b = charsize;
1006
0
        if (ptr) {
1007
0
            if (charsize == 1)
1008
0
                literal = memchr(ptr, '\\', n) == NULL;
1009
0
            else
1010
0
                literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
1011
0
        } else {
1012
0
            PyErr_Clear();
1013
0
            literal = 0;
1014
0
        }
1015
0
        if (view.buf)
1016
0
            PyBuffer_Release(&view);
1017
0
        if (literal) {
1018
0
            filter = ptemplate;
1019
0
            Py_INCREF(filter);
1020
0
            filter_is_callable = 0;
1021
0
        } else {
1022
            /* not a literal; hand it over to the template compiler */
1023
0
            filter = call(
1024
0
                SRE_PY_MODULE, "_subx",
1025
0
                PyTuple_Pack(2, self, ptemplate)
1026
0
                );
1027
0
            if (!filter)
1028
0
                return NULL;
1029
0
            filter_is_callable = PyCallable_Check(filter);
1030
0
        }
1031
0
    }
1032
1033
0
    if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) {
1034
0
        Py_DECREF(filter);
1035
0
        return NULL;
1036
0
    }
1037
1038
0
    list = PyList_New(0);
1039
0
    if (!list) {
1040
0
        Py_DECREF(filter);
1041
0
        state_fini(&state);
1042
0
        return NULL;
1043
0
    }
1044
1045
0
    n = i = 0;
1046
1047
0
    while (!count || n < count) {
1048
1049
0
        state_reset(&state);
1050
1051
0
        state.ptr = state.start;
1052
1053
0
        status = sre_search(&state, PatternObject_GetCode(self));
1054
0
        if (PyErr_Occurred())
1055
0
            goto error;
1056
1057
0
        if (status <= 0) {
1058
0
            if (status == 0)
1059
0
                break;
1060
0
            pattern_error(status);
1061
0
            goto error;
1062
0
        }
1063
1064
0
        b = STATE_OFFSET(&state, state.start);
1065
0
        e = STATE_OFFSET(&state, state.ptr);
1066
1067
0
        if (i < b) {
1068
            /* get segment before this match */
1069
0
            item = getslice(state.isbytes, state.beginning,
1070
0
                string, i, b);
1071
0
            if (!item)
1072
0
                goto error;
1073
0
            status = PyList_Append(list, item);
1074
0
            Py_DECREF(item);
1075
0
            if (status < 0)
1076
0
                goto error;
1077
1078
0
        }
1079
1080
0
        if (filter_is_callable) {
1081
            /* pass match object through filter */
1082
0
            match = pattern_new_match(self, &state, 1);
1083
0
            if (!match)
1084
0
                goto error;
1085
0
            item = PyObject_CallFunctionObjArgs(filter, match, NULL);
1086
0
            Py_DECREF(match);
1087
0
            if (!item)
1088
0
                goto error;
1089
0
        } else {
1090
            /* filter is literal string */
1091
0
            item = filter;
1092
0
            Py_INCREF(item);
1093
0
        }
1094
1095
        /* add to list */
1096
0
        if (item != Py_None) {
1097
0
            status = PyList_Append(list, item);
1098
0
            Py_DECREF(item);
1099
0
            if (status < 0)
1100
0
                goto error;
1101
0
        }
1102
1103
0
        i = e;
1104
0
        n = n + 1;
1105
0
        state.must_advance = (state.ptr == state.start);
1106
0
        state.start = state.ptr;
1107
0
    }
1108
1109
    /* get segment following last match */
1110
0
    if (i < state.endpos) {
1111
0
        item = getslice(state.isbytes, state.beginning,
1112
0
                        string, i, state.endpos);
1113
0
        if (!item)
1114
0
            goto error;
1115
0
        status = PyList_Append(list, item);
1116
0
        Py_DECREF(item);
1117
0
        if (status < 0)
1118
0
            goto error;
1119
0
    }
1120
1121
0
    state_fini(&state);
1122
1123
0
    Py_DECREF(filter);
1124
1125
    /* convert list to single string (also removes list) */
1126
0
    joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
1127
0
    if (!joiner) {
1128
0
        Py_DECREF(list);
1129
0
        return NULL;
1130
0
    }
1131
0
    if (PyList_GET_SIZE(list) == 0) {
1132
0
        Py_DECREF(list);
1133
0
        item = joiner;
1134
0
    }
1135
0
    else {
1136
0
        if (state.isbytes)
1137
0
            item = _PyBytes_Join(joiner, list);
1138
0
        else
1139
0
            item = PyUnicode_Join(joiner, list);
1140
0
        Py_DECREF(joiner);
1141
0
        Py_DECREF(list);
1142
0
        if (!item)
1143
0
            return NULL;
1144
0
    }
1145
1146
0
    if (subn)
1147
0
        return Py_BuildValue("Nn", item, n);
1148
1149
0
    return item;
1150
1151
0
error:
1152
0
    Py_DECREF(list);
1153
0
    state_fini(&state);
1154
0
    Py_DECREF(filter);
1155
0
    return NULL;
1156
1157
0
}
1158
1159
/*[clinic input]
1160
_sre.SRE_Pattern.sub
1161
1162
    repl: object
1163
    string: object
1164
    count: Py_ssize_t = 0
1165
1166
Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl.
1167
[clinic start generated code]*/
1168
1169
static PyObject *
1170
_sre_SRE_Pattern_sub_impl(PatternObject *self, PyObject *repl,
1171
                          PyObject *string, Py_ssize_t count)
1172
/*[clinic end generated code: output=1dbf2ec3479cba00 input=c53d70be0b3caf86]*/
1173
0
{
1174
0
    return pattern_subx(self, repl, string, count, 0);
1175
0
}
1176
1177
/*[clinic input]
1178
_sre.SRE_Pattern.subn
1179
1180
    repl: object
1181
    string: object
1182
    count: Py_ssize_t = 0
1183
1184
Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.
1185
[clinic start generated code]*/
1186
1187
static PyObject *
1188
_sre_SRE_Pattern_subn_impl(PatternObject *self, PyObject *repl,
1189
                           PyObject *string, Py_ssize_t count)
1190
/*[clinic end generated code: output=0d9522cd529e9728 input=e7342d7ce6083577]*/
1191
0
{
1192
0
    return pattern_subx(self, repl, string, count, 1);
1193
0
}
1194
1195
/*[clinic input]
1196
_sre.SRE_Pattern.__copy__
1197
1198
[clinic start generated code]*/
1199
1200
static PyObject *
1201
_sre_SRE_Pattern___copy___impl(PatternObject *self)
1202
/*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/
1203
0
{
1204
0
    Py_INCREF(self);
1205
0
    return (PyObject *)self;
1206
0
}
1207
1208
/*[clinic input]
1209
_sre.SRE_Pattern.__deepcopy__
1210
1211
    memo: object
1212
    /
1213
1214
[clinic start generated code]*/
1215
1216
static PyObject *
1217
_sre_SRE_Pattern___deepcopy__(PatternObject *self, PyObject *memo)
1218
/*[clinic end generated code: output=2ad25679c1f1204a input=a465b1602f997bed]*/
1219
0
{
1220
0
    Py_INCREF(self);
1221
0
    return (PyObject *)self;
1222
0
}
1223
1224
static PyObject *
1225
pattern_repr(PatternObject *obj)
1226
0
{
1227
0
    static const struct {
1228
0
        const char *name;
1229
0
        int value;
1230
0
    } flag_names[] = {
1231
0
        {"re.TEMPLATE", SRE_FLAG_TEMPLATE},
1232
0
        {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
1233
0
        {"re.LOCALE", SRE_FLAG_LOCALE},
1234
0
        {"re.MULTILINE", SRE_FLAG_MULTILINE},
1235
0
        {"re.DOTALL", SRE_FLAG_DOTALL},
1236
0
        {"re.UNICODE", SRE_FLAG_UNICODE},
1237
0
        {"re.VERBOSE", SRE_FLAG_VERBOSE},
1238
0
        {"re.DEBUG", SRE_FLAG_DEBUG},
1239
0
        {"re.ASCII", SRE_FLAG_ASCII},
1240
0
    };
1241
0
    PyObject *result = NULL;
1242
0
    PyObject *flag_items;
1243
0
    size_t i;
1244
0
    int flags = obj->flags;
1245
1246
    /* Omit re.UNICODE for valid string patterns. */
1247
0
    if (obj->isbytes == 0 &&
1248
0
        (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
1249
0
         SRE_FLAG_UNICODE)
1250
0
        flags &= ~SRE_FLAG_UNICODE;
1251
1252
0
    flag_items = PyList_New(0);
1253
0
    if (!flag_items)
1254
0
        return NULL;
1255
1256
0
    for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
1257
0
        if (flags & flag_names[i].value) {
1258
0
            PyObject *item = PyUnicode_FromString(flag_names[i].name);
1259
0
            if (!item)
1260
0
                goto done;
1261
1262
0
            if (PyList_Append(flag_items, item) < 0) {
1263
0
                Py_DECREF(item);
1264
0
                goto done;
1265
0
            }
1266
0
            Py_DECREF(item);
1267
0
            flags &= ~flag_names[i].value;
1268
0
        }
1269
0
    }
1270
0
    if (flags) {
1271
0
        PyObject *item = PyUnicode_FromFormat("0x%x", flags);
1272
0
        if (!item)
1273
0
            goto done;
1274
1275
0
        if (PyList_Append(flag_items, item) < 0) {
1276
0
            Py_DECREF(item);
1277
0
            goto done;
1278
0
        }
1279
0
        Py_DECREF(item);
1280
0
    }
1281
1282
0
    if (PyList_Size(flag_items) > 0) {
1283
0
        PyObject *flags_result;
1284
0
        PyObject *sep = PyUnicode_FromString("|");
1285
0
        if (!sep)
1286
0
            goto done;
1287
0
        flags_result = PyUnicode_Join(sep, flag_items);
1288
0
        Py_DECREF(sep);
1289
0
        if (!flags_result)
1290
0
            goto done;
1291
0
        result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
1292
0
                                      obj->pattern, flags_result);
1293
0
        Py_DECREF(flags_result);
1294
0
    }
1295
0
    else {
1296
0
        result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
1297
0
    }
1298
1299
0
done:
1300
0
    Py_DECREF(flag_items);
1301
0
    return result;
1302
0
}
1303
1304
PyDoc_STRVAR(pattern_doc, "Compiled regular expression object.");
1305
1306
/* PatternObject's 'groupindex' method. */
1307
static PyObject *
1308
pattern_groupindex(PatternObject *self, void *Py_UNUSED(ignored))
1309
0
{
1310
0
    if (self->groupindex == NULL)
1311
0
        return PyDict_New();
1312
0
    return PyDictProxy_New(self->groupindex);
1313
0
}
1314
1315
static int _validate(PatternObject *self); /* Forward */
1316
1317
/*[clinic input]
1318
_sre.compile
1319
1320
    pattern: object
1321
    flags: int
1322
    code: object(subclass_of='&PyList_Type')
1323
    groups: Py_ssize_t
1324
    groupindex: object(subclass_of='&PyDict_Type')
1325
    indexgroup: object(subclass_of='&PyTuple_Type')
1326
1327
[clinic start generated code]*/
1328
1329
static PyObject *
1330
_sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
1331
                  PyObject *code, Py_ssize_t groups, PyObject *groupindex,
1332
                  PyObject *indexgroup)
1333
/*[clinic end generated code: output=ef9c2b3693776404 input=0a68476dbbe5db30]*/
1334
8
{
1335
    /* "compile" pattern descriptor to pattern object */
1336
1337
8
    PatternObject* self;
1338
8
    Py_ssize_t i, n;
1339
1340
8
    n = PyList_GET_SIZE(code);
1341
    /* coverity[ampersand_in_size] */
1342
8
    self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
1343
8
    if (!self)
1344
0
        return NULL;
1345
8
    self->weakreflist = NULL;
1346
8
    self->pattern = NULL;
1347
8
    self->groupindex = NULL;
1348
8
    self->indexgroup = NULL;
1349
1350
8
    self->codesize = n;
1351
1352
792
    for (i = 0; i < n; i++) {
1353
784
        PyObject *o = PyList_GET_ITEM(code, i);
1354
784
        unsigned long value = PyLong_AsUnsignedLong(o);
1355
784
        self->code[i] = (SRE_CODE) value;
1356
784
        if ((unsigned long) self->code[i] != value) {
1357
0
            PyErr_SetString(PyExc_OverflowError,
1358
0
                            "regular expression code size limit exceeded");
1359
0
            break;
1360
0
        }
1361
784
    }
1362
1363
8
    if (PyErr_Occurred()) {
1364
0
        Py_DECREF(self);
1365
0
        return NULL;
1366
0
    }
1367
1368
8
    if (pattern == Py_None) {
1369
0
        self->isbytes = -1;
1370
0
    }
1371
8
    else {
1372
8
        Py_ssize_t p_length;
1373
8
        int charsize;
1374
8
        Py_buffer view;
1375
8
        view.buf = NULL;
1376
8
        if (!getstring(pattern, &p_length, &self->isbytes,
1377
8
                       &charsize, &view)) {
1378
0
            Py_DECREF(self);
1379
0
            return NULL;
1380
0
        }
1381
8
        if (view.buf)
1382
1
            PyBuffer_Release(&view);
1383
8
    }
1384
1385
8
    Py_INCREF(pattern);
1386
8
    self->pattern = pattern;
1387
1388
8
    self->flags = flags;
1389
1390
8
    self->groups = groups;
1391
1392
8
    if (PyDict_GET_SIZE(groupindex) > 0) {
1393
1
        Py_INCREF(groupindex);
1394
1
        self->groupindex = groupindex;
1395
1
        if (PyTuple_GET_SIZE(indexgroup) > 0) {
1396
1
            Py_INCREF(indexgroup);
1397
1
            self->indexgroup = indexgroup;
1398
1
        }
1399
1
    }
1400
1401
8
    if (!_validate(self)) {
1402
0
        Py_DECREF(self);
1403
0
        return NULL;
1404
0
    }
1405
1406
8
    return (PyObject*) self;
1407
8
}
1408
1409
/* -------------------------------------------------------------------- */
1410
/* Code validation */
1411
1412
/* To learn more about this code, have a look at the _compile() function in
1413
   Lib/sre_compile.py.  The validation functions below checks the code array
1414
   for conformance with the code patterns generated there.
1415
1416
   The nice thing about the generated code is that it is position-independent:
1417
   all jumps are relative jumps forward.  Also, jumps don't cross each other:
1418
   the target of a later jump is always earlier than the target of an earlier
1419
   jump.  IOW, this is okay:
1420
1421
   J---------J-------T--------T
1422
    \         \_____/        /
1423
     \______________________/
1424
1425
   but this is not:
1426
1427
   J---------J-------T--------T
1428
    \_________\_____/        /
1429
               \____________/
1430
1431
   It also helps that SRE_CODE is always an unsigned type.
1432
*/
1433
1434
/* Defining this one enables tracing of the validator */
1435
#undef VVERBOSE
1436
1437
/* Trace macro for the validator */
1438
#if defined(VVERBOSE)
1439
#define VTRACE(v) printf v
1440
#else
1441
1.14k
#define VTRACE(v) do {} while(0)  /* do nothing */
1442
#endif
1443
1444
/* Report failure */
1445
0
#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
1446
1447
/* Extract opcode, argument, or skip count from code array */
1448
#define GET_OP                                          \
1449
220
    do {                                                \
1450
220
        VTRACE(("%p: ", code));                         \
1451
220
        if (code >= end) FAIL;                          \
1452
220
        op = *code++;                                   \
1453
220
        VTRACE(("%lu (op)\n", (unsigned long)op));      \
1454
220
    } while (0)
1455
#define GET_ARG                                         \
1456
183
    do {                                                \
1457
183
        VTRACE(("%p= ", code));                         \
1458
183
        if (code >= end) FAIL;                          \
1459
183
        arg = *code++;                                  \
1460
183
        VTRACE(("%lu (arg)\n", (unsigned long)arg));    \
1461
183
    } while (0)
1462
#define GET_SKIP_ADJ(adj)                               \
1463
109
    do {                                                \
1464
109
        VTRACE(("%p= ", code));                         \
1465
109
        if (code >= end) FAIL;                          \
1466
109
        skip = *code;                                   \
1467
109
        VTRACE(("%lu (skip to %p)\n",                   \
1468
109
               (unsigned long)skip, code+skip));        \
1469
109
        if (skip-adj > (uintptr_t)(end - code))      \
1470
109
            FAIL;                                       \
1471
109
        code++;                                         \
1472
109
    } while (0)
1473
109
#define GET_SKIP GET_SKIP_ADJ(0)
1474
1475
static int
1476
_validate_charset(SRE_CODE *code, SRE_CODE *end)
1477
26
{
1478
    /* Some variables are manipulated by the macros above */
1479
26
    SRE_CODE op;
1480
26
    SRE_CODE arg;
1481
26
    SRE_CODE offset;
1482
26
    int i;
1483
1484
59
    while (code < end) {
1485
33
        GET_OP;
1486
33
        switch (op) {
1487
1488
0
        case SRE_OP_NEGATE:
1489
0
            break;
1490
1491
9
        case SRE_OP_LITERAL:
1492
9
            GET_ARG;
1493
9
            break;
1494
1495
9
        case SRE_OP_RANGE:
1496
4
        case SRE_OP_RANGE_UNI_IGNORE:
1497
4
            GET_ARG;
1498
4
            GET_ARG;
1499
4
            break;
1500
1501
7
        case SRE_OP_CHARSET:
1502
7
            offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
1503
7
            if (offset > (uintptr_t)(end - code))
1504
0
                FAIL;
1505
7
            code += offset;
1506
7
            break;
1507
1508
2
        case SRE_OP_BIGCHARSET:
1509
2
            GET_ARG; /* Number of blocks */
1510
2
            offset = 256/sizeof(SRE_CODE); /* 256-byte table */
1511
2
            if (offset > (uintptr_t)(end - code))
1512
0
                FAIL;
1513
            /* Make sure that each byte points to a valid block */
1514
514
            for (i = 0; i < 256; i++) {
1515
512
                if (((unsigned char *)code)[i] >= arg)
1516
0
                    FAIL;
1517
512
            }
1518
2
            code += offset;
1519
2
            offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
1520
2
            if (offset > (uintptr_t)(end - code))
1521
0
                FAIL;
1522
2
            code += offset;
1523
2
            break;
1524
1525
11
        case SRE_OP_CATEGORY:
1526
11
            GET_ARG;
1527
11
            switch (arg) {
1528
0
            case SRE_CATEGORY_DIGIT:
1529
0
            case SRE_CATEGORY_NOT_DIGIT:
1530
0
            case SRE_CATEGORY_SPACE:
1531
0
            case SRE_CATEGORY_NOT_SPACE:
1532
1
            case SRE_CATEGORY_WORD:
1533
1
            case SRE_CATEGORY_NOT_WORD:
1534
1
            case SRE_CATEGORY_LINEBREAK:
1535
1
            case SRE_CATEGORY_NOT_LINEBREAK:
1536
1
            case SRE_CATEGORY_LOC_WORD:
1537
1
            case SRE_CATEGORY_LOC_NOT_WORD:
1538
6
            case SRE_CATEGORY_UNI_DIGIT:
1539
6
            case SRE_CATEGORY_UNI_NOT_DIGIT:
1540
6
            case SRE_CATEGORY_UNI_SPACE:
1541
6
            case SRE_CATEGORY_UNI_NOT_SPACE:
1542
11
            case SRE_CATEGORY_UNI_WORD:
1543
11
            case SRE_CATEGORY_UNI_NOT_WORD:
1544
11
            case SRE_CATEGORY_UNI_LINEBREAK:
1545
11
            case SRE_CATEGORY_UNI_NOT_LINEBREAK:
1546
11
                break;
1547
0
            default:
1548
0
                FAIL;
1549
11
            }
1550
11
            break;
1551
1552
11
        default:
1553
0
            FAIL;
1554
1555
33
        }
1556
33
    }
1557
1558
26
    return 1;
1559
26
}
1560
1561
static int
1562
_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1563
57
{
1564
    /* Some variables are manipulated by the macros above */
1565
57
    SRE_CODE op;
1566
57
    SRE_CODE arg;
1567
57
    SRE_CODE skip;
1568
1569
57
    VTRACE(("code=%p, end=%p\n", code, end));
1570
1571
57
    if (code > end)
1572
0
        FAIL;
1573
1574
195
    while (code < end) {
1575
138
        GET_OP;
1576
138
        switch (op) {
1577
1578
28
        case SRE_OP_MARK:
1579
            /* We don't check whether marks are properly nested; the
1580
               sre_match() code is robust even if they don't, and the worst
1581
               you can get is nonsensical match results. */
1582
28
            GET_ARG;
1583
28
            if (arg > 2 * (size_t)groups + 1) {
1584
0
                VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
1585
0
                FAIL;
1586
0
            }
1587
28
            break;
1588
1589
28
        case SRE_OP_LITERAL:
1590
28
        case SRE_OP_NOT_LITERAL:
1591
28
        case SRE_OP_LITERAL_IGNORE:
1592
28
        case SRE_OP_NOT_LITERAL_IGNORE:
1593
28
        case SRE_OP_LITERAL_UNI_IGNORE:
1594
28
        case SRE_OP_NOT_LITERAL_UNI_IGNORE:
1595
28
        case SRE_OP_LITERAL_LOC_IGNORE:
1596
28
        case SRE_OP_NOT_LITERAL_LOC_IGNORE:
1597
28
            GET_ARG;
1598
            /* The arg is just a character, nothing to check */
1599
28
            break;
1600
1601
28
        case SRE_OP_SUCCESS:
1602
0
        case SRE_OP_FAILURE:
1603
            /* Nothing to check; these normally end the matching process */
1604
0
            break;
1605
1606
7
        case SRE_OP_AT:
1607
7
            GET_ARG;
1608
7
            switch (arg) {
1609
4
            case SRE_AT_BEGINNING:
1610
4
            case SRE_AT_BEGINNING_STRING:
1611
4
            case SRE_AT_BEGINNING_LINE:
1612
7
            case SRE_AT_END:
1613
7
            case SRE_AT_END_LINE:
1614
7
            case SRE_AT_END_STRING:
1615
7
            case SRE_AT_BOUNDARY:
1616
7
            case SRE_AT_NON_BOUNDARY:
1617
7
            case SRE_AT_LOC_BOUNDARY:
1618
7
            case SRE_AT_LOC_NON_BOUNDARY:
1619
7
            case SRE_AT_UNI_BOUNDARY:
1620
7
            case SRE_AT_UNI_NON_BOUNDARY:
1621
7
                break;
1622
0
            default:
1623
0
                FAIL;
1624
7
            }
1625
7
            break;
1626
1627
7
        case SRE_OP_ANY:
1628
2
        case SRE_OP_ANY_ALL:
1629
            /* These have no operands */
1630
2
            break;
1631
1632
20
        case SRE_OP_IN:
1633
24
        case SRE_OP_IN_IGNORE:
1634
26
        case SRE_OP_IN_UNI_IGNORE:
1635
26
        case SRE_OP_IN_LOC_IGNORE:
1636
26
            GET_SKIP;
1637
            /* Stop 1 before the end; we check the FAILURE below */
1638
26
            if (!_validate_charset(code, code+skip-2))
1639
0
                FAIL;
1640
26
            if (code[skip-2] != SRE_OP_FAILURE)
1641
0
                FAIL;
1642
26
            code += skip-1;
1643
26
            break;
1644
1645
8
        case SRE_OP_INFO:
1646
8
            {
1647
                /* A minimal info field is
1648
                   <INFO> <1=skip> <2=flags> <3=min> <4=max>;
1649
                   If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
1650
                   more follows. */
1651
8
                SRE_CODE flags, i;
1652
8
                SRE_CODE *newcode;
1653
8
                GET_SKIP;
1654
8
                newcode = code+skip-1;
1655
8
                GET_ARG; flags = arg;
1656
8
                GET_ARG;
1657
8
                GET_ARG;
1658
                /* Check that only valid flags are present */
1659
8
                if ((flags & ~(SRE_INFO_PREFIX |
1660
8
                               SRE_INFO_LITERAL |
1661
8
                               SRE_INFO_CHARSET)) != 0)
1662
0
                    FAIL;
1663
                /* PREFIX and CHARSET are mutually exclusive */
1664
8
                if ((flags & SRE_INFO_PREFIX) &&
1665
8
                    (flags & SRE_INFO_CHARSET))
1666
0
                    FAIL;
1667
                /* LITERAL implies PREFIX */
1668
8
                if ((flags & SRE_INFO_LITERAL) &&
1669
8
                    !(flags & SRE_INFO_PREFIX))
1670
0
                    FAIL;
1671
                /* Validate the prefix */
1672
8
                if (flags & SRE_INFO_PREFIX) {
1673
2
                    SRE_CODE prefix_len;
1674
2
                    GET_ARG; prefix_len = arg;
1675
2
                    GET_ARG;
1676
                    /* Here comes the prefix string */
1677
2
                    if (prefix_len > (uintptr_t)(newcode - code))
1678
0
                        FAIL;
1679
2
                    code += prefix_len;
1680
                    /* And here comes the overlap table */
1681
2
                    if (prefix_len > (uintptr_t)(newcode - code))
1682
0
                        FAIL;
1683
                    /* Each overlap value should be < prefix_len */
1684
5
                    for (i = 0; i < prefix_len; i++) {
1685
3
                        if (code[i] >= prefix_len)
1686
0
                            FAIL;
1687
3
                    }
1688
2
                    code += prefix_len;
1689
2
                }
1690
                /* Validate the charset */
1691
8
                if (flags & SRE_INFO_CHARSET) {
1692
0
                    if (!_validate_charset(code, newcode-1))
1693
0
                        FAIL;
1694
0
                    if (newcode[-1] != SRE_OP_FAILURE)
1695
0
                        FAIL;
1696
0
                    code = newcode;
1697
0
                }
1698
8
                else if (code != newcode) {
1699
0
                  VTRACE(("code=%p, newcode=%p\n", code, newcode));
1700
0
                    FAIL;
1701
0
                }
1702
8
            }
1703
8
            break;
1704
1705
8
        case SRE_OP_BRANCH:
1706
8
            {
1707
8
                SRE_CODE *target = NULL;
1708
26
                for (;;) {
1709
26
                    GET_SKIP;
1710
26
                    if (skip == 0)
1711
8
                        break;
1712
                    /* Stop 2 before the end; we check the JUMP below */
1713
18
                    if (!_validate_inner(code, code+skip-3, groups))
1714
0
                        FAIL;
1715
18
                    code += skip-3;
1716
                    /* Check that it ends with a JUMP, and that each JUMP
1717
                       has the same target */
1718
18
                    GET_OP;
1719
18
                    if (op != SRE_OP_JUMP)
1720
0
                        FAIL;
1721
18
                    GET_SKIP;
1722
18
                    if (target == NULL)
1723
8
                        target = code+skip-1;
1724
10
                    else if (code+skip-1 != target)
1725
0
                        FAIL;
1726
18
                }
1727
8
            }
1728
8
            break;
1729
1730
24
        case SRE_OP_REPEAT_ONE:
1731
25
        case SRE_OP_MIN_REPEAT_ONE:
1732
25
            {
1733
25
                SRE_CODE min, max;
1734
25
                GET_SKIP;
1735
25
                GET_ARG; min = arg;
1736
25
                GET_ARG; max = arg;
1737
25
                if (min > max)
1738
0
                    FAIL;
1739
25
                if (max > SRE_MAXREPEAT)
1740
0
                    FAIL;
1741
25
                if (!_validate_inner(code, code+skip-4, groups))
1742
0
                    FAIL;
1743
25
                code += skip-4;
1744
25
                GET_OP;
1745
25
                if (op != SRE_OP_SUCCESS)
1746
0
                    FAIL;
1747
25
            }
1748
25
            break;
1749
1750
25
        case SRE_OP_REPEAT:
1751
6
            {
1752
6
                SRE_CODE min, max;
1753
6
                GET_SKIP;
1754
6
                GET_ARG; min = arg;
1755
6
                GET_ARG; max = arg;
1756
6
                if (min > max)
1757
0
                    FAIL;
1758
6
                if (max > SRE_MAXREPEAT)
1759
0
                    FAIL;
1760
6
                if (!_validate_inner(code, code+skip-3, groups))
1761
0
                    FAIL;
1762
6
                code += skip-3;
1763
6
                GET_OP;
1764
6
                if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
1765
0
                    FAIL;
1766
6
            }
1767
6
            break;
1768
1769
6
        case SRE_OP_GROUPREF:
1770
0
        case SRE_OP_GROUPREF_IGNORE:
1771
0
        case SRE_OP_GROUPREF_UNI_IGNORE:
1772
0
        case SRE_OP_GROUPREF_LOC_IGNORE:
1773
0
            GET_ARG;
1774
0
            if (arg >= (size_t)groups)
1775
0
                FAIL;
1776
0
            break;
1777
1778
0
        case SRE_OP_GROUPREF_EXISTS:
1779
            /* The regex syntax for this is: '(?(group)then|else)', where
1780
               'group' is either an integer group number or a group name,
1781
               'then' and 'else' are sub-regexes, and 'else' is optional. */
1782
0
            GET_ARG;
1783
0
            if (arg >= (size_t)groups)
1784
0
                FAIL;
1785
0
            GET_SKIP_ADJ(1);
1786
0
            code--; /* The skip is relative to the first arg! */
1787
            /* There are two possibilities here: if there is both a 'then'
1788
               part and an 'else' part, the generated code looks like:
1789
1790
               GROUPREF_EXISTS
1791
               <group>
1792
               <skipyes>
1793
               ...then part...
1794
               JUMP
1795
               <skipno>
1796
               (<skipyes> jumps here)
1797
               ...else part...
1798
               (<skipno> jumps here)
1799
1800
               If there is only a 'then' part, it looks like:
1801
1802
               GROUPREF_EXISTS
1803
               <group>
1804
               <skip>
1805
               ...then part...
1806
               (<skip> jumps here)
1807
1808
               There is no direct way to decide which it is, and we don't want
1809
               to allow arbitrary jumps anywhere in the code; so we just look
1810
               for a JUMP opcode preceding our skip target.
1811
            */
1812
0
            if (skip >= 3 && skip-3 < (uintptr_t)(end - code) &&
1813
0
                code[skip-3] == SRE_OP_JUMP)
1814
0
            {
1815
0
                VTRACE(("both then and else parts present\n"));
1816
0
                if (!_validate_inner(code+1, code+skip-3, groups))
1817
0
                    FAIL;
1818
0
                code += skip-2; /* Position after JUMP, at <skipno> */
1819
0
                GET_SKIP;
1820
0
                if (!_validate_inner(code, code+skip-1, groups))
1821
0
                    FAIL;
1822
0
                code += skip-1;
1823
0
            }
1824
0
            else {
1825
0
                VTRACE(("only a then part present\n"));
1826
0
                if (!_validate_inner(code+1, code+skip-1, groups))
1827
0
                    FAIL;
1828
0
                code += skip-1;
1829
0
            }
1830
0
            break;
1831
1832
0
        case SRE_OP_ASSERT:
1833
0
        case SRE_OP_ASSERT_NOT:
1834
0
            GET_SKIP;
1835
0
            GET_ARG; /* 0 for lookahead, width for lookbehind */
1836
0
            code--; /* Back up over arg to simplify math below */
1837
0
            if (arg & 0x80000000)
1838
0
                FAIL; /* Width too large */
1839
            /* Stop 1 before the end; we check the SUCCESS below */
1840
0
            if (!_validate_inner(code+1, code+skip-2, groups))
1841
0
                FAIL;
1842
0
            code += skip-2;
1843
0
            GET_OP;
1844
0
            if (op != SRE_OP_SUCCESS)
1845
0
                FAIL;
1846
0
            break;
1847
1848
0
        default:
1849
0
            FAIL;
1850
1851
138
        }
1852
138
    }
1853
1854
57
    VTRACE(("okay\n"));
1855
57
    return 1;
1856
57
}
1857
1858
static int
1859
_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1860
8
{
1861
8
    if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
1862
8
        code >= end || end[-1] != SRE_OP_SUCCESS)
1863
0
        FAIL;
1864
8
    return _validate_inner(code, end-1, groups);
1865
8
}
1866
1867
static int
1868
_validate(PatternObject *self)
1869
8
{
1870
8
    if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
1871
0
    {
1872
0
        PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
1873
0
        return 0;
1874
0
    }
1875
8
    else
1876
8
        VTRACE(("Success!\n"));
1877
8
    return 1;
1878
8
}
1879
1880
/* -------------------------------------------------------------------- */
1881
/* match methods */
1882
1883
static void
1884
match_dealloc(MatchObject* self)
1885
4
{
1886
4
    Py_XDECREF(self->regs);
1887
4
    Py_XDECREF(self->string);
1888
4
    Py_DECREF(self->pattern);
1889
4
    PyObject_DEL(self);
1890
4
}
1891
1892
static PyObject*
1893
match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
1894
0
{
1895
0
    Py_ssize_t length;
1896
0
    int isbytes, charsize;
1897
0
    Py_buffer view;
1898
0
    PyObject *result;
1899
0
    void* ptr;
1900
0
    Py_ssize_t i, j;
1901
1902
0
    assert(0 <= index && index < self->groups);
1903
0
    index *= 2;
1904
1905
0
    if (self->string == Py_None || self->mark[index] < 0) {
1906
        /* return default value if the string or group is undefined */
1907
0
        Py_INCREF(def);
1908
0
        return def;
1909
0
    }
1910
1911
0
    ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
1912
0
    if (ptr == NULL)
1913
0
        return NULL;
1914
1915
0
    i = self->mark[index];
1916
0
    j = self->mark[index+1];
1917
0
    i = Py_MIN(i, length);
1918
0
    j = Py_MIN(j, length);
1919
0
    result = getslice(isbytes, ptr, self->string, i, j);
1920
0
    if (isbytes && view.buf != NULL)
1921
0
        PyBuffer_Release(&view);
1922
0
    return result;
1923
0
}
1924
1925
static Py_ssize_t
1926
match_getindex(MatchObject* self, PyObject* index)
1927
0
{
1928
0
    Py_ssize_t i;
1929
1930
0
    if (index == NULL)
1931
        /* Default value */
1932
0
        return 0;
1933
1934
0
    if (PyIndex_Check(index)) {
1935
0
        i = PyNumber_AsSsize_t(index, NULL);
1936
0
    }
1937
0
    else {
1938
0
        i = -1;
1939
1940
0
        if (self->pattern->groupindex) {
1941
0
            index = PyDict_GetItemWithError(self->pattern->groupindex, index);
1942
0
            if (index && PyLong_Check(index)) {
1943
0
                i = PyLong_AsSsize_t(index);
1944
0
            }
1945
0
        }
1946
0
    }
1947
0
    if (i < 0 || i >= self->groups) {
1948
        /* raise IndexError if we were given a bad group number */
1949
0
        if (!PyErr_Occurred()) {
1950
0
            PyErr_SetString(PyExc_IndexError, "no such group");
1951
0
        }
1952
0
        return -1;
1953
0
    }
1954
1955
0
    return i;
1956
0
}
1957
1958
static PyObject*
1959
match_getslice(MatchObject* self, PyObject* index, PyObject* def)
1960
0
{
1961
0
    Py_ssize_t i = match_getindex(self, index);
1962
1963
0
    if (i < 0) {
1964
0
        return NULL;
1965
0
    }
1966
1967
0
    return match_getslice_by_index(self, i, def);
1968
0
}
1969
1970
/*[clinic input]
1971
_sre.SRE_Match.expand
1972
1973
    template: object
1974
1975
Return the string obtained by doing backslash substitution on the string template, as done by the sub() method.
1976
[clinic start generated code]*/
1977
1978
static PyObject *
1979
_sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template)
1980
/*[clinic end generated code: output=931b58ccc323c3a1 input=4bfdb22c2f8b146a]*/
1981
0
{
1982
    /* delegate to Python code */
1983
0
    return call(
1984
0
        SRE_PY_MODULE, "_expand",
1985
0
        PyTuple_Pack(3, self->pattern, self, template)
1986
0
        );
1987
0
}
1988
1989
static PyObject*
1990
match_group(MatchObject* self, PyObject* args)
1991
0
{
1992
0
    PyObject* result;
1993
0
    Py_ssize_t i, size;
1994
1995
0
    size = PyTuple_GET_SIZE(args);
1996
1997
0
    switch (size) {
1998
0
    case 0:
1999
0
        result = match_getslice(self, _PyLong_Zero, Py_None);
2000
0
        break;
2001
0
    case 1:
2002
0
        result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2003
0
        break;
2004
0
    default:
2005
        /* fetch multiple items */
2006
0
        result = PyTuple_New(size);
2007
0
        if (!result)
2008
0
            return NULL;
2009
0
        for (i = 0; i < size; i++) {
2010
0
            PyObject* item = match_getslice(
2011
0
                self, PyTuple_GET_ITEM(args, i), Py_None
2012
0
                );
2013
0
            if (!item) {
2014
0
                Py_DECREF(result);
2015
0
                return NULL;
2016
0
            }
2017
0
            PyTuple_SET_ITEM(result, i, item);
2018
0
        }
2019
0
        break;
2020
0
    }
2021
0
    return result;
2022
0
}
2023
2024
static PyObject*
2025
match_getitem(MatchObject* self, PyObject* name)
2026
0
{
2027
0
    return match_getslice(self, name, Py_None);
2028
0
}
2029
2030
/*[clinic input]
2031
_sre.SRE_Match.groups
2032
2033
    default: object = None
2034
        Is used for groups that did not participate in the match.
2035
2036
Return a tuple containing all the subgroups of the match, from 1.
2037
[clinic start generated code]*/
2038
2039
static PyObject *
2040
_sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value)
2041
/*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/
2042
0
{
2043
0
    PyObject* result;
2044
0
    Py_ssize_t index;
2045
2046
0
    result = PyTuple_New(self->groups-1);
2047
0
    if (!result)
2048
0
        return NULL;
2049
2050
0
    for (index = 1; index < self->groups; index++) {
2051
0
        PyObject* item;
2052
0
        item = match_getslice_by_index(self, index, default_value);
2053
0
        if (!item) {
2054
0
            Py_DECREF(result);
2055
0
            return NULL;
2056
0
        }
2057
0
        PyTuple_SET_ITEM(result, index-1, item);
2058
0
    }
2059
2060
0
    return result;
2061
0
}
2062
2063
/*[clinic input]
2064
_sre.SRE_Match.groupdict
2065
2066
    default: object = None
2067
        Is used for groups that did not participate in the match.
2068
2069
Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name.
2070
[clinic start generated code]*/
2071
2072
static PyObject *
2073
_sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value)
2074
/*[clinic end generated code: output=29917c9073e41757 input=0ded7960b23780aa]*/
2075
0
{
2076
0
    PyObject *result;
2077
0
    PyObject *key;
2078
0
    PyObject *value;
2079
0
    Py_ssize_t pos = 0;
2080
0
    Py_hash_t hash;
2081
2082
0
    result = PyDict_New();
2083
0
    if (!result || !self->pattern->groupindex)
2084
0
        return result;
2085
2086
0
    while (_PyDict_Next(self->pattern->groupindex, &pos, &key, &value, &hash)) {
2087
0
        int status;
2088
0
        Py_INCREF(key);
2089
0
        value = match_getslice(self, key, default_value);
2090
0
        if (!value) {
2091
0
            Py_DECREF(key);
2092
0
            goto failed;
2093
0
        }
2094
0
        status = _PyDict_SetItem_KnownHash(result, key, value, hash);
2095
0
        Py_DECREF(value);
2096
0
        Py_DECREF(key);
2097
0
        if (status < 0)
2098
0
            goto failed;
2099
0
    }
2100
2101
0
    return result;
2102
2103
0
failed:
2104
0
    Py_DECREF(result);
2105
0
    return NULL;
2106
0
}
2107
2108
/*[clinic input]
2109
_sre.SRE_Match.start -> Py_ssize_t
2110
2111
    group: object(c_default="NULL") = 0
2112
    /
2113
2114
Return index of the start of the substring matched by group.
2115
[clinic start generated code]*/
2116
2117
static Py_ssize_t
2118
_sre_SRE_Match_start_impl(MatchObject *self, PyObject *group)
2119
/*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/
2120
0
{
2121
0
    Py_ssize_t index = match_getindex(self, group);
2122
2123
0
    if (index < 0) {
2124
0
        return -1;
2125
0
    }
2126
2127
    /* mark is -1 if group is undefined */
2128
0
    return self->mark[index*2];
2129
0
}
2130
2131
/*[clinic input]
2132
_sre.SRE_Match.end -> Py_ssize_t
2133
2134
    group: object(c_default="NULL") = 0
2135
    /
2136
2137
Return index of the end of the substring matched by group.
2138
[clinic start generated code]*/
2139
2140
static Py_ssize_t
2141
_sre_SRE_Match_end_impl(MatchObject *self, PyObject *group)
2142
/*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/
2143
0
{
2144
0
    Py_ssize_t index = match_getindex(self, group);
2145
2146
0
    if (index < 0) {
2147
0
        return -1;
2148
0
    }
2149
2150
    /* mark is -1 if group is undefined */
2151
0
    return self->mark[index*2+1];
2152
0
}
2153
2154
LOCAL(PyObject*)
2155
_pair(Py_ssize_t i1, Py_ssize_t i2)
2156
0
{
2157
0
    PyObject* pair;
2158
0
    PyObject* item;
2159
2160
0
    pair = PyTuple_New(2);
2161
0
    if (!pair)
2162
0
        return NULL;
2163
2164
0
    item = PyLong_FromSsize_t(i1);
2165
0
    if (!item)
2166
0
        goto error;
2167
0
    PyTuple_SET_ITEM(pair, 0, item);
2168
2169
0
    item = PyLong_FromSsize_t(i2);
2170
0
    if (!item)
2171
0
        goto error;
2172
0
    PyTuple_SET_ITEM(pair, 1, item);
2173
2174
0
    return pair;
2175
2176
0
  error:
2177
0
    Py_DECREF(pair);
2178
0
    return NULL;
2179
0
}
2180
2181
/*[clinic input]
2182
_sre.SRE_Match.span
2183
2184
    group: object(c_default="NULL") = 0
2185
    /
2186
2187
For match object m, return the 2-tuple (m.start(group), m.end(group)).
2188
[clinic start generated code]*/
2189
2190
static PyObject *
2191
_sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)
2192
/*[clinic end generated code: output=f02ae40594d14fe6 input=8fa6014e982d71d4]*/
2193
0
{
2194
0
    Py_ssize_t index = match_getindex(self, group);
2195
2196
0
    if (index < 0) {
2197
0
        return NULL;
2198
0
    }
2199
2200
    /* marks are -1 if group is undefined */
2201
0
    return _pair(self->mark[index*2], self->mark[index*2+1]);
2202
0
}
2203
2204
static PyObject*
2205
match_regs(MatchObject* self)
2206
0
{
2207
0
    PyObject* regs;
2208
0
    PyObject* item;
2209
0
    Py_ssize_t index;
2210
2211
0
    regs = PyTuple_New(self->groups);
2212
0
    if (!regs)
2213
0
        return NULL;
2214
2215
0
    for (index = 0; index < self->groups; index++) {
2216
0
        item = _pair(self->mark[index*2], self->mark[index*2+1]);
2217
0
        if (!item) {
2218
0
            Py_DECREF(regs);
2219
0
            return NULL;
2220
0
        }
2221
0
        PyTuple_SET_ITEM(regs, index, item);
2222
0
    }
2223
2224
0
    Py_INCREF(regs);
2225
0
    self->regs = regs;
2226
2227
0
    return regs;
2228
0
}
2229
2230
/*[clinic input]
2231
_sre.SRE_Match.__copy__
2232
2233
[clinic start generated code]*/
2234
2235
static PyObject *
2236
_sre_SRE_Match___copy___impl(MatchObject *self)
2237
/*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/
2238
0
{
2239
0
    Py_INCREF(self);
2240
0
    return (PyObject *)self;
2241
0
}
2242
2243
/*[clinic input]
2244
_sre.SRE_Match.__deepcopy__
2245
2246
    memo: object
2247
    /
2248
2249
[clinic start generated code]*/
2250
2251
static PyObject *
2252
_sre_SRE_Match___deepcopy__(MatchObject *self, PyObject *memo)
2253
/*[clinic end generated code: output=ba7cb46d655e4ee2 input=779d12a31c2c325e]*/
2254
0
{
2255
0
    Py_INCREF(self);
2256
0
    return (PyObject *)self;
2257
0
}
2258
2259
PyDoc_STRVAR(match_doc,
2260
"The result of re.match() and re.search().\n\
2261
Match objects always have a boolean value of True.");
2262
2263
PyDoc_STRVAR(match_group_doc,
2264
"group([group1, ...]) -> str or tuple.\n\
2265
    Return subgroup(s) of the match by indices or names.\n\
2266
    For 0 returns the entire match.");
2267
2268
static PyObject *
2269
match_lastindex_get(MatchObject *self, void *Py_UNUSED(ignored))
2270
0
{
2271
0
    if (self->lastindex >= 0)
2272
0
        return PyLong_FromSsize_t(self->lastindex);
2273
0
    Py_RETURN_NONE;
2274
0
}
2275
2276
static PyObject *
2277
match_lastgroup_get(MatchObject *self, void *Py_UNUSED(ignored))
2278
0
{
2279
0
    if (self->pattern->indexgroup &&
2280
0
        self->lastindex >= 0 &&
2281
0
        self->lastindex < PyTuple_GET_SIZE(self->pattern->indexgroup))
2282
0
    {
2283
0
        PyObject *result = PyTuple_GET_ITEM(self->pattern->indexgroup,
2284
0
                                            self->lastindex);
2285
0
        Py_INCREF(result);
2286
0
        return result;
2287
0
    }
2288
0
    Py_RETURN_NONE;
2289
0
}
2290
2291
static PyObject *
2292
match_regs_get(MatchObject *self, void *Py_UNUSED(ignored))
2293
0
{
2294
0
    if (self->regs) {
2295
0
        Py_INCREF(self->regs);
2296
0
        return self->regs;
2297
0
    } else
2298
0
        return match_regs(self);
2299
0
}
2300
2301
static PyObject *
2302
match_repr(MatchObject *self)
2303
0
{
2304
0
    PyObject *result;
2305
0
    PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
2306
0
    if (group0 == NULL)
2307
0
        return NULL;
2308
0
    result = PyUnicode_FromFormat(
2309
0
            "<%s object; span=(%zd, %zd), match=%.50R>",
2310
0
            Py_TYPE(self)->tp_name,
2311
0
            self->mark[0], self->mark[1], group0);
2312
0
    Py_DECREF(group0);
2313
0
    return result;
2314
0
}
2315
2316
2317
static PyObject*
2318
pattern_new_match(PatternObject* pattern, SRE_STATE* state, Py_ssize_t status)
2319
6
{
2320
    /* create match object (from state object) */
2321
2322
6
    MatchObject* match;
2323
6
    Py_ssize_t i, j;
2324
6
    char* base;
2325
6
    int n;
2326
2327
6
    if (status > 0) {
2328
2329
        /* create match object (with room for extra group marks) */
2330
        /* coverity[ampersand_in_size] */
2331
4
        match = PyObject_NEW_VAR(MatchObject, &Match_Type,
2332
4
                                 2*(pattern->groups+1));
2333
4
        if (!match)
2334
0
            return NULL;
2335
2336
4
        Py_INCREF(pattern);
2337
4
        match->pattern = pattern;
2338
2339
4
        Py_INCREF(state->string);
2340
4
        match->string = state->string;
2341
2342
4
        match->regs = NULL;
2343
4
        match->groups = pattern->groups+1;
2344
2345
        /* fill in group slices */
2346
2347
4
        base = (char*) state->beginning;
2348
4
        n = state->charsize;
2349
2350
4
        match->mark[0] = ((char*) state->start - base) / n;
2351
4
        match->mark[1] = ((char*) state->ptr - base) / n;
2352
2353
10
        for (i = j = 0; i < pattern->groups; i++, j+=2)
2354
6
            if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
2355
0
                match->mark[j+2] = ((char*) state->mark[j] - base) / n;
2356
0
                match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
2357
0
            } else
2358
6
                match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
2359
2360
4
        match->pos = state->pos;
2361
4
        match->endpos = state->endpos;
2362
2363
4
        match->lastindex = state->lastindex;
2364
2365
4
        return (PyObject*) match;
2366
2367
4
    } else if (status == 0) {
2368
2369
        /* no match */
2370
2
        Py_RETURN_NONE;
2371
2372
2
    }
2373
2374
    /* internal error */
2375
0
    pattern_error(status);
2376
0
    return NULL;
2377
6
}
2378
2379
2380
/* -------------------------------------------------------------------- */
2381
/* scanner methods (experimental) */
2382
2383
static void
2384
scanner_dealloc(ScannerObject* self)
2385
0
{
2386
0
    state_fini(&self->state);
2387
0
    Py_XDECREF(self->pattern);
2388
0
    PyObject_DEL(self);
2389
0
}
2390
2391
/*[clinic input]
2392
_sre.SRE_Scanner.match
2393
2394
[clinic start generated code]*/
2395
2396
static PyObject *
2397
_sre_SRE_Scanner_match_impl(ScannerObject *self)
2398
/*[clinic end generated code: output=936b30c63d4b81eb input=881a0154f8c13d9a]*/
2399
0
{
2400
0
    SRE_STATE* state = &self->state;
2401
0
    PyObject* match;
2402
0
    Py_ssize_t status;
2403
2404
0
    if (state->start == NULL)
2405
0
        Py_RETURN_NONE;
2406
2407
0
    state_reset(state);
2408
2409
0
    state->ptr = state->start;
2410
2411
0
    status = sre_match(state, PatternObject_GetCode(self->pattern));
2412
0
    if (PyErr_Occurred())
2413
0
        return NULL;
2414
2415
0
    match = pattern_new_match((PatternObject*) self->pattern,
2416
0
                               state, status);
2417
2418
0
    if (status == 0)
2419
0
        state->start = NULL;
2420
0
    else {
2421
0
        state->must_advance = (state->ptr == state->start);
2422
0
        state->start = state->ptr;
2423
0
    }
2424
2425
0
    return match;
2426
0
}
2427
2428
2429
/*[clinic input]
2430
_sre.SRE_Scanner.search
2431
2432
[clinic start generated code]*/
2433
2434
static PyObject *
2435
_sre_SRE_Scanner_search_impl(ScannerObject *self)
2436
/*[clinic end generated code: output=7dc211986088f025 input=161223ee92ef9270]*/
2437
0
{
2438
0
    SRE_STATE* state = &self->state;
2439
0
    PyObject* match;
2440
0
    Py_ssize_t status;
2441
2442
0
    if (state->start == NULL)
2443
0
        Py_RETURN_NONE;
2444
2445
0
    state_reset(state);
2446
2447
0
    state->ptr = state->start;
2448
2449
0
    status = sre_search(state, PatternObject_GetCode(self->pattern));
2450
0
    if (PyErr_Occurred())
2451
0
        return NULL;
2452
2453
0
    match = pattern_new_match((PatternObject*) self->pattern,
2454
0
                               state, status);
2455
2456
0
    if (status == 0)
2457
0
        state->start = NULL;
2458
0
    else {
2459
0
        state->must_advance = (state->ptr == state->start);
2460
0
        state->start = state->ptr;
2461
0
    }
2462
2463
0
    return match;
2464
0
}
2465
2466
static PyObject *
2467
pattern_scanner(PatternObject *self, PyObject *string, Py_ssize_t pos, Py_ssize_t endpos)
2468
0
{
2469
0
    ScannerObject* scanner;
2470
2471
    /* create scanner object */
2472
0
    scanner = PyObject_NEW(ScannerObject, &Scanner_Type);
2473
0
    if (!scanner)
2474
0
        return NULL;
2475
0
    scanner->pattern = NULL;
2476
2477
    /* create search state object */
2478
0
    if (!state_init(&scanner->state, self, string, pos, endpos)) {
2479
0
        Py_DECREF(scanner);
2480
0
        return NULL;
2481
0
    }
2482
2483
0
    Py_INCREF(self);
2484
0
    scanner->pattern = (PyObject*) self;
2485
2486
0
    return (PyObject*) scanner;
2487
0
}
2488
2489
static Py_hash_t
2490
pattern_hash(PatternObject *self)
2491
0
{
2492
0
    Py_hash_t hash, hash2;
2493
2494
0
    hash = PyObject_Hash(self->pattern);
2495
0
    if (hash == -1) {
2496
0
        return -1;
2497
0
    }
2498
2499
0
    hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize);
2500
0
    hash ^= hash2;
2501
2502
0
    hash ^= self->flags;
2503
0
    hash ^= self->isbytes;
2504
0
    hash ^= self->codesize;
2505
2506
0
    if (hash == -1) {
2507
0
        hash = -2;
2508
0
    }
2509
0
    return hash;
2510
0
}
2511
2512
static PyObject*
2513
pattern_richcompare(PyObject *lefto, PyObject *righto, int op)
2514
0
{
2515
0
    PatternObject *left, *right;
2516
0
    int cmp;
2517
2518
0
    if (op != Py_EQ && op != Py_NE) {
2519
0
        Py_RETURN_NOTIMPLEMENTED;
2520
0
    }
2521
2522
0
    if (Py_TYPE(lefto) != &Pattern_Type || Py_TYPE(righto) != &Pattern_Type) {
2523
0
        Py_RETURN_NOTIMPLEMENTED;
2524
0
    }
2525
2526
0
    if (lefto == righto) {
2527
        /* a pattern is equal to itself */
2528
0
        return PyBool_FromLong(op == Py_EQ);
2529
0
    }
2530
2531
0
    left = (PatternObject *)lefto;
2532
0
    right = (PatternObject *)righto;
2533
2534
0
    cmp = (left->flags == right->flags
2535
0
           && left->isbytes == right->isbytes
2536
0
           && left->codesize == right->codesize);
2537
0
    if (cmp) {
2538
        /* Compare the code and the pattern because the same pattern can
2539
           produce different codes depending on the locale used to compile the
2540
           pattern when the re.LOCALE flag is used. Don't compare groups,
2541
           indexgroup nor groupindex: they are derivated from the pattern. */
2542
0
        cmp = (memcmp(left->code, right->code,
2543
0
                      sizeof(left->code[0]) * left->codesize) == 0);
2544
0
    }
2545
0
    if (cmp) {
2546
0
        cmp = PyObject_RichCompareBool(left->pattern, right->pattern,
2547
0
                                       Py_EQ);
2548
0
        if (cmp < 0) {
2549
0
            return NULL;
2550
0
        }
2551
0
    }
2552
0
    if (op == Py_NE) {
2553
0
        cmp = !cmp;
2554
0
    }
2555
0
    return PyBool_FromLong(cmp);
2556
0
}
2557
2558
#include "clinic/_sre.c.h"
2559
2560
static PyMethodDef pattern_methods[] = {
2561
    _SRE_SRE_PATTERN_MATCH_METHODDEF
2562
    _SRE_SRE_PATTERN_FULLMATCH_METHODDEF
2563
    _SRE_SRE_PATTERN_SEARCH_METHODDEF
2564
    _SRE_SRE_PATTERN_SUB_METHODDEF
2565
    _SRE_SRE_PATTERN_SUBN_METHODDEF
2566
    _SRE_SRE_PATTERN_FINDALL_METHODDEF
2567
    _SRE_SRE_PATTERN_SPLIT_METHODDEF
2568
    _SRE_SRE_PATTERN_FINDITER_METHODDEF
2569
    _SRE_SRE_PATTERN_SCANNER_METHODDEF
2570
    _SRE_SRE_PATTERN___COPY___METHODDEF
2571
    _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF
2572
    {NULL, NULL}
2573
};
2574
2575
static PyGetSetDef pattern_getset[] = {
2576
    {"groupindex", (getter)pattern_groupindex, (setter)NULL,
2577
      "A dictionary mapping group names to group numbers."},
2578
    {NULL}  /* Sentinel */
2579
};
2580
2581
#define PAT_OFF(x) offsetof(PatternObject, x)
2582
static PyMemberDef pattern_members[] = {
2583
    {"pattern",    T_OBJECT,    PAT_OFF(pattern),       READONLY,
2584
     "The pattern string from which the RE object was compiled."},
2585
    {"flags",      T_INT,       PAT_OFF(flags),         READONLY,
2586
     "The regex matching flags."},
2587
    {"groups",     T_PYSSIZET,  PAT_OFF(groups),        READONLY,
2588
     "The number of capturing groups in the pattern."},
2589
    {NULL}  /* Sentinel */
2590
};
2591
2592
static PyTypeObject Pattern_Type = {
2593
    PyVarObject_HEAD_INIT(NULL, 0)
2594
    "re.Pattern",
2595
    sizeof(PatternObject), sizeof(SRE_CODE),
2596
    (destructor)pattern_dealloc,        /* tp_dealloc */
2597
    0,                                  /* tp_vectorcall_offset */
2598
    0,                                  /* tp_getattr */
2599
    0,                                  /* tp_setattr */
2600
    0,                                  /* tp_as_async */
2601
    (reprfunc)pattern_repr,             /* tp_repr */
2602
    0,                                  /* tp_as_number */
2603
    0,                                  /* tp_as_sequence */
2604
    0,                                  /* tp_as_mapping */
2605
    (hashfunc)pattern_hash,             /* tp_hash */
2606
    0,                                  /* tp_call */
2607
    0,                                  /* tp_str */
2608
    0,                                  /* tp_getattro */
2609
    0,                                  /* tp_setattro */
2610
    0,                                  /* tp_as_buffer */
2611
    Py_TPFLAGS_DEFAULT,                 /* tp_flags */
2612
    pattern_doc,                        /* tp_doc */
2613
    0,                                  /* tp_traverse */
2614
    0,                                  /* tp_clear */
2615
    pattern_richcompare,                /* tp_richcompare */
2616
    offsetof(PatternObject, weakreflist),       /* tp_weaklistoffset */
2617
    0,                                  /* tp_iter */
2618
    0,                                  /* tp_iternext */
2619
    pattern_methods,                    /* tp_methods */
2620
    pattern_members,                    /* tp_members */
2621
    pattern_getset,                     /* tp_getset */
2622
};
2623
2624
/* Match objects do not support length or assignment, but do support
2625
   __getitem__. */
2626
static PyMappingMethods match_as_mapping = {
2627
    NULL,
2628
    (binaryfunc)match_getitem,
2629
    NULL
2630
};
2631
2632
static PyMethodDef match_methods[] = {
2633
    {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
2634
    _SRE_SRE_MATCH_START_METHODDEF
2635
    _SRE_SRE_MATCH_END_METHODDEF
2636
    _SRE_SRE_MATCH_SPAN_METHODDEF
2637
    _SRE_SRE_MATCH_GROUPS_METHODDEF
2638
    _SRE_SRE_MATCH_GROUPDICT_METHODDEF
2639
    _SRE_SRE_MATCH_EXPAND_METHODDEF
2640
    _SRE_SRE_MATCH___COPY___METHODDEF
2641
    _SRE_SRE_MATCH___DEEPCOPY___METHODDEF
2642
    {NULL, NULL}
2643
};
2644
2645
static PyGetSetDef match_getset[] = {
2646
    {"lastindex", (getter)match_lastindex_get, (setter)NULL,
2647
     "The integer index of the last matched capturing group."},
2648
    {"lastgroup", (getter)match_lastgroup_get, (setter)NULL,
2649
     "The name of the last matched capturing group."},
2650
    {"regs",      (getter)match_regs_get,      (setter)NULL},
2651
    {NULL}
2652
};
2653
2654
#define MATCH_OFF(x) offsetof(MatchObject, x)
2655
static PyMemberDef match_members[] = {
2656
    {"string",  T_OBJECT,   MATCH_OFF(string),  READONLY,
2657
     "The string passed to match() or search()."},
2658
    {"re",      T_OBJECT,   MATCH_OFF(pattern), READONLY,
2659
     "The regular expression object."},
2660
    {"pos",     T_PYSSIZET, MATCH_OFF(pos),     READONLY,
2661
     "The index into the string at which the RE engine started looking for a match."},
2662
    {"endpos",  T_PYSSIZET, MATCH_OFF(endpos),  READONLY,
2663
     "The index into the string beyond which the RE engine will not go."},
2664
    {NULL}
2665
};
2666
2667
/* FIXME: implement setattr("string", None) as a special case (to
2668
   detach the associated string, if any */
2669
2670
static PyTypeObject Match_Type = {
2671
    PyVarObject_HEAD_INIT(NULL,0)
2672
    "re.Match",
2673
    sizeof(MatchObject), sizeof(Py_ssize_t),
2674
    (destructor)match_dealloc,  /* tp_dealloc */
2675
    0,                          /* tp_vectorcall_offset */
2676
    0,                          /* tp_getattr */
2677
    0,                          /* tp_setattr */
2678
    0,                          /* tp_as_async */
2679
    (reprfunc)match_repr,       /* tp_repr */
2680
    0,                          /* tp_as_number */
2681
    0,                          /* tp_as_sequence */
2682
    &match_as_mapping,          /* tp_as_mapping */
2683
    0,                          /* tp_hash */
2684
    0,                          /* tp_call */
2685
    0,                          /* tp_str */
2686
    0,                          /* tp_getattro */
2687
    0,                          /* tp_setattro */
2688
    0,                          /* tp_as_buffer */
2689
    Py_TPFLAGS_DEFAULT,         /* tp_flags */
2690
    match_doc,                  /* tp_doc */
2691
    0,                          /* tp_traverse */
2692
    0,                          /* tp_clear */
2693
    0,                          /* tp_richcompare */
2694
    0,                          /* tp_weaklistoffset */
2695
    0,                          /* tp_iter */
2696
    0,                          /* tp_iternext */
2697
    match_methods,              /* tp_methods */
2698
    match_members,              /* tp_members */
2699
    match_getset,               /* tp_getset */
2700
};
2701
2702
static PyMethodDef scanner_methods[] = {
2703
    _SRE_SRE_SCANNER_MATCH_METHODDEF
2704
    _SRE_SRE_SCANNER_SEARCH_METHODDEF
2705
    {NULL, NULL}
2706
};
2707
2708
#define SCAN_OFF(x) offsetof(ScannerObject, x)
2709
static PyMemberDef scanner_members[] = {
2710
    {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
2711
    {NULL}  /* Sentinel */
2712
};
2713
2714
static PyTypeObject Scanner_Type = {
2715
    PyVarObject_HEAD_INIT(NULL, 0)
2716
    "_" SRE_MODULE ".SRE_Scanner",
2717
    sizeof(ScannerObject), 0,
2718
    (destructor)scanner_dealloc,/* tp_dealloc */
2719
    0,                          /* tp_vectorcall_offset */
2720
    0,                          /* tp_getattr */
2721
    0,                          /* tp_setattr */
2722
    0,                          /* tp_as_async */
2723
    0,                          /* tp_repr */
2724
    0,                          /* tp_as_number */
2725
    0,                          /* tp_as_sequence */
2726
    0,                          /* tp_as_mapping */
2727
    0,                          /* tp_hash */
2728
    0,                          /* tp_call */
2729
    0,                          /* tp_str */
2730
    0,                          /* tp_getattro */
2731
    0,                          /* tp_setattro */
2732
    0,                          /* tp_as_buffer */
2733
    Py_TPFLAGS_DEFAULT,         /* tp_flags */
2734
    0,                          /* tp_doc */
2735
    0,                          /* tp_traverse */
2736
    0,                          /* tp_clear */
2737
    0,                          /* tp_richcompare */
2738
    0,                          /* tp_weaklistoffset */
2739
    0,                          /* tp_iter */
2740
    0,                          /* tp_iternext */
2741
    scanner_methods,            /* tp_methods */
2742
    scanner_members,            /* tp_members */
2743
    0,                          /* tp_getset */
2744
};
2745
2746
static PyMethodDef _functions[] = {
2747
    _SRE_COMPILE_METHODDEF
2748
    _SRE_GETCODESIZE_METHODDEF
2749
    _SRE_ASCII_ISCASED_METHODDEF
2750
    _SRE_UNICODE_ISCASED_METHODDEF
2751
    _SRE_ASCII_TOLOWER_METHODDEF
2752
    _SRE_UNICODE_TOLOWER_METHODDEF
2753
    {NULL, NULL}
2754
};
2755
2756
static struct PyModuleDef sremodule = {
2757
        PyModuleDef_HEAD_INIT,
2758
        "_" SRE_MODULE,
2759
        NULL,
2760
        -1,
2761
        _functions,
2762
        NULL,
2763
        NULL,
2764
        NULL,
2765
        NULL
2766
};
2767
2768
PyMODINIT_FUNC PyInit__sre(void)
2769
1
{
2770
1
    PyObject* m;
2771
1
    PyObject* d;
2772
1
    PyObject* x;
2773
2774
    /* Patch object types */
2775
1
    if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
2776
1
        PyType_Ready(&Scanner_Type))
2777
0
        return NULL;
2778
2779
1
    m = PyModule_Create(&sremodule);
2780
1
    if (m == NULL)
2781
0
        return NULL;
2782
1
    d = PyModule_GetDict(m);
2783
2784
1
    x = PyLong_FromLong(SRE_MAGIC);
2785
1
    if (x) {
2786
1
        PyDict_SetItemString(d, "MAGIC", x);
2787
1
        Py_DECREF(x);
2788
1
    }
2789
2790
1
    x = PyLong_FromLong(sizeof(SRE_CODE));
2791
1
    if (x) {
2792
1
        PyDict_SetItemString(d, "CODESIZE", x);
2793
1
        Py_DECREF(x);
2794
1
    }
2795
2796
1
    x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
2797
1
    if (x) {
2798
1
        PyDict_SetItemString(d, "MAXREPEAT", x);
2799
1
        Py_DECREF(x);
2800
1
    }
2801
2802
1
    x = PyLong_FromUnsignedLong(SRE_MAXGROUPS);
2803
1
    if (x) {
2804
1
        PyDict_SetItemString(d, "MAXGROUPS", x);
2805
1
        Py_DECREF(x);
2806
1
    }
2807
2808
1
    x = PyUnicode_FromString(copyright);
2809
1
    if (x) {
2810
1
        PyDict_SetItemString(d, "copyright", x);
2811
1
        Py_DECREF(x);
2812
1
    }
2813
1
    return m;
2814
1
}
2815
2816
/* vim:ts=4:sw=4:et
2817
*/