/src/Python-3.8.3/Modules/_sre.c
Line  | Count  | Source  | 
1  |  | /*  | 
2  |  |  * Secret Labs' Regular Expression Engine  | 
3  |  |  *  | 
4  |  |  * regular expression matching engine  | 
5  |  |  *  | 
6  |  |  * partial history:  | 
7  |  |  * 1999-10-24 fl   created (based on existing template matcher code)  | 
8  |  |  * 2000-03-06 fl   first alpha, sort of  | 
9  |  |  * 2000-08-01 fl   fixes for 1.6b1  | 
10  |  |  * 2000-08-07 fl   use PyOS_CheckStack() if available  | 
11  |  |  * 2000-09-20 fl   added expand method  | 
12  |  |  * 2001-03-20 fl   lots of fixes for 2.1b2  | 
13  |  |  * 2001-04-15 fl   export copyright as Python attribute, not global  | 
14  |  |  * 2001-04-28 fl   added __copy__ methods (work in progress)  | 
15  |  |  * 2001-05-14 fl   fixes for 1.5.2 compatibility  | 
16  |  |  * 2001-07-01 fl   added BIGCHARSET support (from Martin von Loewis)  | 
17  |  |  * 2001-10-18 fl   fixed group reset issue (from Matthew Mueller)  | 
18  |  |  * 2001-10-20 fl   added split primitive; reenable unicode for 1.6/2.0/2.1  | 
19  |  |  * 2001-10-21 fl   added sub/subn primitive  | 
20  |  |  * 2001-10-24 fl   added finditer primitive (for 2.2 only)  | 
21  |  |  * 2001-12-07 fl   fixed memory leak in sub/subn (Guido van Rossum)  | 
22  |  |  * 2002-11-09 fl   fixed empty sub/subn return type  | 
23  |  |  * 2003-04-18 mvl  fully support 4-byte codes  | 
24  |  |  * 2003-10-17 gn   implemented non recursive scheme  | 
25  |  |  * 2013-02-04 mrab added fullmatch primitive  | 
26  |  |  *  | 
27  |  |  * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.  | 
28  |  |  *  | 
29  |  |  * This version of the SRE library can be redistributed under CNRI's  | 
30  |  |  * Python 1.6 license.  For any other use, please contact Secret Labs  | 
31  |  |  * AB (info@pythonware.com).  | 
32  |  |  *  | 
33  |  |  * Portions of this engine have been developed in cooperation with  | 
34  |  |  * CNRI.  Hewlett-Packard provided funding for 1.6 integration and  | 
35  |  |  * other compatibility work.  | 
36  |  |  */  | 
37  |  |  | 
38  |  | static const char copyright[] =  | 
39  |  |     " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";  | 
40  |  |  | 
41  |  | #define PY_SSIZE_T_CLEAN  | 
42  |  |  | 
43  |  | #include "Python.h"  | 
44  |  | #include "structmember.h" /* offsetof */  | 
45  |  |  | 
46  |  | #include "sre.h"  | 
47  |  |  | 
48  | 32  | #define SRE_CODE_BITS (8 * sizeof(SRE_CODE))  | 
49  |  |  | 
50  |  | #include <ctype.h>  | 
51  |  |  | 
52  |  | /* name of this module, minus the leading underscore */  | 
53  |  | #if !defined(SRE_MODULE)  | 
54  |  | #define SRE_MODULE "sre"  | 
55  |  | #endif  | 
56  |  |  | 
57  | 0  | #define SRE_PY_MODULE "re"  | 
58  |  |  | 
59  |  | /* defining this one enables tracing */  | 
60  |  | #undef VERBOSE  | 
61  |  |  | 
62  |  | /* -------------------------------------------------------------------- */  | 
63  |  |  | 
64  |  | #if defined(_MSC_VER)  | 
65  |  | #pragma optimize("agtw", on) /* doesn't seem to make much difference... */ | 
66  |  | #pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */  | 
67  |  | /* fastest possible local call under MSVC */  | 
68  |  | #define LOCAL(type) static __inline type __fastcall  | 
69  |  | #else  | 
70  |  | #define LOCAL(type) static inline type  | 
71  |  | #endif  | 
72  |  |  | 
73  |  | /* error codes */  | 
74  |  | #define SRE_ERROR_ILLEGAL -1 /* illegal opcode */  | 
75  |  | #define SRE_ERROR_STATE -2 /* illegal state */  | 
76  | 0  | #define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */  | 
77  | 0  | #define SRE_ERROR_MEMORY -9 /* out of memory */  | 
78  | 0  | #define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */  | 
79  |  |  | 
80  |  | #if defined(VERBOSE)  | 
81  |  | #define TRACE(v) printf v  | 
82  |  | #else  | 
83  |  | #define TRACE(v)  | 
84  |  | #endif  | 
85  |  |  | 
86  |  | /* -------------------------------------------------------------------- */  | 
87  |  | /* search engine state */  | 
88  |  |  | 
89  |  | #define SRE_IS_DIGIT(ch)\  | 
90  | 0  |     ((ch) <= '9' && Py_ISDIGIT(ch))  | 
91  |  | #define SRE_IS_SPACE(ch)\  | 
92  | 0  |     ((ch) <= ' ' && Py_ISSPACE(ch))  | 
93  |  | #define SRE_IS_LINEBREAK(ch)\  | 
94  | 113  |     ((ch) == '\n')  | 
95  |  | #define SRE_IS_WORD(ch)\  | 
96  | 0  |     ((ch) <= 'z' && (Py_ISALNUM(ch) || (ch) == '_'))  | 
97  |  |  | 
98  |  | static unsigned int sre_lower_ascii(unsigned int ch)  | 
99  | 128  | { | 
100  | 128  |     return ((ch) < 128 ? Py_TOLOWER(ch) : ch);  | 
101  | 128  | }  | 
102  |  |  | 
103  |  | /* locale-specific character predicates */  | 
104  |  | /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids  | 
105  |  |  * warnings when c's type supports only numbers < N+1 */  | 
106  | 0  | #define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)  | 
107  | 0  | #define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')  | 
108  |  |  | 
109  |  | static unsigned int sre_lower_locale(unsigned int ch)  | 
110  | 0  | { | 
111  | 0  |     return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);  | 
112  | 0  | }  | 
113  |  |  | 
114  |  | static unsigned int sre_upper_locale(unsigned int ch)  | 
115  | 0  | { | 
116  | 0  |     return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch);  | 
117  | 0  | }  | 
118  |  |  | 
119  |  | /* unicode-specific character predicates */  | 
120  |  |  | 
121  | 2  | #define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)  | 
122  | 0  | #define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)  | 
123  | 0  | #define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)  | 
124  | 18  | #define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)  | 
125  | 18  | #define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')  | 
126  |  |  | 
127  |  | static unsigned int sre_lower_unicode(unsigned int ch)  | 
128  | 78  | { | 
129  | 78  |     return (unsigned int) Py_UNICODE_TOLOWER(ch);  | 
130  | 78  | }  | 
131  |  |  | 
132  |  | static unsigned int sre_upper_unicode(unsigned int ch)  | 
133  | 36  | { | 
134  | 36  |     return (unsigned int) Py_UNICODE_TOUPPER(ch);  | 
135  | 36  | }  | 
136  |  |  | 
137  |  | LOCAL(int)  | 
138  |  | sre_category(SRE_CODE category, unsigned int ch)  | 
139  | 20  | { | 
140  | 20  |     switch (category) { | 
141  |  |  | 
142  | 0  |     case SRE_CATEGORY_DIGIT:  | 
143  | 0  |         return SRE_IS_DIGIT(ch);  | 
144  | 0  |     case SRE_CATEGORY_NOT_DIGIT:  | 
145  | 0  |         return !SRE_IS_DIGIT(ch);  | 
146  | 0  |     case SRE_CATEGORY_SPACE:  | 
147  | 0  |         return SRE_IS_SPACE(ch);  | 
148  | 0  |     case SRE_CATEGORY_NOT_SPACE:  | 
149  | 0  |         return !SRE_IS_SPACE(ch);  | 
150  | 0  |     case SRE_CATEGORY_WORD:  | 
151  | 0  |         return SRE_IS_WORD(ch);  | 
152  | 0  |     case SRE_CATEGORY_NOT_WORD:  | 
153  | 0  |         return !SRE_IS_WORD(ch);  | 
154  | 0  |     case SRE_CATEGORY_LINEBREAK:  | 
155  | 0  |         return SRE_IS_LINEBREAK(ch);  | 
156  | 0  |     case SRE_CATEGORY_NOT_LINEBREAK:  | 
157  | 0  |         return !SRE_IS_LINEBREAK(ch);  | 
158  |  |  | 
159  | 0  |     case SRE_CATEGORY_LOC_WORD:  | 
160  | 0  |         return SRE_LOC_IS_WORD(ch);  | 
161  | 0  |     case SRE_CATEGORY_LOC_NOT_WORD:  | 
162  | 0  |         return !SRE_LOC_IS_WORD(ch);  | 
163  |  |  | 
164  | 2  |     case SRE_CATEGORY_UNI_DIGIT:  | 
165  | 2  |         return SRE_UNI_IS_DIGIT(ch);  | 
166  | 0  |     case SRE_CATEGORY_UNI_NOT_DIGIT:  | 
167  | 0  |         return !SRE_UNI_IS_DIGIT(ch);  | 
168  | 0  |     case SRE_CATEGORY_UNI_SPACE:  | 
169  | 0  |         return SRE_UNI_IS_SPACE(ch);  | 
170  | 0  |     case SRE_CATEGORY_UNI_NOT_SPACE:  | 
171  | 0  |         return !SRE_UNI_IS_SPACE(ch);  | 
172  | 18  |     case SRE_CATEGORY_UNI_WORD:  | 
173  | 18  |         return SRE_UNI_IS_WORD(ch);  | 
174  | 0  |     case SRE_CATEGORY_UNI_NOT_WORD:  | 
175  | 0  |         return !SRE_UNI_IS_WORD(ch);  | 
176  | 0  |     case SRE_CATEGORY_UNI_LINEBREAK:  | 
177  | 0  |         return SRE_UNI_IS_LINEBREAK(ch);  | 
178  | 0  |     case SRE_CATEGORY_UNI_NOT_LINEBREAK:  | 
179  | 0  |         return !SRE_UNI_IS_LINEBREAK(ch);  | 
180  | 20  |     }  | 
181  | 0  |     return 0;  | 
182  | 20  | }  | 
183  |  |  | 
184  |  | LOCAL(int)  | 
185  |  | char_loc_ignore(SRE_CODE pattern, SRE_CODE ch)  | 
186  | 0  | { | 
187  | 0  |     return ch == pattern  | 
188  | 0  |         || (SRE_CODE) sre_lower_locale(ch) == pattern  | 
189  | 0  |         || (SRE_CODE) sre_upper_locale(ch) == pattern;  | 
190  | 0  | }  | 
191  |  |  | 
192  |  |  | 
193  |  | /* helpers */  | 
194  |  |  | 
195  |  | static void  | 
196  |  | data_stack_dealloc(SRE_STATE* state)  | 
197  | 6  | { | 
198  | 6  |     if (state->data_stack) { | 
199  | 6  |         PyMem_FREE(state->data_stack);  | 
200  | 6  |         state->data_stack = NULL;  | 
201  | 6  |     }  | 
202  | 6  |     state->data_stack_size = state->data_stack_base = 0;  | 
203  | 6  | }  | 
204  |  |  | 
205  |  | static int  | 
206  |  | data_stack_grow(SRE_STATE* state, Py_ssize_t size)  | 
207  | 6  | { | 
208  | 6  |     Py_ssize_t minsize, cursize;  | 
209  | 6  |     minsize = state->data_stack_base+size;  | 
210  | 6  |     cursize = state->data_stack_size;  | 
211  | 6  |     if (cursize < minsize) { | 
212  | 6  |         void* stack;  | 
213  | 6  |         cursize = minsize+minsize/4+1024;  | 
214  | 6  |         TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize)); | 
215  | 6  |         stack = PyMem_REALLOC(state->data_stack, cursize);  | 
216  | 6  |         if (!stack) { | 
217  | 0  |             data_stack_dealloc(state);  | 
218  | 0  |             return SRE_ERROR_MEMORY;  | 
219  | 0  |         }  | 
220  | 6  |         state->data_stack = (char *)stack;  | 
221  | 6  |         state->data_stack_size = cursize;  | 
222  | 6  |     }  | 
223  | 6  |     return 0;  | 
224  | 6  | }  | 
225  |  |  | 
226  |  | /* generate 8-bit version */  | 
227  |  |  | 
228  | 378  | #define SRE_CHAR Py_UCS1  | 
229  |  | #define SIZEOF_SRE_CHAR 1  | 
230  | 168  | #define SRE(F) sre_ucs1_##F  | 
231  |  | #include "sre_lib.h"  | 
232  |  |  | 
233  |  | /* generate 16-bit unicode version */  | 
234  |  |  | 
235  | 0  | #define SRE_CHAR Py_UCS2  | 
236  |  | #define SIZEOF_SRE_CHAR 2  | 
237  | 0  | #define SRE(F) sre_ucs2_##F  | 
238  |  | #include "sre_lib.h"  | 
239  |  |  | 
240  |  | /* generate 32-bit unicode version */  | 
241  |  |  | 
242  | 0  | #define SRE_CHAR Py_UCS4  | 
243  |  | #define SIZEOF_SRE_CHAR 4  | 
244  | 0  | #define SRE(F) sre_ucs4_##F  | 
245  |  | #include "sre_lib.h"  | 
246  |  |  | 
247  |  | /* -------------------------------------------------------------------- */  | 
248  |  | /* factories and destructors */  | 
249  |  |  | 
250  |  | /* see sre.h for object declarations */  | 
251  |  | static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, Py_ssize_t);  | 
252  |  | static PyObject *pattern_scanner(PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t);  | 
253  |  |  | 
254  |  |  | 
255  |  | /*[clinic input]  | 
256  |  | module _sre  | 
257  |  | class _sre.SRE_Pattern "PatternObject *" "&Pattern_Type"  | 
258  |  | class _sre.SRE_Match "MatchObject *" "&Match_Type"  | 
259  |  | class _sre.SRE_Scanner "ScannerObject *" "&Scanner_Type"  | 
260  |  | [clinic start generated code]*/  | 
261  |  | /*[clinic end generated code: output=da39a3ee5e6b4b0d input=b0230ec19a0deac8]*/  | 
262  |  |  | 
263  |  | static PyTypeObject Pattern_Type;  | 
264  |  | static PyTypeObject Match_Type;  | 
265  |  | static PyTypeObject Scanner_Type;  | 
266  |  |  | 
267  |  | /*[clinic input]  | 
268  |  | _sre.getcodesize -> int  | 
269  |  | [clinic start generated code]*/  | 
270  |  |  | 
271  |  | static int  | 
272  |  | _sre_getcodesize_impl(PyObject *module)  | 
273  |  | /*[clinic end generated code: output=e0db7ce34a6dd7b1 input=bd6f6ecf4916bb2b]*/  | 
274  | 0  | { | 
275  | 0  |     return sizeof(SRE_CODE);  | 
276  | 0  | }  | 
277  |  |  | 
278  |  | /*[clinic input]  | 
279  |  | _sre.ascii_iscased -> bool  | 
280  |  |  | 
281  |  |     character: int  | 
282  |  |     /  | 
283  |  |  | 
284  |  | [clinic start generated code]*/  | 
285  |  |  | 
286  |  | static int  | 
287  |  | _sre_ascii_iscased_impl(PyObject *module, int character)  | 
288  |  | /*[clinic end generated code: output=4f454b630fbd19a2 input=9f0bd952812c7ed3]*/  | 
289  | 8  | { | 
290  | 8  |     unsigned int ch = (unsigned int)character;  | 
291  | 8  |     return ch < 128 && Py_ISALPHA(ch);  | 
292  | 8  | }  | 
293  |  |  | 
294  |  | /*[clinic input]  | 
295  |  | _sre.unicode_iscased -> bool  | 
296  |  |  | 
297  |  |     character: int  | 
298  |  |     /  | 
299  |  |  | 
300  |  | [clinic start generated code]*/  | 
301  |  |  | 
302  |  | static int  | 
303  |  | _sre_unicode_iscased_impl(PyObject *module, int character)  | 
304  |  | /*[clinic end generated code: output=9c5ddee0dc2bc258 input=51e42c3b8dddb78e]*/  | 
305  | 36  | { | 
306  | 36  |     unsigned int ch = (unsigned int)character;  | 
307  | 36  |     return ch != sre_lower_unicode(ch) || ch != sre_upper_unicode(ch);  | 
308  | 36  | }  | 
309  |  |  | 
310  |  | /*[clinic input]  | 
311  |  | _sre.ascii_tolower -> int  | 
312  |  |  | 
313  |  |     character: int  | 
314  |  |     /  | 
315  |  |  | 
316  |  | [clinic start generated code]*/  | 
317  |  |  | 
318  |  | static int  | 
319  |  | _sre_ascii_tolower_impl(PyObject *module, int character)  | 
320  |  | /*[clinic end generated code: output=228294ed6ff2a612 input=272c609b5b61f136]*/  | 
321  | 128  | { | 
322  | 128  |     return sre_lower_ascii(character);  | 
323  | 128  | }  | 
324  |  |  | 
325  |  | /*[clinic input]  | 
326  |  | _sre.unicode_tolower -> int  | 
327  |  |  | 
328  |  |     character: int  | 
329  |  |     /  | 
330  |  |  | 
331  |  | [clinic start generated code]*/  | 
332  |  |  | 
333  |  | static int  | 
334  |  | _sre_unicode_tolower_impl(PyObject *module, int character)  | 
335  |  | /*[clinic end generated code: output=6422272d7d7fee65 input=91d708c5f3c2045a]*/  | 
336  | 40  | { | 
337  | 40  |     return sre_lower_unicode(character);  | 
338  | 40  | }  | 
339  |  |  | 
340  |  | LOCAL(void)  | 
341  |  | state_reset(SRE_STATE* state)  | 
342  | 0  | { | 
343  |  |     /* state->mark will be set to 0 in SRE_OP_MARK dynamically. */  | 
344  |  |     /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/  | 
345  |  | 
  | 
346  | 0  |     state->lastmark = -1;  | 
347  | 0  |     state->lastindex = -1;  | 
348  |  | 
  | 
349  | 0  |     state->repeat = NULL;  | 
350  |  | 
  | 
351  | 0  |     data_stack_dealloc(state);  | 
352  | 0  | }  | 
353  |  |  | 
354  |  | static void*  | 
355  |  | getstring(PyObject* string, Py_ssize_t* p_length,  | 
356  |  |           int* p_isbytes, int* p_charsize,  | 
357  |  |           Py_buffer *view)  | 
358  | 14  | { | 
359  |  |     /* given a python object, return a data pointer, a length (in  | 
360  |  |        characters), and a character size.  return NULL if the object  | 
361  |  |        is not a string (or not compatible) */  | 
362  |  |  | 
363  |  |     /* Unicode objects do not support the buffer API. So, get the data  | 
364  |  |        directly instead. */  | 
365  | 14  |     if (PyUnicode_Check(string)) { | 
366  | 12  |         if (PyUnicode_READY(string) == -1)  | 
367  | 0  |             return NULL;  | 
368  | 12  |         *p_length = PyUnicode_GET_LENGTH(string);  | 
369  | 12  |         *p_charsize = PyUnicode_KIND(string);  | 
370  | 12  |         *p_isbytes = 0;  | 
371  | 12  |         return PyUnicode_DATA(string);  | 
372  | 12  |     }  | 
373  |  |  | 
374  |  |     /* get pointer to byte string buffer */  | 
375  | 2  |     if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) { | 
376  | 0  |         PyErr_SetString(PyExc_TypeError, "expected string or bytes-like object");  | 
377  | 0  |         return NULL;  | 
378  | 0  |     }  | 
379  |  |  | 
380  | 2  |     *p_length = view->len;  | 
381  | 2  |     *p_charsize = 1;  | 
382  | 2  |     *p_isbytes = 1;  | 
383  |  |  | 
384  | 2  |     if (view->buf == NULL) { | 
385  | 0  |         PyErr_SetString(PyExc_ValueError, "Buffer is NULL");  | 
386  | 0  |         PyBuffer_Release(view);  | 
387  | 0  |         view->buf = NULL;  | 
388  | 0  |         return NULL;  | 
389  | 0  |     }  | 
390  | 2  |     return view->buf;  | 
391  | 2  | }  | 
392  |  |  | 
393  |  | LOCAL(PyObject*)  | 
394  |  | state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,  | 
395  |  |            Py_ssize_t start, Py_ssize_t end)  | 
396  | 6  | { | 
397  |  |     /* prepare state object */  | 
398  |  |  | 
399  | 6  |     Py_ssize_t length;  | 
400  | 6  |     int isbytes, charsize;  | 
401  | 6  |     void* ptr;  | 
402  |  |  | 
403  | 6  |     memset(state, 0, sizeof(SRE_STATE));  | 
404  |  |  | 
405  | 6  |     state->mark = PyMem_New(void *, pattern->groups * 2);  | 
406  | 6  |     if (!state->mark) { | 
407  | 0  |         PyErr_NoMemory();  | 
408  | 0  |         goto err;  | 
409  | 0  |     }  | 
410  | 6  |     state->lastmark = -1;  | 
411  | 6  |     state->lastindex = -1;  | 
412  |  |  | 
413  | 6  |     state->buffer.buf = NULL;  | 
414  | 6  |     ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);  | 
415  | 6  |     if (!ptr)  | 
416  | 0  |         goto err;  | 
417  |  |  | 
418  | 6  |     if (isbytes && pattern->isbytes == 0) { | 
419  | 0  |         PyErr_SetString(PyExc_TypeError,  | 
420  | 0  |                         "cannot use a string pattern on a bytes-like object");  | 
421  | 0  |         goto err;  | 
422  | 0  |     }  | 
423  | 6  |     if (!isbytes && pattern->isbytes > 0) { | 
424  | 0  |         PyErr_SetString(PyExc_TypeError,  | 
425  | 0  |                         "cannot use a bytes pattern on a string-like object");  | 
426  | 0  |         goto err;  | 
427  | 0  |     }  | 
428  |  |  | 
429  |  |     /* adjust boundaries */  | 
430  | 6  |     if (start < 0)  | 
431  | 0  |         start = 0;  | 
432  | 6  |     else if (start > length)  | 
433  | 0  |         start = length;  | 
434  |  |  | 
435  | 6  |     if (end < 0)  | 
436  | 0  |         end = 0;  | 
437  | 6  |     else if (end > length)  | 
438  | 6  |         end = length;  | 
439  |  |  | 
440  | 6  |     state->isbytes = isbytes;  | 
441  | 6  |     state->charsize = charsize;  | 
442  | 6  |     state->match_all = 0;  | 
443  | 6  |     state->must_advance = 0;  | 
444  |  |  | 
445  | 6  |     state->beginning = ptr;  | 
446  |  |  | 
447  | 6  |     state->start = (void*) ((char*) ptr + start * state->charsize);  | 
448  | 6  |     state->end = (void*) ((char*) ptr + end * state->charsize);  | 
449  |  |  | 
450  | 6  |     Py_INCREF(string);  | 
451  | 6  |     state->string = string;  | 
452  | 6  |     state->pos = start;  | 
453  | 6  |     state->endpos = end;  | 
454  |  |  | 
455  | 6  |     return string;  | 
456  | 0  |   err:  | 
457  | 0  |     PyMem_Del(state->mark);  | 
458  | 0  |     state->mark = NULL;  | 
459  | 0  |     if (state->buffer.buf)  | 
460  | 0  |         PyBuffer_Release(&state->buffer);  | 
461  | 0  |     return NULL;  | 
462  | 6  | }  | 
463  |  |  | 
464  |  | LOCAL(void)  | 
465  |  | state_fini(SRE_STATE* state)  | 
466  | 6  | { | 
467  | 6  |     if (state->buffer.buf)  | 
468  | 1  |         PyBuffer_Release(&state->buffer);  | 
469  | 6  |     Py_XDECREF(state->string);  | 
470  | 6  |     data_stack_dealloc(state);  | 
471  | 6  |     PyMem_Del(state->mark);  | 
472  | 6  |     state->mark = NULL;  | 
473  | 6  | }  | 
474  |  |  | 
475  |  | /* calculate offset from start of string */  | 
476  |  | #define STATE_OFFSET(state, member)\  | 
477  | 0  |     (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)  | 
478  |  |  | 
479  |  | LOCAL(PyObject*)  | 
480  |  | getslice(int isbytes, const void *ptr,  | 
481  |  |          PyObject* string, Py_ssize_t start, Py_ssize_t end)  | 
482  | 0  | { | 
483  | 0  |     if (isbytes) { | 
484  | 0  |         if (PyBytes_CheckExact(string) &&  | 
485  | 0  |             start == 0 && end == PyBytes_GET_SIZE(string)) { | 
486  | 0  |             Py_INCREF(string);  | 
487  | 0  |             return string;  | 
488  | 0  |         }  | 
489  | 0  |         return PyBytes_FromStringAndSize(  | 
490  | 0  |                 (const char *)ptr + start, end - start);  | 
491  | 0  |     }  | 
492  | 0  |     else { | 
493  | 0  |         return PyUnicode_Substring(string, start, end);  | 
494  | 0  |     }  | 
495  | 0  | }  | 
496  |  |  | 
497  |  | LOCAL(PyObject*)  | 
498  |  | state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)  | 
499  | 0  | { | 
500  | 0  |     Py_ssize_t i, j;  | 
501  |  | 
  | 
502  | 0  |     index = (index - 1) * 2;  | 
503  |  | 
  | 
504  | 0  |     if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) { | 
505  | 0  |         if (empty)  | 
506  |  |             /* want empty string */  | 
507  | 0  |             i = j = 0;  | 
508  | 0  |         else { | 
509  | 0  |             Py_RETURN_NONE;  | 
510  | 0  |         }  | 
511  | 0  |     } else { | 
512  | 0  |         i = STATE_OFFSET(state, state->mark[index]);  | 
513  | 0  |         j = STATE_OFFSET(state, state->mark[index+1]);  | 
514  | 0  |     }  | 
515  |  |  | 
516  | 0  |     return getslice(state->isbytes, state->beginning, string, i, j);  | 
517  | 0  | }  | 
518  |  |  | 
519  |  | static void  | 
520  |  | pattern_error(Py_ssize_t status)  | 
521  | 0  | { | 
522  | 0  |     switch (status) { | 
523  | 0  |     case SRE_ERROR_RECURSION_LIMIT:  | 
524  |  |         /* This error code seems to be unused. */  | 
525  | 0  |         PyErr_SetString(  | 
526  | 0  |             PyExc_RecursionError,  | 
527  | 0  |             "maximum recursion limit exceeded"  | 
528  | 0  |             );  | 
529  | 0  |         break;  | 
530  | 0  |     case SRE_ERROR_MEMORY:  | 
531  | 0  |         PyErr_NoMemory();  | 
532  | 0  |         break;  | 
533  | 0  |     case SRE_ERROR_INTERRUPTED:  | 
534  |  |     /* An exception has already been raised, so let it fly */  | 
535  | 0  |         break;  | 
536  | 0  |     default:  | 
537  |  |         /* other error codes indicate compiler/engine bugs */  | 
538  | 0  |         PyErr_SetString(  | 
539  | 0  |             PyExc_RuntimeError,  | 
540  | 0  |             "internal error in regular expression engine"  | 
541  | 0  |             );  | 
542  | 0  |     }  | 
543  | 0  | }  | 
544  |  |  | 
545  |  | static void  | 
546  |  | pattern_dealloc(PatternObject* self)  | 
547  | 2  | { | 
548  | 2  |     if (self->weakreflist != NULL)  | 
549  | 0  |         PyObject_ClearWeakRefs((PyObject *) self);  | 
550  | 2  |     Py_XDECREF(self->pattern);  | 
551  | 2  |     Py_XDECREF(self->groupindex);  | 
552  | 2  |     Py_XDECREF(self->indexgroup);  | 
553  | 2  |     PyObject_DEL(self);  | 
554  | 2  | }  | 
555  |  |  | 
556  |  | LOCAL(Py_ssize_t)  | 
557  |  | sre_match(SRE_STATE* state, SRE_CODE* pattern)  | 
558  | 4  | { | 
559  | 4  |     if (state->charsize == 1)  | 
560  | 4  |         return sre_ucs1_match(state, pattern, 1);  | 
561  | 0  |     if (state->charsize == 2)  | 
562  | 0  |         return sre_ucs2_match(state, pattern, 1);  | 
563  | 0  |     assert(state->charsize == 4);  | 
564  | 0  |     return sre_ucs4_match(state, pattern, 1);  | 
565  | 0  | }  | 
566  |  |  | 
567  |  | LOCAL(Py_ssize_t)  | 
568  |  | sre_search(SRE_STATE* state, SRE_CODE* pattern)  | 
569  | 2  | { | 
570  | 2  |     if (state->charsize == 1)  | 
571  | 2  |         return sre_ucs1_search(state, pattern);  | 
572  | 0  |     if (state->charsize == 2)  | 
573  | 0  |         return sre_ucs2_search(state, pattern);  | 
574  | 0  |     assert(state->charsize == 4);  | 
575  | 0  |     return sre_ucs4_search(state, pattern);  | 
576  | 0  | }  | 
577  |  |  | 
578  |  | /*[clinic input]  | 
579  |  | _sre.SRE_Pattern.match  | 
580  |  |  | 
581  |  |     string: object  | 
582  |  |     pos: Py_ssize_t = 0  | 
583  |  |     endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize  | 
584  |  |  | 
585  |  | Matches zero or more characters at the beginning of the string.  | 
586  |  | [clinic start generated code]*/  | 
587  |  |  | 
588  |  | static PyObject *  | 
589  |  | _sre_SRE_Pattern_match_impl(PatternObject *self, PyObject *string,  | 
590  |  |                             Py_ssize_t pos, Py_ssize_t endpos)  | 
591  |  | /*[clinic end generated code: output=ea2d838888510661 input=a2ba191647abebe5]*/  | 
592  | 4  | { | 
593  | 4  |     SRE_STATE state;  | 
594  | 4  |     Py_ssize_t status;  | 
595  | 4  |     PyObject *match;  | 
596  |  |  | 
597  | 4  |     if (!state_init(&state, (PatternObject *)self, string, pos, endpos))  | 
598  | 0  |         return NULL;  | 
599  |  |  | 
600  | 4  |     state.ptr = state.start;  | 
601  |  |  | 
602  | 4  |     TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr)); | 
603  |  |  | 
604  | 4  |     status = sre_match(&state, PatternObject_GetCode(self));  | 
605  |  |  | 
606  | 4  |     TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr)); | 
607  | 4  |     if (PyErr_Occurred()) { | 
608  | 0  |         state_fini(&state);  | 
609  | 0  |         return NULL;  | 
610  | 0  |     }  | 
611  |  |  | 
612  | 4  |     match = pattern_new_match(self, &state, status);  | 
613  | 4  |     state_fini(&state);  | 
614  | 4  |     return match;  | 
615  | 4  | }  | 
616  |  |  | 
617  |  | /*[clinic input]  | 
618  |  | _sre.SRE_Pattern.fullmatch  | 
619  |  |  | 
620  |  |     string: object  | 
621  |  |     pos: Py_ssize_t = 0  | 
622  |  |     endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize  | 
623  |  |  | 
624  |  | Matches against all of the string.  | 
625  |  | [clinic start generated code]*/  | 
626  |  |  | 
627  |  | static PyObject *  | 
628  |  | _sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyObject *string,  | 
629  |  |                                 Py_ssize_t pos, Py_ssize_t endpos)  | 
630  |  | /*[clinic end generated code: output=5833c47782a35f4a input=d9fb03a7625b5828]*/  | 
631  | 0  | { | 
632  | 0  |     SRE_STATE state;  | 
633  | 0  |     Py_ssize_t status;  | 
634  | 0  |     PyObject *match;  | 
635  |  | 
  | 
636  | 0  |     if (!state_init(&state, self, string, pos, endpos))  | 
637  | 0  |         return NULL;  | 
638  |  |  | 
639  | 0  |     state.ptr = state.start;  | 
640  |  | 
  | 
641  | 0  |     TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr)); | 
642  |  | 
  | 
643  | 0  |     state.match_all = 1;  | 
644  | 0  |     status = sre_match(&state, PatternObject_GetCode(self));  | 
645  |  | 
  | 
646  | 0  |     TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr)); | 
647  | 0  |     if (PyErr_Occurred()) { | 
648  | 0  |         state_fini(&state);  | 
649  | 0  |         return NULL;  | 
650  | 0  |     }  | 
651  |  |  | 
652  | 0  |     match = pattern_new_match(self, &state, status);  | 
653  | 0  |     state_fini(&state);  | 
654  | 0  |     return match;  | 
655  | 0  | }  | 
656  |  |  | 
657  |  | /*[clinic input]  | 
658  |  | _sre.SRE_Pattern.search  | 
659  |  |  | 
660  |  |     string: object  | 
661  |  |     pos: Py_ssize_t = 0  | 
662  |  |     endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize  | 
663  |  |  | 
664  |  | Scan through string looking for a match, and return a corresponding match object instance.  | 
665  |  |  | 
666  |  | Return None if no position in the string matches.  | 
667  |  | [clinic start generated code]*/  | 
668  |  |  | 
669  |  | static PyObject *  | 
670  |  | _sre_SRE_Pattern_search_impl(PatternObject *self, PyObject *string,  | 
671  |  |                              Py_ssize_t pos, Py_ssize_t endpos)  | 
672  |  | /*[clinic end generated code: output=25f302a644e951e8 input=4ae5cb7dc38fed1b]*/  | 
673  | 2  | { | 
674  | 2  |     SRE_STATE state;  | 
675  | 2  |     Py_ssize_t status;  | 
676  | 2  |     PyObject *match;  | 
677  |  |  | 
678  | 2  |     if (!state_init(&state, self, string, pos, endpos))  | 
679  | 0  |         return NULL;  | 
680  |  |  | 
681  | 2  |     TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr)); | 
682  |  |  | 
683  | 2  |     status = sre_search(&state, PatternObject_GetCode(self));  | 
684  |  |  | 
685  | 2  |     TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr)); | 
686  |  |  | 
687  | 2  |     if (PyErr_Occurred()) { | 
688  | 0  |         state_fini(&state);  | 
689  | 0  |         return NULL;  | 
690  | 0  |     }  | 
691  |  |  | 
692  | 2  |     match = pattern_new_match(self, &state, status);  | 
693  | 2  |     state_fini(&state);  | 
694  | 2  |     return match;  | 
695  | 2  | }  | 
696  |  |  | 
697  |  | static PyObject*  | 
698  |  | call(const char* module, const char* function, PyObject* args)  | 
699  | 0  | { | 
700  | 0  |     PyObject* name;  | 
701  | 0  |     PyObject* mod;  | 
702  | 0  |     PyObject* func;  | 
703  | 0  |     PyObject* result;  | 
704  |  | 
  | 
705  | 0  |     if (!args)  | 
706  | 0  |         return NULL;  | 
707  | 0  |     name = PyUnicode_FromString(module);  | 
708  | 0  |     if (!name)  | 
709  | 0  |         return NULL;  | 
710  | 0  |     mod = PyImport_Import(name);  | 
711  | 0  |     Py_DECREF(name);  | 
712  | 0  |     if (!mod)  | 
713  | 0  |         return NULL;  | 
714  | 0  |     func = PyObject_GetAttrString(mod, function);  | 
715  | 0  |     Py_DECREF(mod);  | 
716  | 0  |     if (!func)  | 
717  | 0  |         return NULL;  | 
718  | 0  |     result = PyObject_CallObject(func, args);  | 
719  | 0  |     Py_DECREF(func);  | 
720  | 0  |     Py_DECREF(args);  | 
721  | 0  |     return result;  | 
722  | 0  | }  | 
723  |  |  | 
724  |  | /*[clinic input]  | 
725  |  | _sre.SRE_Pattern.findall  | 
726  |  |  | 
727  |  |     string: object  | 
728  |  |     pos: Py_ssize_t = 0  | 
729  |  |     endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize  | 
730  |  |  | 
731  |  | Return a list of all non-overlapping matches of pattern in string.  | 
732  |  | [clinic start generated code]*/  | 
733  |  |  | 
734  |  | static PyObject *  | 
735  |  | _sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,  | 
736  |  |                               Py_ssize_t pos, Py_ssize_t endpos)  | 
737  |  | /*[clinic end generated code: output=f4966baceea60aca input=5b6a4ee799741563]*/  | 
738  | 0  | { | 
739  | 0  |     SRE_STATE state;  | 
740  | 0  |     PyObject* list;  | 
741  | 0  |     Py_ssize_t status;  | 
742  | 0  |     Py_ssize_t i, b, e;  | 
743  |  | 
  | 
744  | 0  |     if (!state_init(&state, self, string, pos, endpos))  | 
745  | 0  |         return NULL;  | 
746  |  |  | 
747  | 0  |     list = PyList_New(0);  | 
748  | 0  |     if (!list) { | 
749  | 0  |         state_fini(&state);  | 
750  | 0  |         return NULL;  | 
751  | 0  |     }  | 
752  |  |  | 
753  | 0  |     while (state.start <= state.end) { | 
754  |  | 
  | 
755  | 0  |         PyObject* item;  | 
756  |  | 
  | 
757  | 0  |         state_reset(&state);  | 
758  |  | 
  | 
759  | 0  |         state.ptr = state.start;  | 
760  |  | 
  | 
761  | 0  |         status = sre_search(&state, PatternObject_GetCode(self));  | 
762  | 0  |         if (PyErr_Occurred())  | 
763  | 0  |             goto error;  | 
764  |  |  | 
765  | 0  |         if (status <= 0) { | 
766  | 0  |             if (status == 0)  | 
767  | 0  |                 break;  | 
768  | 0  |             pattern_error(status);  | 
769  | 0  |             goto error;  | 
770  | 0  |         }  | 
771  |  |  | 
772  |  |         /* don't bother to build a match object */  | 
773  | 0  |         switch (self->groups) { | 
774  | 0  |         case 0:  | 
775  | 0  |             b = STATE_OFFSET(&state, state.start);  | 
776  | 0  |             e = STATE_OFFSET(&state, state.ptr);  | 
777  | 0  |             item = getslice(state.isbytes, state.beginning,  | 
778  | 0  |                             string, b, e);  | 
779  | 0  |             if (!item)  | 
780  | 0  |                 goto error;  | 
781  | 0  |             break;  | 
782  | 0  |         case 1:  | 
783  | 0  |             item = state_getslice(&state, 1, string, 1);  | 
784  | 0  |             if (!item)  | 
785  | 0  |                 goto error;  | 
786  | 0  |             break;  | 
787  | 0  |         default:  | 
788  | 0  |             item = PyTuple_New(self->groups);  | 
789  | 0  |             if (!item)  | 
790  | 0  |                 goto error;  | 
791  | 0  |             for (i = 0; i < self->groups; i++) { | 
792  | 0  |                 PyObject* o = state_getslice(&state, i+1, string, 1);  | 
793  | 0  |                 if (!o) { | 
794  | 0  |                     Py_DECREF(item);  | 
795  | 0  |                     goto error;  | 
796  | 0  |                 }  | 
797  | 0  |                 PyTuple_SET_ITEM(item, i, o);  | 
798  | 0  |             }  | 
799  | 0  |             break;  | 
800  | 0  |         }  | 
801  |  |  | 
802  | 0  |         status = PyList_Append(list, item);  | 
803  | 0  |         Py_DECREF(item);  | 
804  | 0  |         if (status < 0)  | 
805  | 0  |             goto error;  | 
806  |  |  | 
807  | 0  |         state.must_advance = (state.ptr == state.start);  | 
808  | 0  |         state.start = state.ptr;  | 
809  | 0  |     }  | 
810  |  |  | 
811  | 0  |     state_fini(&state);  | 
812  | 0  |     return list;  | 
813  |  |  | 
814  | 0  | error:  | 
815  | 0  |     Py_DECREF(list);  | 
816  | 0  |     state_fini(&state);  | 
817  | 0  |     return NULL;  | 
818  |  | 
  | 
819  | 0  | }  | 
820  |  |  | 
821  |  | /*[clinic input]  | 
822  |  | _sre.SRE_Pattern.finditer  | 
823  |  |  | 
824  |  |     string: object  | 
825  |  |     pos: Py_ssize_t = 0  | 
826  |  |     endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize  | 
827  |  |  | 
828  |  | Return an iterator over all non-overlapping matches for the RE pattern in string.  | 
829  |  |  | 
830  |  | For each match, the iterator returns a match object.  | 
831  |  | [clinic start generated code]*/  | 
832  |  |  | 
833  |  | static PyObject *  | 
834  |  | _sre_SRE_Pattern_finditer_impl(PatternObject *self, PyObject *string,  | 
835  |  |                                Py_ssize_t pos, Py_ssize_t endpos)  | 
836  |  | /*[clinic end generated code: output=0bbb1a0aeb38bb14 input=612aab69e9fe08e4]*/  | 
837  | 0  | { | 
838  | 0  |     PyObject* scanner;  | 
839  | 0  |     PyObject* search;  | 
840  | 0  |     PyObject* iterator;  | 
841  |  | 
  | 
842  | 0  |     scanner = pattern_scanner(self, string, pos, endpos);  | 
843  | 0  |     if (!scanner)  | 
844  | 0  |         return NULL;  | 
845  |  |  | 
846  | 0  |     search = PyObject_GetAttrString(scanner, "search");  | 
847  | 0  |     Py_DECREF(scanner);  | 
848  | 0  |     if (!search)  | 
849  | 0  |         return NULL;  | 
850  |  |  | 
851  | 0  |     iterator = PyCallIter_New(search, Py_None);  | 
852  | 0  |     Py_DECREF(search);  | 
853  |  | 
  | 
854  | 0  |     return iterator;  | 
855  | 0  | }  | 
856  |  |  | 
857  |  | /*[clinic input]  | 
858  |  | _sre.SRE_Pattern.scanner  | 
859  |  |  | 
860  |  |     string: object  | 
861  |  |     pos: Py_ssize_t = 0  | 
862  |  |     endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize  | 
863  |  |  | 
864  |  | [clinic start generated code]*/  | 
865  |  |  | 
866  |  | static PyObject *  | 
867  |  | _sre_SRE_Pattern_scanner_impl(PatternObject *self, PyObject *string,  | 
868  |  |                               Py_ssize_t pos, Py_ssize_t endpos)  | 
869  |  | /*[clinic end generated code: output=54ea548aed33890b input=3aacdbde77a3a637]*/  | 
870  | 0  | { | 
871  | 0  |     return pattern_scanner(self, string, pos, endpos);  | 
872  | 0  | }  | 
873  |  |  | 
874  |  | /*[clinic input]  | 
875  |  | _sre.SRE_Pattern.split  | 
876  |  |  | 
877  |  |     string: object  | 
878  |  |     maxsplit: Py_ssize_t = 0  | 
879  |  |  | 
880  |  | Split string by the occurrences of pattern.  | 
881  |  | [clinic start generated code]*/  | 
882  |  |  | 
883  |  | static PyObject *  | 
884  |  | _sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,  | 
885  |  |                             Py_ssize_t maxsplit)  | 
886  |  | /*[clinic end generated code: output=7ac66f381c45e0be input=1eeeb10dafc9947a]*/  | 
887  | 0  | { | 
888  | 0  |     SRE_STATE state;  | 
889  | 0  |     PyObject* list;  | 
890  | 0  |     PyObject* item;  | 
891  | 0  |     Py_ssize_t status;  | 
892  | 0  |     Py_ssize_t n;  | 
893  | 0  |     Py_ssize_t i;  | 
894  | 0  |     void* last;  | 
895  |  | 
  | 
896  | 0  |     assert(self->codesize != 0);  | 
897  |  | 
  | 
898  | 0  |     if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX))  | 
899  | 0  |         return NULL;  | 
900  |  |  | 
901  | 0  |     list = PyList_New(0);  | 
902  | 0  |     if (!list) { | 
903  | 0  |         state_fini(&state);  | 
904  | 0  |         return NULL;  | 
905  | 0  |     }  | 
906  |  |  | 
907  | 0  |     n = 0;  | 
908  | 0  |     last = state.start;  | 
909  |  | 
  | 
910  | 0  |     while (!maxsplit || n < maxsplit) { | 
911  |  | 
  | 
912  | 0  |         state_reset(&state);  | 
913  |  | 
  | 
914  | 0  |         state.ptr = state.start;  | 
915  |  | 
  | 
916  | 0  |         status = sre_search(&state, PatternObject_GetCode(self));  | 
917  | 0  |         if (PyErr_Occurred())  | 
918  | 0  |             goto error;  | 
919  |  |  | 
920  | 0  |         if (status <= 0) { | 
921  | 0  |             if (status == 0)  | 
922  | 0  |                 break;  | 
923  | 0  |             pattern_error(status);  | 
924  | 0  |             goto error;  | 
925  | 0  |         }  | 
926  |  |  | 
927  |  |         /* get segment before this match */  | 
928  | 0  |         item = getslice(state.isbytes, state.beginning,  | 
929  | 0  |             string, STATE_OFFSET(&state, last),  | 
930  | 0  |             STATE_OFFSET(&state, state.start)  | 
931  | 0  |             );  | 
932  | 0  |         if (!item)  | 
933  | 0  |             goto error;  | 
934  | 0  |         status = PyList_Append(list, item);  | 
935  | 0  |         Py_DECREF(item);  | 
936  | 0  |         if (status < 0)  | 
937  | 0  |             goto error;  | 
938  |  |  | 
939  |  |         /* add groups (if any) */  | 
940  | 0  |         for (i = 0; i < self->groups; i++) { | 
941  | 0  |             item = state_getslice(&state, i+1, string, 0);  | 
942  | 0  |             if (!item)  | 
943  | 0  |                 goto error;  | 
944  | 0  |             status = PyList_Append(list, item);  | 
945  | 0  |             Py_DECREF(item);  | 
946  | 0  |             if (status < 0)  | 
947  | 0  |                 goto error;  | 
948  | 0  |         }  | 
949  |  |  | 
950  | 0  |         n = n + 1;  | 
951  | 0  |         state.must_advance = (state.ptr == state.start);  | 
952  | 0  |         last = state.start = state.ptr;  | 
953  |  | 
  | 
954  | 0  |     }  | 
955  |  |  | 
956  |  |     /* get segment following last match (even if empty) */  | 
957  | 0  |     item = getslice(state.isbytes, state.beginning,  | 
958  | 0  |         string, STATE_OFFSET(&state, last), state.endpos  | 
959  | 0  |         );  | 
960  | 0  |     if (!item)  | 
961  | 0  |         goto error;  | 
962  | 0  |     status = PyList_Append(list, item);  | 
963  | 0  |     Py_DECREF(item);  | 
964  | 0  |     if (status < 0)  | 
965  | 0  |         goto error;  | 
966  |  |  | 
967  | 0  |     state_fini(&state);  | 
968  | 0  |     return list;  | 
969  |  |  | 
970  | 0  | error:  | 
971  | 0  |     Py_DECREF(list);  | 
972  | 0  |     state_fini(&state);  | 
973  | 0  |     return NULL;  | 
974  |  | 
  | 
975  | 0  | }  | 
976  |  |  | 
977  |  | static PyObject*  | 
978  |  | pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,  | 
979  |  |              Py_ssize_t count, Py_ssize_t subn)  | 
980  | 0  | { | 
981  | 0  |     SRE_STATE state;  | 
982  | 0  |     PyObject* list;  | 
983  | 0  |     PyObject* joiner;  | 
984  | 0  |     PyObject* item;  | 
985  | 0  |     PyObject* filter;  | 
986  | 0  |     PyObject* match;  | 
987  | 0  |     void* ptr;  | 
988  | 0  |     Py_ssize_t status;  | 
989  | 0  |     Py_ssize_t n;  | 
990  | 0  |     Py_ssize_t i, b, e;  | 
991  | 0  |     int isbytes, charsize;  | 
992  | 0  |     int filter_is_callable;  | 
993  | 0  |     Py_buffer view;  | 
994  |  | 
  | 
995  | 0  |     if (PyCallable_Check(ptemplate)) { | 
996  |  |         /* sub/subn takes either a function or a template */  | 
997  | 0  |         filter = ptemplate;  | 
998  | 0  |         Py_INCREF(filter);  | 
999  | 0  |         filter_is_callable = 1;  | 
1000  | 0  |     } else { | 
1001  |  |         /* if not callable, check if it's a literal string */  | 
1002  | 0  |         int literal;  | 
1003  | 0  |         view.buf = NULL;  | 
1004  | 0  |         ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);  | 
1005  | 0  |         b = charsize;  | 
1006  | 0  |         if (ptr) { | 
1007  | 0  |             if (charsize == 1)  | 
1008  | 0  |                 literal = memchr(ptr, '\\', n) == NULL;  | 
1009  | 0  |             else  | 
1010  | 0  |                 literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;  | 
1011  | 0  |         } else { | 
1012  | 0  |             PyErr_Clear();  | 
1013  | 0  |             literal = 0;  | 
1014  | 0  |         }  | 
1015  | 0  |         if (view.buf)  | 
1016  | 0  |             PyBuffer_Release(&view);  | 
1017  | 0  |         if (literal) { | 
1018  | 0  |             filter = ptemplate;  | 
1019  | 0  |             Py_INCREF(filter);  | 
1020  | 0  |             filter_is_callable = 0;  | 
1021  | 0  |         } else { | 
1022  |  |             /* not a literal; hand it over to the template compiler */  | 
1023  | 0  |             filter = call(  | 
1024  | 0  |                 SRE_PY_MODULE, "_subx",  | 
1025  | 0  |                 PyTuple_Pack(2, self, ptemplate)  | 
1026  | 0  |                 );  | 
1027  | 0  |             if (!filter)  | 
1028  | 0  |                 return NULL;  | 
1029  | 0  |             filter_is_callable = PyCallable_Check(filter);  | 
1030  | 0  |         }  | 
1031  | 0  |     }  | 
1032  |  |  | 
1033  | 0  |     if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) { | 
1034  | 0  |         Py_DECREF(filter);  | 
1035  | 0  |         return NULL;  | 
1036  | 0  |     }  | 
1037  |  |  | 
1038  | 0  |     list = PyList_New(0);  | 
1039  | 0  |     if (!list) { | 
1040  | 0  |         Py_DECREF(filter);  | 
1041  | 0  |         state_fini(&state);  | 
1042  | 0  |         return NULL;  | 
1043  | 0  |     }  | 
1044  |  |  | 
1045  | 0  |     n = i = 0;  | 
1046  |  | 
  | 
1047  | 0  |     while (!count || n < count) { | 
1048  |  | 
  | 
1049  | 0  |         state_reset(&state);  | 
1050  |  | 
  | 
1051  | 0  |         state.ptr = state.start;  | 
1052  |  | 
  | 
1053  | 0  |         status = sre_search(&state, PatternObject_GetCode(self));  | 
1054  | 0  |         if (PyErr_Occurred())  | 
1055  | 0  |             goto error;  | 
1056  |  |  | 
1057  | 0  |         if (status <= 0) { | 
1058  | 0  |             if (status == 0)  | 
1059  | 0  |                 break;  | 
1060  | 0  |             pattern_error(status);  | 
1061  | 0  |             goto error;  | 
1062  | 0  |         }  | 
1063  |  |  | 
1064  | 0  |         b = STATE_OFFSET(&state, state.start);  | 
1065  | 0  |         e = STATE_OFFSET(&state, state.ptr);  | 
1066  |  | 
  | 
1067  | 0  |         if (i < b) { | 
1068  |  |             /* get segment before this match */  | 
1069  | 0  |             item = getslice(state.isbytes, state.beginning,  | 
1070  | 0  |                 string, i, b);  | 
1071  | 0  |             if (!item)  | 
1072  | 0  |                 goto error;  | 
1073  | 0  |             status = PyList_Append(list, item);  | 
1074  | 0  |             Py_DECREF(item);  | 
1075  | 0  |             if (status < 0)  | 
1076  | 0  |                 goto error;  | 
1077  |  | 
  | 
1078  | 0  |         }  | 
1079  |  |  | 
1080  | 0  |         if (filter_is_callable) { | 
1081  |  |             /* pass match object through filter */  | 
1082  | 0  |             match = pattern_new_match(self, &state, 1);  | 
1083  | 0  |             if (!match)  | 
1084  | 0  |                 goto error;  | 
1085  | 0  |             item = PyObject_CallFunctionObjArgs(filter, match, NULL);  | 
1086  | 0  |             Py_DECREF(match);  | 
1087  | 0  |             if (!item)  | 
1088  | 0  |                 goto error;  | 
1089  | 0  |         } else { | 
1090  |  |             /* filter is literal string */  | 
1091  | 0  |             item = filter;  | 
1092  | 0  |             Py_INCREF(item);  | 
1093  | 0  |         }  | 
1094  |  |  | 
1095  |  |         /* add to list */  | 
1096  | 0  |         if (item != Py_None) { | 
1097  | 0  |             status = PyList_Append(list, item);  | 
1098  | 0  |             Py_DECREF(item);  | 
1099  | 0  |             if (status < 0)  | 
1100  | 0  |                 goto error;  | 
1101  | 0  |         }  | 
1102  |  |  | 
1103  | 0  |         i = e;  | 
1104  | 0  |         n = n + 1;  | 
1105  | 0  |         state.must_advance = (state.ptr == state.start);  | 
1106  | 0  |         state.start = state.ptr;  | 
1107  | 0  |     }  | 
1108  |  |  | 
1109  |  |     /* get segment following last match */  | 
1110  | 0  |     if (i < state.endpos) { | 
1111  | 0  |         item = getslice(state.isbytes, state.beginning,  | 
1112  | 0  |                         string, i, state.endpos);  | 
1113  | 0  |         if (!item)  | 
1114  | 0  |             goto error;  | 
1115  | 0  |         status = PyList_Append(list, item);  | 
1116  | 0  |         Py_DECREF(item);  | 
1117  | 0  |         if (status < 0)  | 
1118  | 0  |             goto error;  | 
1119  | 0  |     }  | 
1120  |  |  | 
1121  | 0  |     state_fini(&state);  | 
1122  |  | 
  | 
1123  | 0  |     Py_DECREF(filter);  | 
1124  |  |  | 
1125  |  |     /* convert list to single string (also removes list) */  | 
1126  | 0  |     joiner = getslice(state.isbytes, state.beginning, string, 0, 0);  | 
1127  | 0  |     if (!joiner) { | 
1128  | 0  |         Py_DECREF(list);  | 
1129  | 0  |         return NULL;  | 
1130  | 0  |     }  | 
1131  | 0  |     if (PyList_GET_SIZE(list) == 0) { | 
1132  | 0  |         Py_DECREF(list);  | 
1133  | 0  |         item = joiner;  | 
1134  | 0  |     }  | 
1135  | 0  |     else { | 
1136  | 0  |         if (state.isbytes)  | 
1137  | 0  |             item = _PyBytes_Join(joiner, list);  | 
1138  | 0  |         else  | 
1139  | 0  |             item = PyUnicode_Join(joiner, list);  | 
1140  | 0  |         Py_DECREF(joiner);  | 
1141  | 0  |         Py_DECREF(list);  | 
1142  | 0  |         if (!item)  | 
1143  | 0  |             return NULL;  | 
1144  | 0  |     }  | 
1145  |  |  | 
1146  | 0  |     if (subn)  | 
1147  | 0  |         return Py_BuildValue("Nn", item, n); | 
1148  |  |  | 
1149  | 0  |     return item;  | 
1150  |  |  | 
1151  | 0  | error:  | 
1152  | 0  |     Py_DECREF(list);  | 
1153  | 0  |     state_fini(&state);  | 
1154  | 0  |     Py_DECREF(filter);  | 
1155  | 0  |     return NULL;  | 
1156  |  | 
  | 
1157  | 0  | }  | 
1158  |  |  | 
1159  |  | /*[clinic input]  | 
1160  |  | _sre.SRE_Pattern.sub  | 
1161  |  |  | 
1162  |  |     repl: object  | 
1163  |  |     string: object  | 
1164  |  |     count: Py_ssize_t = 0  | 
1165  |  |  | 
1166  |  | Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl.  | 
1167  |  | [clinic start generated code]*/  | 
1168  |  |  | 
1169  |  | static PyObject *  | 
1170  |  | _sre_SRE_Pattern_sub_impl(PatternObject *self, PyObject *repl,  | 
1171  |  |                           PyObject *string, Py_ssize_t count)  | 
1172  |  | /*[clinic end generated code: output=1dbf2ec3479cba00 input=c53d70be0b3caf86]*/  | 
1173  | 0  | { | 
1174  | 0  |     return pattern_subx(self, repl, string, count, 0);  | 
1175  | 0  | }  | 
1176  |  |  | 
1177  |  | /*[clinic input]  | 
1178  |  | _sre.SRE_Pattern.subn  | 
1179  |  |  | 
1180  |  |     repl: object  | 
1181  |  |     string: object  | 
1182  |  |     count: Py_ssize_t = 0  | 
1183  |  |  | 
1184  |  | Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.  | 
1185  |  | [clinic start generated code]*/  | 
1186  |  |  | 
1187  |  | static PyObject *  | 
1188  |  | _sre_SRE_Pattern_subn_impl(PatternObject *self, PyObject *repl,  | 
1189  |  |                            PyObject *string, Py_ssize_t count)  | 
1190  |  | /*[clinic end generated code: output=0d9522cd529e9728 input=e7342d7ce6083577]*/  | 
1191  | 0  | { | 
1192  | 0  |     return pattern_subx(self, repl, string, count, 1);  | 
1193  | 0  | }  | 
1194  |  |  | 
1195  |  | /*[clinic input]  | 
1196  |  | _sre.SRE_Pattern.__copy__  | 
1197  |  |  | 
1198  |  | [clinic start generated code]*/  | 
1199  |  |  | 
1200  |  | static PyObject *  | 
1201  |  | _sre_SRE_Pattern___copy___impl(PatternObject *self)  | 
1202  |  | /*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/  | 
1203  | 0  | { | 
1204  | 0  |     Py_INCREF(self);  | 
1205  | 0  |     return (PyObject *)self;  | 
1206  | 0  | }  | 
1207  |  |  | 
1208  |  | /*[clinic input]  | 
1209  |  | _sre.SRE_Pattern.__deepcopy__  | 
1210  |  |  | 
1211  |  |     memo: object  | 
1212  |  |     /  | 
1213  |  |  | 
1214  |  | [clinic start generated code]*/  | 
1215  |  |  | 
1216  |  | static PyObject *  | 
1217  |  | _sre_SRE_Pattern___deepcopy__(PatternObject *self, PyObject *memo)  | 
1218  |  | /*[clinic end generated code: output=2ad25679c1f1204a input=a465b1602f997bed]*/  | 
1219  | 0  | { | 
1220  | 0  |     Py_INCREF(self);  | 
1221  | 0  |     return (PyObject *)self;  | 
1222  | 0  | }  | 
1223  |  |  | 
1224  |  | static PyObject *  | 
1225  |  | pattern_repr(PatternObject *obj)  | 
1226  | 0  | { | 
1227  | 0  |     static const struct { | 
1228  | 0  |         const char *name;  | 
1229  | 0  |         int value;  | 
1230  | 0  |     } flag_names[] = { | 
1231  | 0  |         {"re.TEMPLATE", SRE_FLAG_TEMPLATE}, | 
1232  | 0  |         {"re.IGNORECASE", SRE_FLAG_IGNORECASE}, | 
1233  | 0  |         {"re.LOCALE", SRE_FLAG_LOCALE}, | 
1234  | 0  |         {"re.MULTILINE", SRE_FLAG_MULTILINE}, | 
1235  | 0  |         {"re.DOTALL", SRE_FLAG_DOTALL}, | 
1236  | 0  |         {"re.UNICODE", SRE_FLAG_UNICODE}, | 
1237  | 0  |         {"re.VERBOSE", SRE_FLAG_VERBOSE}, | 
1238  | 0  |         {"re.DEBUG", SRE_FLAG_DEBUG}, | 
1239  | 0  |         {"re.ASCII", SRE_FLAG_ASCII}, | 
1240  | 0  |     };  | 
1241  | 0  |     PyObject *result = NULL;  | 
1242  | 0  |     PyObject *flag_items;  | 
1243  | 0  |     size_t i;  | 
1244  | 0  |     int flags = obj->flags;  | 
1245  |  |  | 
1246  |  |     /* Omit re.UNICODE for valid string patterns. */  | 
1247  | 0  |     if (obj->isbytes == 0 &&  | 
1248  | 0  |         (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==  | 
1249  | 0  |          SRE_FLAG_UNICODE)  | 
1250  | 0  |         flags &= ~SRE_FLAG_UNICODE;  | 
1251  |  | 
  | 
1252  | 0  |     flag_items = PyList_New(0);  | 
1253  | 0  |     if (!flag_items)  | 
1254  | 0  |         return NULL;  | 
1255  |  |  | 
1256  | 0  |     for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) { | 
1257  | 0  |         if (flags & flag_names[i].value) { | 
1258  | 0  |             PyObject *item = PyUnicode_FromString(flag_names[i].name);  | 
1259  | 0  |             if (!item)  | 
1260  | 0  |                 goto done;  | 
1261  |  |  | 
1262  | 0  |             if (PyList_Append(flag_items, item) < 0) { | 
1263  | 0  |                 Py_DECREF(item);  | 
1264  | 0  |                 goto done;  | 
1265  | 0  |             }  | 
1266  | 0  |             Py_DECREF(item);  | 
1267  | 0  |             flags &= ~flag_names[i].value;  | 
1268  | 0  |         }  | 
1269  | 0  |     }  | 
1270  | 0  |     if (flags) { | 
1271  | 0  |         PyObject *item = PyUnicode_FromFormat("0x%x", flags); | 
1272  | 0  |         if (!item)  | 
1273  | 0  |             goto done;  | 
1274  |  |  | 
1275  | 0  |         if (PyList_Append(flag_items, item) < 0) { | 
1276  | 0  |             Py_DECREF(item);  | 
1277  | 0  |             goto done;  | 
1278  | 0  |         }  | 
1279  | 0  |         Py_DECREF(item);  | 
1280  | 0  |     }  | 
1281  |  |  | 
1282  | 0  |     if (PyList_Size(flag_items) > 0) { | 
1283  | 0  |         PyObject *flags_result;  | 
1284  | 0  |         PyObject *sep = PyUnicode_FromString("|"); | 
1285  | 0  |         if (!sep)  | 
1286  | 0  |             goto done;  | 
1287  | 0  |         flags_result = PyUnicode_Join(sep, flag_items);  | 
1288  | 0  |         Py_DECREF(sep);  | 
1289  | 0  |         if (!flags_result)  | 
1290  | 0  |             goto done;  | 
1291  | 0  |         result = PyUnicode_FromFormat("re.compile(%.200R, %S)", | 
1292  | 0  |                                       obj->pattern, flags_result);  | 
1293  | 0  |         Py_DECREF(flags_result);  | 
1294  | 0  |     }  | 
1295  | 0  |     else { | 
1296  | 0  |         result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern); | 
1297  | 0  |     }  | 
1298  |  |  | 
1299  | 0  | done:  | 
1300  | 0  |     Py_DECREF(flag_items);  | 
1301  | 0  |     return result;  | 
1302  | 0  | }  | 
1303  |  |  | 
1304  |  | PyDoc_STRVAR(pattern_doc, "Compiled regular expression object.");  | 
1305  |  |  | 
1306  |  | /* PatternObject's 'groupindex' method. */  | 
1307  |  | static PyObject *  | 
1308  |  | pattern_groupindex(PatternObject *self, void *Py_UNUSED(ignored))  | 
1309  | 0  | { | 
1310  | 0  |     if (self->groupindex == NULL)  | 
1311  | 0  |         return PyDict_New();  | 
1312  | 0  |     return PyDictProxy_New(self->groupindex);  | 
1313  | 0  | }  | 
1314  |  |  | 
1315  |  | static int _validate(PatternObject *self); /* Forward */  | 
1316  |  |  | 
1317  |  | /*[clinic input]  | 
1318  |  | _sre.compile  | 
1319  |  |  | 
1320  |  |     pattern: object  | 
1321  |  |     flags: int  | 
1322  |  |     code: object(subclass_of='&PyList_Type')  | 
1323  |  |     groups: Py_ssize_t  | 
1324  |  |     groupindex: object(subclass_of='&PyDict_Type')  | 
1325  |  |     indexgroup: object(subclass_of='&PyTuple_Type')  | 
1326  |  |  | 
1327  |  | [clinic start generated code]*/  | 
1328  |  |  | 
1329  |  | static PyObject *  | 
1330  |  | _sre_compile_impl(PyObject *module, PyObject *pattern, int flags,  | 
1331  |  |                   PyObject *code, Py_ssize_t groups, PyObject *groupindex,  | 
1332  |  |                   PyObject *indexgroup)  | 
1333  |  | /*[clinic end generated code: output=ef9c2b3693776404 input=0a68476dbbe5db30]*/  | 
1334  | 8  | { | 
1335  |  |     /* "compile" pattern descriptor to pattern object */  | 
1336  |  |  | 
1337  | 8  |     PatternObject* self;  | 
1338  | 8  |     Py_ssize_t i, n;  | 
1339  |  |  | 
1340  | 8  |     n = PyList_GET_SIZE(code);  | 
1341  |  |     /* coverity[ampersand_in_size] */  | 
1342  | 8  |     self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);  | 
1343  | 8  |     if (!self)  | 
1344  | 0  |         return NULL;  | 
1345  | 8  |     self->weakreflist = NULL;  | 
1346  | 8  |     self->pattern = NULL;  | 
1347  | 8  |     self->groupindex = NULL;  | 
1348  | 8  |     self->indexgroup = NULL;  | 
1349  |  |  | 
1350  | 8  |     self->codesize = n;  | 
1351  |  |  | 
1352  | 792  |     for (i = 0; i < n; i++) { | 
1353  | 784  |         PyObject *o = PyList_GET_ITEM(code, i);  | 
1354  | 784  |         unsigned long value = PyLong_AsUnsignedLong(o);  | 
1355  | 784  |         self->code[i] = (SRE_CODE) value;  | 
1356  | 784  |         if ((unsigned long) self->code[i] != value) { | 
1357  | 0  |             PyErr_SetString(PyExc_OverflowError,  | 
1358  | 0  |                             "regular expression code size limit exceeded");  | 
1359  | 0  |             break;  | 
1360  | 0  |         }  | 
1361  | 784  |     }  | 
1362  |  |  | 
1363  | 8  |     if (PyErr_Occurred()) { | 
1364  | 0  |         Py_DECREF(self);  | 
1365  | 0  |         return NULL;  | 
1366  | 0  |     }  | 
1367  |  |  | 
1368  | 8  |     if (pattern == Py_None) { | 
1369  | 0  |         self->isbytes = -1;  | 
1370  | 0  |     }  | 
1371  | 8  |     else { | 
1372  | 8  |         Py_ssize_t p_length;  | 
1373  | 8  |         int charsize;  | 
1374  | 8  |         Py_buffer view;  | 
1375  | 8  |         view.buf = NULL;  | 
1376  | 8  |         if (!getstring(pattern, &p_length, &self->isbytes,  | 
1377  | 8  |                        &charsize, &view)) { | 
1378  | 0  |             Py_DECREF(self);  | 
1379  | 0  |             return NULL;  | 
1380  | 0  |         }  | 
1381  | 8  |         if (view.buf)  | 
1382  | 1  |             PyBuffer_Release(&view);  | 
1383  | 8  |     }  | 
1384  |  |  | 
1385  | 8  |     Py_INCREF(pattern);  | 
1386  | 8  |     self->pattern = pattern;  | 
1387  |  |  | 
1388  | 8  |     self->flags = flags;  | 
1389  |  |  | 
1390  | 8  |     self->groups = groups;  | 
1391  |  |  | 
1392  | 8  |     if (PyDict_GET_SIZE(groupindex) > 0) { | 
1393  | 1  |         Py_INCREF(groupindex);  | 
1394  | 1  |         self->groupindex = groupindex;  | 
1395  | 1  |         if (PyTuple_GET_SIZE(indexgroup) > 0) { | 
1396  | 1  |             Py_INCREF(indexgroup);  | 
1397  | 1  |             self->indexgroup = indexgroup;  | 
1398  | 1  |         }  | 
1399  | 1  |     }  | 
1400  |  |  | 
1401  | 8  |     if (!_validate(self)) { | 
1402  | 0  |         Py_DECREF(self);  | 
1403  | 0  |         return NULL;  | 
1404  | 0  |     }  | 
1405  |  |  | 
1406  | 8  |     return (PyObject*) self;  | 
1407  | 8  | }  | 
1408  |  |  | 
1409  |  | /* -------------------------------------------------------------------- */  | 
1410  |  | /* Code validation */  | 
1411  |  |  | 
1412  |  | /* To learn more about this code, have a look at the _compile() function in  | 
1413  |  |    Lib/sre_compile.py.  The validation functions below checks the code array  | 
1414  |  |    for conformance with the code patterns generated there.  | 
1415  |  |  | 
1416  |  |    The nice thing about the generated code is that it is position-independent:  | 
1417  |  |    all jumps are relative jumps forward.  Also, jumps don't cross each other:  | 
1418  |  |    the target of a later jump is always earlier than the target of an earlier  | 
1419  |  |    jump.  IOW, this is okay:  | 
1420  |  |  | 
1421  |  |    J---------J-------T--------T  | 
1422  |  |     \         \_____/        /  | 
1423  |  |      \______________________/  | 
1424  |  |  | 
1425  |  |    but this is not:  | 
1426  |  |  | 
1427  |  |    J---------J-------T--------T  | 
1428  |  |     \_________\_____/        /  | 
1429  |  |                \____________/  | 
1430  |  |  | 
1431  |  |    It also helps that SRE_CODE is always an unsigned type.  | 
1432  |  | */  | 
1433  |  |  | 
1434  |  | /* Defining this one enables tracing of the validator */  | 
1435  |  | #undef VVERBOSE  | 
1436  |  |  | 
1437  |  | /* Trace macro for the validator */  | 
1438  |  | #if defined(VVERBOSE)  | 
1439  |  | #define VTRACE(v) printf v  | 
1440  |  | #else  | 
1441  | 1.14k  | #define VTRACE(v) do {} while(0)  /* do nothing */ | 
1442  |  | #endif  | 
1443  |  |  | 
1444  |  | /* Report failure */  | 
1445  | 0  | #define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0) | 
1446  |  |  | 
1447  |  | /* Extract opcode, argument, or skip count from code array */  | 
1448  |  | #define GET_OP                                          \  | 
1449  | 220  |     do {                                                \ | 
1450  | 220  |         VTRACE(("%p: ", code));                         \ | 
1451  | 220  |         if (code >= end) FAIL;                          \  | 
1452  | 220  |         op = *code++;                                   \  | 
1453  | 220  |         VTRACE(("%lu (op)\n", (unsigned long)op));      \ | 
1454  | 220  |     } while (0)  | 
1455  |  | #define GET_ARG                                         \  | 
1456  | 183  |     do {                                                \ | 
1457  | 183  |         VTRACE(("%p= ", code));                         \ | 
1458  | 183  |         if (code >= end) FAIL;                          \  | 
1459  | 183  |         arg = *code++;                                  \  | 
1460  | 183  |         VTRACE(("%lu (arg)\n", (unsigned long)arg));    \ | 
1461  | 183  |     } while (0)  | 
1462  |  | #define GET_SKIP_ADJ(adj)                               \  | 
1463  | 109  |     do {                                                \ | 
1464  | 109  |         VTRACE(("%p= ", code));                         \ | 
1465  | 109  |         if (code >= end) FAIL;                          \  | 
1466  | 109  |         skip = *code;                                   \  | 
1467  | 109  |         VTRACE(("%lu (skip to %p)\n",                   \ | 
1468  | 109  |                (unsigned long)skip, code+skip));        \  | 
1469  | 109  |         if (skip-adj > (uintptr_t)(end - code))      \  | 
1470  | 109  |             FAIL;                                       \  | 
1471  | 109  |         code++;                                         \  | 
1472  | 109  |     } while (0)  | 
1473  | 109  | #define GET_SKIP GET_SKIP_ADJ(0)  | 
1474  |  |  | 
1475  |  | static int  | 
1476  |  | _validate_charset(SRE_CODE *code, SRE_CODE *end)  | 
1477  | 26  | { | 
1478  |  |     /* Some variables are manipulated by the macros above */  | 
1479  | 26  |     SRE_CODE op;  | 
1480  | 26  |     SRE_CODE arg;  | 
1481  | 26  |     SRE_CODE offset;  | 
1482  | 26  |     int i;  | 
1483  |  |  | 
1484  | 59  |     while (code < end) { | 
1485  | 33  |         GET_OP;  | 
1486  | 33  |         switch (op) { | 
1487  |  |  | 
1488  | 0  |         case SRE_OP_NEGATE:  | 
1489  | 0  |             break;  | 
1490  |  |  | 
1491  | 9  |         case SRE_OP_LITERAL:  | 
1492  | 9  |             GET_ARG;  | 
1493  | 9  |             break;  | 
1494  |  |  | 
1495  | 9  |         case SRE_OP_RANGE:  | 
1496  | 4  |         case SRE_OP_RANGE_UNI_IGNORE:  | 
1497  | 4  |             GET_ARG;  | 
1498  | 4  |             GET_ARG;  | 
1499  | 4  |             break;  | 
1500  |  |  | 
1501  | 7  |         case SRE_OP_CHARSET:  | 
1502  | 7  |             offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */  | 
1503  | 7  |             if (offset > (uintptr_t)(end - code))  | 
1504  | 0  |                 FAIL;  | 
1505  | 7  |             code += offset;  | 
1506  | 7  |             break;  | 
1507  |  |  | 
1508  | 2  |         case SRE_OP_BIGCHARSET:  | 
1509  | 2  |             GET_ARG; /* Number of blocks */  | 
1510  | 2  |             offset = 256/sizeof(SRE_CODE); /* 256-byte table */  | 
1511  | 2  |             if (offset > (uintptr_t)(end - code))  | 
1512  | 0  |                 FAIL;  | 
1513  |  |             /* Make sure that each byte points to a valid block */  | 
1514  | 514  |             for (i = 0; i < 256; i++) { | 
1515  | 512  |                 if (((unsigned char *)code)[i] >= arg)  | 
1516  | 0  |                     FAIL;  | 
1517  | 512  |             }  | 
1518  | 2  |             code += offset;  | 
1519  | 2  |             offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */  | 
1520  | 2  |             if (offset > (uintptr_t)(end - code))  | 
1521  | 0  |                 FAIL;  | 
1522  | 2  |             code += offset;  | 
1523  | 2  |             break;  | 
1524  |  |  | 
1525  | 11  |         case SRE_OP_CATEGORY:  | 
1526  | 11  |             GET_ARG;  | 
1527  | 11  |             switch (arg) { | 
1528  | 0  |             case SRE_CATEGORY_DIGIT:  | 
1529  | 0  |             case SRE_CATEGORY_NOT_DIGIT:  | 
1530  | 0  |             case SRE_CATEGORY_SPACE:  | 
1531  | 0  |             case SRE_CATEGORY_NOT_SPACE:  | 
1532  | 1  |             case SRE_CATEGORY_WORD:  | 
1533  | 1  |             case SRE_CATEGORY_NOT_WORD:  | 
1534  | 1  |             case SRE_CATEGORY_LINEBREAK:  | 
1535  | 1  |             case SRE_CATEGORY_NOT_LINEBREAK:  | 
1536  | 1  |             case SRE_CATEGORY_LOC_WORD:  | 
1537  | 1  |             case SRE_CATEGORY_LOC_NOT_WORD:  | 
1538  | 6  |             case SRE_CATEGORY_UNI_DIGIT:  | 
1539  | 6  |             case SRE_CATEGORY_UNI_NOT_DIGIT:  | 
1540  | 6  |             case SRE_CATEGORY_UNI_SPACE:  | 
1541  | 6  |             case SRE_CATEGORY_UNI_NOT_SPACE:  | 
1542  | 11  |             case SRE_CATEGORY_UNI_WORD:  | 
1543  | 11  |             case SRE_CATEGORY_UNI_NOT_WORD:  | 
1544  | 11  |             case SRE_CATEGORY_UNI_LINEBREAK:  | 
1545  | 11  |             case SRE_CATEGORY_UNI_NOT_LINEBREAK:  | 
1546  | 11  |                 break;  | 
1547  | 0  |             default:  | 
1548  | 0  |                 FAIL;  | 
1549  | 11  |             }  | 
1550  | 11  |             break;  | 
1551  |  |  | 
1552  | 11  |         default:  | 
1553  | 0  |             FAIL;  | 
1554  |  |  | 
1555  | 33  |         }  | 
1556  | 33  |     }  | 
1557  |  |  | 
1558  | 26  |     return 1;  | 
1559  | 26  | }  | 
1560  |  |  | 
1561  |  | static int  | 
1562  |  | _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)  | 
1563  | 57  | { | 
1564  |  |     /* Some variables are manipulated by the macros above */  | 
1565  | 57  |     SRE_CODE op;  | 
1566  | 57  |     SRE_CODE arg;  | 
1567  | 57  |     SRE_CODE skip;  | 
1568  |  |  | 
1569  | 57  |     VTRACE(("code=%p, end=%p\n", code, end)); | 
1570  |  |  | 
1571  | 57  |     if (code > end)  | 
1572  | 0  |         FAIL;  | 
1573  |  |  | 
1574  | 195  |     while (code < end) { | 
1575  | 138  |         GET_OP;  | 
1576  | 138  |         switch (op) { | 
1577  |  |  | 
1578  | 28  |         case SRE_OP_MARK:  | 
1579  |  |             /* We don't check whether marks are properly nested; the  | 
1580  |  |                sre_match() code is robust even if they don't, and the worst  | 
1581  |  |                you can get is nonsensical match results. */  | 
1582  | 28  |             GET_ARG;  | 
1583  | 28  |             if (arg > 2 * (size_t)groups + 1) { | 
1584  | 0  |                 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups)); | 
1585  | 0  |                 FAIL;  | 
1586  | 0  |             }  | 
1587  | 28  |             break;  | 
1588  |  |  | 
1589  | 28  |         case SRE_OP_LITERAL:  | 
1590  | 28  |         case SRE_OP_NOT_LITERAL:  | 
1591  | 28  |         case SRE_OP_LITERAL_IGNORE:  | 
1592  | 28  |         case SRE_OP_NOT_LITERAL_IGNORE:  | 
1593  | 28  |         case SRE_OP_LITERAL_UNI_IGNORE:  | 
1594  | 28  |         case SRE_OP_NOT_LITERAL_UNI_IGNORE:  | 
1595  | 28  |         case SRE_OP_LITERAL_LOC_IGNORE:  | 
1596  | 28  |         case SRE_OP_NOT_LITERAL_LOC_IGNORE:  | 
1597  | 28  |             GET_ARG;  | 
1598  |  |             /* The arg is just a character, nothing to check */  | 
1599  | 28  |             break;  | 
1600  |  |  | 
1601  | 28  |         case SRE_OP_SUCCESS:  | 
1602  | 0  |         case SRE_OP_FAILURE:  | 
1603  |  |             /* Nothing to check; these normally end the matching process */  | 
1604  | 0  |             break;  | 
1605  |  |  | 
1606  | 7  |         case SRE_OP_AT:  | 
1607  | 7  |             GET_ARG;  | 
1608  | 7  |             switch (arg) { | 
1609  | 4  |             case SRE_AT_BEGINNING:  | 
1610  | 4  |             case SRE_AT_BEGINNING_STRING:  | 
1611  | 4  |             case SRE_AT_BEGINNING_LINE:  | 
1612  | 7  |             case SRE_AT_END:  | 
1613  | 7  |             case SRE_AT_END_LINE:  | 
1614  | 7  |             case SRE_AT_END_STRING:  | 
1615  | 7  |             case SRE_AT_BOUNDARY:  | 
1616  | 7  |             case SRE_AT_NON_BOUNDARY:  | 
1617  | 7  |             case SRE_AT_LOC_BOUNDARY:  | 
1618  | 7  |             case SRE_AT_LOC_NON_BOUNDARY:  | 
1619  | 7  |             case SRE_AT_UNI_BOUNDARY:  | 
1620  | 7  |             case SRE_AT_UNI_NON_BOUNDARY:  | 
1621  | 7  |                 break;  | 
1622  | 0  |             default:  | 
1623  | 0  |                 FAIL;  | 
1624  | 7  |             }  | 
1625  | 7  |             break;  | 
1626  |  |  | 
1627  | 7  |         case SRE_OP_ANY:  | 
1628  | 2  |         case SRE_OP_ANY_ALL:  | 
1629  |  |             /* These have no operands */  | 
1630  | 2  |             break;  | 
1631  |  |  | 
1632  | 20  |         case SRE_OP_IN:  | 
1633  | 24  |         case SRE_OP_IN_IGNORE:  | 
1634  | 26  |         case SRE_OP_IN_UNI_IGNORE:  | 
1635  | 26  |         case SRE_OP_IN_LOC_IGNORE:  | 
1636  | 26  |             GET_SKIP;  | 
1637  |  |             /* Stop 1 before the end; we check the FAILURE below */  | 
1638  | 26  |             if (!_validate_charset(code, code+skip-2))  | 
1639  | 0  |                 FAIL;  | 
1640  | 26  |             if (code[skip-2] != SRE_OP_FAILURE)  | 
1641  | 0  |                 FAIL;  | 
1642  | 26  |             code += skip-1;  | 
1643  | 26  |             break;  | 
1644  |  |  | 
1645  | 8  |         case SRE_OP_INFO:  | 
1646  | 8  |             { | 
1647  |  |                 /* A minimal info field is  | 
1648  |  |                    <INFO> <1=skip> <2=flags> <3=min> <4=max>;  | 
1649  |  |                    If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,  | 
1650  |  |                    more follows. */  | 
1651  | 8  |                 SRE_CODE flags, i;  | 
1652  | 8  |                 SRE_CODE *newcode;  | 
1653  | 8  |                 GET_SKIP;  | 
1654  | 8  |                 newcode = code+skip-1;  | 
1655  | 8  |                 GET_ARG; flags = arg;  | 
1656  | 8  |                 GET_ARG;  | 
1657  | 8  |                 GET_ARG;  | 
1658  |  |                 /* Check that only valid flags are present */  | 
1659  | 8  |                 if ((flags & ~(SRE_INFO_PREFIX |  | 
1660  | 8  |                                SRE_INFO_LITERAL |  | 
1661  | 8  |                                SRE_INFO_CHARSET)) != 0)  | 
1662  | 0  |                     FAIL;  | 
1663  |  |                 /* PREFIX and CHARSET are mutually exclusive */  | 
1664  | 8  |                 if ((flags & SRE_INFO_PREFIX) &&  | 
1665  | 2  |                     (flags & SRE_INFO_CHARSET))  | 
1666  | 0  |                     FAIL;  | 
1667  |  |                 /* LITERAL implies PREFIX */  | 
1668  | 8  |                 if ((flags & SRE_INFO_LITERAL) &&  | 
1669  | 0  |                     !(flags & SRE_INFO_PREFIX))  | 
1670  | 0  |                     FAIL;  | 
1671  |  |                 /* Validate the prefix */  | 
1672  | 8  |                 if (flags & SRE_INFO_PREFIX) { | 
1673  | 2  |                     SRE_CODE prefix_len;  | 
1674  | 2  |                     GET_ARG; prefix_len = arg;  | 
1675  | 2  |                     GET_ARG;  | 
1676  |  |                     /* Here comes the prefix string */  | 
1677  | 2  |                     if (prefix_len > (uintptr_t)(newcode - code))  | 
1678  | 0  |                         FAIL;  | 
1679  | 2  |                     code += prefix_len;  | 
1680  |  |                     /* And here comes the overlap table */  | 
1681  | 2  |                     if (prefix_len > (uintptr_t)(newcode - code))  | 
1682  | 0  |                         FAIL;  | 
1683  |  |                     /* Each overlap value should be < prefix_len */  | 
1684  | 5  |                     for (i = 0; i < prefix_len; i++) { | 
1685  | 3  |                         if (code[i] >= prefix_len)  | 
1686  | 0  |                             FAIL;  | 
1687  | 3  |                     }  | 
1688  | 2  |                     code += prefix_len;  | 
1689  | 2  |                 }  | 
1690  |  |                 /* Validate the charset */  | 
1691  | 8  |                 if (flags & SRE_INFO_CHARSET) { | 
1692  | 0  |                     if (!_validate_charset(code, newcode-1))  | 
1693  | 0  |                         FAIL;  | 
1694  | 0  |                     if (newcode[-1] != SRE_OP_FAILURE)  | 
1695  | 0  |                         FAIL;  | 
1696  | 0  |                     code = newcode;  | 
1697  | 0  |                 }  | 
1698  | 8  |                 else if (code != newcode) { | 
1699  | 0  |                   VTRACE(("code=%p, newcode=%p\n", code, newcode)); | 
1700  | 0  |                     FAIL;  | 
1701  | 0  |                 }  | 
1702  | 8  |             }  | 
1703  | 8  |             break;  | 
1704  |  |  | 
1705  | 8  |         case SRE_OP_BRANCH:  | 
1706  | 8  |             { | 
1707  | 8  |                 SRE_CODE *target = NULL;  | 
1708  | 26  |                 for (;;) { | 
1709  | 26  |                     GET_SKIP;  | 
1710  | 26  |                     if (skip == 0)  | 
1711  | 8  |                         break;  | 
1712  |  |                     /* Stop 2 before the end; we check the JUMP below */  | 
1713  | 18  |                     if (!_validate_inner(code, code+skip-3, groups))  | 
1714  | 0  |                         FAIL;  | 
1715  | 18  |                     code += skip-3;  | 
1716  |  |                     /* Check that it ends with a JUMP, and that each JUMP  | 
1717  |  |                        has the same target */  | 
1718  | 18  |                     GET_OP;  | 
1719  | 18  |                     if (op != SRE_OP_JUMP)  | 
1720  | 0  |                         FAIL;  | 
1721  | 18  |                     GET_SKIP;  | 
1722  | 18  |                     if (target == NULL)  | 
1723  | 8  |                         target = code+skip-1;  | 
1724  | 10  |                     else if (code+skip-1 != target)  | 
1725  | 0  |                         FAIL;  | 
1726  | 18  |                 }  | 
1727  | 8  |             }  | 
1728  | 8  |             break;  | 
1729  |  |  | 
1730  | 24  |         case SRE_OP_REPEAT_ONE:  | 
1731  | 25  |         case SRE_OP_MIN_REPEAT_ONE:  | 
1732  | 25  |             { | 
1733  | 25  |                 SRE_CODE min, max;  | 
1734  | 25  |                 GET_SKIP;  | 
1735  | 25  |                 GET_ARG; min = arg;  | 
1736  | 25  |                 GET_ARG; max = arg;  | 
1737  | 25  |                 if (min > max)  | 
1738  | 0  |                     FAIL;  | 
1739  | 25  |                 if (max > SRE_MAXREPEAT)  | 
1740  | 0  |                     FAIL;  | 
1741  | 25  |                 if (!_validate_inner(code, code+skip-4, groups))  | 
1742  | 0  |                     FAIL;  | 
1743  | 25  |                 code += skip-4;  | 
1744  | 25  |                 GET_OP;  | 
1745  | 25  |                 if (op != SRE_OP_SUCCESS)  | 
1746  | 0  |                     FAIL;  | 
1747  | 25  |             }  | 
1748  | 25  |             break;  | 
1749  |  |  | 
1750  | 25  |         case SRE_OP_REPEAT:  | 
1751  | 6  |             { | 
1752  | 6  |                 SRE_CODE min, max;  | 
1753  | 6  |                 GET_SKIP;  | 
1754  | 6  |                 GET_ARG; min = arg;  | 
1755  | 6  |                 GET_ARG; max = arg;  | 
1756  | 6  |                 if (min > max)  | 
1757  | 0  |                     FAIL;  | 
1758  | 6  |                 if (max > SRE_MAXREPEAT)  | 
1759  | 0  |                     FAIL;  | 
1760  | 6  |                 if (!_validate_inner(code, code+skip-3, groups))  | 
1761  | 0  |                     FAIL;  | 
1762  | 6  |                 code += skip-3;  | 
1763  | 6  |                 GET_OP;  | 
1764  | 6  |                 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)  | 
1765  | 0  |                     FAIL;  | 
1766  | 6  |             }  | 
1767  | 6  |             break;  | 
1768  |  |  | 
1769  | 6  |         case SRE_OP_GROUPREF:  | 
1770  | 0  |         case SRE_OP_GROUPREF_IGNORE:  | 
1771  | 0  |         case SRE_OP_GROUPREF_UNI_IGNORE:  | 
1772  | 0  |         case SRE_OP_GROUPREF_LOC_IGNORE:  | 
1773  | 0  |             GET_ARG;  | 
1774  | 0  |             if (arg >= (size_t)groups)  | 
1775  | 0  |                 FAIL;  | 
1776  | 0  |             break;  | 
1777  |  |  | 
1778  | 0  |         case SRE_OP_GROUPREF_EXISTS:  | 
1779  |  |             /* The regex syntax for this is: '(?(group)then|else)', where  | 
1780  |  |                'group' is either an integer group number or a group name,  | 
1781  |  |                'then' and 'else' are sub-regexes, and 'else' is optional. */  | 
1782  | 0  |             GET_ARG;  | 
1783  | 0  |             if (arg >= (size_t)groups)  | 
1784  | 0  |                 FAIL;  | 
1785  | 0  |             GET_SKIP_ADJ(1);  | 
1786  | 0  |             code--; /* The skip is relative to the first arg! */  | 
1787  |  |             /* There are two possibilities here: if there is both a 'then'  | 
1788  |  |                part and an 'else' part, the generated code looks like:  | 
1789  |  |  | 
1790  |  |                GROUPREF_EXISTS  | 
1791  |  |                <group>  | 
1792  |  |                <skipyes>  | 
1793  |  |                ...then part...  | 
1794  |  |                JUMP  | 
1795  |  |                <skipno>  | 
1796  |  |                (<skipyes> jumps here)  | 
1797  |  |                ...else part...  | 
1798  |  |                (<skipno> jumps here)  | 
1799  |  |  | 
1800  |  |                If there is only a 'then' part, it looks like:  | 
1801  |  |  | 
1802  |  |                GROUPREF_EXISTS  | 
1803  |  |                <group>  | 
1804  |  |                <skip>  | 
1805  |  |                ...then part...  | 
1806  |  |                (<skip> jumps here)  | 
1807  |  |  | 
1808  |  |                There is no direct way to decide which it is, and we don't want  | 
1809  |  |                to allow arbitrary jumps anywhere in the code; so we just look  | 
1810  |  |                for a JUMP opcode preceding our skip target.  | 
1811  |  |             */  | 
1812  | 0  |             if (skip >= 3 && skip-3 < (uintptr_t)(end - code) &&  | 
1813  | 0  |                 code[skip-3] == SRE_OP_JUMP)  | 
1814  | 0  |             { | 
1815  | 0  |                 VTRACE(("both then and else parts present\n")); | 
1816  | 0  |                 if (!_validate_inner(code+1, code+skip-3, groups))  | 
1817  | 0  |                     FAIL;  | 
1818  | 0  |                 code += skip-2; /* Position after JUMP, at <skipno> */  | 
1819  | 0  |                 GET_SKIP;  | 
1820  | 0  |                 if (!_validate_inner(code, code+skip-1, groups))  | 
1821  | 0  |                     FAIL;  | 
1822  | 0  |                 code += skip-1;  | 
1823  | 0  |             }  | 
1824  | 0  |             else { | 
1825  | 0  |                 VTRACE(("only a then part present\n")); | 
1826  | 0  |                 if (!_validate_inner(code+1, code+skip-1, groups))  | 
1827  | 0  |                     FAIL;  | 
1828  | 0  |                 code += skip-1;  | 
1829  | 0  |             }  | 
1830  | 0  |             break;  | 
1831  |  |  | 
1832  | 0  |         case SRE_OP_ASSERT:  | 
1833  | 0  |         case SRE_OP_ASSERT_NOT:  | 
1834  | 0  |             GET_SKIP;  | 
1835  | 0  |             GET_ARG; /* 0 for lookahead, width for lookbehind */  | 
1836  | 0  |             code--; /* Back up over arg to simplify math below */  | 
1837  | 0  |             if (arg & 0x80000000)  | 
1838  | 0  |                 FAIL; /* Width too large */  | 
1839  |  |             /* Stop 1 before the end; we check the SUCCESS below */  | 
1840  | 0  |             if (!_validate_inner(code+1, code+skip-2, groups))  | 
1841  | 0  |                 FAIL;  | 
1842  | 0  |             code += skip-2;  | 
1843  | 0  |             GET_OP;  | 
1844  | 0  |             if (op != SRE_OP_SUCCESS)  | 
1845  | 0  |                 FAIL;  | 
1846  | 0  |             break;  | 
1847  |  |  | 
1848  | 0  |         default:  | 
1849  | 0  |             FAIL;  | 
1850  |  |  | 
1851  | 138  |         }  | 
1852  | 138  |     }  | 
1853  |  |  | 
1854  | 57  |     VTRACE(("okay\n")); | 
1855  | 57  |     return 1;  | 
1856  | 57  | }  | 
1857  |  |  | 
1858  |  | static int  | 
1859  |  | _validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)  | 
1860  | 8  | { | 
1861  | 8  |     if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||  | 
1862  | 8  |         code >= end || end[-1] != SRE_OP_SUCCESS)  | 
1863  | 0  |         FAIL;  | 
1864  | 8  |     return _validate_inner(code, end-1, groups);  | 
1865  | 8  | }  | 
1866  |  |  | 
1867  |  | static int  | 
1868  |  | _validate(PatternObject *self)  | 
1869  | 8  | { | 
1870  | 8  |     if (!_validate_outer(self->code, self->code+self->codesize, self->groups))  | 
1871  | 0  |     { | 
1872  | 0  |         PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");  | 
1873  | 0  |         return 0;  | 
1874  | 0  |     }  | 
1875  | 8  |     else  | 
1876  | 8  |         VTRACE(("Success!\n")); | 
1877  | 8  |     return 1;  | 
1878  | 8  | }  | 
1879  |  |  | 
1880  |  | /* -------------------------------------------------------------------- */  | 
1881  |  | /* match methods */  | 
1882  |  |  | 
1883  |  | static void  | 
1884  |  | match_dealloc(MatchObject* self)  | 
1885  | 4  | { | 
1886  | 4  |     Py_XDECREF(self->regs);  | 
1887  | 4  |     Py_XDECREF(self->string);  | 
1888  | 4  |     Py_DECREF(self->pattern);  | 
1889  | 4  |     PyObject_DEL(self);  | 
1890  | 4  | }  | 
1891  |  |  | 
1892  |  | static PyObject*  | 
1893  |  | match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)  | 
1894  | 0  | { | 
1895  | 0  |     Py_ssize_t length;  | 
1896  | 0  |     int isbytes, charsize;  | 
1897  | 0  |     Py_buffer view;  | 
1898  | 0  |     PyObject *result;  | 
1899  | 0  |     void* ptr;  | 
1900  | 0  |     Py_ssize_t i, j;  | 
1901  |  | 
  | 
1902  | 0  |     assert(0 <= index && index < self->groups);  | 
1903  | 0  |     index *= 2;  | 
1904  |  | 
  | 
1905  | 0  |     if (self->string == Py_None || self->mark[index] < 0) { | 
1906  |  |         /* return default value if the string or group is undefined */  | 
1907  | 0  |         Py_INCREF(def);  | 
1908  | 0  |         return def;  | 
1909  | 0  |     }  | 
1910  |  |  | 
1911  | 0  |     ptr = getstring(self->string, &length, &isbytes, &charsize, &view);  | 
1912  | 0  |     if (ptr == NULL)  | 
1913  | 0  |         return NULL;  | 
1914  |  |  | 
1915  | 0  |     i = self->mark[index];  | 
1916  | 0  |     j = self->mark[index+1];  | 
1917  | 0  |     i = Py_MIN(i, length);  | 
1918  | 0  |     j = Py_MIN(j, length);  | 
1919  | 0  |     result = getslice(isbytes, ptr, self->string, i, j);  | 
1920  | 0  |     if (isbytes && view.buf != NULL)  | 
1921  | 0  |         PyBuffer_Release(&view);  | 
1922  | 0  |     return result;  | 
1923  | 0  | }  | 
1924  |  |  | 
1925  |  | static Py_ssize_t  | 
1926  |  | match_getindex(MatchObject* self, PyObject* index)  | 
1927  | 0  | { | 
1928  | 0  |     Py_ssize_t i;  | 
1929  |  | 
  | 
1930  | 0  |     if (index == NULL)  | 
1931  |  |         /* Default value */  | 
1932  | 0  |         return 0;  | 
1933  |  |  | 
1934  | 0  |     if (PyIndex_Check(index)) { | 
1935  | 0  |         i = PyNumber_AsSsize_t(index, NULL);  | 
1936  | 0  |     }  | 
1937  | 0  |     else { | 
1938  | 0  |         i = -1;  | 
1939  |  | 
  | 
1940  | 0  |         if (self->pattern->groupindex) { | 
1941  | 0  |             index = PyDict_GetItemWithError(self->pattern->groupindex, index);  | 
1942  | 0  |             if (index && PyLong_Check(index)) { | 
1943  | 0  |                 i = PyLong_AsSsize_t(index);  | 
1944  | 0  |             }  | 
1945  | 0  |         }  | 
1946  | 0  |     }  | 
1947  | 0  |     if (i < 0 || i >= self->groups) { | 
1948  |  |         /* raise IndexError if we were given a bad group number */  | 
1949  | 0  |         if (!PyErr_Occurred()) { | 
1950  | 0  |             PyErr_SetString(PyExc_IndexError, "no such group");  | 
1951  | 0  |         }  | 
1952  | 0  |         return -1;  | 
1953  | 0  |     }  | 
1954  |  |  | 
1955  | 0  |     return i;  | 
1956  | 0  | }  | 
1957  |  |  | 
1958  |  | static PyObject*  | 
1959  |  | match_getslice(MatchObject* self, PyObject* index, PyObject* def)  | 
1960  | 0  | { | 
1961  | 0  |     Py_ssize_t i = match_getindex(self, index);  | 
1962  |  | 
  | 
1963  | 0  |     if (i < 0) { | 
1964  | 0  |         return NULL;  | 
1965  | 0  |     }  | 
1966  |  |  | 
1967  | 0  |     return match_getslice_by_index(self, i, def);  | 
1968  | 0  | }  | 
1969  |  |  | 
1970  |  | /*[clinic input]  | 
1971  |  | _sre.SRE_Match.expand  | 
1972  |  |  | 
1973  |  |     template: object  | 
1974  |  |  | 
1975  |  | Return the string obtained by doing backslash substitution on the string template, as done by the sub() method.  | 
1976  |  | [clinic start generated code]*/  | 
1977  |  |  | 
1978  |  | static PyObject *  | 
1979  |  | _sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template)  | 
1980  |  | /*[clinic end generated code: output=931b58ccc323c3a1 input=4bfdb22c2f8b146a]*/  | 
1981  | 0  | { | 
1982  |  |     /* delegate to Python code */  | 
1983  | 0  |     return call(  | 
1984  | 0  |         SRE_PY_MODULE, "_expand",  | 
1985  | 0  |         PyTuple_Pack(3, self->pattern, self, template)  | 
1986  | 0  |         );  | 
1987  | 0  | }  | 
1988  |  |  | 
1989  |  | static PyObject*  | 
1990  |  | match_group(MatchObject* self, PyObject* args)  | 
1991  | 0  | { | 
1992  | 0  |     PyObject* result;  | 
1993  | 0  |     Py_ssize_t i, size;  | 
1994  |  | 
  | 
1995  | 0  |     size = PyTuple_GET_SIZE(args);  | 
1996  |  | 
  | 
1997  | 0  |     switch (size) { | 
1998  | 0  |     case 0:  | 
1999  | 0  |         result = match_getslice(self, _PyLong_Zero, Py_None);  | 
2000  | 0  |         break;  | 
2001  | 0  |     case 1:  | 
2002  | 0  |         result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);  | 
2003  | 0  |         break;  | 
2004  | 0  |     default:  | 
2005  |  |         /* fetch multiple items */  | 
2006  | 0  |         result = PyTuple_New(size);  | 
2007  | 0  |         if (!result)  | 
2008  | 0  |             return NULL;  | 
2009  | 0  |         for (i = 0; i < size; i++) { | 
2010  | 0  |             PyObject* item = match_getslice(  | 
2011  | 0  |                 self, PyTuple_GET_ITEM(args, i), Py_None  | 
2012  | 0  |                 );  | 
2013  | 0  |             if (!item) { | 
2014  | 0  |                 Py_DECREF(result);  | 
2015  | 0  |                 return NULL;  | 
2016  | 0  |             }  | 
2017  | 0  |             PyTuple_SET_ITEM(result, i, item);  | 
2018  | 0  |         }  | 
2019  | 0  |         break;  | 
2020  | 0  |     }  | 
2021  | 0  |     return result;  | 
2022  | 0  | }  | 
2023  |  |  | 
2024  |  | static PyObject*  | 
2025  |  | match_getitem(MatchObject* self, PyObject* name)  | 
2026  | 0  | { | 
2027  | 0  |     return match_getslice(self, name, Py_None);  | 
2028  | 0  | }  | 
2029  |  |  | 
2030  |  | /*[clinic input]  | 
2031  |  | _sre.SRE_Match.groups  | 
2032  |  |  | 
2033  |  |     default: object = None  | 
2034  |  |         Is used for groups that did not participate in the match.  | 
2035  |  |  | 
2036  |  | Return a tuple containing all the subgroups of the match, from 1.  | 
2037  |  | [clinic start generated code]*/  | 
2038  |  |  | 
2039  |  | static PyObject *  | 
2040  |  | _sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value)  | 
2041  |  | /*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/  | 
2042  | 0  | { | 
2043  | 0  |     PyObject* result;  | 
2044  | 0  |     Py_ssize_t index;  | 
2045  |  | 
  | 
2046  | 0  |     result = PyTuple_New(self->groups-1);  | 
2047  | 0  |     if (!result)  | 
2048  | 0  |         return NULL;  | 
2049  |  |  | 
2050  | 0  |     for (index = 1; index < self->groups; index++) { | 
2051  | 0  |         PyObject* item;  | 
2052  | 0  |         item = match_getslice_by_index(self, index, default_value);  | 
2053  | 0  |         if (!item) { | 
2054  | 0  |             Py_DECREF(result);  | 
2055  | 0  |             return NULL;  | 
2056  | 0  |         }  | 
2057  | 0  |         PyTuple_SET_ITEM(result, index-1, item);  | 
2058  | 0  |     }  | 
2059  |  |  | 
2060  | 0  |     return result;  | 
2061  | 0  | }  | 
2062  |  |  | 
2063  |  | /*[clinic input]  | 
2064  |  | _sre.SRE_Match.groupdict  | 
2065  |  |  | 
2066  |  |     default: object = None  | 
2067  |  |         Is used for groups that did not participate in the match.  | 
2068  |  |  | 
2069  |  | Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name.  | 
2070  |  | [clinic start generated code]*/  | 
2071  |  |  | 
2072  |  | static PyObject *  | 
2073  |  | _sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value)  | 
2074  |  | /*[clinic end generated code: output=29917c9073e41757 input=0ded7960b23780aa]*/  | 
2075  | 0  | { | 
2076  | 0  |     PyObject *result;  | 
2077  | 0  |     PyObject *key;  | 
2078  | 0  |     PyObject *value;  | 
2079  | 0  |     Py_ssize_t pos = 0;  | 
2080  | 0  |     Py_hash_t hash;  | 
2081  |  | 
  | 
2082  | 0  |     result = PyDict_New();  | 
2083  | 0  |     if (!result || !self->pattern->groupindex)  | 
2084  | 0  |         return result;  | 
2085  |  |  | 
2086  | 0  |     while (_PyDict_Next(self->pattern->groupindex, &pos, &key, &value, &hash)) { | 
2087  | 0  |         int status;  | 
2088  | 0  |         Py_INCREF(key);  | 
2089  | 0  |         value = match_getslice(self, key, default_value);  | 
2090  | 0  |         if (!value) { | 
2091  | 0  |             Py_DECREF(key);  | 
2092  | 0  |             goto failed;  | 
2093  | 0  |         }  | 
2094  | 0  |         status = _PyDict_SetItem_KnownHash(result, key, value, hash);  | 
2095  | 0  |         Py_DECREF(value);  | 
2096  | 0  |         Py_DECREF(key);  | 
2097  | 0  |         if (status < 0)  | 
2098  | 0  |             goto failed;  | 
2099  | 0  |     }  | 
2100  |  |  | 
2101  | 0  |     return result;  | 
2102  |  |  | 
2103  | 0  | failed:  | 
2104  | 0  |     Py_DECREF(result);  | 
2105  | 0  |     return NULL;  | 
2106  | 0  | }  | 
2107  |  |  | 
2108  |  | /*[clinic input]  | 
2109  |  | _sre.SRE_Match.start -> Py_ssize_t  | 
2110  |  |  | 
2111  |  |     group: object(c_default="NULL") = 0  | 
2112  |  |     /  | 
2113  |  |  | 
2114  |  | Return index of the start of the substring matched by group.  | 
2115  |  | [clinic start generated code]*/  | 
2116  |  |  | 
2117  |  | static Py_ssize_t  | 
2118  |  | _sre_SRE_Match_start_impl(MatchObject *self, PyObject *group)  | 
2119  |  | /*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/  | 
2120  | 0  | { | 
2121  | 0  |     Py_ssize_t index = match_getindex(self, group);  | 
2122  |  | 
  | 
2123  | 0  |     if (index < 0) { | 
2124  | 0  |         return -1;  | 
2125  | 0  |     }  | 
2126  |  |  | 
2127  |  |     /* mark is -1 if group is undefined */  | 
2128  | 0  |     return self->mark[index*2];  | 
2129  | 0  | }  | 
2130  |  |  | 
2131  |  | /*[clinic input]  | 
2132  |  | _sre.SRE_Match.end -> Py_ssize_t  | 
2133  |  |  | 
2134  |  |     group: object(c_default="NULL") = 0  | 
2135  |  |     /  | 
2136  |  |  | 
2137  |  | Return index of the end of the substring matched by group.  | 
2138  |  | [clinic start generated code]*/  | 
2139  |  |  | 
2140  |  | static Py_ssize_t  | 
2141  |  | _sre_SRE_Match_end_impl(MatchObject *self, PyObject *group)  | 
2142  |  | /*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/  | 
2143  | 0  | { | 
2144  | 0  |     Py_ssize_t index = match_getindex(self, group);  | 
2145  |  | 
  | 
2146  | 0  |     if (index < 0) { | 
2147  | 0  |         return -1;  | 
2148  | 0  |     }  | 
2149  |  |  | 
2150  |  |     /* mark is -1 if group is undefined */  | 
2151  | 0  |     return self->mark[index*2+1];  | 
2152  | 0  | }  | 
2153  |  |  | 
2154  |  | LOCAL(PyObject*)  | 
2155  |  | _pair(Py_ssize_t i1, Py_ssize_t i2)  | 
2156  | 0  | { | 
2157  | 0  |     PyObject* pair;  | 
2158  | 0  |     PyObject* item;  | 
2159  |  | 
  | 
2160  | 0  |     pair = PyTuple_New(2);  | 
2161  | 0  |     if (!pair)  | 
2162  | 0  |         return NULL;  | 
2163  |  |  | 
2164  | 0  |     item = PyLong_FromSsize_t(i1);  | 
2165  | 0  |     if (!item)  | 
2166  | 0  |         goto error;  | 
2167  | 0  |     PyTuple_SET_ITEM(pair, 0, item);  | 
2168  |  | 
  | 
2169  | 0  |     item = PyLong_FromSsize_t(i2);  | 
2170  | 0  |     if (!item)  | 
2171  | 0  |         goto error;  | 
2172  | 0  |     PyTuple_SET_ITEM(pair, 1, item);  | 
2173  |  | 
  | 
2174  | 0  |     return pair;  | 
2175  |  |  | 
2176  | 0  |   error:  | 
2177  | 0  |     Py_DECREF(pair);  | 
2178  | 0  |     return NULL;  | 
2179  | 0  | }  | 
2180  |  |  | 
2181  |  | /*[clinic input]  | 
2182  |  | _sre.SRE_Match.span  | 
2183  |  |  | 
2184  |  |     group: object(c_default="NULL") = 0  | 
2185  |  |     /  | 
2186  |  |  | 
2187  |  | For match object m, return the 2-tuple (m.start(group), m.end(group)).  | 
2188  |  | [clinic start generated code]*/  | 
2189  |  |  | 
2190  |  | static PyObject *  | 
2191  |  | _sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)  | 
2192  |  | /*[clinic end generated code: output=f02ae40594d14fe6 input=8fa6014e982d71d4]*/  | 
2193  | 0  | { | 
2194  | 0  |     Py_ssize_t index = match_getindex(self, group);  | 
2195  |  | 
  | 
2196  | 0  |     if (index < 0) { | 
2197  | 0  |         return NULL;  | 
2198  | 0  |     }  | 
2199  |  |  | 
2200  |  |     /* marks are -1 if group is undefined */  | 
2201  | 0  |     return _pair(self->mark[index*2], self->mark[index*2+1]);  | 
2202  | 0  | }  | 
2203  |  |  | 
2204  |  | static PyObject*  | 
2205  |  | match_regs(MatchObject* self)  | 
2206  | 0  | { | 
2207  | 0  |     PyObject* regs;  | 
2208  | 0  |     PyObject* item;  | 
2209  | 0  |     Py_ssize_t index;  | 
2210  |  | 
  | 
2211  | 0  |     regs = PyTuple_New(self->groups);  | 
2212  | 0  |     if (!regs)  | 
2213  | 0  |         return NULL;  | 
2214  |  |  | 
2215  | 0  |     for (index = 0; index < self->groups; index++) { | 
2216  | 0  |         item = _pair(self->mark[index*2], self->mark[index*2+1]);  | 
2217  | 0  |         if (!item) { | 
2218  | 0  |             Py_DECREF(regs);  | 
2219  | 0  |             return NULL;  | 
2220  | 0  |         }  | 
2221  | 0  |         PyTuple_SET_ITEM(regs, index, item);  | 
2222  | 0  |     }  | 
2223  |  |  | 
2224  | 0  |     Py_INCREF(regs);  | 
2225  | 0  |     self->regs = regs;  | 
2226  |  | 
  | 
2227  | 0  |     return regs;  | 
2228  | 0  | }  | 
2229  |  |  | 
2230  |  | /*[clinic input]  | 
2231  |  | _sre.SRE_Match.__copy__  | 
2232  |  |  | 
2233  |  | [clinic start generated code]*/  | 
2234  |  |  | 
2235  |  | static PyObject *  | 
2236  |  | _sre_SRE_Match___copy___impl(MatchObject *self)  | 
2237  |  | /*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/  | 
2238  | 0  | { | 
2239  | 0  |     Py_INCREF(self);  | 
2240  | 0  |     return (PyObject *)self;  | 
2241  | 0  | }  | 
2242  |  |  | 
2243  |  | /*[clinic input]  | 
2244  |  | _sre.SRE_Match.__deepcopy__  | 
2245  |  |  | 
2246  |  |     memo: object  | 
2247  |  |     /  | 
2248  |  |  | 
2249  |  | [clinic start generated code]*/  | 
2250  |  |  | 
2251  |  | static PyObject *  | 
2252  |  | _sre_SRE_Match___deepcopy__(MatchObject *self, PyObject *memo)  | 
2253  |  | /*[clinic end generated code: output=ba7cb46d655e4ee2 input=779d12a31c2c325e]*/  | 
2254  | 0  | { | 
2255  | 0  |     Py_INCREF(self);  | 
2256  | 0  |     return (PyObject *)self;  | 
2257  | 0  | }  | 
2258  |  |  | 
2259  |  | PyDoc_STRVAR(match_doc,  | 
2260  |  | "The result of re.match() and re.search().\n\  | 
2261  |  | Match objects always have a boolean value of True.");  | 
2262  |  |  | 
2263  |  | PyDoc_STRVAR(match_group_doc,  | 
2264  |  | "group([group1, ...]) -> str or tuple.\n\  | 
2265  |  |     Return subgroup(s) of the match by indices or names.\n\  | 
2266  |  |     For 0 returns the entire match.");  | 
2267  |  |  | 
2268  |  | static PyObject *  | 
2269  |  | match_lastindex_get(MatchObject *self, void *Py_UNUSED(ignored))  | 
2270  | 0  | { | 
2271  | 0  |     if (self->lastindex >= 0)  | 
2272  | 0  |         return PyLong_FromSsize_t(self->lastindex);  | 
2273  | 0  |     Py_RETURN_NONE;  | 
2274  | 0  | }  | 
2275  |  |  | 
2276  |  | static PyObject *  | 
2277  |  | match_lastgroup_get(MatchObject *self, void *Py_UNUSED(ignored))  | 
2278  | 0  | { | 
2279  | 0  |     if (self->pattern->indexgroup &&  | 
2280  | 0  |         self->lastindex >= 0 &&  | 
2281  | 0  |         self->lastindex < PyTuple_GET_SIZE(self->pattern->indexgroup))  | 
2282  | 0  |     { | 
2283  | 0  |         PyObject *result = PyTuple_GET_ITEM(self->pattern->indexgroup,  | 
2284  | 0  |                                             self->lastindex);  | 
2285  | 0  |         Py_INCREF(result);  | 
2286  | 0  |         return result;  | 
2287  | 0  |     }  | 
2288  | 0  |     Py_RETURN_NONE;  | 
2289  | 0  | }  | 
2290  |  |  | 
2291  |  | static PyObject *  | 
2292  |  | match_regs_get(MatchObject *self, void *Py_UNUSED(ignored))  | 
2293  | 0  | { | 
2294  | 0  |     if (self->regs) { | 
2295  | 0  |         Py_INCREF(self->regs);  | 
2296  | 0  |         return self->regs;  | 
2297  | 0  |     } else  | 
2298  | 0  |         return match_regs(self);  | 
2299  | 0  | }  | 
2300  |  |  | 
2301  |  | static PyObject *  | 
2302  |  | match_repr(MatchObject *self)  | 
2303  | 0  | { | 
2304  | 0  |     PyObject *result;  | 
2305  | 0  |     PyObject *group0 = match_getslice_by_index(self, 0, Py_None);  | 
2306  | 0  |     if (group0 == NULL)  | 
2307  | 0  |         return NULL;  | 
2308  | 0  |     result = PyUnicode_FromFormat(  | 
2309  | 0  |             "<%s object; span=(%zd, %zd), match=%.50R>",  | 
2310  | 0  |             Py_TYPE(self)->tp_name,  | 
2311  | 0  |             self->mark[0], self->mark[1], group0);  | 
2312  | 0  |     Py_DECREF(group0);  | 
2313  | 0  |     return result;  | 
2314  | 0  | }  | 
2315  |  |  | 
2316  |  |  | 
2317  |  | static PyObject*  | 
2318  |  | pattern_new_match(PatternObject* pattern, SRE_STATE* state, Py_ssize_t status)  | 
2319  | 6  | { | 
2320  |  |     /* create match object (from state object) */  | 
2321  |  |  | 
2322  | 6  |     MatchObject* match;  | 
2323  | 6  |     Py_ssize_t i, j;  | 
2324  | 6  |     char* base;  | 
2325  | 6  |     int n;  | 
2326  |  |  | 
2327  | 6  |     if (status > 0) { | 
2328  |  |  | 
2329  |  |         /* create match object (with room for extra group marks) */  | 
2330  |  |         /* coverity[ampersand_in_size] */  | 
2331  | 4  |         match = PyObject_NEW_VAR(MatchObject, &Match_Type,  | 
2332  | 4  |                                  2*(pattern->groups+1));  | 
2333  | 4  |         if (!match)  | 
2334  | 0  |             return NULL;  | 
2335  |  |  | 
2336  | 4  |         Py_INCREF(pattern);  | 
2337  | 4  |         match->pattern = pattern;  | 
2338  |  |  | 
2339  | 4  |         Py_INCREF(state->string);  | 
2340  | 4  |         match->string = state->string;  | 
2341  |  |  | 
2342  | 4  |         match->regs = NULL;  | 
2343  | 4  |         match->groups = pattern->groups+1;  | 
2344  |  |  | 
2345  |  |         /* fill in group slices */  | 
2346  |  |  | 
2347  | 4  |         base = (char*) state->beginning;  | 
2348  | 4  |         n = state->charsize;  | 
2349  |  |  | 
2350  | 4  |         match->mark[0] = ((char*) state->start - base) / n;  | 
2351  | 4  |         match->mark[1] = ((char*) state->ptr - base) / n;  | 
2352  |  |  | 
2353  | 10  |         for (i = j = 0; i < pattern->groups; i++, j+=2)  | 
2354  | 6  |             if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) { | 
2355  | 0  |                 match->mark[j+2] = ((char*) state->mark[j] - base) / n;  | 
2356  | 0  |                 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;  | 
2357  | 0  |             } else  | 
2358  | 6  |                 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */  | 
2359  |  |  | 
2360  | 4  |         match->pos = state->pos;  | 
2361  | 4  |         match->endpos = state->endpos;  | 
2362  |  |  | 
2363  | 4  |         match->lastindex = state->lastindex;  | 
2364  |  |  | 
2365  | 4  |         return (PyObject*) match;  | 
2366  |  |  | 
2367  | 4  |     } else if (status == 0) { | 
2368  |  |  | 
2369  |  |         /* no match */  | 
2370  | 2  |         Py_RETURN_NONE;  | 
2371  |  |  | 
2372  | 2  |     }  | 
2373  |  |  | 
2374  |  |     /* internal error */  | 
2375  | 0  |     pattern_error(status);  | 
2376  | 0  |     return NULL;  | 
2377  | 6  | }  | 
2378  |  |  | 
2379  |  |  | 
2380  |  | /* -------------------------------------------------------------------- */  | 
2381  |  | /* scanner methods (experimental) */  | 
2382  |  |  | 
2383  |  | static void  | 
2384  |  | scanner_dealloc(ScannerObject* self)  | 
2385  | 0  | { | 
2386  | 0  |     state_fini(&self->state);  | 
2387  | 0  |     Py_XDECREF(self->pattern);  | 
2388  | 0  |     PyObject_DEL(self);  | 
2389  | 0  | }  | 
2390  |  |  | 
2391  |  | /*[clinic input]  | 
2392  |  | _sre.SRE_Scanner.match  | 
2393  |  |  | 
2394  |  | [clinic start generated code]*/  | 
2395  |  |  | 
2396  |  | static PyObject *  | 
2397  |  | _sre_SRE_Scanner_match_impl(ScannerObject *self)  | 
2398  |  | /*[clinic end generated code: output=936b30c63d4b81eb input=881a0154f8c13d9a]*/  | 
2399  | 0  | { | 
2400  | 0  |     SRE_STATE* state = &self->state;  | 
2401  | 0  |     PyObject* match;  | 
2402  | 0  |     Py_ssize_t status;  | 
2403  |  | 
  | 
2404  | 0  |     if (state->start == NULL)  | 
2405  | 0  |         Py_RETURN_NONE;  | 
2406  |  |  | 
2407  | 0  |     state_reset(state);  | 
2408  |  | 
  | 
2409  | 0  |     state->ptr = state->start;  | 
2410  |  | 
  | 
2411  | 0  |     status = sre_match(state, PatternObject_GetCode(self->pattern));  | 
2412  | 0  |     if (PyErr_Occurred())  | 
2413  | 0  |         return NULL;  | 
2414  |  |  | 
2415  | 0  |     match = pattern_new_match((PatternObject*) self->pattern,  | 
2416  | 0  |                                state, status);  | 
2417  |  | 
  | 
2418  | 0  |     if (status == 0)  | 
2419  | 0  |         state->start = NULL;  | 
2420  | 0  |     else { | 
2421  | 0  |         state->must_advance = (state->ptr == state->start);  | 
2422  | 0  |         state->start = state->ptr;  | 
2423  | 0  |     }  | 
2424  |  | 
  | 
2425  | 0  |     return match;  | 
2426  | 0  | }  | 
2427  |  |  | 
2428  |  |  | 
2429  |  | /*[clinic input]  | 
2430  |  | _sre.SRE_Scanner.search  | 
2431  |  |  | 
2432  |  | [clinic start generated code]*/  | 
2433  |  |  | 
2434  |  | static PyObject *  | 
2435  |  | _sre_SRE_Scanner_search_impl(ScannerObject *self)  | 
2436  |  | /*[clinic end generated code: output=7dc211986088f025 input=161223ee92ef9270]*/  | 
2437  | 0  | { | 
2438  | 0  |     SRE_STATE* state = &self->state;  | 
2439  | 0  |     PyObject* match;  | 
2440  | 0  |     Py_ssize_t status;  | 
2441  |  | 
  | 
2442  | 0  |     if (state->start == NULL)  | 
2443  | 0  |         Py_RETURN_NONE;  | 
2444  |  |  | 
2445  | 0  |     state_reset(state);  | 
2446  |  | 
  | 
2447  | 0  |     state->ptr = state->start;  | 
2448  |  | 
  | 
2449  | 0  |     status = sre_search(state, PatternObject_GetCode(self->pattern));  | 
2450  | 0  |     if (PyErr_Occurred())  | 
2451  | 0  |         return NULL;  | 
2452  |  |  | 
2453  | 0  |     match = pattern_new_match((PatternObject*) self->pattern,  | 
2454  | 0  |                                state, status);  | 
2455  |  | 
  | 
2456  | 0  |     if (status == 0)  | 
2457  | 0  |         state->start = NULL;  | 
2458  | 0  |     else { | 
2459  | 0  |         state->must_advance = (state->ptr == state->start);  | 
2460  | 0  |         state->start = state->ptr;  | 
2461  | 0  |     }  | 
2462  |  | 
  | 
2463  | 0  |     return match;  | 
2464  | 0  | }  | 
2465  |  |  | 
2466  |  | static PyObject *  | 
2467  |  | pattern_scanner(PatternObject *self, PyObject *string, Py_ssize_t pos, Py_ssize_t endpos)  | 
2468  | 0  | { | 
2469  | 0  |     ScannerObject* scanner;  | 
2470  |  |  | 
2471  |  |     /* create scanner object */  | 
2472  | 0  |     scanner = PyObject_NEW(ScannerObject, &Scanner_Type);  | 
2473  | 0  |     if (!scanner)  | 
2474  | 0  |         return NULL;  | 
2475  | 0  |     scanner->pattern = NULL;  | 
2476  |  |  | 
2477  |  |     /* create search state object */  | 
2478  | 0  |     if (!state_init(&scanner->state, self, string, pos, endpos)) { | 
2479  | 0  |         Py_DECREF(scanner);  | 
2480  | 0  |         return NULL;  | 
2481  | 0  |     }  | 
2482  |  |  | 
2483  | 0  |     Py_INCREF(self);  | 
2484  | 0  |     scanner->pattern = (PyObject*) self;  | 
2485  |  | 
  | 
2486  | 0  |     return (PyObject*) scanner;  | 
2487  | 0  | }  | 
2488  |  |  | 
2489  |  | static Py_hash_t  | 
2490  |  | pattern_hash(PatternObject *self)  | 
2491  | 0  | { | 
2492  | 0  |     Py_hash_t hash, hash2;  | 
2493  |  | 
  | 
2494  | 0  |     hash = PyObject_Hash(self->pattern);  | 
2495  | 0  |     if (hash == -1) { | 
2496  | 0  |         return -1;  | 
2497  | 0  |     }  | 
2498  |  |  | 
2499  | 0  |     hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize);  | 
2500  | 0  |     hash ^= hash2;  | 
2501  |  | 
  | 
2502  | 0  |     hash ^= self->flags;  | 
2503  | 0  |     hash ^= self->isbytes;  | 
2504  | 0  |     hash ^= self->codesize;  | 
2505  |  | 
  | 
2506  | 0  |     if (hash == -1) { | 
2507  | 0  |         hash = -2;  | 
2508  | 0  |     }  | 
2509  | 0  |     return hash;  | 
2510  | 0  | }  | 
2511  |  |  | 
2512  |  | static PyObject*  | 
2513  |  | pattern_richcompare(PyObject *lefto, PyObject *righto, int op)  | 
2514  | 0  | { | 
2515  | 0  |     PatternObject *left, *right;  | 
2516  | 0  |     int cmp;  | 
2517  |  | 
  | 
2518  | 0  |     if (op != Py_EQ && op != Py_NE) { | 
2519  | 0  |         Py_RETURN_NOTIMPLEMENTED;  | 
2520  | 0  |     }  | 
2521  |  |  | 
2522  | 0  |     if (Py_TYPE(lefto) != &Pattern_Type || Py_TYPE(righto) != &Pattern_Type) { | 
2523  | 0  |         Py_RETURN_NOTIMPLEMENTED;  | 
2524  | 0  |     }  | 
2525  |  |  | 
2526  | 0  |     if (lefto == righto) { | 
2527  |  |         /* a pattern is equal to itself */  | 
2528  | 0  |         return PyBool_FromLong(op == Py_EQ);  | 
2529  | 0  |     }  | 
2530  |  |  | 
2531  | 0  |     left = (PatternObject *)lefto;  | 
2532  | 0  |     right = (PatternObject *)righto;  | 
2533  |  | 
  | 
2534  | 0  |     cmp = (left->flags == right->flags  | 
2535  | 0  |            && left->isbytes == right->isbytes  | 
2536  | 0  |            && left->codesize == right->codesize);  | 
2537  | 0  |     if (cmp) { | 
2538  |  |         /* Compare the code and the pattern because the same pattern can  | 
2539  |  |            produce different codes depending on the locale used to compile the  | 
2540  |  |            pattern when the re.LOCALE flag is used. Don't compare groups,  | 
2541  |  |            indexgroup nor groupindex: they are derivated from the pattern. */  | 
2542  | 0  |         cmp = (memcmp(left->code, right->code,  | 
2543  | 0  |                       sizeof(left->code[0]) * left->codesize) == 0);  | 
2544  | 0  |     }  | 
2545  | 0  |     if (cmp) { | 
2546  | 0  |         cmp = PyObject_RichCompareBool(left->pattern, right->pattern,  | 
2547  | 0  |                                        Py_EQ);  | 
2548  | 0  |         if (cmp < 0) { | 
2549  | 0  |             return NULL;  | 
2550  | 0  |         }  | 
2551  | 0  |     }  | 
2552  | 0  |     if (op == Py_NE) { | 
2553  | 0  |         cmp = !cmp;  | 
2554  | 0  |     }  | 
2555  | 0  |     return PyBool_FromLong(cmp);  | 
2556  | 0  | }  | 
2557  |  |  | 
2558  |  | #include "clinic/_sre.c.h"  | 
2559  |  |  | 
2560  |  | static PyMethodDef pattern_methods[] = { | 
2561  |  |     _SRE_SRE_PATTERN_MATCH_METHODDEF  | 
2562  |  |     _SRE_SRE_PATTERN_FULLMATCH_METHODDEF  | 
2563  |  |     _SRE_SRE_PATTERN_SEARCH_METHODDEF  | 
2564  |  |     _SRE_SRE_PATTERN_SUB_METHODDEF  | 
2565  |  |     _SRE_SRE_PATTERN_SUBN_METHODDEF  | 
2566  |  |     _SRE_SRE_PATTERN_FINDALL_METHODDEF  | 
2567  |  |     _SRE_SRE_PATTERN_SPLIT_METHODDEF  | 
2568  |  |     _SRE_SRE_PATTERN_FINDITER_METHODDEF  | 
2569  |  |     _SRE_SRE_PATTERN_SCANNER_METHODDEF  | 
2570  |  |     _SRE_SRE_PATTERN___COPY___METHODDEF  | 
2571  |  |     _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF  | 
2572  |  |     {NULL, NULL} | 
2573  |  | };  | 
2574  |  |  | 
2575  |  | static PyGetSetDef pattern_getset[] = { | 
2576  |  |     {"groupindex", (getter)pattern_groupindex, (setter)NULL, | 
2577  |  |       "A dictionary mapping group names to group numbers."},  | 
2578  |  |     {NULL}  /* Sentinel */ | 
2579  |  | };  | 
2580  |  |  | 
2581  |  | #define PAT_OFF(x) offsetof(PatternObject, x)  | 
2582  |  | static PyMemberDef pattern_members[] = { | 
2583  |  |     {"pattern",    T_OBJECT,    PAT_OFF(pattern),       READONLY, | 
2584  |  |      "The pattern string from which the RE object was compiled."},  | 
2585  |  |     {"flags",      T_INT,       PAT_OFF(flags),         READONLY, | 
2586  |  |      "The regex matching flags."},  | 
2587  |  |     {"groups",     T_PYSSIZET,  PAT_OFF(groups),        READONLY, | 
2588  |  |      "The number of capturing groups in the pattern."},  | 
2589  |  |     {NULL}  /* Sentinel */ | 
2590  |  | };  | 
2591  |  |  | 
2592  |  | static PyTypeObject Pattern_Type = { | 
2593  |  |     PyVarObject_HEAD_INIT(NULL, 0)  | 
2594  |  |     "re.Pattern",  | 
2595  |  |     sizeof(PatternObject), sizeof(SRE_CODE),  | 
2596  |  |     (destructor)pattern_dealloc,        /* tp_dealloc */  | 
2597  |  |     0,                                  /* tp_vectorcall_offset */  | 
2598  |  |     0,                                  /* tp_getattr */  | 
2599  |  |     0,                                  /* tp_setattr */  | 
2600  |  |     0,                                  /* tp_as_async */  | 
2601  |  |     (reprfunc)pattern_repr,             /* tp_repr */  | 
2602  |  |     0,                                  /* tp_as_number */  | 
2603  |  |     0,                                  /* tp_as_sequence */  | 
2604  |  |     0,                                  /* tp_as_mapping */  | 
2605  |  |     (hashfunc)pattern_hash,             /* tp_hash */  | 
2606  |  |     0,                                  /* tp_call */  | 
2607  |  |     0,                                  /* tp_str */  | 
2608  |  |     0,                                  /* tp_getattro */  | 
2609  |  |     0,                                  /* tp_setattro */  | 
2610  |  |     0,                                  /* tp_as_buffer */  | 
2611  |  |     Py_TPFLAGS_DEFAULT,                 /* tp_flags */  | 
2612  |  |     pattern_doc,                        /* tp_doc */  | 
2613  |  |     0,                                  /* tp_traverse */  | 
2614  |  |     0,                                  /* tp_clear */  | 
2615  |  |     pattern_richcompare,                /* tp_richcompare */  | 
2616  |  |     offsetof(PatternObject, weakreflist),       /* tp_weaklistoffset */  | 
2617  |  |     0,                                  /* tp_iter */  | 
2618  |  |     0,                                  /* tp_iternext */  | 
2619  |  |     pattern_methods,                    /* tp_methods */  | 
2620  |  |     pattern_members,                    /* tp_members */  | 
2621  |  |     pattern_getset,                     /* tp_getset */  | 
2622  |  | };  | 
2623  |  |  | 
2624  |  | /* Match objects do not support length or assignment, but do support  | 
2625  |  |    __getitem__. */  | 
2626  |  | static PyMappingMethods match_as_mapping = { | 
2627  |  |     NULL,  | 
2628  |  |     (binaryfunc)match_getitem,  | 
2629  |  |     NULL  | 
2630  |  | };  | 
2631  |  |  | 
2632  |  | static PyMethodDef match_methods[] = { | 
2633  |  |     {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc}, | 
2634  |  |     _SRE_SRE_MATCH_START_METHODDEF  | 
2635  |  |     _SRE_SRE_MATCH_END_METHODDEF  | 
2636  |  |     _SRE_SRE_MATCH_SPAN_METHODDEF  | 
2637  |  |     _SRE_SRE_MATCH_GROUPS_METHODDEF  | 
2638  |  |     _SRE_SRE_MATCH_GROUPDICT_METHODDEF  | 
2639  |  |     _SRE_SRE_MATCH_EXPAND_METHODDEF  | 
2640  |  |     _SRE_SRE_MATCH___COPY___METHODDEF  | 
2641  |  |     _SRE_SRE_MATCH___DEEPCOPY___METHODDEF  | 
2642  |  |     {NULL, NULL} | 
2643  |  | };  | 
2644  |  |  | 
2645  |  | static PyGetSetDef match_getset[] = { | 
2646  |  |     {"lastindex", (getter)match_lastindex_get, (setter)NULL, | 
2647  |  |      "The integer index of the last matched capturing group."},  | 
2648  |  |     {"lastgroup", (getter)match_lastgroup_get, (setter)NULL, | 
2649  |  |      "The name of the last matched capturing group."},  | 
2650  |  |     {"regs",      (getter)match_regs_get,      (setter)NULL}, | 
2651  |  |     {NULL} | 
2652  |  | };  | 
2653  |  |  | 
2654  |  | #define MATCH_OFF(x) offsetof(MatchObject, x)  | 
2655  |  | static PyMemberDef match_members[] = { | 
2656  |  |     {"string",  T_OBJECT,   MATCH_OFF(string),  READONLY, | 
2657  |  |      "The string passed to match() or search()."},  | 
2658  |  |     {"re",      T_OBJECT,   MATCH_OFF(pattern), READONLY, | 
2659  |  |      "The regular expression object."},  | 
2660  |  |     {"pos",     T_PYSSIZET, MATCH_OFF(pos),     READONLY, | 
2661  |  |      "The index into the string at which the RE engine started looking for a match."},  | 
2662  |  |     {"endpos",  T_PYSSIZET, MATCH_OFF(endpos),  READONLY, | 
2663  |  |      "The index into the string beyond which the RE engine will not go."},  | 
2664  |  |     {NULL} | 
2665  |  | };  | 
2666  |  |  | 
2667  |  | /* FIXME: implement setattr("string", None) as a special case (to | 
2668  |  |    detach the associated string, if any */  | 
2669  |  |  | 
2670  |  | static PyTypeObject Match_Type = { | 
2671  |  |     PyVarObject_HEAD_INIT(NULL,0)  | 
2672  |  |     "re.Match",  | 
2673  |  |     sizeof(MatchObject), sizeof(Py_ssize_t),  | 
2674  |  |     (destructor)match_dealloc,  /* tp_dealloc */  | 
2675  |  |     0,                          /* tp_vectorcall_offset */  | 
2676  |  |     0,                          /* tp_getattr */  | 
2677  |  |     0,                          /* tp_setattr */  | 
2678  |  |     0,                          /* tp_as_async */  | 
2679  |  |     (reprfunc)match_repr,       /* tp_repr */  | 
2680  |  |     0,                          /* tp_as_number */  | 
2681  |  |     0,                          /* tp_as_sequence */  | 
2682  |  |     &match_as_mapping,          /* tp_as_mapping */  | 
2683  |  |     0,                          /* tp_hash */  | 
2684  |  |     0,                          /* tp_call */  | 
2685  |  |     0,                          /* tp_str */  | 
2686  |  |     0,                          /* tp_getattro */  | 
2687  |  |     0,                          /* tp_setattro */  | 
2688  |  |     0,                          /* tp_as_buffer */  | 
2689  |  |     Py_TPFLAGS_DEFAULT,         /* tp_flags */  | 
2690  |  |     match_doc,                  /* tp_doc */  | 
2691  |  |     0,                          /* tp_traverse */  | 
2692  |  |     0,                          /* tp_clear */  | 
2693  |  |     0,                          /* tp_richcompare */  | 
2694  |  |     0,                          /* tp_weaklistoffset */  | 
2695  |  |     0,                          /* tp_iter */  | 
2696  |  |     0,                          /* tp_iternext */  | 
2697  |  |     match_methods,              /* tp_methods */  | 
2698  |  |     match_members,              /* tp_members */  | 
2699  |  |     match_getset,               /* tp_getset */  | 
2700  |  | };  | 
2701  |  |  | 
2702  |  | static PyMethodDef scanner_methods[] = { | 
2703  |  |     _SRE_SRE_SCANNER_MATCH_METHODDEF  | 
2704  |  |     _SRE_SRE_SCANNER_SEARCH_METHODDEF  | 
2705  |  |     {NULL, NULL} | 
2706  |  | };  | 
2707  |  |  | 
2708  |  | #define SCAN_OFF(x) offsetof(ScannerObject, x)  | 
2709  |  | static PyMemberDef scanner_members[] = { | 
2710  |  |     {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY}, | 
2711  |  |     {NULL}  /* Sentinel */ | 
2712  |  | };  | 
2713  |  |  | 
2714  |  | static PyTypeObject Scanner_Type = { | 
2715  |  |     PyVarObject_HEAD_INIT(NULL, 0)  | 
2716  |  |     "_" SRE_MODULE ".SRE_Scanner",  | 
2717  |  |     sizeof(ScannerObject), 0,  | 
2718  |  |     (destructor)scanner_dealloc,/* tp_dealloc */  | 
2719  |  |     0,                          /* tp_vectorcall_offset */  | 
2720  |  |     0,                          /* tp_getattr */  | 
2721  |  |     0,                          /* tp_setattr */  | 
2722  |  |     0,                          /* tp_as_async */  | 
2723  |  |     0,                          /* tp_repr */  | 
2724  |  |     0,                          /* tp_as_number */  | 
2725  |  |     0,                          /* tp_as_sequence */  | 
2726  |  |     0,                          /* tp_as_mapping */  | 
2727  |  |     0,                          /* tp_hash */  | 
2728  |  |     0,                          /* tp_call */  | 
2729  |  |     0,                          /* tp_str */  | 
2730  |  |     0,                          /* tp_getattro */  | 
2731  |  |     0,                          /* tp_setattro */  | 
2732  |  |     0,                          /* tp_as_buffer */  | 
2733  |  |     Py_TPFLAGS_DEFAULT,         /* tp_flags */  | 
2734  |  |     0,                          /* tp_doc */  | 
2735  |  |     0,                          /* tp_traverse */  | 
2736  |  |     0,                          /* tp_clear */  | 
2737  |  |     0,                          /* tp_richcompare */  | 
2738  |  |     0,                          /* tp_weaklistoffset */  | 
2739  |  |     0,                          /* tp_iter */  | 
2740  |  |     0,                          /* tp_iternext */  | 
2741  |  |     scanner_methods,            /* tp_methods */  | 
2742  |  |     scanner_members,            /* tp_members */  | 
2743  |  |     0,                          /* tp_getset */  | 
2744  |  | };  | 
2745  |  |  | 
2746  |  | static PyMethodDef _functions[] = { | 
2747  |  |     _SRE_COMPILE_METHODDEF  | 
2748  |  |     _SRE_GETCODESIZE_METHODDEF  | 
2749  |  |     _SRE_ASCII_ISCASED_METHODDEF  | 
2750  |  |     _SRE_UNICODE_ISCASED_METHODDEF  | 
2751  |  |     _SRE_ASCII_TOLOWER_METHODDEF  | 
2752  |  |     _SRE_UNICODE_TOLOWER_METHODDEF  | 
2753  |  |     {NULL, NULL} | 
2754  |  | };  | 
2755  |  |  | 
2756  |  | static struct PyModuleDef sremodule = { | 
2757  |  |         PyModuleDef_HEAD_INIT,  | 
2758  |  |         "_" SRE_MODULE,  | 
2759  |  |         NULL,  | 
2760  |  |         -1,  | 
2761  |  |         _functions,  | 
2762  |  |         NULL,  | 
2763  |  |         NULL,  | 
2764  |  |         NULL,  | 
2765  |  |         NULL  | 
2766  |  | };  | 
2767  |  |  | 
2768  |  | PyMODINIT_FUNC PyInit__sre(void)  | 
2769  | 1  | { | 
2770  | 1  |     PyObject* m;  | 
2771  | 1  |     PyObject* d;  | 
2772  | 1  |     PyObject* x;  | 
2773  |  |  | 
2774  |  |     /* Patch object types */  | 
2775  | 1  |     if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||  | 
2776  | 1  |         PyType_Ready(&Scanner_Type))  | 
2777  | 0  |         return NULL;  | 
2778  |  |  | 
2779  | 1  |     m = PyModule_Create(&sremodule);  | 
2780  | 1  |     if (m == NULL)  | 
2781  | 0  |         return NULL;  | 
2782  | 1  |     d = PyModule_GetDict(m);  | 
2783  |  |  | 
2784  | 1  |     x = PyLong_FromLong(SRE_MAGIC);  | 
2785  | 1  |     if (x) { | 
2786  | 1  |         PyDict_SetItemString(d, "MAGIC", x);  | 
2787  | 1  |         Py_DECREF(x);  | 
2788  | 1  |     }  | 
2789  |  |  | 
2790  | 1  |     x = PyLong_FromLong(sizeof(SRE_CODE));  | 
2791  | 1  |     if (x) { | 
2792  | 1  |         PyDict_SetItemString(d, "CODESIZE", x);  | 
2793  | 1  |         Py_DECREF(x);  | 
2794  | 1  |     }  | 
2795  |  |  | 
2796  | 1  |     x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);  | 
2797  | 1  |     if (x) { | 
2798  | 1  |         PyDict_SetItemString(d, "MAXREPEAT", x);  | 
2799  | 1  |         Py_DECREF(x);  | 
2800  | 1  |     }  | 
2801  |  |  | 
2802  | 1  |     x = PyLong_FromUnsignedLong(SRE_MAXGROUPS);  | 
2803  | 1  |     if (x) { | 
2804  | 1  |         PyDict_SetItemString(d, "MAXGROUPS", x);  | 
2805  | 1  |         Py_DECREF(x);  | 
2806  | 1  |     }  | 
2807  |  |  | 
2808  | 1  |     x = PyUnicode_FromString(copyright);  | 
2809  | 1  |     if (x) { | 
2810  | 1  |         PyDict_SetItemString(d, "copyright", x);  | 
2811  | 1  |         Py_DECREF(x);  | 
2812  | 1  |     }  | 
2813  | 1  |     return m;  | 
2814  | 1  | }  | 
2815  |  |  | 
2816  |  | /* vim:ts=4:sw=4:et  | 
2817  |  | */  |