/src/cpython/Objects/stringlib/unicode_format.h
Line | Count | Source |
1 | | /* |
2 | | unicode_format.h -- implementation of str.format(). |
3 | | */ |
4 | | |
5 | | #include "pycore_complexobject.h" // _PyComplex_FormatAdvancedWriter() |
6 | | #include "pycore_floatobject.h" // _PyFloat_FormatAdvancedWriter() |
7 | | #include "pycore_tuple.h" // _PyTuple_FromPairSteal |
8 | | |
9 | | /************************************************************************/ |
10 | | /*********** Global data structures and forward declarations *********/ |
11 | | /************************************************************************/ |
12 | | |
13 | | /* |
14 | | A SubString consists of the characters between two string or |
15 | | unicode pointers. |
16 | | */ |
17 | | typedef struct { |
18 | | PyObject *str; /* borrowed reference */ |
19 | | Py_ssize_t start, end; |
20 | | } SubString; |
21 | | |
22 | | |
23 | | typedef enum { |
24 | | ANS_INIT, |
25 | | ANS_AUTO, |
26 | | ANS_MANUAL |
27 | | } AutoNumberState; /* Keep track if we're auto-numbering fields */ |
28 | | |
29 | | /* Keeps track of our auto-numbering state, and which number field we're on */ |
30 | | typedef struct { |
31 | | AutoNumberState an_state; |
32 | | int an_field_number; |
33 | | } AutoNumber; |
34 | | |
35 | | |
36 | | /* forward declaration for recursion */ |
37 | | static PyObject * |
38 | | build_string(SubString *input, PyObject *args, PyObject *kwargs, |
39 | | int recursion_depth, AutoNumber *auto_number); |
40 | | |
41 | | |
42 | | |
43 | | /************************************************************************/ |
44 | | /************************** Utility functions ************************/ |
45 | | /************************************************************************/ |
46 | | |
47 | | static void |
48 | | AutoNumber_Init(AutoNumber *auto_number) |
49 | 8.60M | { |
50 | 8.60M | auto_number->an_state = ANS_INIT; |
51 | 8.60M | auto_number->an_field_number = 0; |
52 | 8.60M | } |
53 | | |
54 | | /* fill in a SubString from a pointer and length */ |
55 | | Py_LOCAL_INLINE(void) |
56 | | SubString_init(SubString *str, PyObject *s, Py_ssize_t start, Py_ssize_t end) |
57 | 163M | { |
58 | 163M | str->str = s; |
59 | 163M | str->start = start; |
60 | 163M | str->end = end; |
61 | 163M | } |
62 | | |
63 | | /* return a new string. if str->str is NULL, return None */ |
64 | | Py_LOCAL_INLINE(PyObject *) |
65 | | SubString_new_object(SubString *str) |
66 | 46.5k | { |
67 | 46.5k | if (str->str == NULL) |
68 | 0 | Py_RETURN_NONE; |
69 | 46.5k | return PyUnicode_Substring(str->str, str->start, str->end); |
70 | 46.5k | } |
71 | | |
72 | | /* return a new string. if str->str is NULL, return a new empty string */ |
73 | | Py_LOCAL_INLINE(PyObject *) |
74 | | SubString_new_object_or_empty(SubString *str) |
75 | 0 | { |
76 | 0 | if (str->str == NULL) { |
77 | 0 | return Py_GetConstant(Py_CONSTANT_EMPTY_STR); |
78 | 0 | } |
79 | 0 | return SubString_new_object(str); |
80 | 0 | } |
81 | | |
82 | | /* Return 1 if an error has been detected switching between automatic |
83 | | field numbering and manual field specification, else return 0. Set |
84 | | ValueError on error. */ |
85 | | static int |
86 | | autonumber_state_error(AutoNumberState state, int field_name_is_empty) |
87 | 16.1M | { |
88 | 16.1M | if (state == ANS_MANUAL) { |
89 | 432 | if (field_name_is_empty) { |
90 | 0 | PyErr_SetString(PyExc_ValueError, "cannot switch from " |
91 | 0 | "manual field specification to " |
92 | 0 | "automatic field numbering"); |
93 | 0 | return 1; |
94 | 0 | } |
95 | 432 | } |
96 | 16.1M | else { |
97 | 16.1M | if (!field_name_is_empty) { |
98 | 0 | PyErr_SetString(PyExc_ValueError, "cannot switch from " |
99 | 0 | "automatic field numbering to " |
100 | 0 | "manual field specification"); |
101 | 0 | return 1; |
102 | 0 | } |
103 | 16.1M | } |
104 | 16.1M | return 0; |
105 | 16.1M | } |
106 | | |
107 | | |
108 | | /************************************************************************/ |
109 | | /*********** Format string parsing -- integers and identifiers *********/ |
110 | | /************************************************************************/ |
111 | | |
112 | | static Py_ssize_t |
113 | | get_integer(const SubString *str) |
114 | 16.2M | { |
115 | 16.2M | Py_ssize_t accumulator = 0; |
116 | 16.2M | Py_ssize_t digitval; |
117 | 16.2M | Py_ssize_t i; |
118 | | |
119 | | /* empty string is an error */ |
120 | 16.2M | if (str->start >= str->end) |
121 | 16.1M | return -1; |
122 | | |
123 | 47.3k | for (i = str->start; i < str->end; i++) { |
124 | 46.9k | digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str->str, i)); |
125 | 46.9k | if (digitval < 0) |
126 | 46.5k | return -1; |
127 | | /* |
128 | | Detect possible overflow before it happens: |
129 | | |
130 | | accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if |
131 | | accumulator > (PY_SSIZE_T_MAX - digitval) / 10. |
132 | | */ |
133 | 432 | if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) { |
134 | 0 | PyErr_Format(PyExc_ValueError, |
135 | 0 | "Too many decimal digits in format string"); |
136 | 0 | return -1; |
137 | 0 | } |
138 | 432 | accumulator = accumulator * 10 + digitval; |
139 | 432 | } |
140 | 432 | return accumulator; |
141 | 46.9k | } |
142 | | |
143 | | /************************************************************************/ |
144 | | /******** Functions to get field objects and specification strings ******/ |
145 | | /************************************************************************/ |
146 | | |
147 | | /* do the equivalent of obj.name */ |
148 | | static PyObject * |
149 | | getattr(PyObject *obj, SubString *name) |
150 | 4 | { |
151 | 4 | PyObject *newobj; |
152 | 4 | PyObject *str = SubString_new_object(name); |
153 | 4 | if (str == NULL) |
154 | 0 | return NULL; |
155 | 4 | newobj = PyObject_GetAttr(obj, str); |
156 | 4 | Py_DECREF(str); |
157 | 4 | return newobj; |
158 | 4 | } |
159 | | |
160 | | /* do the equivalent of obj[idx], where obj is a sequence */ |
161 | | static PyObject * |
162 | | getitem_sequence(PyObject *obj, Py_ssize_t idx) |
163 | 0 | { |
164 | 0 | return PySequence_GetItem(obj, idx); |
165 | 0 | } |
166 | | |
167 | | /* do the equivalent of obj[idx], where obj is not a sequence */ |
168 | | static PyObject * |
169 | | getitem_idx(PyObject *obj, Py_ssize_t idx) |
170 | 0 | { |
171 | 0 | PyObject *newobj; |
172 | 0 | PyObject *idx_obj = PyLong_FromSsize_t(idx); |
173 | 0 | if (idx_obj == NULL) |
174 | 0 | return NULL; |
175 | 0 | newobj = PyObject_GetItem(obj, idx_obj); |
176 | 0 | Py_DECREF(idx_obj); |
177 | 0 | return newobj; |
178 | 0 | } |
179 | | |
180 | | /* do the equivalent of obj[name] */ |
181 | | static PyObject * |
182 | | getitem_str(PyObject *obj, SubString *name) |
183 | 0 | { |
184 | 0 | PyObject *newobj; |
185 | 0 | PyObject *str = SubString_new_object(name); |
186 | 0 | if (str == NULL) |
187 | 0 | return NULL; |
188 | 0 | newobj = PyObject_GetItem(obj, str); |
189 | 0 | Py_DECREF(str); |
190 | 0 | return newobj; |
191 | 0 | } |
192 | | |
193 | | typedef struct { |
194 | | /* the entire string we're parsing. we assume that someone else |
195 | | is managing its lifetime, and that it will exist for the |
196 | | lifetime of the iterator. can be empty */ |
197 | | SubString str; |
198 | | |
199 | | /* index to where we are inside field_name */ |
200 | | Py_ssize_t index; |
201 | | } FieldNameIterator; |
202 | | |
203 | | |
204 | | static int |
205 | | FieldNameIterator_init(FieldNameIterator *self, PyObject *s, |
206 | | Py_ssize_t start, Py_ssize_t end) |
207 | 16.2M | { |
208 | 16.2M | SubString_init(&self->str, s, start, end); |
209 | 16.2M | self->index = start; |
210 | 16.2M | return 1; |
211 | 16.2M | } |
212 | | |
213 | | static int |
214 | | _FieldNameIterator_attr(FieldNameIterator *self, SubString *name) |
215 | 4 | { |
216 | 4 | Py_UCS4 c; |
217 | | |
218 | 4 | name->str = self->str.str; |
219 | 4 | name->start = self->index; |
220 | | |
221 | | /* return everything until '.' or '[' */ |
222 | 52 | while (self->index < self->str.end) { |
223 | 48 | c = PyUnicode_READ_CHAR(self->str.str, self->index++); |
224 | 48 | switch (c) { |
225 | 0 | case '[': |
226 | 0 | case '.': |
227 | | /* backup so that we this character will be seen next time */ |
228 | 0 | self->index--; |
229 | 0 | break; |
230 | 48 | default: |
231 | 48 | continue; |
232 | 48 | } |
233 | 0 | break; |
234 | 48 | } |
235 | | /* end of string is okay */ |
236 | 4 | name->end = self->index; |
237 | 4 | return 1; |
238 | 4 | } |
239 | | |
240 | | static int |
241 | | _FieldNameIterator_item(FieldNameIterator *self, SubString *name) |
242 | 0 | { |
243 | 0 | int bracket_seen = 0; |
244 | 0 | Py_UCS4 c; |
245 | |
|
246 | 0 | name->str = self->str.str; |
247 | 0 | name->start = self->index; |
248 | | |
249 | | /* return everything until ']' */ |
250 | 0 | while (self->index < self->str.end) { |
251 | 0 | c = PyUnicode_READ_CHAR(self->str.str, self->index++); |
252 | 0 | switch (c) { |
253 | 0 | case ']': |
254 | 0 | bracket_seen = 1; |
255 | 0 | break; |
256 | 0 | default: |
257 | 0 | continue; |
258 | 0 | } |
259 | 0 | break; |
260 | 0 | } |
261 | | /* make sure we ended with a ']' */ |
262 | 0 | if (!bracket_seen) { |
263 | 0 | PyErr_SetString(PyExc_ValueError, "Missing ']' in format string"); |
264 | 0 | return 0; |
265 | 0 | } |
266 | | |
267 | | /* end of string is okay */ |
268 | | /* don't include the ']' */ |
269 | 0 | name->end = self->index-1; |
270 | 0 | return 1; |
271 | 0 | } |
272 | | |
273 | | /* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */ |
274 | | static int |
275 | | FieldNameIterator_next(FieldNameIterator *self, int *is_attribute, |
276 | | Py_ssize_t *name_idx, SubString *name) |
277 | 16.2M | { |
278 | | /* check at end of input */ |
279 | 16.2M | if (self->index >= self->str.end) |
280 | 16.2M | return 1; |
281 | | |
282 | 4 | switch (PyUnicode_READ_CHAR(self->str.str, self->index++)) { |
283 | 4 | case '.': |
284 | 4 | *is_attribute = 1; |
285 | 4 | if (_FieldNameIterator_attr(self, name) == 0) |
286 | 0 | return 0; |
287 | 4 | *name_idx = -1; |
288 | 4 | break; |
289 | 0 | case '[': |
290 | 0 | *is_attribute = 0; |
291 | 0 | if (_FieldNameIterator_item(self, name) == 0) |
292 | 0 | return 0; |
293 | 0 | *name_idx = get_integer(name); |
294 | 0 | if (*name_idx == -1 && PyErr_Occurred()) |
295 | 0 | return 0; |
296 | 0 | break; |
297 | 0 | default: |
298 | | /* Invalid character follows ']' */ |
299 | 0 | PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may " |
300 | 0 | "follow ']' in format field specifier"); |
301 | 0 | return 0; |
302 | 4 | } |
303 | | |
304 | | /* empty string is an error */ |
305 | 4 | if (name->start == name->end) { |
306 | 0 | PyErr_SetString(PyExc_ValueError, "Empty attribute in format string"); |
307 | 0 | return 0; |
308 | 0 | } |
309 | | |
310 | 4 | return 2; |
311 | 4 | } |
312 | | |
313 | | |
314 | | /* input: field_name |
315 | | output: 'first' points to the part before the first '[' or '.' |
316 | | 'first_idx' is -1 if 'first' is not an integer, otherwise |
317 | | it's the value of first converted to an integer |
318 | | 'rest' is an iterator to return the rest |
319 | | */ |
320 | | static int |
321 | | field_name_split(PyObject *str, Py_ssize_t start, Py_ssize_t end, SubString *first, |
322 | | Py_ssize_t *first_idx, FieldNameIterator *rest, |
323 | | AutoNumber *auto_number) |
324 | 16.2M | { |
325 | 16.2M | Py_UCS4 c; |
326 | 16.2M | Py_ssize_t i = start; |
327 | 16.2M | int field_name_is_empty; |
328 | 16.2M | int using_numeric_index; |
329 | | |
330 | | /* find the part up until the first '.' or '[' */ |
331 | 16.5M | while (i < end) { |
332 | 337k | switch (c = PyUnicode_READ_CHAR(str, i++)) { |
333 | 0 | case '[': |
334 | 4 | case '.': |
335 | | /* backup so that we this character is available to the |
336 | | "rest" iterator */ |
337 | 4 | i--; |
338 | 4 | break; |
339 | 337k | default: |
340 | 337k | continue; |
341 | 337k | } |
342 | 4 | break; |
343 | 337k | } |
344 | | |
345 | | /* set up the return values */ |
346 | 16.2M | SubString_init(first, str, start, i); |
347 | 16.2M | FieldNameIterator_init(rest, str, i, end); |
348 | | |
349 | | /* see if "first" is an integer, in which case it's used as an index */ |
350 | 16.2M | *first_idx = get_integer(first); |
351 | 16.2M | if (*first_idx == -1 && PyErr_Occurred()) |
352 | 0 | return 0; |
353 | | |
354 | 16.2M | field_name_is_empty = first->start >= first->end; |
355 | | |
356 | | /* If the field name is omitted or if we have a numeric index |
357 | | specified, then we're doing numeric indexing into args. */ |
358 | 16.2M | using_numeric_index = field_name_is_empty || *first_idx != -1; |
359 | | |
360 | | /* We always get here exactly one time for each field we're |
361 | | processing. And we get here in field order (counting by left |
362 | | braces). So this is the perfect place to handle automatic field |
363 | | numbering if the field name is omitted. */ |
364 | | |
365 | | /* Check if we need to do the auto-numbering. It's not needed if |
366 | | we're called from string.Format routines, because it's handled |
367 | | in that class by itself. */ |
368 | 16.2M | if (auto_number) { |
369 | | /* Initialize our auto numbering state if this is the first |
370 | | time we're either auto-numbering or manually numbering. */ |
371 | 16.2M | if (auto_number->an_state == ANS_INIT && using_numeric_index) |
372 | 8.59M | auto_number->an_state = field_name_is_empty ? |
373 | 8.59M | ANS_AUTO : ANS_MANUAL; |
374 | | |
375 | | /* Make sure our state is consistent with what we're doing |
376 | | this time through. Only check if we're using a numeric |
377 | | index. */ |
378 | 16.2M | if (using_numeric_index) |
379 | 16.1M | if (autonumber_state_error(auto_number->an_state, |
380 | 16.1M | field_name_is_empty)) |
381 | 0 | return 0; |
382 | | /* Zero length field means we want to do auto-numbering of the |
383 | | fields. */ |
384 | 16.2M | if (field_name_is_empty) |
385 | 16.1M | *first_idx = (auto_number->an_field_number)++; |
386 | 16.2M | } |
387 | | |
388 | 16.2M | return 1; |
389 | 16.2M | } |
390 | | |
391 | | |
392 | | /* |
393 | | get_field_object returns the object inside {}, before the |
394 | | format_spec. It handles getindex and getattr lookups and consumes |
395 | | the entire input string. |
396 | | */ |
397 | | static PyObject * |
398 | | get_field_object(SubString *input, PyObject *args, PyObject *kwargs, |
399 | | AutoNumber *auto_number) |
400 | 16.2M | { |
401 | 16.2M | PyObject *obj = NULL; |
402 | 16.2M | int ok; |
403 | 16.2M | int is_attribute; |
404 | 16.2M | SubString name; |
405 | 16.2M | SubString first; |
406 | 16.2M | Py_ssize_t index; |
407 | 16.2M | FieldNameIterator rest; |
408 | | |
409 | 16.2M | if (!field_name_split(input->str, input->start, input->end, &first, |
410 | 16.2M | &index, &rest, auto_number)) { |
411 | 0 | goto error; |
412 | 0 | } |
413 | | |
414 | 16.2M | if (index == -1) { |
415 | | /* look up in kwargs */ |
416 | 46.5k | PyObject *key = SubString_new_object(&first); |
417 | 46.5k | if (key == NULL) { |
418 | 0 | goto error; |
419 | 0 | } |
420 | 46.5k | if (kwargs == NULL) { |
421 | 0 | PyErr_SetObject(PyExc_KeyError, key); |
422 | 0 | Py_DECREF(key); |
423 | 0 | goto error; |
424 | 0 | } |
425 | | /* Use PyObject_GetItem instead of PyDict_GetItem because this |
426 | | code is no longer just used with kwargs. It might be passed |
427 | | a non-dict when called through format_map. */ |
428 | 46.5k | obj = PyObject_GetItem(kwargs, key); |
429 | 46.5k | Py_DECREF(key); |
430 | 46.5k | if (obj == NULL) { |
431 | 0 | goto error; |
432 | 0 | } |
433 | 46.5k | } |
434 | 16.1M | else { |
435 | | /* If args is NULL, we have a format string with a positional field |
436 | | with only kwargs to retrieve it from. This can only happen when |
437 | | used with format_map(), where positional arguments are not |
438 | | allowed. */ |
439 | 16.1M | if (args == NULL) { |
440 | 0 | PyErr_SetString(PyExc_ValueError, "Format string contains " |
441 | 0 | "positional fields"); |
442 | 0 | goto error; |
443 | 0 | } |
444 | | |
445 | | /* look up in args */ |
446 | 16.1M | obj = PySequence_GetItem(args, index); |
447 | 16.1M | if (obj == NULL) { |
448 | 0 | PyErr_Format(PyExc_IndexError, |
449 | 0 | "Replacement index %zd out of range for positional " |
450 | 0 | "args tuple", |
451 | 0 | index); |
452 | 0 | goto error; |
453 | 0 | } |
454 | 16.1M | } |
455 | | |
456 | | /* iterate over the rest of the field_name */ |
457 | 16.2M | while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index, |
458 | 16.2M | &name)) == 2) { |
459 | 4 | PyObject *tmp; |
460 | | |
461 | 4 | if (is_attribute) |
462 | | /* getattr lookup "." */ |
463 | 4 | tmp = getattr(obj, &name); |
464 | 0 | else |
465 | | /* getitem lookup "[]" */ |
466 | 0 | if (index == -1) |
467 | 0 | tmp = getitem_str(obj, &name); |
468 | 0 | else |
469 | 0 | if (PySequence_Check(obj)) |
470 | 0 | tmp = getitem_sequence(obj, index); |
471 | 0 | else |
472 | | /* not a sequence */ |
473 | 0 | tmp = getitem_idx(obj, index); |
474 | 4 | if (tmp == NULL) |
475 | 0 | goto error; |
476 | | |
477 | | /* assign to obj */ |
478 | 4 | Py_SETREF(obj, tmp); |
479 | 4 | } |
480 | | /* end of iterator, this is the non-error case */ |
481 | 16.2M | if (ok == 1) |
482 | 16.2M | return obj; |
483 | 0 | error: |
484 | 0 | Py_XDECREF(obj); |
485 | 0 | return NULL; |
486 | 16.2M | } |
487 | | |
488 | | /************************************************************************/ |
489 | | /***************** Field rendering functions **************************/ |
490 | | /************************************************************************/ |
491 | | |
492 | | /* |
493 | | render_field() is the main function in this section. It takes the |
494 | | field object and field specification string generated by |
495 | | get_field_and_spec, and renders the field into the output string. |
496 | | |
497 | | render_field calls fieldobj.__format__(format_spec) method, and |
498 | | appends to the output. |
499 | | */ |
500 | | static int |
501 | | render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *writer) |
502 | 16.2M | { |
503 | 16.2M | int ok = 0; |
504 | 16.2M | PyObject *result = NULL; |
505 | 16.2M | PyObject *format_spec_object = NULL; |
506 | 16.2M | int (*formatter) (_PyUnicodeWriter*, PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL; |
507 | 16.2M | int err; |
508 | | |
509 | | /* If we know the type exactly, skip the lookup of __format__ and just |
510 | | call the formatter directly. */ |
511 | 16.2M | if (PyUnicode_CheckExact(fieldobj)) |
512 | 15.9M | formatter = _PyUnicode_FormatAdvancedWriter; |
513 | 221k | else if (PyLong_CheckExact(fieldobj)) |
514 | 66.0k | formatter = _PyLong_FormatAdvancedWriter; |
515 | 155k | else if (PyFloat_CheckExact(fieldobj)) |
516 | 0 | formatter = _PyFloat_FormatAdvancedWriter; |
517 | 155k | else if (PyComplex_CheckExact(fieldobj)) |
518 | 0 | formatter = _PyComplex_FormatAdvancedWriter; |
519 | | |
520 | 16.2M | if (formatter) { |
521 | | /* we know exactly which formatter will be called when __format__ is |
522 | | looked up, so call it directly, instead. */ |
523 | 16.0M | err = formatter(writer, fieldobj, format_spec->str, |
524 | 16.0M | format_spec->start, format_spec->end); |
525 | 16.0M | return (err == 0); |
526 | 16.0M | } |
527 | 155k | else { |
528 | | /* We need to create an object out of the pointers we have, because |
529 | | __format__ takes a string/unicode object for format_spec. */ |
530 | 155k | if (format_spec->str) |
531 | 0 | format_spec_object = PyUnicode_Substring(format_spec->str, |
532 | 0 | format_spec->start, |
533 | 0 | format_spec->end); |
534 | 155k | else |
535 | 155k | format_spec_object = Py_GetConstant(Py_CONSTANT_EMPTY_STR); |
536 | 155k | if (format_spec_object == NULL) |
537 | 0 | goto done; |
538 | | |
539 | 155k | result = PyObject_Format(fieldobj, format_spec_object); |
540 | 155k | } |
541 | 155k | if (result == NULL) |
542 | 1 | goto done; |
543 | | |
544 | 155k | if (_PyUnicodeWriter_WriteStr(writer, result) == -1) |
545 | 0 | goto done; |
546 | 155k | ok = 1; |
547 | | |
548 | 155k | done: |
549 | 155k | Py_XDECREF(format_spec_object); |
550 | 155k | Py_XDECREF(result); |
551 | 155k | return ok; |
552 | 155k | } |
553 | | |
554 | | static int |
555 | | parse_field(SubString *str, SubString *field_name, SubString *format_spec, |
556 | | int *format_spec_needs_expanding, Py_UCS4 *conversion) |
557 | 16.2M | { |
558 | | /* Note this function works if the field name is zero length, |
559 | | which is good. Zero length field names are handled later, in |
560 | | field_name_split. */ |
561 | | |
562 | 16.2M | Py_UCS4 c = 0; |
563 | | |
564 | | /* initialize these, as they may be empty */ |
565 | 16.2M | *conversion = '\0'; |
566 | 16.2M | SubString_init(format_spec, NULL, 0, 0); |
567 | | |
568 | | /* Search for the field name. it's terminated by the end of |
569 | | the string, or a ':' or '!' */ |
570 | 16.2M | field_name->str = str->str; |
571 | 16.2M | field_name->start = str->start; |
572 | 16.5M | while (str->start < str->end) { |
573 | 16.5M | switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) { |
574 | 0 | case '{': |
575 | 0 | PyErr_SetString(PyExc_ValueError, "unexpected '{' in field name"); |
576 | 0 | return 0; |
577 | 0 | case '[': |
578 | 0 | for (; str->start < str->end; str->start++) |
579 | 0 | if (PyUnicode_READ_CHAR(str->str, str->start) == ']') |
580 | 0 | break; |
581 | 0 | continue; |
582 | 15.6M | case '}': |
583 | 15.6M | case ':': |
584 | 16.2M | case '!': |
585 | 16.2M | break; |
586 | 337k | default: |
587 | 337k | continue; |
588 | 16.5M | } |
589 | 16.2M | break; |
590 | 16.5M | } |
591 | | |
592 | 16.2M | field_name->end = str->start - 1; |
593 | 16.2M | if (c == '!' || c == ':') { |
594 | 529k | Py_ssize_t count; |
595 | | /* we have a format specifier and/or a conversion */ |
596 | | /* don't include the last character */ |
597 | | |
598 | | /* see if there's a conversion specifier */ |
599 | 529k | if (c == '!') { |
600 | | /* there must be another character present */ |
601 | 529k | if (str->start >= str->end) { |
602 | 0 | PyErr_SetString(PyExc_ValueError, |
603 | 0 | "end of string while looking for conversion " |
604 | 0 | "specifier"); |
605 | 0 | return 0; |
606 | 0 | } |
607 | 529k | *conversion = PyUnicode_READ_CHAR(str->str, str->start++); |
608 | | |
609 | 529k | if (str->start < str->end) { |
610 | 529k | c = PyUnicode_READ_CHAR(str->str, str->start++); |
611 | 529k | if (c == '}') |
612 | 529k | return 1; |
613 | 0 | if (c != ':') { |
614 | 0 | PyErr_SetString(PyExc_ValueError, |
615 | 0 | "expected ':' after conversion specifier"); |
616 | 0 | return 0; |
617 | 0 | } |
618 | 0 | } |
619 | 529k | } |
620 | 192 | format_spec->str = str->str; |
621 | 192 | format_spec->start = str->start; |
622 | 192 | count = 1; |
623 | 768 | while (str->start < str->end) { |
624 | 768 | switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) { |
625 | 0 | case '{': |
626 | 0 | *format_spec_needs_expanding = 1; |
627 | 0 | count++; |
628 | 0 | break; |
629 | 192 | case '}': |
630 | 192 | count--; |
631 | 192 | if (count == 0) { |
632 | 192 | format_spec->end = str->start - 1; |
633 | 192 | return 1; |
634 | 192 | } |
635 | 0 | break; |
636 | 576 | default: |
637 | 576 | break; |
638 | 768 | } |
639 | 768 | } |
640 | | |
641 | 0 | PyErr_SetString(PyExc_ValueError, "unmatched '{' in format spec"); |
642 | 0 | return 0; |
643 | 192 | } |
644 | 15.6M | else if (c != '}') { |
645 | 0 | PyErr_SetString(PyExc_ValueError, "expected '}' before end of string"); |
646 | 0 | return 0; |
647 | 0 | } |
648 | | |
649 | 15.6M | return 1; |
650 | 16.2M | } |
651 | | |
652 | | /************************************************************************/ |
653 | | /******* Output string allocation and escape-to-markup processing ******/ |
654 | | /************************************************************************/ |
655 | | |
656 | | /* MarkupIterator breaks the string into pieces of either literal |
657 | | text, or things inside {} that need to be marked up. it is |
658 | | designed to make it easy to wrap a Python iterator around it, for |
659 | | use with the Formatter class */ |
660 | | |
661 | | typedef struct { |
662 | | SubString str; |
663 | | } MarkupIterator; |
664 | | |
665 | | static int |
666 | | MarkupIterator_init(MarkupIterator *self, PyObject *str, |
667 | | Py_ssize_t start, Py_ssize_t end) |
668 | 8.60M | { |
669 | 8.60M | SubString_init(&self->str, str, start, end); |
670 | 8.60M | return 1; |
671 | 8.60M | } |
672 | | |
673 | | /* returns 0 on error, 1 on non-error termination, and 2 if it got a |
674 | | string (or something to be expanded) */ |
675 | | static int |
676 | | MarkupIterator_next(MarkupIterator *self, SubString *literal, |
677 | | int *field_present, SubString *field_name, |
678 | | SubString *format_spec, Py_UCS4 *conversion, |
679 | | int *format_spec_needs_expanding) |
680 | 32.6M | { |
681 | 32.6M | int at_end; |
682 | 32.6M | Py_UCS4 c = 0; |
683 | 32.6M | Py_ssize_t start; |
684 | 32.6M | Py_ssize_t len; |
685 | 32.6M | int markup_follows = 0; |
686 | | |
687 | | /* initialize all of the output variables */ |
688 | 32.6M | SubString_init(literal, NULL, 0, 0); |
689 | 32.6M | SubString_init(field_name, NULL, 0, 0); |
690 | 32.6M | SubString_init(format_spec, NULL, 0, 0); |
691 | 32.6M | *conversion = '\0'; |
692 | 32.6M | *format_spec_needs_expanding = 0; |
693 | 32.6M | *field_present = 0; |
694 | | |
695 | | /* No more input, end of iterator. This is the normal exit |
696 | | path. */ |
697 | 32.6M | if (self->str.start >= self->str.end) |
698 | 8.60M | return 1; |
699 | | |
700 | 24.0M | start = self->str.start; |
701 | | |
702 | | /* First read any literal text. Read until the end of string, an |
703 | | escaped '{' or '}', or an unescaped '{'. In order to never |
704 | | allocate memory and so I can just pass pointers around, if |
705 | | there's an escaped '{' or '}' then we'll return the literal |
706 | | including the brace, but no format object. The next time |
707 | | through, we'll return the rest of the literal, skipping past |
708 | | the second consecutive brace. */ |
709 | 72.6M | while (self->str.start < self->str.end) { |
710 | 64.8M | switch (c = PyUnicode_READ_CHAR(self->str.str, self->str.start++)) { |
711 | 16.2M | case '{': |
712 | 16.2M | case '}': |
713 | 16.2M | markup_follows = 1; |
714 | 16.2M | break; |
715 | 48.6M | default: |
716 | 48.6M | continue; |
717 | 64.8M | } |
718 | 16.2M | break; |
719 | 64.8M | } |
720 | | |
721 | 24.0M | at_end = self->str.start >= self->str.end; |
722 | 24.0M | len = self->str.start - start; |
723 | | |
724 | 24.0M | if ((c == '}') && (at_end || |
725 | 0 | (c != PyUnicode_READ_CHAR(self->str.str, |
726 | 0 | self->str.start)))) { |
727 | 0 | PyErr_SetString(PyExc_ValueError, "Single '}' encountered " |
728 | 0 | "in format string"); |
729 | 0 | return 0; |
730 | 0 | } |
731 | 24.0M | if (at_end && c == '{') { |
732 | 0 | PyErr_SetString(PyExc_ValueError, "Single '{' encountered " |
733 | 0 | "in format string"); |
734 | 0 | return 0; |
735 | 0 | } |
736 | 24.0M | if (!at_end) { |
737 | 16.2M | if (c == PyUnicode_READ_CHAR(self->str.str, self->str.start)) { |
738 | | /* escaped } or {, skip it in the input. there is no |
739 | | markup object following us, just this literal text */ |
740 | 0 | self->str.start++; |
741 | 0 | markup_follows = 0; |
742 | 0 | } |
743 | 16.2M | else |
744 | 16.2M | len--; |
745 | 16.2M | } |
746 | | |
747 | | /* record the literal text */ |
748 | 24.0M | literal->str = self->str.str; |
749 | 24.0M | literal->start = start; |
750 | 24.0M | literal->end = start + len; |
751 | | |
752 | 24.0M | if (!markup_follows) |
753 | 7.80M | return 2; |
754 | | |
755 | | /* this is markup; parse the field */ |
756 | 16.2M | *field_present = 1; |
757 | 16.2M | if (!parse_field(&self->str, field_name, format_spec, |
758 | 16.2M | format_spec_needs_expanding, conversion)) |
759 | 0 | return 0; |
760 | 16.2M | return 2; |
761 | 16.2M | } |
762 | | |
763 | | |
764 | | /* do the !r or !s conversion on obj */ |
765 | | static PyObject * |
766 | | do_conversion(PyObject *obj, Py_UCS4 conversion) |
767 | 529k | { |
768 | | /* XXX in pre-3.0, do we need to convert this to unicode, since it |
769 | | might have returned a string? */ |
770 | 529k | switch (conversion) { |
771 | 529k | case 'r': |
772 | 529k | return PyObject_Repr(obj); |
773 | 0 | case 's': |
774 | 0 | return PyObject_Str(obj); |
775 | 0 | case 'a': |
776 | 0 | return PyObject_ASCII(obj); |
777 | 0 | default: |
778 | 0 | if (conversion > 32 && conversion < 127) { |
779 | | /* It's the ASCII subrange; casting to char is safe |
780 | | (assuming the execution character set is an ASCII |
781 | | superset). */ |
782 | 0 | PyErr_Format(PyExc_ValueError, |
783 | 0 | "Unknown conversion specifier %c", |
784 | 0 | (char)conversion); |
785 | 0 | } else |
786 | 0 | PyErr_Format(PyExc_ValueError, |
787 | 0 | "Unknown conversion specifier \\x%x", |
788 | 0 | (unsigned int)conversion); |
789 | 0 | return NULL; |
790 | 529k | } |
791 | 529k | } |
792 | | |
793 | | /* given: |
794 | | |
795 | | {field_name!conversion:format_spec} |
796 | | |
797 | | compute the result and write it to output. |
798 | | format_spec_needs_expanding is an optimization. if it's false, |
799 | | just output the string directly, otherwise recursively expand the |
800 | | format_spec string. |
801 | | |
802 | | field_name is allowed to be zero length, in which case we |
803 | | are doing auto field numbering. |
804 | | */ |
805 | | |
806 | | static int |
807 | | output_markup(SubString *field_name, SubString *format_spec, |
808 | | int format_spec_needs_expanding, Py_UCS4 conversion, |
809 | | _PyUnicodeWriter *writer, PyObject *args, PyObject *kwargs, |
810 | | int recursion_depth, AutoNumber *auto_number) |
811 | 16.2M | { |
812 | 16.2M | PyObject *tmp = NULL; |
813 | 16.2M | PyObject *fieldobj = NULL; |
814 | 16.2M | SubString expanded_format_spec; |
815 | 16.2M | SubString *actual_format_spec; |
816 | 16.2M | int result = 0; |
817 | | |
818 | | /* convert field_name to an object */ |
819 | 16.2M | fieldobj = get_field_object(field_name, args, kwargs, auto_number); |
820 | 16.2M | if (fieldobj == NULL) |
821 | 0 | goto done; |
822 | | |
823 | 16.2M | if (conversion != '\0') { |
824 | 529k | tmp = do_conversion(fieldobj, conversion); |
825 | 529k | if (tmp == NULL) |
826 | 0 | goto done; |
827 | | |
828 | | /* do the assignment, transferring ownership: fieldobj = tmp */ |
829 | 529k | Py_SETREF(fieldobj, tmp); |
830 | 529k | tmp = NULL; |
831 | 529k | } |
832 | | |
833 | | /* if needed, recursively compute the format_spec */ |
834 | 16.2M | if (format_spec_needs_expanding) { |
835 | 0 | tmp = build_string(format_spec, args, kwargs, recursion_depth-1, |
836 | 0 | auto_number); |
837 | 0 | if (tmp == NULL) |
838 | 0 | goto done; |
839 | | |
840 | | /* note that in the case we're expanding the format string, |
841 | | tmp must be kept around until after the call to |
842 | | render_field. */ |
843 | 0 | SubString_init(&expanded_format_spec, tmp, 0, PyUnicode_GET_LENGTH(tmp)); |
844 | 0 | actual_format_spec = &expanded_format_spec; |
845 | 0 | } |
846 | 16.2M | else |
847 | 16.2M | actual_format_spec = format_spec; |
848 | | |
849 | 16.2M | if (render_field(fieldobj, actual_format_spec, writer) == 0) |
850 | 1 | goto done; |
851 | | |
852 | 16.2M | result = 1; |
853 | | |
854 | 16.2M | done: |
855 | 16.2M | Py_XDECREF(fieldobj); |
856 | 16.2M | Py_XDECREF(tmp); |
857 | | |
858 | 16.2M | return result; |
859 | 16.2M | } |
860 | | |
861 | | /* |
862 | | do_markup is the top-level loop for the format() method. It |
863 | | searches through the format string for escapes to markup codes, and |
864 | | calls other functions to move non-markup text to the output, |
865 | | and to perform the markup to the output. |
866 | | */ |
867 | | static int |
868 | | do_markup(SubString *input, PyObject *args, PyObject *kwargs, |
869 | | _PyUnicodeWriter *writer, int recursion_depth, AutoNumber *auto_number) |
870 | 8.60M | { |
871 | 8.60M | MarkupIterator iter; |
872 | 8.60M | int format_spec_needs_expanding; |
873 | 8.60M | int result; |
874 | 8.60M | int field_present; |
875 | 8.60M | SubString literal; |
876 | 8.60M | SubString field_name; |
877 | 8.60M | SubString format_spec; |
878 | 8.60M | Py_UCS4 conversion; |
879 | | |
880 | 8.60M | MarkupIterator_init(&iter, input->str, input->start, input->end); |
881 | 32.6M | while ((result = MarkupIterator_next(&iter, &literal, &field_present, |
882 | 32.6M | &field_name, &format_spec, |
883 | 32.6M | &conversion, |
884 | 32.6M | &format_spec_needs_expanding)) == 2) { |
885 | 24.0M | if (literal.end != literal.start) { |
886 | 16.4M | if (!field_present && iter.str.start == iter.str.end) |
887 | 7.80M | writer->overallocate = 0; |
888 | 16.4M | if (_PyUnicodeWriter_WriteSubstring(writer, literal.str, |
889 | 16.4M | literal.start, literal.end) < 0) |
890 | 0 | return 0; |
891 | 16.4M | } |
892 | | |
893 | 24.0M | if (field_present) { |
894 | 16.2M | if (iter.str.start == iter.str.end) |
895 | 800k | writer->overallocate = 0; |
896 | 16.2M | if (!output_markup(&field_name, &format_spec, |
897 | 16.2M | format_spec_needs_expanding, conversion, writer, |
898 | 16.2M | args, kwargs, recursion_depth, auto_number)) |
899 | 1 | return 0; |
900 | 16.2M | } |
901 | 24.0M | } |
902 | 8.60M | return result; |
903 | 8.60M | } |
904 | | |
905 | | |
906 | | /* |
907 | | build_string allocates the output string and then |
908 | | calls do_markup to do the heavy lifting. |
909 | | */ |
910 | | static PyObject * |
911 | | build_string(SubString *input, PyObject *args, PyObject *kwargs, |
912 | | int recursion_depth, AutoNumber *auto_number) |
913 | 8.60M | { |
914 | 8.60M | _PyUnicodeWriter writer; |
915 | | |
916 | | /* check the recursion level */ |
917 | 8.60M | if (recursion_depth <= 0) { |
918 | 0 | PyErr_SetString(PyExc_ValueError, |
919 | 0 | "Max string recursion exceeded"); |
920 | 0 | return NULL; |
921 | 0 | } |
922 | | |
923 | 8.60M | _PyUnicodeWriter_Init(&writer); |
924 | 8.60M | writer.overallocate = 1; |
925 | 8.60M | writer.min_length = PyUnicode_GET_LENGTH(input->str) + 100; |
926 | | |
927 | 8.60M | if (!do_markup(input, args, kwargs, &writer, recursion_depth, |
928 | 8.60M | auto_number)) { |
929 | 1 | _PyUnicodeWriter_Dealloc(&writer); |
930 | 1 | return NULL; |
931 | 1 | } |
932 | | |
933 | 8.60M | return _PyUnicodeWriter_Finish(&writer); |
934 | 8.60M | } |
935 | | |
936 | | /************************************************************************/ |
937 | | /*********** main routine ***********************************************/ |
938 | | /************************************************************************/ |
939 | | |
940 | | /* this is the main entry point */ |
941 | | static PyObject * |
942 | | do_string_format(PyObject *self, PyObject *args, PyObject *kwargs) |
943 | 8.60M | { |
944 | 8.60M | SubString input; |
945 | | |
946 | | /* PEP 3101 says only 2 levels, so that |
947 | | "{0:{1}}".format('abc', 's') # works |
948 | | "{0:{1:{2}}}".format('abc', 's', '') # fails |
949 | | */ |
950 | 8.60M | int recursion_depth = 2; |
951 | | |
952 | 8.60M | AutoNumber auto_number; |
953 | 8.60M | AutoNumber_Init(&auto_number); |
954 | 8.60M | SubString_init(&input, self, 0, PyUnicode_GET_LENGTH(self)); |
955 | 8.60M | return build_string(&input, args, kwargs, recursion_depth, &auto_number); |
956 | 8.60M | } |
957 | | |
958 | | static PyObject * |
959 | | do_string_format_map(PyObject *self, PyObject *obj) |
960 | 0 | { |
961 | 0 | return do_string_format(self, NULL, obj); |
962 | 0 | } |
963 | | |
964 | | |
965 | | /************************************************************************/ |
966 | | /*********** formatteriterator ******************************************/ |
967 | | /************************************************************************/ |
968 | | |
969 | | /* This is used to implement string.Formatter.vparse(). It exists so |
970 | | Formatter can share code with the built in unicode.format() method. |
971 | | It's really just a wrapper around MarkupIterator that is callable |
972 | | from Python. */ |
973 | | |
974 | | typedef struct { |
975 | | PyObject_HEAD |
976 | | PyObject *str; |
977 | | MarkupIterator it_markup; |
978 | | } formatteriterobject; |
979 | | |
980 | | static void |
981 | | formatteriter_dealloc(PyObject *op) |
982 | 0 | { |
983 | 0 | formatteriterobject *it = (formatteriterobject*)op; |
984 | 0 | Py_XDECREF(it->str); |
985 | 0 | PyObject_Free(it); |
986 | 0 | } |
987 | | |
988 | | /* returns a tuple: |
989 | | (literal, field_name, format_spec, conversion) |
990 | | |
991 | | literal is any literal text to output. might be zero length |
992 | | field_name is the string before the ':'. might be None |
993 | | format_spec is the string after the ':'. mibht be None |
994 | | conversion is either None, or the string after the '!' |
995 | | */ |
996 | | static PyObject * |
997 | | formatteriter_next(PyObject *op) |
998 | 0 | { |
999 | 0 | formatteriterobject *it = (formatteriterobject*)op; |
1000 | 0 | SubString literal; |
1001 | 0 | SubString field_name; |
1002 | 0 | SubString format_spec; |
1003 | 0 | Py_UCS4 conversion; |
1004 | 0 | int format_spec_needs_expanding; |
1005 | 0 | int field_present; |
1006 | 0 | int result = MarkupIterator_next(&it->it_markup, &literal, &field_present, |
1007 | 0 | &field_name, &format_spec, &conversion, |
1008 | 0 | &format_spec_needs_expanding); |
1009 | | |
1010 | | /* all of the SubString objects point into it->str, so no |
1011 | | memory management needs to be done on them */ |
1012 | 0 | assert(0 <= result && result <= 2); |
1013 | 0 | if (result == 0 || result == 1) |
1014 | | /* if 0, error has already been set, if 1, iterator is empty */ |
1015 | 0 | return NULL; |
1016 | 0 | else { |
1017 | 0 | PyObject *literal_str = NULL; |
1018 | 0 | PyObject *field_name_str = NULL; |
1019 | 0 | PyObject *format_spec_str = NULL; |
1020 | 0 | PyObject *conversion_str = NULL; |
1021 | 0 | PyObject *tuple = NULL; |
1022 | |
|
1023 | 0 | literal_str = SubString_new_object(&literal); |
1024 | 0 | if (literal_str == NULL) |
1025 | 0 | goto done; |
1026 | | |
1027 | 0 | field_name_str = SubString_new_object(&field_name); |
1028 | 0 | if (field_name_str == NULL) |
1029 | 0 | goto done; |
1030 | | |
1031 | | /* if field_name is non-zero length, return a string for |
1032 | | format_spec (even if zero length), else return None */ |
1033 | 0 | format_spec_str = (field_present ? |
1034 | 0 | SubString_new_object_or_empty : |
1035 | 0 | SubString_new_object)(&format_spec); |
1036 | 0 | if (format_spec_str == NULL) |
1037 | 0 | goto done; |
1038 | | |
1039 | | /* if the conversion is not specified, return a None, |
1040 | | otherwise create a one length string with the conversion |
1041 | | character */ |
1042 | 0 | if (conversion == '\0') { |
1043 | 0 | conversion_str = Py_NewRef(Py_None); |
1044 | 0 | } |
1045 | 0 | else |
1046 | 0 | conversion_str = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, |
1047 | 0 | &conversion, 1); |
1048 | 0 | if (conversion_str == NULL) |
1049 | 0 | goto done; |
1050 | | |
1051 | 0 | tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str, |
1052 | 0 | conversion_str); |
1053 | 0 | done: |
1054 | 0 | Py_XDECREF(literal_str); |
1055 | 0 | Py_XDECREF(field_name_str); |
1056 | 0 | Py_XDECREF(format_spec_str); |
1057 | 0 | Py_XDECREF(conversion_str); |
1058 | 0 | return tuple; |
1059 | 0 | } |
1060 | 0 | } |
1061 | | |
1062 | | static PyMethodDef formatteriter_methods[] = { |
1063 | | {NULL, NULL} /* sentinel */ |
1064 | | }; |
1065 | | |
1066 | | static PyTypeObject PyFormatterIter_Type = { |
1067 | | PyVarObject_HEAD_INIT(&PyType_Type, 0) |
1068 | | "formatteriterator", /* tp_name */ |
1069 | | sizeof(formatteriterobject), /* tp_basicsize */ |
1070 | | 0, /* tp_itemsize */ |
1071 | | /* methods */ |
1072 | | formatteriter_dealloc, /* tp_dealloc */ |
1073 | | 0, /* tp_vectorcall_offset */ |
1074 | | 0, /* tp_getattr */ |
1075 | | 0, /* tp_setattr */ |
1076 | | 0, /* tp_as_async */ |
1077 | | 0, /* tp_repr */ |
1078 | | 0, /* tp_as_number */ |
1079 | | 0, /* tp_as_sequence */ |
1080 | | 0, /* tp_as_mapping */ |
1081 | | 0, /* tp_hash */ |
1082 | | 0, /* tp_call */ |
1083 | | 0, /* tp_str */ |
1084 | | PyObject_GenericGetAttr, /* tp_getattro */ |
1085 | | 0, /* tp_setattro */ |
1086 | | 0, /* tp_as_buffer */ |
1087 | | Py_TPFLAGS_DEFAULT, /* tp_flags */ |
1088 | | 0, /* tp_doc */ |
1089 | | 0, /* tp_traverse */ |
1090 | | 0, /* tp_clear */ |
1091 | | 0, /* tp_richcompare */ |
1092 | | 0, /* tp_weaklistoffset */ |
1093 | | PyObject_SelfIter, /* tp_iter */ |
1094 | | formatteriter_next, /* tp_iternext */ |
1095 | | formatteriter_methods, /* tp_methods */ |
1096 | | 0, |
1097 | | }; |
1098 | | |
1099 | | /* unicode_formatter_parser is used to implement |
1100 | | string.Formatter.vformat. it parses a string and returns tuples |
1101 | | describing the parsed elements. It's a wrapper around |
1102 | | stringlib/string_format.h's MarkupIterator */ |
1103 | | static PyObject * |
1104 | | formatter_parser(PyObject *Py_UNUSED(module), PyObject *self) |
1105 | 0 | { |
1106 | 0 | formatteriterobject *it; |
1107 | |
|
1108 | 0 | if (!PyUnicode_Check(self)) { |
1109 | 0 | PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name); |
1110 | 0 | return NULL; |
1111 | 0 | } |
1112 | | |
1113 | 0 | it = PyObject_New(formatteriterobject, &PyFormatterIter_Type); |
1114 | 0 | if (it == NULL) |
1115 | 0 | return NULL; |
1116 | | |
1117 | | /* take ownership, give the object to the iterator */ |
1118 | 0 | it->str = Py_NewRef(self); |
1119 | | |
1120 | | /* initialize the contained MarkupIterator */ |
1121 | 0 | MarkupIterator_init(&it->it_markup, (PyObject*)self, 0, PyUnicode_GET_LENGTH(self)); |
1122 | 0 | return (PyObject *)it; |
1123 | 0 | } |
1124 | | |
1125 | | |
1126 | | /************************************************************************/ |
1127 | | /*********** fieldnameiterator ******************************************/ |
1128 | | /************************************************************************/ |
1129 | | |
1130 | | |
1131 | | /* This is used to implement string.Formatter.vparse(). It parses the |
1132 | | field name into attribute and item values. It's a Python-callable |
1133 | | wrapper around FieldNameIterator */ |
1134 | | |
1135 | | typedef struct { |
1136 | | PyObject_HEAD |
1137 | | PyObject *str; |
1138 | | FieldNameIterator it_field; |
1139 | | } fieldnameiterobject; |
1140 | | |
1141 | | static void |
1142 | | fieldnameiter_dealloc(PyObject *op) |
1143 | 0 | { |
1144 | 0 | fieldnameiterobject *it = (fieldnameiterobject*)op; |
1145 | 0 | Py_XDECREF(it->str); |
1146 | 0 | PyObject_Free(it); |
1147 | 0 | } |
1148 | | |
1149 | | /* returns a tuple: |
1150 | | (is_attr, value) |
1151 | | is_attr is true if we used attribute syntax (e.g., '.foo') |
1152 | | false if we used index syntax (e.g., '[foo]') |
1153 | | value is an integer or string |
1154 | | */ |
1155 | | static PyObject * |
1156 | | fieldnameiter_next(PyObject *op) |
1157 | 0 | { |
1158 | 0 | fieldnameiterobject *it = (fieldnameiterobject*)op; |
1159 | 0 | int result; |
1160 | 0 | int is_attr; |
1161 | 0 | Py_ssize_t idx; |
1162 | 0 | SubString name; |
1163 | |
|
1164 | 0 | result = FieldNameIterator_next(&it->it_field, &is_attr, |
1165 | 0 | &idx, &name); |
1166 | 0 | if (result == 0 || result == 1) |
1167 | | /* if 0, error has already been set, if 1, iterator is empty */ |
1168 | 0 | return NULL; |
1169 | 0 | else { |
1170 | 0 | PyObject* result = NULL; |
1171 | 0 | PyObject* is_attr_obj = NULL; |
1172 | 0 | PyObject* obj = NULL; |
1173 | |
|
1174 | 0 | is_attr_obj = PyBool_FromLong(is_attr); |
1175 | 0 | if (is_attr_obj == NULL) |
1176 | 0 | goto error; |
1177 | | |
1178 | | /* either an integer or a string */ |
1179 | 0 | if (idx != -1) |
1180 | 0 | obj = PyLong_FromSsize_t(idx); |
1181 | 0 | else |
1182 | 0 | obj = SubString_new_object(&name); |
1183 | 0 | if (obj == NULL) |
1184 | 0 | goto error; |
1185 | | |
1186 | | /* return a tuple of values */ |
1187 | 0 | return _PyTuple_FromPairSteal(is_attr_obj, obj); |
1188 | | |
1189 | 0 | error: |
1190 | 0 | Py_XDECREF(is_attr_obj); |
1191 | 0 | Py_XDECREF(obj); |
1192 | 0 | return result; |
1193 | 0 | } |
1194 | 0 | } |
1195 | | |
1196 | | static PyMethodDef fieldnameiter_methods[] = { |
1197 | | {NULL, NULL} /* sentinel */ |
1198 | | }; |
1199 | | |
1200 | | static PyTypeObject PyFieldNameIter_Type = { |
1201 | | PyVarObject_HEAD_INIT(&PyType_Type, 0) |
1202 | | "fieldnameiterator", /* tp_name */ |
1203 | | sizeof(fieldnameiterobject), /* tp_basicsize */ |
1204 | | 0, /* tp_itemsize */ |
1205 | | /* methods */ |
1206 | | fieldnameiter_dealloc, /* tp_dealloc */ |
1207 | | 0, /* tp_vectorcall_offset */ |
1208 | | 0, /* tp_getattr */ |
1209 | | 0, /* tp_setattr */ |
1210 | | 0, /* tp_as_async */ |
1211 | | 0, /* tp_repr */ |
1212 | | 0, /* tp_as_number */ |
1213 | | 0, /* tp_as_sequence */ |
1214 | | 0, /* tp_as_mapping */ |
1215 | | 0, /* tp_hash */ |
1216 | | 0, /* tp_call */ |
1217 | | 0, /* tp_str */ |
1218 | | PyObject_GenericGetAttr, /* tp_getattro */ |
1219 | | 0, /* tp_setattro */ |
1220 | | 0, /* tp_as_buffer */ |
1221 | | Py_TPFLAGS_DEFAULT, /* tp_flags */ |
1222 | | 0, /* tp_doc */ |
1223 | | 0, /* tp_traverse */ |
1224 | | 0, /* tp_clear */ |
1225 | | 0, /* tp_richcompare */ |
1226 | | 0, /* tp_weaklistoffset */ |
1227 | | PyObject_SelfIter, /* tp_iter */ |
1228 | | fieldnameiter_next, /* tp_iternext */ |
1229 | | fieldnameiter_methods, /* tp_methods */ |
1230 | | 0}; |
1231 | | |
1232 | | /* unicode_formatter_field_name_split is used to implement |
1233 | | string.Formatter.vformat. it takes a PEP 3101 "field name", and |
1234 | | returns a tuple of (first, rest): "first", the part before the |
1235 | | first '.' or '['; and "rest", an iterator for the rest of the field |
1236 | | name. it's a wrapper around stringlib/string_format.h's |
1237 | | field_name_split. The iterator it returns is a |
1238 | | FieldNameIterator */ |
1239 | | static PyObject * |
1240 | | formatter_field_name_split(PyObject *Py_UNUSED(module), PyObject *self) |
1241 | 0 | { |
1242 | 0 | SubString first; |
1243 | 0 | Py_ssize_t first_idx; |
1244 | 0 | fieldnameiterobject *it; |
1245 | |
|
1246 | 0 | PyObject *first_obj = NULL; |
1247 | 0 | PyObject *result = NULL; |
1248 | |
|
1249 | 0 | if (!PyUnicode_Check(self)) { |
1250 | 0 | PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name); |
1251 | 0 | return NULL; |
1252 | 0 | } |
1253 | | |
1254 | 0 | it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type); |
1255 | 0 | if (it == NULL) |
1256 | 0 | return NULL; |
1257 | | |
1258 | | /* take ownership, give the object to the iterator. this is |
1259 | | just to keep the field_name alive */ |
1260 | 0 | it->str = Py_NewRef(self); |
1261 | | |
1262 | | /* Pass in auto_number = NULL. We'll return an empty string for |
1263 | | first_obj in that case. */ |
1264 | 0 | if (!field_name_split((PyObject*)self, 0, PyUnicode_GET_LENGTH(self), |
1265 | 0 | &first, &first_idx, &it->it_field, NULL)) |
1266 | 0 | goto error; |
1267 | | |
1268 | | /* first becomes an integer, if possible; else a string */ |
1269 | 0 | if (first_idx != -1) |
1270 | 0 | first_obj = PyLong_FromSsize_t(first_idx); |
1271 | 0 | else |
1272 | | /* convert "first" into a string object */ |
1273 | 0 | first_obj = SubString_new_object(&first); |
1274 | 0 | if (first_obj == NULL) |
1275 | 0 | goto error; |
1276 | | |
1277 | | /* return a tuple of values */ |
1278 | 0 | return _PyTuple_FromPairSteal(first_obj, (PyObject *)it); |
1279 | | |
1280 | 0 | error: |
1281 | 0 | Py_XDECREF(it); |
1282 | 0 | Py_XDECREF(first_obj); |
1283 | 0 | return result; |
1284 | 0 | } |