/src/cpython/Objects/stringlib/unicode_format.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | unicode_format.h -- implementation of str.format(). |
3 | | */ |
4 | | |
5 | | #include "pycore_complexobject.h" // _PyComplex_FormatAdvancedWriter() |
6 | | #include "pycore_floatobject.h" // _PyFloat_FormatAdvancedWriter() |
7 | | |
8 | | /************************************************************************/ |
9 | | /*********** Global data structures and forward declarations *********/ |
10 | | /************************************************************************/ |
11 | | |
12 | | /* |
13 | | A SubString consists of the characters between two string or |
14 | | unicode pointers. |
15 | | */ |
16 | | typedef struct { |
17 | | PyObject *str; /* borrowed reference */ |
18 | | Py_ssize_t start, end; |
19 | | } SubString; |
20 | | |
21 | | |
22 | | typedef enum { |
23 | | ANS_INIT, |
24 | | ANS_AUTO, |
25 | | ANS_MANUAL |
26 | | } AutoNumberState; /* Keep track if we're auto-numbering fields */ |
27 | | |
28 | | /* Keeps track of our auto-numbering state, and which number field we're on */ |
29 | | typedef struct { |
30 | | AutoNumberState an_state; |
31 | | int an_field_number; |
32 | | } AutoNumber; |
33 | | |
34 | | |
35 | | /* forward declaration for recursion */ |
36 | | static PyObject * |
37 | | build_string(SubString *input, PyObject *args, PyObject *kwargs, |
38 | | int recursion_depth, AutoNumber *auto_number); |
39 | | |
40 | | |
41 | | |
42 | | /************************************************************************/ |
43 | | /************************** Utility functions ************************/ |
44 | | /************************************************************************/ |
45 | | |
46 | | static void |
47 | | AutoNumber_Init(AutoNumber *auto_number) |
48 | 8.79M | { |
49 | 8.79M | auto_number->an_state = ANS_INIT; |
50 | 8.79M | auto_number->an_field_number = 0; |
51 | 8.79M | } |
52 | | |
53 | | /* fill in a SubString from a pointer and length */ |
54 | | Py_LOCAL_INLINE(void) |
55 | | SubString_init(SubString *str, PyObject *s, Py_ssize_t start, Py_ssize_t end) |
56 | 166M | { |
57 | 166M | str->str = s; |
58 | 166M | str->start = start; |
59 | 166M | str->end = end; |
60 | 166M | } |
61 | | |
62 | | /* return a new string. if str->str is NULL, return None */ |
63 | | Py_LOCAL_INLINE(PyObject *) |
64 | | SubString_new_object(SubString *str) |
65 | 42 | { |
66 | 42 | if (str->str == NULL) |
67 | 0 | Py_RETURN_NONE; |
68 | 42 | return PyUnicode_Substring(str->str, str->start, str->end); |
69 | 42 | } |
70 | | |
71 | | /* return a new string. if str->str is NULL, return a new empty string */ |
72 | | Py_LOCAL_INLINE(PyObject *) |
73 | | SubString_new_object_or_empty(SubString *str) |
74 | 0 | { |
75 | 0 | if (str->str == NULL) { |
76 | 0 | return Py_GetConstant(Py_CONSTANT_EMPTY_STR); |
77 | 0 | } |
78 | 0 | return SubString_new_object(str); |
79 | 0 | } |
80 | | |
81 | | /* Return 1 if an error has been detected switching between automatic |
82 | | field numbering and manual field specification, else return 0. Set |
83 | | ValueError on error. */ |
84 | | static int |
85 | | autonumber_state_error(AutoNumberState state, int field_name_is_empty) |
86 | 16.4M | { |
87 | 16.4M | if (state == ANS_MANUAL) { |
88 | 64 | if (field_name_is_empty) { |
89 | 0 | PyErr_SetString(PyExc_ValueError, "cannot switch from " |
90 | 0 | "manual field specification to " |
91 | 0 | "automatic field numbering"); |
92 | 0 | return 1; |
93 | 0 | } |
94 | 64 | } |
95 | 16.4M | else { |
96 | 16.4M | if (!field_name_is_empty) { |
97 | 0 | PyErr_SetString(PyExc_ValueError, "cannot switch from " |
98 | 0 | "automatic field numbering to " |
99 | 0 | "manual field specification"); |
100 | 0 | return 1; |
101 | 0 | } |
102 | 16.4M | } |
103 | 16.4M | return 0; |
104 | 16.4M | } |
105 | | |
106 | | |
107 | | /************************************************************************/ |
108 | | /*********** Format string parsing -- integers and identifiers *********/ |
109 | | /************************************************************************/ |
110 | | |
111 | | static Py_ssize_t |
112 | | get_integer(const SubString *str) |
113 | 16.4M | { |
114 | 16.4M | Py_ssize_t accumulator = 0; |
115 | 16.4M | Py_ssize_t digitval; |
116 | 16.4M | Py_ssize_t i; |
117 | | |
118 | | /* empty string is an error */ |
119 | 16.4M | if (str->start >= str->end) |
120 | 16.4M | return -1; |
121 | | |
122 | 170 | for (i = str->start; i < str->end; i++) { |
123 | 106 | digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str->str, i)); |
124 | 106 | if (digitval < 0) |
125 | 42 | return -1; |
126 | | /* |
127 | | Detect possible overflow before it happens: |
128 | | |
129 | | accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if |
130 | | accumulator > (PY_SSIZE_T_MAX - digitval) / 10. |
131 | | */ |
132 | 64 | if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) { |
133 | 0 | PyErr_Format(PyExc_ValueError, |
134 | 0 | "Too many decimal digits in format string"); |
135 | 0 | return -1; |
136 | 0 | } |
137 | 64 | accumulator = accumulator * 10 + digitval; |
138 | 64 | } |
139 | 64 | return accumulator; |
140 | 106 | } |
141 | | |
142 | | /************************************************************************/ |
143 | | /******** Functions to get field objects and specification strings ******/ |
144 | | /************************************************************************/ |
145 | | |
146 | | /* do the equivalent of obj.name */ |
147 | | static PyObject * |
148 | | getattr(PyObject *obj, SubString *name) |
149 | 0 | { |
150 | 0 | PyObject *newobj; |
151 | 0 | PyObject *str = SubString_new_object(name); |
152 | 0 | if (str == NULL) |
153 | 0 | return NULL; |
154 | 0 | newobj = PyObject_GetAttr(obj, str); |
155 | 0 | Py_DECREF(str); |
156 | 0 | return newobj; |
157 | 0 | } |
158 | | |
159 | | /* do the equivalent of obj[idx], where obj is a sequence */ |
160 | | static PyObject * |
161 | | getitem_sequence(PyObject *obj, Py_ssize_t idx) |
162 | 0 | { |
163 | 0 | return PySequence_GetItem(obj, idx); |
164 | 0 | } |
165 | | |
166 | | /* do the equivalent of obj[idx], where obj is not a sequence */ |
167 | | static PyObject * |
168 | | getitem_idx(PyObject *obj, Py_ssize_t idx) |
169 | 0 | { |
170 | 0 | PyObject *newobj; |
171 | 0 | PyObject *idx_obj = PyLong_FromSsize_t(idx); |
172 | 0 | if (idx_obj == NULL) |
173 | 0 | return NULL; |
174 | 0 | newobj = PyObject_GetItem(obj, idx_obj); |
175 | 0 | Py_DECREF(idx_obj); |
176 | 0 | return newobj; |
177 | 0 | } |
178 | | |
179 | | /* do the equivalent of obj[name] */ |
180 | | static PyObject * |
181 | | getitem_str(PyObject *obj, SubString *name) |
182 | 0 | { |
183 | 0 | PyObject *newobj; |
184 | 0 | PyObject *str = SubString_new_object(name); |
185 | 0 | if (str == NULL) |
186 | 0 | return NULL; |
187 | 0 | newobj = PyObject_GetItem(obj, str); |
188 | 0 | Py_DECREF(str); |
189 | 0 | return newobj; |
190 | 0 | } |
191 | | |
192 | | typedef struct { |
193 | | /* the entire string we're parsing. we assume that someone else |
194 | | is managing its lifetime, and that it will exist for the |
195 | | lifetime of the iterator. can be empty */ |
196 | | SubString str; |
197 | | |
198 | | /* index to where we are inside field_name */ |
199 | | Py_ssize_t index; |
200 | | } FieldNameIterator; |
201 | | |
202 | | |
203 | | static int |
204 | | FieldNameIterator_init(FieldNameIterator *self, PyObject *s, |
205 | | Py_ssize_t start, Py_ssize_t end) |
206 | 16.4M | { |
207 | 16.4M | SubString_init(&self->str, s, start, end); |
208 | 16.4M | self->index = start; |
209 | 16.4M | return 1; |
210 | 16.4M | } |
211 | | |
212 | | static int |
213 | | _FieldNameIterator_attr(FieldNameIterator *self, SubString *name) |
214 | 0 | { |
215 | 0 | Py_UCS4 c; |
216 | |
|
217 | 0 | name->str = self->str.str; |
218 | 0 | name->start = self->index; |
219 | | |
220 | | /* return everything until '.' or '[' */ |
221 | 0 | while (self->index < self->str.end) { |
222 | 0 | c = PyUnicode_READ_CHAR(self->str.str, self->index++); |
223 | 0 | switch (c) { |
224 | 0 | case '[': |
225 | 0 | case '.': |
226 | | /* backup so that we this character will be seen next time */ |
227 | 0 | self->index--; |
228 | 0 | break; |
229 | 0 | default: |
230 | 0 | continue; |
231 | 0 | } |
232 | 0 | break; |
233 | 0 | } |
234 | | /* end of string is okay */ |
235 | 0 | name->end = self->index; |
236 | 0 | return 1; |
237 | 0 | } |
238 | | |
239 | | static int |
240 | | _FieldNameIterator_item(FieldNameIterator *self, SubString *name) |
241 | 0 | { |
242 | 0 | int bracket_seen = 0; |
243 | 0 | Py_UCS4 c; |
244 | |
|
245 | 0 | name->str = self->str.str; |
246 | 0 | name->start = self->index; |
247 | | |
248 | | /* return everything until ']' */ |
249 | 0 | while (self->index < self->str.end) { |
250 | 0 | c = PyUnicode_READ_CHAR(self->str.str, self->index++); |
251 | 0 | switch (c) { |
252 | 0 | case ']': |
253 | 0 | bracket_seen = 1; |
254 | 0 | break; |
255 | 0 | default: |
256 | 0 | continue; |
257 | 0 | } |
258 | 0 | break; |
259 | 0 | } |
260 | | /* make sure we ended with a ']' */ |
261 | 0 | if (!bracket_seen) { |
262 | 0 | PyErr_SetString(PyExc_ValueError, "Missing ']' in format string"); |
263 | 0 | return 0; |
264 | 0 | } |
265 | | |
266 | | /* end of string is okay */ |
267 | | /* don't include the ']' */ |
268 | 0 | name->end = self->index-1; |
269 | 0 | return 1; |
270 | 0 | } |
271 | | |
272 | | /* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */ |
273 | | static int |
274 | | FieldNameIterator_next(FieldNameIterator *self, int *is_attribute, |
275 | | Py_ssize_t *name_idx, SubString *name) |
276 | 16.4M | { |
277 | | /* check at end of input */ |
278 | 16.4M | if (self->index >= self->str.end) |
279 | 16.4M | return 1; |
280 | | |
281 | 0 | switch (PyUnicode_READ_CHAR(self->str.str, self->index++)) { |
282 | 0 | case '.': |
283 | 0 | *is_attribute = 1; |
284 | 0 | if (_FieldNameIterator_attr(self, name) == 0) |
285 | 0 | return 0; |
286 | 0 | *name_idx = -1; |
287 | 0 | break; |
288 | 0 | case '[': |
289 | 0 | *is_attribute = 0; |
290 | 0 | if (_FieldNameIterator_item(self, name) == 0) |
291 | 0 | return 0; |
292 | 0 | *name_idx = get_integer(name); |
293 | 0 | if (*name_idx == -1 && PyErr_Occurred()) |
294 | 0 | return 0; |
295 | 0 | break; |
296 | 0 | default: |
297 | | /* Invalid character follows ']' */ |
298 | 0 | PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may " |
299 | 0 | "follow ']' in format field specifier"); |
300 | 0 | return 0; |
301 | 0 | } |
302 | | |
303 | | /* empty string is an error */ |
304 | 0 | if (name->start == name->end) { |
305 | 0 | PyErr_SetString(PyExc_ValueError, "Empty attribute in format string"); |
306 | 0 | return 0; |
307 | 0 | } |
308 | | |
309 | 0 | return 2; |
310 | 0 | } |
311 | | |
312 | | |
313 | | /* input: field_name |
314 | | output: 'first' points to the part before the first '[' or '.' |
315 | | 'first_idx' is -1 if 'first' is not an integer, otherwise |
316 | | it's the value of first converted to an integer |
317 | | 'rest' is an iterator to return the rest |
318 | | */ |
319 | | static int |
320 | | field_name_split(PyObject *str, Py_ssize_t start, Py_ssize_t end, SubString *first, |
321 | | Py_ssize_t *first_idx, FieldNameIterator *rest, |
322 | | AutoNumber *auto_number) |
323 | 16.4M | { |
324 | 16.4M | Py_UCS4 c; |
325 | 16.4M | Py_ssize_t i = start; |
326 | 16.4M | int field_name_is_empty; |
327 | 16.4M | int using_numeric_index; |
328 | | |
329 | | /* find the part up until the first '.' or '[' */ |
330 | 16.4M | while (i < end) { |
331 | 428 | switch (c = PyUnicode_READ_CHAR(str, i++)) { |
332 | 0 | case '[': |
333 | 0 | case '.': |
334 | | /* backup so that we this character is available to the |
335 | | "rest" iterator */ |
336 | 0 | i--; |
337 | 0 | break; |
338 | 428 | default: |
339 | 428 | continue; |
340 | 428 | } |
341 | 0 | break; |
342 | 428 | } |
343 | | |
344 | | /* set up the return values */ |
345 | 16.4M | SubString_init(first, str, start, i); |
346 | 16.4M | FieldNameIterator_init(rest, str, i, end); |
347 | | |
348 | | /* see if "first" is an integer, in which case it's used as an index */ |
349 | 16.4M | *first_idx = get_integer(first); |
350 | 16.4M | if (*first_idx == -1 && PyErr_Occurred()) |
351 | 0 | return 0; |
352 | | |
353 | 16.4M | field_name_is_empty = first->start >= first->end; |
354 | | |
355 | | /* If the field name is omitted or if we have a numeric index |
356 | | specified, then we're doing numeric indexing into args. */ |
357 | 16.4M | using_numeric_index = field_name_is_empty || *first_idx != -1; |
358 | | |
359 | | /* We always get here exactly one time for each field we're |
360 | | processing. And we get here in field order (counting by left |
361 | | braces). So this is the perfect place to handle automatic field |
362 | | numbering if the field name is omitted. */ |
363 | | |
364 | | /* Check if we need to do the auto-numbering. It's not needed if |
365 | | we're called from string.Format routines, because it's handled |
366 | | in that class by itself. */ |
367 | 16.4M | if (auto_number) { |
368 | | /* Initialize our auto numbering state if this is the first |
369 | | time we're either auto-numbering or manually numbering. */ |
370 | 16.4M | if (auto_number->an_state == ANS_INIT && using_numeric_index) |
371 | 8.79M | auto_number->an_state = field_name_is_empty ? |
372 | 8.79M | ANS_AUTO : ANS_MANUAL; |
373 | | |
374 | | /* Make sure our state is consistent with what we're doing |
375 | | this time through. Only check if we're using a numeric |
376 | | index. */ |
377 | 16.4M | if (using_numeric_index) |
378 | 16.4M | if (autonumber_state_error(auto_number->an_state, |
379 | 16.4M | field_name_is_empty)) |
380 | 0 | return 0; |
381 | | /* Zero length field means we want to do auto-numbering of the |
382 | | fields. */ |
383 | 16.4M | if (field_name_is_empty) |
384 | 16.4M | *first_idx = (auto_number->an_field_number)++; |
385 | 16.4M | } |
386 | | |
387 | 16.4M | return 1; |
388 | 16.4M | } |
389 | | |
390 | | |
391 | | /* |
392 | | get_field_object returns the object inside {}, before the |
393 | | format_spec. It handles getindex and getattr lookups and consumes |
394 | | the entire input string. |
395 | | */ |
396 | | static PyObject * |
397 | | get_field_object(SubString *input, PyObject *args, PyObject *kwargs, |
398 | | AutoNumber *auto_number) |
399 | 16.4M | { |
400 | 16.4M | PyObject *obj = NULL; |
401 | 16.4M | int ok; |
402 | 16.4M | int is_attribute; |
403 | 16.4M | SubString name; |
404 | 16.4M | SubString first; |
405 | 16.4M | Py_ssize_t index; |
406 | 16.4M | FieldNameIterator rest; |
407 | | |
408 | 16.4M | if (!field_name_split(input->str, input->start, input->end, &first, |
409 | 16.4M | &index, &rest, auto_number)) { |
410 | 0 | goto error; |
411 | 0 | } |
412 | | |
413 | 16.4M | if (index == -1) { |
414 | | /* look up in kwargs */ |
415 | 42 | PyObject *key = SubString_new_object(&first); |
416 | 42 | if (key == NULL) { |
417 | 0 | goto error; |
418 | 0 | } |
419 | 42 | if (kwargs == NULL) { |
420 | 0 | PyErr_SetObject(PyExc_KeyError, key); |
421 | 0 | Py_DECREF(key); |
422 | 0 | goto error; |
423 | 0 | } |
424 | | /* Use PyObject_GetItem instead of PyDict_GetItem because this |
425 | | code is no longer just used with kwargs. It might be passed |
426 | | a non-dict when called through format_map. */ |
427 | 42 | obj = PyObject_GetItem(kwargs, key); |
428 | 42 | Py_DECREF(key); |
429 | 42 | if (obj == NULL) { |
430 | 0 | goto error; |
431 | 0 | } |
432 | 42 | } |
433 | 16.4M | else { |
434 | | /* If args is NULL, we have a format string with a positional field |
435 | | with only kwargs to retrieve it from. This can only happen when |
436 | | used with format_map(), where positional arguments are not |
437 | | allowed. */ |
438 | 16.4M | if (args == NULL) { |
439 | 0 | PyErr_SetString(PyExc_ValueError, "Format string contains " |
440 | 0 | "positional fields"); |
441 | 0 | goto error; |
442 | 0 | } |
443 | | |
444 | | /* look up in args */ |
445 | 16.4M | obj = PySequence_GetItem(args, index); |
446 | 16.4M | if (obj == NULL) { |
447 | 0 | PyErr_Format(PyExc_IndexError, |
448 | 0 | "Replacement index %zd out of range for positional " |
449 | 0 | "args tuple", |
450 | 0 | index); |
451 | 0 | goto error; |
452 | 0 | } |
453 | 16.4M | } |
454 | | |
455 | | /* iterate over the rest of the field_name */ |
456 | 16.4M | while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index, |
457 | 16.4M | &name)) == 2) { |
458 | 0 | PyObject *tmp; |
459 | |
|
460 | 0 | if (is_attribute) |
461 | | /* getattr lookup "." */ |
462 | 0 | tmp = getattr(obj, &name); |
463 | 0 | else |
464 | | /* getitem lookup "[]" */ |
465 | 0 | if (index == -1) |
466 | 0 | tmp = getitem_str(obj, &name); |
467 | 0 | else |
468 | 0 | if (PySequence_Check(obj)) |
469 | 0 | tmp = getitem_sequence(obj, index); |
470 | 0 | else |
471 | | /* not a sequence */ |
472 | 0 | tmp = getitem_idx(obj, index); |
473 | 0 | if (tmp == NULL) |
474 | 0 | goto error; |
475 | | |
476 | | /* assign to obj */ |
477 | 0 | Py_SETREF(obj, tmp); |
478 | 0 | } |
479 | | /* end of iterator, this is the non-error case */ |
480 | 16.4M | if (ok == 1) |
481 | 16.4M | return obj; |
482 | 0 | error: |
483 | 0 | Py_XDECREF(obj); |
484 | 0 | return NULL; |
485 | 16.4M | } |
486 | | |
487 | | /************************************************************************/ |
488 | | /***************** Field rendering functions **************************/ |
489 | | /************************************************************************/ |
490 | | |
491 | | /* |
492 | | render_field() is the main function in this section. It takes the |
493 | | field object and field specification string generated by |
494 | | get_field_and_spec, and renders the field into the output string. |
495 | | |
496 | | render_field calls fieldobj.__format__(format_spec) method, and |
497 | | appends to the output. |
498 | | */ |
499 | | static int |
500 | | render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *writer) |
501 | 16.4M | { |
502 | 16.4M | int ok = 0; |
503 | 16.4M | PyObject *result = NULL; |
504 | 16.4M | PyObject *format_spec_object = NULL; |
505 | 16.4M | int (*formatter) (_PyUnicodeWriter*, PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL; |
506 | 16.4M | int err; |
507 | | |
508 | | /* If we know the type exactly, skip the lookup of __format__ and just |
509 | | call the formatter directly. */ |
510 | 16.4M | if (PyUnicode_CheckExact(fieldobj)) |
511 | 16.2M | formatter = _PyUnicode_FormatAdvancedWriter; |
512 | 288k | else if (PyLong_CheckExact(fieldobj)) |
513 | 79.3k | formatter = _PyLong_FormatAdvancedWriter; |
514 | 208k | else if (PyFloat_CheckExact(fieldobj)) |
515 | 0 | formatter = _PyFloat_FormatAdvancedWriter; |
516 | 208k | else if (PyComplex_CheckExact(fieldobj)) |
517 | 0 | formatter = _PyComplex_FormatAdvancedWriter; |
518 | | |
519 | 16.4M | if (formatter) { |
520 | | /* we know exactly which formatter will be called when __format__ is |
521 | | looked up, so call it directly, instead. */ |
522 | 16.2M | err = formatter(writer, fieldobj, format_spec->str, |
523 | 16.2M | format_spec->start, format_spec->end); |
524 | 16.2M | return (err == 0); |
525 | 16.2M | } |
526 | 208k | else { |
527 | | /* We need to create an object out of the pointers we have, because |
528 | | __format__ takes a string/unicode object for format_spec. */ |
529 | 208k | if (format_spec->str) |
530 | 0 | format_spec_object = PyUnicode_Substring(format_spec->str, |
531 | 0 | format_spec->start, |
532 | 0 | format_spec->end); |
533 | 208k | else |
534 | 208k | format_spec_object = Py_GetConstant(Py_CONSTANT_EMPTY_STR); |
535 | 208k | if (format_spec_object == NULL) |
536 | 0 | goto done; |
537 | | |
538 | 208k | result = PyObject_Format(fieldobj, format_spec_object); |
539 | 208k | } |
540 | 208k | if (result == NULL) |
541 | 2 | goto done; |
542 | | |
543 | 208k | if (_PyUnicodeWriter_WriteStr(writer, result) == -1) |
544 | 0 | goto done; |
545 | 208k | ok = 1; |
546 | | |
547 | 208k | done: |
548 | 208k | Py_XDECREF(format_spec_object); |
549 | 208k | Py_XDECREF(result); |
550 | 208k | return ok; |
551 | 208k | } |
552 | | |
553 | | static int |
554 | | parse_field(SubString *str, SubString *field_name, SubString *format_spec, |
555 | | int *format_spec_needs_expanding, Py_UCS4 *conversion) |
556 | 16.4M | { |
557 | | /* Note this function works if the field name is zero length, |
558 | | which is good. Zero length field names are handled later, in |
559 | | field_name_split. */ |
560 | | |
561 | 16.4M | Py_UCS4 c = 0; |
562 | | |
563 | | /* initialize these, as they may be empty */ |
564 | 16.4M | *conversion = '\0'; |
565 | 16.4M | SubString_init(format_spec, NULL, 0, 0); |
566 | | |
567 | | /* Search for the field name. it's terminated by the end of |
568 | | the string, or a ':' or '!' */ |
569 | 16.4M | field_name->str = str->str; |
570 | 16.4M | field_name->start = str->start; |
571 | 16.4M | while (str->start < str->end) { |
572 | 16.4M | switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) { |
573 | 0 | case '{': |
574 | 0 | PyErr_SetString(PyExc_ValueError, "unexpected '{' in field name"); |
575 | 0 | return 0; |
576 | 0 | case '[': |
577 | 0 | for (; str->start < str->end; str->start++) |
578 | 0 | if (PyUnicode_READ_CHAR(str->str, str->start) == ']') |
579 | 0 | break; |
580 | 0 | continue; |
581 | 15.9M | case '}': |
582 | 15.9M | case ':': |
583 | 16.4M | case '!': |
584 | 16.4M | break; |
585 | 428 | default: |
586 | 428 | continue; |
587 | 16.4M | } |
588 | 16.4M | break; |
589 | 16.4M | } |
590 | | |
591 | 16.4M | field_name->end = str->start - 1; |
592 | 16.4M | if (c == '!' || c == ':') { |
593 | 552k | Py_ssize_t count; |
594 | | /* we have a format specifier and/or a conversion */ |
595 | | /* don't include the last character */ |
596 | | |
597 | | /* see if there's a conversion specifier */ |
598 | 552k | if (c == '!') { |
599 | | /* there must be another character present */ |
600 | 552k | if (str->start >= str->end) { |
601 | 0 | PyErr_SetString(PyExc_ValueError, |
602 | 0 | "end of string while looking for conversion " |
603 | 0 | "specifier"); |
604 | 0 | return 0; |
605 | 0 | } |
606 | 552k | *conversion = PyUnicode_READ_CHAR(str->str, str->start++); |
607 | | |
608 | 552k | if (str->start < str->end) { |
609 | 552k | c = PyUnicode_READ_CHAR(str->str, str->start++); |
610 | 552k | if (c == '}') |
611 | 552k | return 1; |
612 | 0 | if (c != ':') { |
613 | 0 | PyErr_SetString(PyExc_ValueError, |
614 | 0 | "expected ':' after conversion specifier"); |
615 | 0 | return 0; |
616 | 0 | } |
617 | 0 | } |
618 | 552k | } |
619 | 64 | format_spec->str = str->str; |
620 | 64 | format_spec->start = str->start; |
621 | 64 | count = 1; |
622 | 256 | while (str->start < str->end) { |
623 | 256 | switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) { |
624 | 0 | case '{': |
625 | 0 | *format_spec_needs_expanding = 1; |
626 | 0 | count++; |
627 | 0 | break; |
628 | 64 | case '}': |
629 | 64 | count--; |
630 | 64 | if (count == 0) { |
631 | 64 | format_spec->end = str->start - 1; |
632 | 64 | return 1; |
633 | 64 | } |
634 | 0 | break; |
635 | 192 | default: |
636 | 192 | break; |
637 | 256 | } |
638 | 256 | } |
639 | | |
640 | 0 | PyErr_SetString(PyExc_ValueError, "unmatched '{' in format spec"); |
641 | 0 | return 0; |
642 | 64 | } |
643 | 15.9M | else if (c != '}') { |
644 | 0 | PyErr_SetString(PyExc_ValueError, "expected '}' before end of string"); |
645 | 0 | return 0; |
646 | 0 | } |
647 | | |
648 | 15.9M | return 1; |
649 | 16.4M | } |
650 | | |
651 | | /************************************************************************/ |
652 | | /******* Output string allocation and escape-to-markup processing ******/ |
653 | | /************************************************************************/ |
654 | | |
655 | | /* MarkupIterator breaks the string into pieces of either literal |
656 | | text, or things inside {} that need to be marked up. it is |
657 | | designed to make it easy to wrap a Python iterator around it, for |
658 | | use with the Formatter class */ |
659 | | |
660 | | typedef struct { |
661 | | SubString str; |
662 | | } MarkupIterator; |
663 | | |
664 | | static int |
665 | | MarkupIterator_init(MarkupIterator *self, PyObject *str, |
666 | | Py_ssize_t start, Py_ssize_t end) |
667 | 8.79M | { |
668 | 8.79M | SubString_init(&self->str, str, start, end); |
669 | 8.79M | return 1; |
670 | 8.79M | } |
671 | | |
672 | | /* returns 0 on error, 1 on non-error termination, and 2 if it got a |
673 | | string (or something to be expanded) */ |
674 | | static int |
675 | | MarkupIterator_next(MarkupIterator *self, SubString *literal, |
676 | | int *field_present, SubString *field_name, |
677 | | SubString *format_spec, Py_UCS4 *conversion, |
678 | | int *format_spec_needs_expanding) |
679 | 33.2M | { |
680 | 33.2M | int at_end; |
681 | 33.2M | Py_UCS4 c = 0; |
682 | 33.2M | Py_ssize_t start; |
683 | 33.2M | Py_ssize_t len; |
684 | 33.2M | int markup_follows = 0; |
685 | | |
686 | | /* initialize all of the output variables */ |
687 | 33.2M | SubString_init(literal, NULL, 0, 0); |
688 | 33.2M | SubString_init(field_name, NULL, 0, 0); |
689 | 33.2M | SubString_init(format_spec, NULL, 0, 0); |
690 | 33.2M | *conversion = '\0'; |
691 | 33.2M | *format_spec_needs_expanding = 0; |
692 | 33.2M | *field_present = 0; |
693 | | |
694 | | /* No more input, end of iterator. This is the normal exit |
695 | | path. */ |
696 | 33.2M | if (self->str.start >= self->str.end) |
697 | 8.79M | return 1; |
698 | | |
699 | 24.4M | start = self->str.start; |
700 | | |
701 | | /* First read any literal text. Read until the end of string, an |
702 | | escaped '{' or '}', or an unescaped '{'. In order to never |
703 | | allocate memory and so I can just pass pointers around, if |
704 | | there's an escaped '{' or '}' then we'll return the literal |
705 | | including the brace, but no format object. The next time |
706 | | through, we'll return the rest of the literal, skipping past |
707 | | the second consecutive brace. */ |
708 | 76.3M | while (self->str.start < self->str.end) { |
709 | 68.3M | switch (c = PyUnicode_READ_CHAR(self->str.str, self->str.start++)) { |
710 | 16.4M | case '{': |
711 | 16.4M | case '}': |
712 | 16.4M | markup_follows = 1; |
713 | 16.4M | break; |
714 | 51.9M | default: |
715 | 51.9M | continue; |
716 | 68.3M | } |
717 | 16.4M | break; |
718 | 68.3M | } |
719 | | |
720 | 24.4M | at_end = self->str.start >= self->str.end; |
721 | 24.4M | len = self->str.start - start; |
722 | | |
723 | 24.4M | if ((c == '}') && (at_end || |
724 | 0 | (c != PyUnicode_READ_CHAR(self->str.str, |
725 | 0 | self->str.start)))) { |
726 | 0 | PyErr_SetString(PyExc_ValueError, "Single '}' encountered " |
727 | 0 | "in format string"); |
728 | 0 | return 0; |
729 | 0 | } |
730 | 24.4M | if (at_end && c == '{') { |
731 | 0 | PyErr_SetString(PyExc_ValueError, "Single '{' encountered " |
732 | 0 | "in format string"); |
733 | 0 | return 0; |
734 | 0 | } |
735 | 24.4M | if (!at_end) { |
736 | 16.4M | if (c == PyUnicode_READ_CHAR(self->str.str, self->str.start)) { |
737 | | /* escaped } or {, skip it in the input. there is no |
738 | | markup object following us, just this literal text */ |
739 | 0 | self->str.start++; |
740 | 0 | markup_follows = 0; |
741 | 0 | } |
742 | 16.4M | else |
743 | 16.4M | len--; |
744 | 16.4M | } |
745 | | |
746 | | /* record the literal text */ |
747 | 24.4M | literal->str = self->str.str; |
748 | 24.4M | literal->start = start; |
749 | 24.4M | literal->end = start + len; |
750 | | |
751 | 24.4M | if (!markup_follows) |
752 | 7.97M | return 2; |
753 | | |
754 | | /* this is markup; parse the field */ |
755 | 16.4M | *field_present = 1; |
756 | 16.4M | if (!parse_field(&self->str, field_name, format_spec, |
757 | 16.4M | format_spec_needs_expanding, conversion)) |
758 | 0 | return 0; |
759 | 16.4M | return 2; |
760 | 16.4M | } |
761 | | |
762 | | |
763 | | /* do the !r or !s conversion on obj */ |
764 | | static PyObject * |
765 | | do_conversion(PyObject *obj, Py_UCS4 conversion) |
766 | 552k | { |
767 | | /* XXX in pre-3.0, do we need to convert this to unicode, since it |
768 | | might have returned a string? */ |
769 | 552k | switch (conversion) { |
770 | 552k | case 'r': |
771 | 552k | return PyObject_Repr(obj); |
772 | 0 | case 's': |
773 | 0 | return PyObject_Str(obj); |
774 | 0 | case 'a': |
775 | 0 | return PyObject_ASCII(obj); |
776 | 0 | default: |
777 | 0 | if (conversion > 32 && conversion < 127) { |
778 | | /* It's the ASCII subrange; casting to char is safe |
779 | | (assuming the execution character set is an ASCII |
780 | | superset). */ |
781 | 0 | PyErr_Format(PyExc_ValueError, |
782 | 0 | "Unknown conversion specifier %c", |
783 | 0 | (char)conversion); |
784 | 0 | } else |
785 | 0 | PyErr_Format(PyExc_ValueError, |
786 | 0 | "Unknown conversion specifier \\x%x", |
787 | 0 | (unsigned int)conversion); |
788 | 0 | return NULL; |
789 | 552k | } |
790 | 552k | } |
791 | | |
792 | | /* given: |
793 | | |
794 | | {field_name!conversion:format_spec} |
795 | | |
796 | | compute the result and write it to output. |
797 | | format_spec_needs_expanding is an optimization. if it's false, |
798 | | just output the string directly, otherwise recursively expand the |
799 | | format_spec string. |
800 | | |
801 | | field_name is allowed to be zero length, in which case we |
802 | | are doing auto field numbering. |
803 | | */ |
804 | | |
805 | | static int |
806 | | output_markup(SubString *field_name, SubString *format_spec, |
807 | | int format_spec_needs_expanding, Py_UCS4 conversion, |
808 | | _PyUnicodeWriter *writer, PyObject *args, PyObject *kwargs, |
809 | | int recursion_depth, AutoNumber *auto_number) |
810 | 16.4M | { |
811 | 16.4M | PyObject *tmp = NULL; |
812 | 16.4M | PyObject *fieldobj = NULL; |
813 | 16.4M | SubString expanded_format_spec; |
814 | 16.4M | SubString *actual_format_spec; |
815 | 16.4M | int result = 0; |
816 | | |
817 | | /* convert field_name to an object */ |
818 | 16.4M | fieldobj = get_field_object(field_name, args, kwargs, auto_number); |
819 | 16.4M | if (fieldobj == NULL) |
820 | 0 | goto done; |
821 | | |
822 | 16.4M | if (conversion != '\0') { |
823 | 552k | tmp = do_conversion(fieldobj, conversion); |
824 | 552k | if (tmp == NULL) |
825 | 0 | goto done; |
826 | | |
827 | | /* do the assignment, transferring ownership: fieldobj = tmp */ |
828 | 552k | Py_SETREF(fieldobj, tmp); |
829 | 552k | tmp = NULL; |
830 | 552k | } |
831 | | |
832 | | /* if needed, recursively compute the format_spec */ |
833 | 16.4M | if (format_spec_needs_expanding) { |
834 | 0 | tmp = build_string(format_spec, args, kwargs, recursion_depth-1, |
835 | 0 | auto_number); |
836 | 0 | if (tmp == NULL) |
837 | 0 | goto done; |
838 | | |
839 | | /* note that in the case we're expanding the format string, |
840 | | tmp must be kept around until after the call to |
841 | | render_field. */ |
842 | 0 | SubString_init(&expanded_format_spec, tmp, 0, PyUnicode_GET_LENGTH(tmp)); |
843 | 0 | actual_format_spec = &expanded_format_spec; |
844 | 0 | } |
845 | 16.4M | else |
846 | 16.4M | actual_format_spec = format_spec; |
847 | | |
848 | 16.4M | if (render_field(fieldobj, actual_format_spec, writer) == 0) |
849 | 2 | goto done; |
850 | | |
851 | 16.4M | result = 1; |
852 | | |
853 | 16.4M | done: |
854 | 16.4M | Py_XDECREF(fieldobj); |
855 | 16.4M | Py_XDECREF(tmp); |
856 | | |
857 | 16.4M | return result; |
858 | 16.4M | } |
859 | | |
860 | | /* |
861 | | do_markup is the top-level loop for the format() method. It |
862 | | searches through the format string for escapes to markup codes, and |
863 | | calls other functions to move non-markup text to the output, |
864 | | and to perform the markup to the output. |
865 | | */ |
866 | | static int |
867 | | do_markup(SubString *input, PyObject *args, PyObject *kwargs, |
868 | | _PyUnicodeWriter *writer, int recursion_depth, AutoNumber *auto_number) |
869 | 8.79M | { |
870 | 8.79M | MarkupIterator iter; |
871 | 8.79M | int format_spec_needs_expanding; |
872 | 8.79M | int result; |
873 | 8.79M | int field_present; |
874 | 8.79M | SubString literal; |
875 | 8.79M | SubString field_name; |
876 | 8.79M | SubString format_spec; |
877 | 8.79M | Py_UCS4 conversion; |
878 | | |
879 | 8.79M | MarkupIterator_init(&iter, input->str, input->start, input->end); |
880 | 33.2M | while ((result = MarkupIterator_next(&iter, &literal, &field_present, |
881 | 33.2M | &field_name, &format_spec, |
882 | 33.2M | &conversion, |
883 | 33.2M | &format_spec_needs_expanding)) == 2) { |
884 | 24.4M | if (literal.end != literal.start) { |
885 | 16.8M | if (!field_present && iter.str.start == iter.str.end) |
886 | 7.97M | writer->overallocate = 0; |
887 | 16.8M | if (_PyUnicodeWriter_WriteSubstring(writer, literal.str, |
888 | 16.8M | literal.start, literal.end) < 0) |
889 | 0 | return 0; |
890 | 16.8M | } |
891 | | |
892 | 24.4M | if (field_present) { |
893 | 16.4M | if (iter.str.start == iter.str.end) |
894 | 815k | writer->overallocate = 0; |
895 | 16.4M | if (!output_markup(&field_name, &format_spec, |
896 | 16.4M | format_spec_needs_expanding, conversion, writer, |
897 | 16.4M | args, kwargs, recursion_depth, auto_number)) |
898 | 2 | return 0; |
899 | 16.4M | } |
900 | 24.4M | } |
901 | 8.79M | return result; |
902 | 8.79M | } |
903 | | |
904 | | |
905 | | /* |
906 | | build_string allocates the output string and then |
907 | | calls do_markup to do the heavy lifting. |
908 | | */ |
909 | | static PyObject * |
910 | | build_string(SubString *input, PyObject *args, PyObject *kwargs, |
911 | | int recursion_depth, AutoNumber *auto_number) |
912 | 8.79M | { |
913 | 8.79M | _PyUnicodeWriter writer; |
914 | | |
915 | | /* check the recursion level */ |
916 | 8.79M | if (recursion_depth <= 0) { |
917 | 0 | PyErr_SetString(PyExc_ValueError, |
918 | 0 | "Max string recursion exceeded"); |
919 | 0 | return NULL; |
920 | 0 | } |
921 | | |
922 | 8.79M | _PyUnicodeWriter_Init(&writer); |
923 | 8.79M | writer.overallocate = 1; |
924 | 8.79M | writer.min_length = PyUnicode_GET_LENGTH(input->str) + 100; |
925 | | |
926 | 8.79M | if (!do_markup(input, args, kwargs, &writer, recursion_depth, |
927 | 8.79M | auto_number)) { |
928 | 2 | _PyUnicodeWriter_Dealloc(&writer); |
929 | 2 | return NULL; |
930 | 2 | } |
931 | | |
932 | 8.79M | return _PyUnicodeWriter_Finish(&writer); |
933 | 8.79M | } |
934 | | |
935 | | /************************************************************************/ |
936 | | /*********** main routine ***********************************************/ |
937 | | /************************************************************************/ |
938 | | |
939 | | /* this is the main entry point */ |
940 | | static PyObject * |
941 | | do_string_format(PyObject *self, PyObject *args, PyObject *kwargs) |
942 | 8.79M | { |
943 | 8.79M | SubString input; |
944 | | |
945 | | /* PEP 3101 says only 2 levels, so that |
946 | | "{0:{1}}".format('abc', 's') # works |
947 | | "{0:{1:{2}}}".format('abc', 's', '') # fails |
948 | | */ |
949 | 8.79M | int recursion_depth = 2; |
950 | | |
951 | 8.79M | AutoNumber auto_number; |
952 | 8.79M | AutoNumber_Init(&auto_number); |
953 | 8.79M | SubString_init(&input, self, 0, PyUnicode_GET_LENGTH(self)); |
954 | 8.79M | return build_string(&input, args, kwargs, recursion_depth, &auto_number); |
955 | 8.79M | } |
956 | | |
957 | | static PyObject * |
958 | | do_string_format_map(PyObject *self, PyObject *obj) |
959 | 0 | { |
960 | 0 | return do_string_format(self, NULL, obj); |
961 | 0 | } |
962 | | |
963 | | |
964 | | /************************************************************************/ |
965 | | /*********** formatteriterator ******************************************/ |
966 | | /************************************************************************/ |
967 | | |
968 | | /* This is used to implement string.Formatter.vparse(). It exists so |
969 | | Formatter can share code with the built in unicode.format() method. |
970 | | It's really just a wrapper around MarkupIterator that is callable |
971 | | from Python. */ |
972 | | |
973 | | typedef struct { |
974 | | PyObject_HEAD |
975 | | PyObject *str; |
976 | | MarkupIterator it_markup; |
977 | | } formatteriterobject; |
978 | | |
979 | | static void |
980 | | formatteriter_dealloc(PyObject *op) |
981 | 0 | { |
982 | 0 | formatteriterobject *it = (formatteriterobject*)op; |
983 | 0 | Py_XDECREF(it->str); |
984 | 0 | PyObject_Free(it); |
985 | 0 | } |
986 | | |
987 | | /* returns a tuple: |
988 | | (literal, field_name, format_spec, conversion) |
989 | | |
990 | | literal is any literal text to output. might be zero length |
991 | | field_name is the string before the ':'. might be None |
992 | | format_spec is the string after the ':'. mibht be None |
993 | | conversion is either None, or the string after the '!' |
994 | | */ |
995 | | static PyObject * |
996 | | formatteriter_next(PyObject *op) |
997 | 0 | { |
998 | 0 | formatteriterobject *it = (formatteriterobject*)op; |
999 | 0 | SubString literal; |
1000 | 0 | SubString field_name; |
1001 | 0 | SubString format_spec; |
1002 | 0 | Py_UCS4 conversion; |
1003 | 0 | int format_spec_needs_expanding; |
1004 | 0 | int field_present; |
1005 | 0 | int result = MarkupIterator_next(&it->it_markup, &literal, &field_present, |
1006 | 0 | &field_name, &format_spec, &conversion, |
1007 | 0 | &format_spec_needs_expanding); |
1008 | | |
1009 | | /* all of the SubString objects point into it->str, so no |
1010 | | memory management needs to be done on them */ |
1011 | 0 | assert(0 <= result && result <= 2); |
1012 | 0 | if (result == 0 || result == 1) |
1013 | | /* if 0, error has already been set, if 1, iterator is empty */ |
1014 | 0 | return NULL; |
1015 | 0 | else { |
1016 | 0 | PyObject *literal_str = NULL; |
1017 | 0 | PyObject *field_name_str = NULL; |
1018 | 0 | PyObject *format_spec_str = NULL; |
1019 | 0 | PyObject *conversion_str = NULL; |
1020 | 0 | PyObject *tuple = NULL; |
1021 | |
|
1022 | 0 | literal_str = SubString_new_object(&literal); |
1023 | 0 | if (literal_str == NULL) |
1024 | 0 | goto done; |
1025 | | |
1026 | 0 | field_name_str = SubString_new_object(&field_name); |
1027 | 0 | if (field_name_str == NULL) |
1028 | 0 | goto done; |
1029 | | |
1030 | | /* if field_name is non-zero length, return a string for |
1031 | | format_spec (even if zero length), else return None */ |
1032 | 0 | format_spec_str = (field_present ? |
1033 | 0 | SubString_new_object_or_empty : |
1034 | 0 | SubString_new_object)(&format_spec); |
1035 | 0 | if (format_spec_str == NULL) |
1036 | 0 | goto done; |
1037 | | |
1038 | | /* if the conversion is not specified, return a None, |
1039 | | otherwise create a one length string with the conversion |
1040 | | character */ |
1041 | 0 | if (conversion == '\0') { |
1042 | 0 | conversion_str = Py_NewRef(Py_None); |
1043 | 0 | } |
1044 | 0 | else |
1045 | 0 | conversion_str = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, |
1046 | 0 | &conversion, 1); |
1047 | 0 | if (conversion_str == NULL) |
1048 | 0 | goto done; |
1049 | | |
1050 | 0 | tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str, |
1051 | 0 | conversion_str); |
1052 | 0 | done: |
1053 | 0 | Py_XDECREF(literal_str); |
1054 | 0 | Py_XDECREF(field_name_str); |
1055 | 0 | Py_XDECREF(format_spec_str); |
1056 | 0 | Py_XDECREF(conversion_str); |
1057 | 0 | return tuple; |
1058 | 0 | } |
1059 | 0 | } |
1060 | | |
1061 | | static PyMethodDef formatteriter_methods[] = { |
1062 | | {NULL, NULL} /* sentinel */ |
1063 | | }; |
1064 | | |
1065 | | static PyTypeObject PyFormatterIter_Type = { |
1066 | | PyVarObject_HEAD_INIT(&PyType_Type, 0) |
1067 | | "formatteriterator", /* tp_name */ |
1068 | | sizeof(formatteriterobject), /* tp_basicsize */ |
1069 | | 0, /* tp_itemsize */ |
1070 | | /* methods */ |
1071 | | formatteriter_dealloc, /* tp_dealloc */ |
1072 | | 0, /* tp_vectorcall_offset */ |
1073 | | 0, /* tp_getattr */ |
1074 | | 0, /* tp_setattr */ |
1075 | | 0, /* tp_as_async */ |
1076 | | 0, /* tp_repr */ |
1077 | | 0, /* tp_as_number */ |
1078 | | 0, /* tp_as_sequence */ |
1079 | | 0, /* tp_as_mapping */ |
1080 | | 0, /* tp_hash */ |
1081 | | 0, /* tp_call */ |
1082 | | 0, /* tp_str */ |
1083 | | PyObject_GenericGetAttr, /* tp_getattro */ |
1084 | | 0, /* tp_setattro */ |
1085 | | 0, /* tp_as_buffer */ |
1086 | | Py_TPFLAGS_DEFAULT, /* tp_flags */ |
1087 | | 0, /* tp_doc */ |
1088 | | 0, /* tp_traverse */ |
1089 | | 0, /* tp_clear */ |
1090 | | 0, /* tp_richcompare */ |
1091 | | 0, /* tp_weaklistoffset */ |
1092 | | PyObject_SelfIter, /* tp_iter */ |
1093 | | formatteriter_next, /* tp_iternext */ |
1094 | | formatteriter_methods, /* tp_methods */ |
1095 | | 0, |
1096 | | }; |
1097 | | |
1098 | | /* unicode_formatter_parser is used to implement |
1099 | | string.Formatter.vformat. it parses a string and returns tuples |
1100 | | describing the parsed elements. It's a wrapper around |
1101 | | stringlib/string_format.h's MarkupIterator */ |
1102 | | static PyObject * |
1103 | | formatter_parser(PyObject *Py_UNUSED(module), PyObject *self) |
1104 | 0 | { |
1105 | 0 | formatteriterobject *it; |
1106 | |
|
1107 | 0 | if (!PyUnicode_Check(self)) { |
1108 | 0 | PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name); |
1109 | 0 | return NULL; |
1110 | 0 | } |
1111 | | |
1112 | 0 | it = PyObject_New(formatteriterobject, &PyFormatterIter_Type); |
1113 | 0 | if (it == NULL) |
1114 | 0 | return NULL; |
1115 | | |
1116 | | /* take ownership, give the object to the iterator */ |
1117 | 0 | it->str = Py_NewRef(self); |
1118 | | |
1119 | | /* initialize the contained MarkupIterator */ |
1120 | 0 | MarkupIterator_init(&it->it_markup, (PyObject*)self, 0, PyUnicode_GET_LENGTH(self)); |
1121 | 0 | return (PyObject *)it; |
1122 | 0 | } |
1123 | | |
1124 | | |
1125 | | /************************************************************************/ |
1126 | | /*********** fieldnameiterator ******************************************/ |
1127 | | /************************************************************************/ |
1128 | | |
1129 | | |
1130 | | /* This is used to implement string.Formatter.vparse(). It parses the |
1131 | | field name into attribute and item values. It's a Python-callable |
1132 | | wrapper around FieldNameIterator */ |
1133 | | |
1134 | | typedef struct { |
1135 | | PyObject_HEAD |
1136 | | PyObject *str; |
1137 | | FieldNameIterator it_field; |
1138 | | } fieldnameiterobject; |
1139 | | |
1140 | | static void |
1141 | | fieldnameiter_dealloc(PyObject *op) |
1142 | 0 | { |
1143 | 0 | fieldnameiterobject *it = (fieldnameiterobject*)op; |
1144 | 0 | Py_XDECREF(it->str); |
1145 | 0 | PyObject_Free(it); |
1146 | 0 | } |
1147 | | |
1148 | | /* returns a tuple: |
1149 | | (is_attr, value) |
1150 | | is_attr is true if we used attribute syntax (e.g., '.foo') |
1151 | | false if we used index syntax (e.g., '[foo]') |
1152 | | value is an integer or string |
1153 | | */ |
1154 | | static PyObject * |
1155 | | fieldnameiter_next(PyObject *op) |
1156 | 0 | { |
1157 | 0 | fieldnameiterobject *it = (fieldnameiterobject*)op; |
1158 | 0 | int result; |
1159 | 0 | int is_attr; |
1160 | 0 | Py_ssize_t idx; |
1161 | 0 | SubString name; |
1162 | |
|
1163 | 0 | result = FieldNameIterator_next(&it->it_field, &is_attr, |
1164 | 0 | &idx, &name); |
1165 | 0 | if (result == 0 || result == 1) |
1166 | | /* if 0, error has already been set, if 1, iterator is empty */ |
1167 | 0 | return NULL; |
1168 | 0 | else { |
1169 | 0 | PyObject* result = NULL; |
1170 | 0 | PyObject* is_attr_obj = NULL; |
1171 | 0 | PyObject* obj = NULL; |
1172 | |
|
1173 | 0 | is_attr_obj = PyBool_FromLong(is_attr); |
1174 | 0 | if (is_attr_obj == NULL) |
1175 | 0 | goto done; |
1176 | | |
1177 | | /* either an integer or a string */ |
1178 | 0 | if (idx != -1) |
1179 | 0 | obj = PyLong_FromSsize_t(idx); |
1180 | 0 | else |
1181 | 0 | obj = SubString_new_object(&name); |
1182 | 0 | if (obj == NULL) |
1183 | 0 | goto done; |
1184 | | |
1185 | | /* return a tuple of values */ |
1186 | 0 | result = PyTuple_Pack(2, is_attr_obj, obj); |
1187 | |
|
1188 | 0 | done: |
1189 | 0 | Py_XDECREF(is_attr_obj); |
1190 | 0 | Py_XDECREF(obj); |
1191 | 0 | return result; |
1192 | 0 | } |
1193 | 0 | } |
1194 | | |
1195 | | static PyMethodDef fieldnameiter_methods[] = { |
1196 | | {NULL, NULL} /* sentinel */ |
1197 | | }; |
1198 | | |
1199 | | static PyTypeObject PyFieldNameIter_Type = { |
1200 | | PyVarObject_HEAD_INIT(&PyType_Type, 0) |
1201 | | "fieldnameiterator", /* tp_name */ |
1202 | | sizeof(fieldnameiterobject), /* tp_basicsize */ |
1203 | | 0, /* tp_itemsize */ |
1204 | | /* methods */ |
1205 | | fieldnameiter_dealloc, /* tp_dealloc */ |
1206 | | 0, /* tp_vectorcall_offset */ |
1207 | | 0, /* tp_getattr */ |
1208 | | 0, /* tp_setattr */ |
1209 | | 0, /* tp_as_async */ |
1210 | | 0, /* tp_repr */ |
1211 | | 0, /* tp_as_number */ |
1212 | | 0, /* tp_as_sequence */ |
1213 | | 0, /* tp_as_mapping */ |
1214 | | 0, /* tp_hash */ |
1215 | | 0, /* tp_call */ |
1216 | | 0, /* tp_str */ |
1217 | | PyObject_GenericGetAttr, /* tp_getattro */ |
1218 | | 0, /* tp_setattro */ |
1219 | | 0, /* tp_as_buffer */ |
1220 | | Py_TPFLAGS_DEFAULT, /* tp_flags */ |
1221 | | 0, /* tp_doc */ |
1222 | | 0, /* tp_traverse */ |
1223 | | 0, /* tp_clear */ |
1224 | | 0, /* tp_richcompare */ |
1225 | | 0, /* tp_weaklistoffset */ |
1226 | | PyObject_SelfIter, /* tp_iter */ |
1227 | | fieldnameiter_next, /* tp_iternext */ |
1228 | | fieldnameiter_methods, /* tp_methods */ |
1229 | | 0}; |
1230 | | |
1231 | | /* unicode_formatter_field_name_split is used to implement |
1232 | | string.Formatter.vformat. it takes a PEP 3101 "field name", and |
1233 | | returns a tuple of (first, rest): "first", the part before the |
1234 | | first '.' or '['; and "rest", an iterator for the rest of the field |
1235 | | name. it's a wrapper around stringlib/string_format.h's |
1236 | | field_name_split. The iterator it returns is a |
1237 | | FieldNameIterator */ |
1238 | | static PyObject * |
1239 | | formatter_field_name_split(PyObject *Py_UNUSED(module), PyObject *self) |
1240 | 0 | { |
1241 | 0 | SubString first; |
1242 | 0 | Py_ssize_t first_idx; |
1243 | 0 | fieldnameiterobject *it; |
1244 | |
|
1245 | 0 | PyObject *first_obj = NULL; |
1246 | 0 | PyObject *result = NULL; |
1247 | |
|
1248 | 0 | if (!PyUnicode_Check(self)) { |
1249 | 0 | PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name); |
1250 | 0 | return NULL; |
1251 | 0 | } |
1252 | | |
1253 | 0 | it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type); |
1254 | 0 | if (it == NULL) |
1255 | 0 | return NULL; |
1256 | | |
1257 | | /* take ownership, give the object to the iterator. this is |
1258 | | just to keep the field_name alive */ |
1259 | 0 | it->str = Py_NewRef(self); |
1260 | | |
1261 | | /* Pass in auto_number = NULL. We'll return an empty string for |
1262 | | first_obj in that case. */ |
1263 | 0 | if (!field_name_split((PyObject*)self, 0, PyUnicode_GET_LENGTH(self), |
1264 | 0 | &first, &first_idx, &it->it_field, NULL)) |
1265 | 0 | goto done; |
1266 | | |
1267 | | /* first becomes an integer, if possible; else a string */ |
1268 | 0 | if (first_idx != -1) |
1269 | 0 | first_obj = PyLong_FromSsize_t(first_idx); |
1270 | 0 | else |
1271 | | /* convert "first" into a string object */ |
1272 | 0 | first_obj = SubString_new_object(&first); |
1273 | 0 | if (first_obj == NULL) |
1274 | 0 | goto done; |
1275 | | |
1276 | | /* return a tuple of values */ |
1277 | 0 | result = PyTuple_Pack(2, first_obj, it); |
1278 | |
|
1279 | 0 | done: |
1280 | 0 | Py_XDECREF(it); |
1281 | 0 | Py_XDECREF(first_obj); |
1282 | 0 | return result; |
1283 | 0 | } |