/src/Python-3.8.3/Objects/stringlib/unicode_format.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | unicode_format.h -- implementation of str.format(). |
3 | | */ |
4 | | |
5 | | /************************************************************************/ |
6 | | /*********** Global data structures and forward declarations *********/ |
7 | | /************************************************************************/ |
8 | | |
9 | | /* |
10 | | A SubString consists of the characters between two string or |
11 | | unicode pointers. |
12 | | */ |
13 | | typedef struct { |
14 | | PyObject *str; /* borrowed reference */ |
15 | | Py_ssize_t start, end; |
16 | | } SubString; |
17 | | |
18 | | |
19 | | typedef enum { |
20 | | ANS_INIT, |
21 | | ANS_AUTO, |
22 | | ANS_MANUAL |
23 | | } AutoNumberState; /* Keep track if we're auto-numbering fields */ |
24 | | |
25 | | /* Keeps track of our auto-numbering state, and which number field we're on */ |
26 | | typedef struct { |
27 | | AutoNumberState an_state; |
28 | | int an_field_number; |
29 | | } AutoNumber; |
30 | | |
31 | | |
32 | | /* forward declaration for recursion */ |
33 | | static PyObject * |
34 | | build_string(SubString *input, PyObject *args, PyObject *kwargs, |
35 | | int recursion_depth, AutoNumber *auto_number); |
36 | | |
37 | | |
38 | | |
39 | | /************************************************************************/ |
40 | | /************************** Utility functions ************************/ |
41 | | /************************************************************************/ |
42 | | |
43 | | static void |
44 | | AutoNumber_Init(AutoNumber *auto_number) |
45 | 105 | { |
46 | 105 | auto_number->an_state = ANS_INIT; |
47 | 105 | auto_number->an_field_number = 0; |
48 | 105 | } |
49 | | |
50 | | /* fill in a SubString from a pointer and length */ |
51 | | Py_LOCAL_INLINE(void) |
52 | | SubString_init(SubString *str, PyObject *s, Py_ssize_t start, Py_ssize_t end) |
53 | 1.74k | { |
54 | 1.74k | str->str = s; |
55 | 1.74k | str->start = start; |
56 | 1.74k | str->end = end; |
57 | 1.74k | } |
58 | | |
59 | | /* return a new string. if str->str is NULL, return None */ |
60 | | Py_LOCAL_INLINE(PyObject *) |
61 | | SubString_new_object(SubString *str) |
62 | 0 | { |
63 | 0 | if (str->str == NULL) |
64 | 0 | Py_RETURN_NONE; |
65 | 0 | return PyUnicode_Substring(str->str, str->start, str->end); |
66 | 0 | } |
67 | | |
68 | | /* return a new string. if str->str is NULL, return a new empty string */ |
69 | | Py_LOCAL_INLINE(PyObject *) |
70 | | SubString_new_object_or_empty(SubString *str) |
71 | 0 | { |
72 | 0 | if (str->str == NULL) { |
73 | 0 | return PyUnicode_New(0, 0); |
74 | 0 | } |
75 | 0 | return SubString_new_object(str); |
76 | 0 | } |
77 | | |
78 | | /* Return 1 if an error has been detected switching between automatic |
79 | | field numbering and manual field specification, else return 0. Set |
80 | | ValueError on error. */ |
81 | | static int |
82 | | autonumber_state_error(AutoNumberState state, int field_name_is_empty) |
83 | 175 | { |
84 | 175 | if (state == ANS_MANUAL) { |
85 | 0 | if (field_name_is_empty) { |
86 | 0 | PyErr_SetString(PyExc_ValueError, "cannot switch from " |
87 | 0 | "manual field specification to " |
88 | 0 | "automatic field numbering"); |
89 | 0 | return 1; |
90 | 0 | } |
91 | 0 | } |
92 | 175 | else { |
93 | 175 | if (!field_name_is_empty) { |
94 | 0 | PyErr_SetString(PyExc_ValueError, "cannot switch from " |
95 | 0 | "automatic field numbering to " |
96 | 0 | "manual field specification"); |
97 | 0 | return 1; |
98 | 0 | } |
99 | 175 | } |
100 | 175 | return 0; |
101 | 175 | } |
102 | | |
103 | | |
104 | | /************************************************************************/ |
105 | | /*********** Format string parsing -- integers and identifiers *********/ |
106 | | /************************************************************************/ |
107 | | |
108 | | static Py_ssize_t |
109 | | get_integer(const SubString *str) |
110 | 175 | { |
111 | 175 | Py_ssize_t accumulator = 0; |
112 | 175 | Py_ssize_t digitval; |
113 | 175 | Py_ssize_t i; |
114 | | |
115 | | /* empty string is an error */ |
116 | 175 | if (str->start >= str->end) |
117 | 175 | return -1; |
118 | | |
119 | 0 | for (i = str->start; i < str->end; i++) { |
120 | 0 | digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str->str, i)); |
121 | 0 | if (digitval < 0) |
122 | 0 | return -1; |
123 | | /* |
124 | | Detect possible overflow before it happens: |
125 | | |
126 | | accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if |
127 | | accumulator > (PY_SSIZE_T_MAX - digitval) / 10. |
128 | | */ |
129 | 0 | if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) { |
130 | 0 | PyErr_Format(PyExc_ValueError, |
131 | 0 | "Too many decimal digits in format string"); |
132 | 0 | return -1; |
133 | 0 | } |
134 | 0 | accumulator = accumulator * 10 + digitval; |
135 | 0 | } |
136 | 0 | return accumulator; |
137 | 0 | } |
138 | | |
139 | | /************************************************************************/ |
140 | | /******** Functions to get field objects and specification strings ******/ |
141 | | /************************************************************************/ |
142 | | |
143 | | /* do the equivalent of obj.name */ |
144 | | static PyObject * |
145 | | getattr(PyObject *obj, SubString *name) |
146 | 0 | { |
147 | 0 | PyObject *newobj; |
148 | 0 | PyObject *str = SubString_new_object(name); |
149 | 0 | if (str == NULL) |
150 | 0 | return NULL; |
151 | 0 | newobj = PyObject_GetAttr(obj, str); |
152 | 0 | Py_DECREF(str); |
153 | 0 | return newobj; |
154 | 0 | } |
155 | | |
156 | | /* do the equivalent of obj[idx], where obj is a sequence */ |
157 | | static PyObject * |
158 | | getitem_sequence(PyObject *obj, Py_ssize_t idx) |
159 | 0 | { |
160 | 0 | return PySequence_GetItem(obj, idx); |
161 | 0 | } |
162 | | |
163 | | /* do the equivalent of obj[idx], where obj is not a sequence */ |
164 | | static PyObject * |
165 | | getitem_idx(PyObject *obj, Py_ssize_t idx) |
166 | 0 | { |
167 | 0 | PyObject *newobj; |
168 | 0 | PyObject *idx_obj = PyLong_FromSsize_t(idx); |
169 | 0 | if (idx_obj == NULL) |
170 | 0 | return NULL; |
171 | 0 | newobj = PyObject_GetItem(obj, idx_obj); |
172 | 0 | Py_DECREF(idx_obj); |
173 | 0 | return newobj; |
174 | 0 | } |
175 | | |
176 | | /* do the equivalent of obj[name] */ |
177 | | static PyObject * |
178 | | getitem_str(PyObject *obj, SubString *name) |
179 | 0 | { |
180 | 0 | PyObject *newobj; |
181 | 0 | PyObject *str = SubString_new_object(name); |
182 | 0 | if (str == NULL) |
183 | 0 | return NULL; |
184 | 0 | newobj = PyObject_GetItem(obj, str); |
185 | 0 | Py_DECREF(str); |
186 | 0 | return newobj; |
187 | 0 | } |
188 | | |
189 | | typedef struct { |
190 | | /* the entire string we're parsing. we assume that someone else |
191 | | is managing its lifetime, and that it will exist for the |
192 | | lifetime of the iterator. can be empty */ |
193 | | SubString str; |
194 | | |
195 | | /* index to where we are inside field_name */ |
196 | | Py_ssize_t index; |
197 | | } FieldNameIterator; |
198 | | |
199 | | |
200 | | static int |
201 | | FieldNameIterator_init(FieldNameIterator *self, PyObject *s, |
202 | | Py_ssize_t start, Py_ssize_t end) |
203 | 175 | { |
204 | 175 | SubString_init(&self->str, s, start, end); |
205 | 175 | self->index = start; |
206 | 175 | return 1; |
207 | 175 | } |
208 | | |
209 | | static int |
210 | | _FieldNameIterator_attr(FieldNameIterator *self, SubString *name) |
211 | 0 | { |
212 | 0 | Py_UCS4 c; |
213 | |
|
214 | 0 | name->str = self->str.str; |
215 | 0 | name->start = self->index; |
216 | | |
217 | | /* return everything until '.' or '[' */ |
218 | 0 | while (self->index < self->str.end) { |
219 | 0 | c = PyUnicode_READ_CHAR(self->str.str, self->index++); |
220 | 0 | switch (c) { |
221 | 0 | case '[': |
222 | 0 | case '.': |
223 | | /* backup so that we this character will be seen next time */ |
224 | 0 | self->index--; |
225 | 0 | break; |
226 | 0 | default: |
227 | 0 | continue; |
228 | 0 | } |
229 | 0 | break; |
230 | 0 | } |
231 | | /* end of string is okay */ |
232 | 0 | name->end = self->index; |
233 | 0 | return 1; |
234 | 0 | } |
235 | | |
236 | | static int |
237 | | _FieldNameIterator_item(FieldNameIterator *self, SubString *name) |
238 | 0 | { |
239 | 0 | int bracket_seen = 0; |
240 | 0 | Py_UCS4 c; |
241 | |
|
242 | 0 | name->str = self->str.str; |
243 | 0 | name->start = self->index; |
244 | | |
245 | | /* return everything until ']' */ |
246 | 0 | while (self->index < self->str.end) { |
247 | 0 | c = PyUnicode_READ_CHAR(self->str.str, self->index++); |
248 | 0 | switch (c) { |
249 | 0 | case ']': |
250 | 0 | bracket_seen = 1; |
251 | 0 | break; |
252 | 0 | default: |
253 | 0 | continue; |
254 | 0 | } |
255 | 0 | break; |
256 | 0 | } |
257 | | /* make sure we ended with a ']' */ |
258 | 0 | if (!bracket_seen) { |
259 | 0 | PyErr_SetString(PyExc_ValueError, "Missing ']' in format string"); |
260 | 0 | return 0; |
261 | 0 | } |
262 | | |
263 | | /* end of string is okay */ |
264 | | /* don't include the ']' */ |
265 | 0 | name->end = self->index-1; |
266 | 0 | return 1; |
267 | 0 | } |
268 | | |
269 | | /* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */ |
270 | | static int |
271 | | FieldNameIterator_next(FieldNameIterator *self, int *is_attribute, |
272 | | Py_ssize_t *name_idx, SubString *name) |
273 | 175 | { |
274 | | /* check at end of input */ |
275 | 175 | if (self->index >= self->str.end) |
276 | 175 | return 1; |
277 | | |
278 | 0 | switch (PyUnicode_READ_CHAR(self->str.str, self->index++)) { |
279 | 0 | case '.': |
280 | 0 | *is_attribute = 1; |
281 | 0 | if (_FieldNameIterator_attr(self, name) == 0) |
282 | 0 | return 0; |
283 | 0 | *name_idx = -1; |
284 | 0 | break; |
285 | 0 | case '[': |
286 | 0 | *is_attribute = 0; |
287 | 0 | if (_FieldNameIterator_item(self, name) == 0) |
288 | 0 | return 0; |
289 | 0 | *name_idx = get_integer(name); |
290 | 0 | if (*name_idx == -1 && PyErr_Occurred()) |
291 | 0 | return 0; |
292 | 0 | break; |
293 | 0 | default: |
294 | | /* Invalid character follows ']' */ |
295 | 0 | PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may " |
296 | 0 | "follow ']' in format field specifier"); |
297 | 0 | return 0; |
298 | 0 | } |
299 | | |
300 | | /* empty string is an error */ |
301 | 0 | if (name->start == name->end) { |
302 | 0 | PyErr_SetString(PyExc_ValueError, "Empty attribute in format string"); |
303 | 0 | return 0; |
304 | 0 | } |
305 | | |
306 | 0 | return 2; |
307 | 0 | } |
308 | | |
309 | | |
310 | | /* input: field_name |
311 | | output: 'first' points to the part before the first '[' or '.' |
312 | | 'first_idx' is -1 if 'first' is not an integer, otherwise |
313 | | it's the value of first converted to an integer |
314 | | 'rest' is an iterator to return the rest |
315 | | */ |
316 | | static int |
317 | | field_name_split(PyObject *str, Py_ssize_t start, Py_ssize_t end, SubString *first, |
318 | | Py_ssize_t *first_idx, FieldNameIterator *rest, |
319 | | AutoNumber *auto_number) |
320 | 175 | { |
321 | 175 | Py_UCS4 c; |
322 | 175 | Py_ssize_t i = start; |
323 | 175 | int field_name_is_empty; |
324 | 175 | int using_numeric_index; |
325 | | |
326 | | /* find the part up until the first '.' or '[' */ |
327 | 175 | while (i < end) { |
328 | 0 | switch (c = PyUnicode_READ_CHAR(str, i++)) { |
329 | 0 | case '[': |
330 | 0 | case '.': |
331 | | /* backup so that we this character is available to the |
332 | | "rest" iterator */ |
333 | 0 | i--; |
334 | 0 | break; |
335 | 0 | default: |
336 | 0 | continue; |
337 | 0 | } |
338 | 0 | break; |
339 | 0 | } |
340 | | |
341 | | /* set up the return values */ |
342 | 175 | SubString_init(first, str, start, i); |
343 | 175 | FieldNameIterator_init(rest, str, i, end); |
344 | | |
345 | | /* see if "first" is an integer, in which case it's used as an index */ |
346 | 175 | *first_idx = get_integer(first); |
347 | 175 | if (*first_idx == -1 && PyErr_Occurred()) |
348 | 0 | return 0; |
349 | | |
350 | 175 | field_name_is_empty = first->start >= first->end; |
351 | | |
352 | | /* If the field name is omitted or if we have a numeric index |
353 | | specified, then we're doing numeric indexing into args. */ |
354 | 175 | using_numeric_index = field_name_is_empty || *first_idx != -1; |
355 | | |
356 | | /* We always get here exactly one time for each field we're |
357 | | processing. And we get here in field order (counting by left |
358 | | braces). So this is the perfect place to handle automatic field |
359 | | numbering if the field name is omitted. */ |
360 | | |
361 | | /* Check if we need to do the auto-numbering. It's not needed if |
362 | | we're called from string.Format routines, because it's handled |
363 | | in that class by itself. */ |
364 | 175 | if (auto_number) { |
365 | | /* Initialize our auto numbering state if this is the first |
366 | | time we're either auto-numbering or manually numbering. */ |
367 | 175 | if (auto_number->an_state == ANS_INIT && using_numeric_index) |
368 | 105 | auto_number->an_state = field_name_is_empty ? |
369 | 105 | ANS_AUTO : ANS_MANUAL; |
370 | | |
371 | | /* Make sure our state is consistent with what we're doing |
372 | | this time through. Only check if we're using a numeric |
373 | | index. */ |
374 | 175 | if (using_numeric_index) |
375 | 175 | if (autonumber_state_error(auto_number->an_state, |
376 | 175 | field_name_is_empty)) |
377 | 0 | return 0; |
378 | | /* Zero length field means we want to do auto-numbering of the |
379 | | fields. */ |
380 | 175 | if (field_name_is_empty) |
381 | 175 | *first_idx = (auto_number->an_field_number)++; |
382 | 175 | } |
383 | | |
384 | 175 | return 1; |
385 | 175 | } |
386 | | |
387 | | |
388 | | /* |
389 | | get_field_object returns the object inside {}, before the |
390 | | format_spec. It handles getindex and getattr lookups and consumes |
391 | | the entire input string. |
392 | | */ |
393 | | static PyObject * |
394 | | get_field_object(SubString *input, PyObject *args, PyObject *kwargs, |
395 | | AutoNumber *auto_number) |
396 | 175 | { |
397 | 175 | PyObject *obj = NULL; |
398 | 175 | int ok; |
399 | 175 | int is_attribute; |
400 | 175 | SubString name; |
401 | 175 | SubString first; |
402 | 175 | Py_ssize_t index; |
403 | 175 | FieldNameIterator rest; |
404 | | |
405 | 175 | if (!field_name_split(input->str, input->start, input->end, &first, |
406 | 175 | &index, &rest, auto_number)) { |
407 | 0 | goto error; |
408 | 0 | } |
409 | | |
410 | 175 | if (index == -1) { |
411 | | /* look up in kwargs */ |
412 | 0 | PyObject *key = SubString_new_object(&first); |
413 | 0 | if (key == NULL) { |
414 | 0 | goto error; |
415 | 0 | } |
416 | 0 | if (kwargs == NULL) { |
417 | 0 | PyErr_SetObject(PyExc_KeyError, key); |
418 | 0 | Py_DECREF(key); |
419 | 0 | goto error; |
420 | 0 | } |
421 | | /* Use PyObject_GetItem instead of PyDict_GetItem because this |
422 | | code is no longer just used with kwargs. It might be passed |
423 | | a non-dict when called through format_map. */ |
424 | 0 | obj = PyObject_GetItem(kwargs, key); |
425 | 0 | Py_DECREF(key); |
426 | 0 | if (obj == NULL) { |
427 | 0 | goto error; |
428 | 0 | } |
429 | 0 | } |
430 | 175 | else { |
431 | | /* If args is NULL, we have a format string with a positional field |
432 | | with only kwargs to retrieve it from. This can only happen when |
433 | | used with format_map(), where positional arguments are not |
434 | | allowed. */ |
435 | 175 | if (args == NULL) { |
436 | 0 | PyErr_SetString(PyExc_ValueError, "Format string contains " |
437 | 0 | "positional fields"); |
438 | 0 | goto error; |
439 | 0 | } |
440 | | |
441 | | /* look up in args */ |
442 | 175 | obj = PySequence_GetItem(args, index); |
443 | 175 | if (obj == NULL) { |
444 | 0 | PyErr_Format(PyExc_IndexError, |
445 | 0 | "Replacement index %zd out of range for positional " |
446 | 0 | "args tuple", |
447 | 0 | index); |
448 | 0 | goto error; |
449 | 0 | } |
450 | 175 | } |
451 | | |
452 | | /* iterate over the rest of the field_name */ |
453 | 175 | while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index, |
454 | 175 | &name)) == 2) { |
455 | 0 | PyObject *tmp; |
456 | |
|
457 | 0 | if (is_attribute) |
458 | | /* getattr lookup "." */ |
459 | 0 | tmp = getattr(obj, &name); |
460 | 0 | else |
461 | | /* getitem lookup "[]" */ |
462 | 0 | if (index == -1) |
463 | 0 | tmp = getitem_str(obj, &name); |
464 | 0 | else |
465 | 0 | if (PySequence_Check(obj)) |
466 | 0 | tmp = getitem_sequence(obj, index); |
467 | 0 | else |
468 | | /* not a sequence */ |
469 | 0 | tmp = getitem_idx(obj, index); |
470 | 0 | if (tmp == NULL) |
471 | 0 | goto error; |
472 | | |
473 | | /* assign to obj */ |
474 | 0 | Py_DECREF(obj); |
475 | 0 | obj = tmp; |
476 | 0 | } |
477 | | /* end of iterator, this is the non-error case */ |
478 | 175 | if (ok == 1) |
479 | 175 | return obj; |
480 | 0 | error: |
481 | 0 | Py_XDECREF(obj); |
482 | 0 | return NULL; |
483 | 175 | } |
484 | | |
485 | | /************************************************************************/ |
486 | | /***************** Field rendering functions **************************/ |
487 | | /************************************************************************/ |
488 | | |
489 | | /* |
490 | | render_field() is the main function in this section. It takes the |
491 | | field object and field specification string generated by |
492 | | get_field_and_spec, and renders the field into the output string. |
493 | | |
494 | | render_field calls fieldobj.__format__(format_spec) method, and |
495 | | appends to the output. |
496 | | */ |
497 | | static int |
498 | | render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *writer) |
499 | 175 | { |
500 | 175 | int ok = 0; |
501 | 175 | PyObject *result = NULL; |
502 | 175 | PyObject *format_spec_object = NULL; |
503 | 175 | int (*formatter) (_PyUnicodeWriter*, PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL; |
504 | 175 | int err; |
505 | | |
506 | | /* If we know the type exactly, skip the lookup of __format__ and just |
507 | | call the formatter directly. */ |
508 | 175 | if (PyUnicode_CheckExact(fieldobj)) |
509 | 147 | formatter = _PyUnicode_FormatAdvancedWriter; |
510 | 28 | else if (PyLong_CheckExact(fieldobj)) |
511 | 28 | formatter = _PyLong_FormatAdvancedWriter; |
512 | 0 | else if (PyFloat_CheckExact(fieldobj)) |
513 | 0 | formatter = _PyFloat_FormatAdvancedWriter; |
514 | 0 | else if (PyComplex_CheckExact(fieldobj)) |
515 | 0 | formatter = _PyComplex_FormatAdvancedWriter; |
516 | | |
517 | 175 | if (formatter) { |
518 | | /* we know exactly which formatter will be called when __format__ is |
519 | | looked up, so call it directly, instead. */ |
520 | 175 | err = formatter(writer, fieldobj, format_spec->str, |
521 | 175 | format_spec->start, format_spec->end); |
522 | 175 | return (err == 0); |
523 | 175 | } |
524 | 0 | else { |
525 | | /* We need to create an object out of the pointers we have, because |
526 | | __format__ takes a string/unicode object for format_spec. */ |
527 | 0 | if (format_spec->str) |
528 | 0 | format_spec_object = PyUnicode_Substring(format_spec->str, |
529 | 0 | format_spec->start, |
530 | 0 | format_spec->end); |
531 | 0 | else |
532 | 0 | format_spec_object = PyUnicode_New(0, 0); |
533 | 0 | if (format_spec_object == NULL) |
534 | 0 | goto done; |
535 | | |
536 | 0 | result = PyObject_Format(fieldobj, format_spec_object); |
537 | 0 | } |
538 | 0 | if (result == NULL) |
539 | 0 | goto done; |
540 | | |
541 | 0 | if (_PyUnicodeWriter_WriteStr(writer, result) == -1) |
542 | 0 | goto done; |
543 | 0 | ok = 1; |
544 | |
|
545 | 0 | done: |
546 | 0 | Py_XDECREF(format_spec_object); |
547 | 0 | Py_XDECREF(result); |
548 | 0 | return ok; |
549 | 0 | } |
550 | | |
551 | | static int |
552 | | parse_field(SubString *str, SubString *field_name, SubString *format_spec, |
553 | | int *format_spec_needs_expanding, Py_UCS4 *conversion) |
554 | 175 | { |
555 | | /* Note this function works if the field name is zero length, |
556 | | which is good. Zero length field names are handled later, in |
557 | | field_name_split. */ |
558 | | |
559 | 175 | Py_UCS4 c = 0; |
560 | | |
561 | | /* initialize these, as they may be empty */ |
562 | 175 | *conversion = '\0'; |
563 | 175 | SubString_init(format_spec, NULL, 0, 0); |
564 | | |
565 | | /* Search for the field name. it's terminated by the end of |
566 | | the string, or a ':' or '!' */ |
567 | 175 | field_name->str = str->str; |
568 | 175 | field_name->start = str->start; |
569 | 175 | while (str->start < str->end) { |
570 | 175 | switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) { |
571 | 0 | case '{': |
572 | 0 | PyErr_SetString(PyExc_ValueError, "unexpected '{' in field name"); |
573 | 0 | return 0; |
574 | 0 | case '[': |
575 | 0 | for (; str->start < str->end; str->start++) |
576 | 0 | if (PyUnicode_READ_CHAR(str->str, str->start) == ']') |
577 | 0 | break; |
578 | 0 | continue; |
579 | 140 | case '}': |
580 | 140 | case ':': |
581 | 175 | case '!': |
582 | 175 | break; |
583 | 0 | default: |
584 | 0 | continue; |
585 | 175 | } |
586 | 175 | break; |
587 | 175 | } |
588 | | |
589 | 175 | field_name->end = str->start - 1; |
590 | 175 | if (c == '!' || c == ':') { |
591 | 35 | Py_ssize_t count; |
592 | | /* we have a format specifier and/or a conversion */ |
593 | | /* don't include the last character */ |
594 | | |
595 | | /* see if there's a conversion specifier */ |
596 | 35 | if (c == '!') { |
597 | | /* there must be another character present */ |
598 | 35 | if (str->start >= str->end) { |
599 | 0 | PyErr_SetString(PyExc_ValueError, |
600 | 0 | "end of string while looking for conversion " |
601 | 0 | "specifier"); |
602 | 0 | return 0; |
603 | 0 | } |
604 | 35 | *conversion = PyUnicode_READ_CHAR(str->str, str->start++); |
605 | | |
606 | 35 | if (str->start < str->end) { |
607 | 35 | c = PyUnicode_READ_CHAR(str->str, str->start++); |
608 | 35 | if (c == '}') |
609 | 35 | return 1; |
610 | 0 | if (c != ':') { |
611 | 0 | PyErr_SetString(PyExc_ValueError, |
612 | 0 | "expected ':' after conversion specifier"); |
613 | 0 | return 0; |
614 | 0 | } |
615 | 0 | } |
616 | 35 | } |
617 | 0 | format_spec->str = str->str; |
618 | 0 | format_spec->start = str->start; |
619 | 0 | count = 1; |
620 | 0 | while (str->start < str->end) { |
621 | 0 | switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) { |
622 | 0 | case '{': |
623 | 0 | *format_spec_needs_expanding = 1; |
624 | 0 | count++; |
625 | 0 | break; |
626 | 0 | case '}': |
627 | 0 | count--; |
628 | 0 | if (count == 0) { |
629 | 0 | format_spec->end = str->start - 1; |
630 | 0 | return 1; |
631 | 0 | } |
632 | 0 | break; |
633 | 0 | default: |
634 | 0 | break; |
635 | 0 | } |
636 | 0 | } |
637 | | |
638 | 0 | PyErr_SetString(PyExc_ValueError, "unmatched '{' in format spec"); |
639 | 0 | return 0; |
640 | 0 | } |
641 | 140 | else if (c != '}') { |
642 | 0 | PyErr_SetString(PyExc_ValueError, "expected '}' before end of string"); |
643 | 0 | return 0; |
644 | 0 | } |
645 | | |
646 | 140 | return 1; |
647 | 175 | } |
648 | | |
649 | | /************************************************************************/ |
650 | | /******* Output string allocation and escape-to-markup processing ******/ |
651 | | /************************************************************************/ |
652 | | |
653 | | /* MarkupIterator breaks the string into pieces of either literal |
654 | | text, or things inside {} that need to be marked up. it is |
655 | | designed to make it easy to wrap a Python iterator around it, for |
656 | | use with the Formatter class */ |
657 | | |
658 | | typedef struct { |
659 | | SubString str; |
660 | | } MarkupIterator; |
661 | | |
662 | | static int |
663 | | MarkupIterator_init(MarkupIterator *self, PyObject *str, |
664 | | Py_ssize_t start, Py_ssize_t end) |
665 | 105 | { |
666 | 105 | SubString_init(&self->str, str, start, end); |
667 | 105 | return 1; |
668 | 105 | } |
669 | | |
670 | | /* returns 0 on error, 1 on non-error termination, and 2 if it got a |
671 | | string (or something to be expanded) */ |
672 | | static int |
673 | | MarkupIterator_next(MarkupIterator *self, SubString *literal, |
674 | | int *field_present, SubString *field_name, |
675 | | SubString *format_spec, Py_UCS4 *conversion, |
676 | | int *format_spec_needs_expanding) |
677 | 336 | { |
678 | 336 | int at_end; |
679 | 336 | Py_UCS4 c = 0; |
680 | 336 | Py_ssize_t start; |
681 | 336 | Py_ssize_t len; |
682 | 336 | int markup_follows = 0; |
683 | | |
684 | | /* initialize all of the output variables */ |
685 | 336 | SubString_init(literal, NULL, 0, 0); |
686 | 336 | SubString_init(field_name, NULL, 0, 0); |
687 | 336 | SubString_init(format_spec, NULL, 0, 0); |
688 | 336 | *conversion = '\0'; |
689 | 336 | *format_spec_needs_expanding = 0; |
690 | 336 | *field_present = 0; |
691 | | |
692 | | /* No more input, end of iterator. This is the normal exit |
693 | | path. */ |
694 | 336 | if (self->str.start >= self->str.end) |
695 | 105 | return 1; |
696 | | |
697 | 231 | start = self->str.start; |
698 | | |
699 | | /* First read any literal text. Read until the end of string, an |
700 | | escaped '{' or '}', or an unescaped '{'. In order to never |
701 | | allocate memory and so I can just pass pointers around, if |
702 | | there's an escaped '{' or '}' then we'll return the literal |
703 | | including the brace, but no format object. The next time |
704 | | through, we'll return the rest of the literal, skipping past |
705 | | the second consecutive brace. */ |
706 | 1.56k | while (self->str.start < self->str.end) { |
707 | 1.50k | switch (c = PyUnicode_READ_CHAR(self->str.str, self->str.start++)) { |
708 | 175 | case '{': |
709 | 175 | case '}': |
710 | 175 | markup_follows = 1; |
711 | 175 | break; |
712 | 1.33k | default: |
713 | 1.33k | continue; |
714 | 1.50k | } |
715 | 175 | break; |
716 | 1.50k | } |
717 | | |
718 | 231 | at_end = self->str.start >= self->str.end; |
719 | 231 | len = self->str.start - start; |
720 | | |
721 | 231 | if ((c == '}') && (at_end || |
722 | 0 | (c != PyUnicode_READ_CHAR(self->str.str, |
723 | 0 | self->str.start)))) { |
724 | 0 | PyErr_SetString(PyExc_ValueError, "Single '}' encountered " |
725 | 0 | "in format string"); |
726 | 0 | return 0; |
727 | 0 | } |
728 | 231 | if (at_end && c == '{') { |
729 | 0 | PyErr_SetString(PyExc_ValueError, "Single '{' encountered " |
730 | 0 | "in format string"); |
731 | 0 | return 0; |
732 | 0 | } |
733 | 231 | if (!at_end) { |
734 | 175 | if (c == PyUnicode_READ_CHAR(self->str.str, self->str.start)) { |
735 | | /* escaped } or {, skip it in the input. there is no |
736 | | markup object following us, just this literal text */ |
737 | 0 | self->str.start++; |
738 | 0 | markup_follows = 0; |
739 | 0 | } |
740 | 175 | else |
741 | 175 | len--; |
742 | 175 | } |
743 | | |
744 | | /* record the literal text */ |
745 | 231 | literal->str = self->str.str; |
746 | 231 | literal->start = start; |
747 | 231 | literal->end = start + len; |
748 | | |
749 | 231 | if (!markup_follows) |
750 | 56 | return 2; |
751 | | |
752 | | /* this is markup; parse the field */ |
753 | 175 | *field_present = 1; |
754 | 175 | if (!parse_field(&self->str, field_name, format_spec, |
755 | 175 | format_spec_needs_expanding, conversion)) |
756 | 0 | return 0; |
757 | 175 | return 2; |
758 | 175 | } |
759 | | |
760 | | |
761 | | /* do the !r or !s conversion on obj */ |
762 | | static PyObject * |
763 | | do_conversion(PyObject *obj, Py_UCS4 conversion) |
764 | 35 | { |
765 | | /* XXX in pre-3.0, do we need to convert this to unicode, since it |
766 | | might have returned a string? */ |
767 | 35 | switch (conversion) { |
768 | 35 | case 'r': |
769 | 35 | return PyObject_Repr(obj); |
770 | 0 | case 's': |
771 | 0 | return PyObject_Str(obj); |
772 | 0 | case 'a': |
773 | 0 | return PyObject_ASCII(obj); |
774 | 0 | default: |
775 | 0 | if (conversion > 32 && conversion < 127) { |
776 | | /* It's the ASCII subrange; casting to char is safe |
777 | | (assuming the execution character set is an ASCII |
778 | | superset). */ |
779 | 0 | PyErr_Format(PyExc_ValueError, |
780 | 0 | "Unknown conversion specifier %c", |
781 | 0 | (char)conversion); |
782 | 0 | } else |
783 | 0 | PyErr_Format(PyExc_ValueError, |
784 | 0 | "Unknown conversion specifier \\x%x", |
785 | 0 | (unsigned int)conversion); |
786 | 0 | return NULL; |
787 | 35 | } |
788 | 35 | } |
789 | | |
790 | | /* given: |
791 | | |
792 | | {field_name!conversion:format_spec} |
793 | | |
794 | | compute the result and write it to output. |
795 | | format_spec_needs_expanding is an optimization. if it's false, |
796 | | just output the string directly, otherwise recursively expand the |
797 | | format_spec string. |
798 | | |
799 | | field_name is allowed to be zero length, in which case we |
800 | | are doing auto field numbering. |
801 | | */ |
802 | | |
803 | | static int |
804 | | output_markup(SubString *field_name, SubString *format_spec, |
805 | | int format_spec_needs_expanding, Py_UCS4 conversion, |
806 | | _PyUnicodeWriter *writer, PyObject *args, PyObject *kwargs, |
807 | | int recursion_depth, AutoNumber *auto_number) |
808 | 175 | { |
809 | 175 | PyObject *tmp = NULL; |
810 | 175 | PyObject *fieldobj = NULL; |
811 | 175 | SubString expanded_format_spec; |
812 | 175 | SubString *actual_format_spec; |
813 | 175 | int result = 0; |
814 | | |
815 | | /* convert field_name to an object */ |
816 | 175 | fieldobj = get_field_object(field_name, args, kwargs, auto_number); |
817 | 175 | if (fieldobj == NULL) |
818 | 0 | goto done; |
819 | | |
820 | 175 | if (conversion != '\0') { |
821 | 35 | tmp = do_conversion(fieldobj, conversion); |
822 | 35 | if (tmp == NULL || PyUnicode_READY(tmp) == -1) |
823 | 0 | goto done; |
824 | | |
825 | | /* do the assignment, transferring ownership: fieldobj = tmp */ |
826 | 35 | Py_DECREF(fieldobj); |
827 | 35 | fieldobj = tmp; |
828 | 35 | tmp = NULL; |
829 | 35 | } |
830 | | |
831 | | /* if needed, recurively compute the format_spec */ |
832 | 175 | if (format_spec_needs_expanding) { |
833 | 0 | tmp = build_string(format_spec, args, kwargs, recursion_depth-1, |
834 | 0 | auto_number); |
835 | 0 | if (tmp == NULL || PyUnicode_READY(tmp) == -1) |
836 | 0 | goto done; |
837 | | |
838 | | /* note that in the case we're expanding the format string, |
839 | | tmp must be kept around until after the call to |
840 | | render_field. */ |
841 | 0 | SubString_init(&expanded_format_spec, tmp, 0, PyUnicode_GET_LENGTH(tmp)); |
842 | 0 | actual_format_spec = &expanded_format_spec; |
843 | 0 | } |
844 | 175 | else |
845 | 175 | actual_format_spec = format_spec; |
846 | | |
847 | 175 | if (render_field(fieldobj, actual_format_spec, writer) == 0) |
848 | 0 | goto done; |
849 | | |
850 | 175 | result = 1; |
851 | | |
852 | 175 | done: |
853 | 175 | Py_XDECREF(fieldobj); |
854 | 175 | Py_XDECREF(tmp); |
855 | | |
856 | 175 | return result; |
857 | 175 | } |
858 | | |
859 | | /* |
860 | | do_markup is the top-level loop for the format() method. It |
861 | | searches through the format string for escapes to markup codes, and |
862 | | calls other functions to move non-markup text to the output, |
863 | | and to perform the markup to the output. |
864 | | */ |
865 | | static int |
866 | | do_markup(SubString *input, PyObject *args, PyObject *kwargs, |
867 | | _PyUnicodeWriter *writer, int recursion_depth, AutoNumber *auto_number) |
868 | 105 | { |
869 | 105 | MarkupIterator iter; |
870 | 105 | int format_spec_needs_expanding; |
871 | 105 | int result; |
872 | 105 | int field_present; |
873 | 105 | SubString literal; |
874 | 105 | SubString field_name; |
875 | 105 | SubString format_spec; |
876 | 105 | Py_UCS4 conversion; |
877 | | |
878 | 105 | MarkupIterator_init(&iter, input->str, input->start, input->end); |
879 | 336 | while ((result = MarkupIterator_next(&iter, &literal, &field_present, |
880 | 336 | &field_name, &format_spec, |
881 | 336 | &conversion, |
882 | 336 | &format_spec_needs_expanding)) == 2) { |
883 | 231 | if (literal.end != literal.start) { |
884 | 217 | if (!field_present && iter.str.start == iter.str.end) |
885 | 56 | writer->overallocate = 0; |
886 | 217 | if (_PyUnicodeWriter_WriteSubstring(writer, literal.str, |
887 | 217 | literal.start, literal.end) < 0) |
888 | 0 | return 0; |
889 | 217 | } |
890 | | |
891 | 231 | if (field_present) { |
892 | 175 | if (iter.str.start == iter.str.end) |
893 | 49 | writer->overallocate = 0; |
894 | 175 | if (!output_markup(&field_name, &format_spec, |
895 | 175 | format_spec_needs_expanding, conversion, writer, |
896 | 175 | args, kwargs, recursion_depth, auto_number)) |
897 | 0 | return 0; |
898 | 175 | } |
899 | 231 | } |
900 | 105 | return result; |
901 | 105 | } |
902 | | |
903 | | |
904 | | /* |
905 | | build_string allocates the output string and then |
906 | | calls do_markup to do the heavy lifting. |
907 | | */ |
908 | | static PyObject * |
909 | | build_string(SubString *input, PyObject *args, PyObject *kwargs, |
910 | | int recursion_depth, AutoNumber *auto_number) |
911 | 105 | { |
912 | 105 | _PyUnicodeWriter writer; |
913 | | |
914 | | /* check the recursion level */ |
915 | 105 | if (recursion_depth <= 0) { |
916 | 0 | PyErr_SetString(PyExc_ValueError, |
917 | 0 | "Max string recursion exceeded"); |
918 | 0 | return NULL; |
919 | 0 | } |
920 | | |
921 | 105 | _PyUnicodeWriter_Init(&writer); |
922 | 105 | writer.overallocate = 1; |
923 | 105 | writer.min_length = PyUnicode_GET_LENGTH(input->str) + 100; |
924 | | |
925 | 105 | if (!do_markup(input, args, kwargs, &writer, recursion_depth, |
926 | 105 | auto_number)) { |
927 | 0 | _PyUnicodeWriter_Dealloc(&writer); |
928 | 0 | return NULL; |
929 | 0 | } |
930 | | |
931 | 105 | return _PyUnicodeWriter_Finish(&writer); |
932 | 105 | } |
933 | | |
934 | | /************************************************************************/ |
935 | | /*********** main routine ***********************************************/ |
936 | | /************************************************************************/ |
937 | | |
938 | | /* this is the main entry point */ |
939 | | static PyObject * |
940 | | do_string_format(PyObject *self, PyObject *args, PyObject *kwargs) |
941 | 105 | { |
942 | 105 | SubString input; |
943 | | |
944 | | /* PEP 3101 says only 2 levels, so that |
945 | | "{0:{1}}".format('abc', 's') # works |
946 | | "{0:{1:{2}}}".format('abc', 's', '') # fails |
947 | | */ |
948 | 105 | int recursion_depth = 2; |
949 | | |
950 | 105 | AutoNumber auto_number; |
951 | | |
952 | 105 | if (PyUnicode_READY(self) == -1) |
953 | 0 | return NULL; |
954 | | |
955 | 105 | AutoNumber_Init(&auto_number); |
956 | 105 | SubString_init(&input, self, 0, PyUnicode_GET_LENGTH(self)); |
957 | 105 | return build_string(&input, args, kwargs, recursion_depth, &auto_number); |
958 | 105 | } |
959 | | |
960 | | static PyObject * |
961 | | do_string_format_map(PyObject *self, PyObject *obj) |
962 | 0 | { |
963 | 0 | return do_string_format(self, NULL, obj); |
964 | 0 | } |
965 | | |
966 | | |
967 | | /************************************************************************/ |
968 | | /*********** formatteriterator ******************************************/ |
969 | | /************************************************************************/ |
970 | | |
971 | | /* This is used to implement string.Formatter.vparse(). It exists so |
972 | | Formatter can share code with the built in unicode.format() method. |
973 | | It's really just a wrapper around MarkupIterator that is callable |
974 | | from Python. */ |
975 | | |
976 | | typedef struct { |
977 | | PyObject_HEAD |
978 | | PyObject *str; |
979 | | MarkupIterator it_markup; |
980 | | } formatteriterobject; |
981 | | |
982 | | static void |
983 | | formatteriter_dealloc(formatteriterobject *it) |
984 | 0 | { |
985 | 0 | Py_XDECREF(it->str); |
986 | 0 | PyObject_FREE(it); |
987 | 0 | } |
988 | | |
989 | | /* returns a tuple: |
990 | | (literal, field_name, format_spec, conversion) |
991 | | |
992 | | literal is any literal text to output. might be zero length |
993 | | field_name is the string before the ':'. might be None |
994 | | format_spec is the string after the ':'. mibht be None |
995 | | conversion is either None, or the string after the '!' |
996 | | */ |
997 | | static PyObject * |
998 | | formatteriter_next(formatteriterobject *it) |
999 | 0 | { |
1000 | 0 | SubString literal; |
1001 | 0 | SubString field_name; |
1002 | 0 | SubString format_spec; |
1003 | 0 | Py_UCS4 conversion; |
1004 | 0 | int format_spec_needs_expanding; |
1005 | 0 | int field_present; |
1006 | 0 | int result = MarkupIterator_next(&it->it_markup, &literal, &field_present, |
1007 | 0 | &field_name, &format_spec, &conversion, |
1008 | 0 | &format_spec_needs_expanding); |
1009 | | |
1010 | | /* all of the SubString objects point into it->str, so no |
1011 | | memory management needs to be done on them */ |
1012 | 0 | assert(0 <= result && result <= 2); |
1013 | 0 | if (result == 0 || result == 1) |
1014 | | /* if 0, error has already been set, if 1, iterator is empty */ |
1015 | 0 | return NULL; |
1016 | 0 | else { |
1017 | 0 | PyObject *literal_str = NULL; |
1018 | 0 | PyObject *field_name_str = NULL; |
1019 | 0 | PyObject *format_spec_str = NULL; |
1020 | 0 | PyObject *conversion_str = NULL; |
1021 | 0 | PyObject *tuple = NULL; |
1022 | |
|
1023 | 0 | literal_str = SubString_new_object(&literal); |
1024 | 0 | if (literal_str == NULL) |
1025 | 0 | goto done; |
1026 | | |
1027 | 0 | field_name_str = SubString_new_object(&field_name); |
1028 | 0 | if (field_name_str == NULL) |
1029 | 0 | goto done; |
1030 | | |
1031 | | /* if field_name is non-zero length, return a string for |
1032 | | format_spec (even if zero length), else return None */ |
1033 | 0 | format_spec_str = (field_present ? |
1034 | 0 | SubString_new_object_or_empty : |
1035 | 0 | SubString_new_object)(&format_spec); |
1036 | 0 | if (format_spec_str == NULL) |
1037 | 0 | goto done; |
1038 | | |
1039 | | /* if the conversion is not specified, return a None, |
1040 | | otherwise create a one length string with the conversion |
1041 | | character */ |
1042 | 0 | if (conversion == '\0') { |
1043 | 0 | conversion_str = Py_None; |
1044 | 0 | Py_INCREF(conversion_str); |
1045 | 0 | } |
1046 | 0 | else |
1047 | 0 | conversion_str = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, |
1048 | 0 | &conversion, 1); |
1049 | 0 | if (conversion_str == NULL) |
1050 | 0 | goto done; |
1051 | | |
1052 | 0 | tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str, |
1053 | 0 | conversion_str); |
1054 | 0 | done: |
1055 | 0 | Py_XDECREF(literal_str); |
1056 | 0 | Py_XDECREF(field_name_str); |
1057 | 0 | Py_XDECREF(format_spec_str); |
1058 | 0 | Py_XDECREF(conversion_str); |
1059 | 0 | return tuple; |
1060 | 0 | } |
1061 | 0 | } |
1062 | | |
1063 | | static PyMethodDef formatteriter_methods[] = { |
1064 | | {NULL, NULL} /* sentinel */ |
1065 | | }; |
1066 | | |
1067 | | static PyTypeObject PyFormatterIter_Type = { |
1068 | | PyVarObject_HEAD_INIT(&PyType_Type, 0) |
1069 | | "formatteriterator", /* tp_name */ |
1070 | | sizeof(formatteriterobject), /* tp_basicsize */ |
1071 | | 0, /* tp_itemsize */ |
1072 | | /* methods */ |
1073 | | (destructor)formatteriter_dealloc, /* tp_dealloc */ |
1074 | | 0, /* tp_vectorcall_offset */ |
1075 | | 0, /* tp_getattr */ |
1076 | | 0, /* tp_setattr */ |
1077 | | 0, /* tp_as_async */ |
1078 | | 0, /* tp_repr */ |
1079 | | 0, /* tp_as_number */ |
1080 | | 0, /* tp_as_sequence */ |
1081 | | 0, /* tp_as_mapping */ |
1082 | | 0, /* tp_hash */ |
1083 | | 0, /* tp_call */ |
1084 | | 0, /* tp_str */ |
1085 | | PyObject_GenericGetAttr, /* tp_getattro */ |
1086 | | 0, /* tp_setattro */ |
1087 | | 0, /* tp_as_buffer */ |
1088 | | Py_TPFLAGS_DEFAULT, /* tp_flags */ |
1089 | | 0, /* tp_doc */ |
1090 | | 0, /* tp_traverse */ |
1091 | | 0, /* tp_clear */ |
1092 | | 0, /* tp_richcompare */ |
1093 | | 0, /* tp_weaklistoffset */ |
1094 | | PyObject_SelfIter, /* tp_iter */ |
1095 | | (iternextfunc)formatteriter_next, /* tp_iternext */ |
1096 | | formatteriter_methods, /* tp_methods */ |
1097 | | 0, |
1098 | | }; |
1099 | | |
1100 | | /* unicode_formatter_parser is used to implement |
1101 | | string.Formatter.vformat. it parses a string and returns tuples |
1102 | | describing the parsed elements. It's a wrapper around |
1103 | | stringlib/string_format.h's MarkupIterator */ |
1104 | | static PyObject * |
1105 | | formatter_parser(PyObject *ignored, PyObject *self) |
1106 | 0 | { |
1107 | 0 | formatteriterobject *it; |
1108 | |
|
1109 | 0 | if (!PyUnicode_Check(self)) { |
1110 | 0 | PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name); |
1111 | 0 | return NULL; |
1112 | 0 | } |
1113 | | |
1114 | 0 | if (PyUnicode_READY(self) == -1) |
1115 | 0 | return NULL; |
1116 | | |
1117 | 0 | it = PyObject_New(formatteriterobject, &PyFormatterIter_Type); |
1118 | 0 | if (it == NULL) |
1119 | 0 | return NULL; |
1120 | | |
1121 | | /* take ownership, give the object to the iterator */ |
1122 | 0 | Py_INCREF(self); |
1123 | 0 | it->str = self; |
1124 | | |
1125 | | /* initialize the contained MarkupIterator */ |
1126 | 0 | MarkupIterator_init(&it->it_markup, (PyObject*)self, 0, PyUnicode_GET_LENGTH(self)); |
1127 | 0 | return (PyObject *)it; |
1128 | 0 | } |
1129 | | |
1130 | | |
1131 | | /************************************************************************/ |
1132 | | /*********** fieldnameiterator ******************************************/ |
1133 | | /************************************************************************/ |
1134 | | |
1135 | | |
1136 | | /* This is used to implement string.Formatter.vparse(). It parses the |
1137 | | field name into attribute and item values. It's a Python-callable |
1138 | | wrapper around FieldNameIterator */ |
1139 | | |
1140 | | typedef struct { |
1141 | | PyObject_HEAD |
1142 | | PyObject *str; |
1143 | | FieldNameIterator it_field; |
1144 | | } fieldnameiterobject; |
1145 | | |
1146 | | static void |
1147 | | fieldnameiter_dealloc(fieldnameiterobject *it) |
1148 | 0 | { |
1149 | 0 | Py_XDECREF(it->str); |
1150 | 0 | PyObject_FREE(it); |
1151 | 0 | } |
1152 | | |
1153 | | /* returns a tuple: |
1154 | | (is_attr, value) |
1155 | | is_attr is true if we used attribute syntax (e.g., '.foo') |
1156 | | false if we used index syntax (e.g., '[foo]') |
1157 | | value is an integer or string |
1158 | | */ |
1159 | | static PyObject * |
1160 | | fieldnameiter_next(fieldnameiterobject *it) |
1161 | 0 | { |
1162 | 0 | int result; |
1163 | 0 | int is_attr; |
1164 | 0 | Py_ssize_t idx; |
1165 | 0 | SubString name; |
1166 | |
|
1167 | 0 | result = FieldNameIterator_next(&it->it_field, &is_attr, |
1168 | 0 | &idx, &name); |
1169 | 0 | if (result == 0 || result == 1) |
1170 | | /* if 0, error has already been set, if 1, iterator is empty */ |
1171 | 0 | return NULL; |
1172 | 0 | else { |
1173 | 0 | PyObject* result = NULL; |
1174 | 0 | PyObject* is_attr_obj = NULL; |
1175 | 0 | PyObject* obj = NULL; |
1176 | |
|
1177 | 0 | is_attr_obj = PyBool_FromLong(is_attr); |
1178 | 0 | if (is_attr_obj == NULL) |
1179 | 0 | goto done; |
1180 | | |
1181 | | /* either an integer or a string */ |
1182 | 0 | if (idx != -1) |
1183 | 0 | obj = PyLong_FromSsize_t(idx); |
1184 | 0 | else |
1185 | 0 | obj = SubString_new_object(&name); |
1186 | 0 | if (obj == NULL) |
1187 | 0 | goto done; |
1188 | | |
1189 | | /* return a tuple of values */ |
1190 | 0 | result = PyTuple_Pack(2, is_attr_obj, obj); |
1191 | |
|
1192 | 0 | done: |
1193 | 0 | Py_XDECREF(is_attr_obj); |
1194 | 0 | Py_XDECREF(obj); |
1195 | 0 | return result; |
1196 | 0 | } |
1197 | 0 | } |
1198 | | |
1199 | | static PyMethodDef fieldnameiter_methods[] = { |
1200 | | {NULL, NULL} /* sentinel */ |
1201 | | }; |
1202 | | |
1203 | | static PyTypeObject PyFieldNameIter_Type = { |
1204 | | PyVarObject_HEAD_INIT(&PyType_Type, 0) |
1205 | | "fieldnameiterator", /* tp_name */ |
1206 | | sizeof(fieldnameiterobject), /* tp_basicsize */ |
1207 | | 0, /* tp_itemsize */ |
1208 | | /* methods */ |
1209 | | (destructor)fieldnameiter_dealloc, /* tp_dealloc */ |
1210 | | 0, /* tp_vectorcall_offset */ |
1211 | | 0, /* tp_getattr */ |
1212 | | 0, /* tp_setattr */ |
1213 | | 0, /* tp_as_async */ |
1214 | | 0, /* tp_repr */ |
1215 | | 0, /* tp_as_number */ |
1216 | | 0, /* tp_as_sequence */ |
1217 | | 0, /* tp_as_mapping */ |
1218 | | 0, /* tp_hash */ |
1219 | | 0, /* tp_call */ |
1220 | | 0, /* tp_str */ |
1221 | | PyObject_GenericGetAttr, /* tp_getattro */ |
1222 | | 0, /* tp_setattro */ |
1223 | | 0, /* tp_as_buffer */ |
1224 | | Py_TPFLAGS_DEFAULT, /* tp_flags */ |
1225 | | 0, /* tp_doc */ |
1226 | | 0, /* tp_traverse */ |
1227 | | 0, /* tp_clear */ |
1228 | | 0, /* tp_richcompare */ |
1229 | | 0, /* tp_weaklistoffset */ |
1230 | | PyObject_SelfIter, /* tp_iter */ |
1231 | | (iternextfunc)fieldnameiter_next, /* tp_iternext */ |
1232 | | fieldnameiter_methods, /* tp_methods */ |
1233 | | 0}; |
1234 | | |
1235 | | /* unicode_formatter_field_name_split is used to implement |
1236 | | string.Formatter.vformat. it takes a PEP 3101 "field name", and |
1237 | | returns a tuple of (first, rest): "first", the part before the |
1238 | | first '.' or '['; and "rest", an iterator for the rest of the field |
1239 | | name. it's a wrapper around stringlib/string_format.h's |
1240 | | field_name_split. The iterator it returns is a |
1241 | | FieldNameIterator */ |
1242 | | static PyObject * |
1243 | | formatter_field_name_split(PyObject *ignored, PyObject *self) |
1244 | 0 | { |
1245 | 0 | SubString first; |
1246 | 0 | Py_ssize_t first_idx; |
1247 | 0 | fieldnameiterobject *it; |
1248 | |
|
1249 | 0 | PyObject *first_obj = NULL; |
1250 | 0 | PyObject *result = NULL; |
1251 | |
|
1252 | 0 | if (!PyUnicode_Check(self)) { |
1253 | 0 | PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name); |
1254 | 0 | return NULL; |
1255 | 0 | } |
1256 | | |
1257 | 0 | if (PyUnicode_READY(self) == -1) |
1258 | 0 | return NULL; |
1259 | | |
1260 | 0 | it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type); |
1261 | 0 | if (it == NULL) |
1262 | 0 | return NULL; |
1263 | | |
1264 | | /* take ownership, give the object to the iterator. this is |
1265 | | just to keep the field_name alive */ |
1266 | 0 | Py_INCREF(self); |
1267 | 0 | it->str = self; |
1268 | | |
1269 | | /* Pass in auto_number = NULL. We'll return an empty string for |
1270 | | first_obj in that case. */ |
1271 | 0 | if (!field_name_split((PyObject*)self, 0, PyUnicode_GET_LENGTH(self), |
1272 | 0 | &first, &first_idx, &it->it_field, NULL)) |
1273 | 0 | goto done; |
1274 | | |
1275 | | /* first becomes an integer, if possible; else a string */ |
1276 | 0 | if (first_idx != -1) |
1277 | 0 | first_obj = PyLong_FromSsize_t(first_idx); |
1278 | 0 | else |
1279 | | /* convert "first" into a string object */ |
1280 | 0 | first_obj = SubString_new_object(&first); |
1281 | 0 | if (first_obj == NULL) |
1282 | 0 | goto done; |
1283 | | |
1284 | | /* return a tuple of values */ |
1285 | 0 | result = PyTuple_Pack(2, first_obj, it); |
1286 | |
|
1287 | 0 | done: |
1288 | 0 | Py_XDECREF(it); |
1289 | 0 | Py_XDECREF(first_obj); |
1290 | 0 | return result; |
1291 | 0 | } |