Coverage Report

Created: 2025-07-12 07:00

/src/cpython3/Objects/stringlib/unicode_format.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
    unicode_format.h -- implementation of str.format().
3
*/
4
5
#include "pycore_complexobject.h" // _PyComplex_FormatAdvancedWriter()
6
#include "pycore_floatobject.h"   // _PyFloat_FormatAdvancedWriter()
7
8
/************************************************************************/
9
/***********   Global data structures and forward declarations  *********/
10
/************************************************************************/
11
12
/*
13
   A SubString consists of the characters between two string or
14
   unicode pointers.
15
*/
16
typedef struct {
17
    PyObject *str; /* borrowed reference */
18
    Py_ssize_t start, end;
19
} SubString;
20
21
22
typedef enum {
23
    ANS_INIT,
24
    ANS_AUTO,
25
    ANS_MANUAL
26
} AutoNumberState;   /* Keep track if we're auto-numbering fields */
27
28
/* Keeps track of our auto-numbering state, and which number field we're on */
29
typedef struct {
30
    AutoNumberState an_state;
31
    int an_field_number;
32
} AutoNumber;
33
34
35
/* forward declaration for recursion */
36
static PyObject *
37
build_string(SubString *input, PyObject *args, PyObject *kwargs,
38
             int recursion_depth, AutoNumber *auto_number);
39
40
41
42
/************************************************************************/
43
/**************************  Utility  functions  ************************/
44
/************************************************************************/
45
46
static void
47
AutoNumber_Init(AutoNumber *auto_number)
48
32
{
49
32
    auto_number->an_state = ANS_INIT;
50
32
    auto_number->an_field_number = 0;
51
32
}
52
53
/* fill in a SubString from a pointer and length */
54
Py_LOCAL_INLINE(void)
55
SubString_init(SubString *str, PyObject *s, Py_ssize_t start, Py_ssize_t end)
56
352
{
57
352
    str->str = s;
58
352
    str->start = start;
59
352
    str->end = end;
60
352
}
61
62
/* return a new string.  if str->str is NULL, return None */
63
Py_LOCAL_INLINE(PyObject *)
64
SubString_new_object(SubString *str)
65
0
{
66
0
    if (str->str == NULL)
67
0
        Py_RETURN_NONE;
68
0
    return PyUnicode_Substring(str->str, str->start, str->end);
69
0
}
70
71
/* return a new string.  if str->str is NULL, return a new empty string */
72
Py_LOCAL_INLINE(PyObject *)
73
SubString_new_object_or_empty(SubString *str)
74
0
{
75
0
    if (str->str == NULL) {
76
0
        return Py_GetConstant(Py_CONSTANT_EMPTY_STR);
77
0
    }
78
0
    return SubString_new_object(str);
79
0
}
80
81
/* Return 1 if an error has been detected switching between automatic
82
   field numbering and manual field specification, else return 0. Set
83
   ValueError on error. */
84
static int
85
autonumber_state_error(AutoNumberState state, int field_name_is_empty)
86
32
{
87
32
    if (state == ANS_MANUAL) {
88
32
        if (field_name_is_empty) {
89
0
            PyErr_SetString(PyExc_ValueError, "cannot switch from "
90
0
                            "manual field specification to "
91
0
                            "automatic field numbering");
92
0
            return 1;
93
0
        }
94
32
    }
95
0
    else {
96
0
        if (!field_name_is_empty) {
97
0
            PyErr_SetString(PyExc_ValueError, "cannot switch from "
98
0
                            "automatic field numbering to "
99
0
                            "manual field specification");
100
0
            return 1;
101
0
        }
102
0
    }
103
32
    return 0;
104
32
}
105
106
107
/************************************************************************/
108
/***********  Format string parsing -- integers and identifiers *********/
109
/************************************************************************/
110
111
static Py_ssize_t
112
get_integer(const SubString *str)
113
32
{
114
32
    Py_ssize_t accumulator = 0;
115
32
    Py_ssize_t digitval;
116
32
    Py_ssize_t i;
117
118
    /* empty string is an error */
119
32
    if (str->start >= str->end)
120
0
        return -1;
121
122
64
    for (i = str->start; i < str->end; i++) {
123
32
        digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str->str, i));
124
32
        if (digitval < 0)
125
0
            return -1;
126
        /*
127
           Detect possible overflow before it happens:
128
129
              accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
130
              accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
131
        */
132
32
        if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
133
0
            PyErr_Format(PyExc_ValueError,
134
0
                         "Too many decimal digits in format string");
135
0
            return -1;
136
0
        }
137
32
        accumulator = accumulator * 10 + digitval;
138
32
    }
139
32
    return accumulator;
140
32
}
141
142
/************************************************************************/
143
/******** Functions to get field objects and specification strings ******/
144
/************************************************************************/
145
146
/* do the equivalent of obj.name */
147
static PyObject *
148
getattr(PyObject *obj, SubString *name)
149
0
{
150
0
    PyObject *newobj;
151
0
    PyObject *str = SubString_new_object(name);
152
0
    if (str == NULL)
153
0
        return NULL;
154
0
    newobj = PyObject_GetAttr(obj, str);
155
0
    Py_DECREF(str);
156
0
    return newobj;
157
0
}
158
159
/* do the equivalent of obj[idx], where obj is a sequence */
160
static PyObject *
161
getitem_sequence(PyObject *obj, Py_ssize_t idx)
162
0
{
163
0
    return PySequence_GetItem(obj, idx);
164
0
}
165
166
/* do the equivalent of obj[idx], where obj is not a sequence */
167
static PyObject *
168
getitem_idx(PyObject *obj, Py_ssize_t idx)
169
0
{
170
0
    PyObject *newobj;
171
0
    PyObject *idx_obj = PyLong_FromSsize_t(idx);
172
0
    if (idx_obj == NULL)
173
0
        return NULL;
174
0
    newobj = PyObject_GetItem(obj, idx_obj);
175
0
    Py_DECREF(idx_obj);
176
0
    return newobj;
177
0
}
178
179
/* do the equivalent of obj[name] */
180
static PyObject *
181
getitem_str(PyObject *obj, SubString *name)
182
0
{
183
0
    PyObject *newobj;
184
0
    PyObject *str = SubString_new_object(name);
185
0
    if (str == NULL)
186
0
        return NULL;
187
0
    newobj = PyObject_GetItem(obj, str);
188
0
    Py_DECREF(str);
189
0
    return newobj;
190
0
}
191
192
typedef struct {
193
    /* the entire string we're parsing.  we assume that someone else
194
       is managing its lifetime, and that it will exist for the
195
       lifetime of the iterator.  can be empty */
196
    SubString str;
197
198
    /* index to where we are inside field_name */
199
    Py_ssize_t index;
200
} FieldNameIterator;
201
202
203
static int
204
FieldNameIterator_init(FieldNameIterator *self, PyObject *s,
205
                       Py_ssize_t start, Py_ssize_t end)
206
32
{
207
32
    SubString_init(&self->str, s, start, end);
208
32
    self->index = start;
209
32
    return 1;
210
32
}
211
212
static int
213
_FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
214
0
{
215
0
    Py_UCS4 c;
216
217
0
    name->str = self->str.str;
218
0
    name->start = self->index;
219
220
    /* return everything until '.' or '[' */
221
0
    while (self->index < self->str.end) {
222
0
        c = PyUnicode_READ_CHAR(self->str.str, self->index++);
223
0
        switch (c) {
224
0
        case '[':
225
0
        case '.':
226
            /* backup so that we this character will be seen next time */
227
0
            self->index--;
228
0
            break;
229
0
        default:
230
0
            continue;
231
0
        }
232
0
        break;
233
0
    }
234
    /* end of string is okay */
235
0
    name->end = self->index;
236
0
    return 1;
237
0
}
238
239
static int
240
_FieldNameIterator_item(FieldNameIterator *self, SubString *name)
241
0
{
242
0
    int bracket_seen = 0;
243
0
    Py_UCS4 c;
244
245
0
    name->str = self->str.str;
246
0
    name->start = self->index;
247
248
    /* return everything until ']' */
249
0
    while (self->index < self->str.end) {
250
0
        c = PyUnicode_READ_CHAR(self->str.str, self->index++);
251
0
        switch (c) {
252
0
        case ']':
253
0
            bracket_seen = 1;
254
0
            break;
255
0
        default:
256
0
            continue;
257
0
        }
258
0
        break;
259
0
    }
260
    /* make sure we ended with a ']' */
261
0
    if (!bracket_seen) {
262
0
        PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");
263
0
        return 0;
264
0
    }
265
266
    /* end of string is okay */
267
    /* don't include the ']' */
268
0
    name->end = self->index-1;
269
0
    return 1;
270
0
}
271
272
/* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
273
static int
274
FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
275
                       Py_ssize_t *name_idx, SubString *name)
276
32
{
277
    /* check at end of input */
278
32
    if (self->index >= self->str.end)
279
32
        return 1;
280
281
0
    switch (PyUnicode_READ_CHAR(self->str.str, self->index++)) {
282
0
    case '.':
283
0
        *is_attribute = 1;
284
0
        if (_FieldNameIterator_attr(self, name) == 0)
285
0
            return 0;
286
0
        *name_idx = -1;
287
0
        break;
288
0
    case '[':
289
0
        *is_attribute = 0;
290
0
        if (_FieldNameIterator_item(self, name) == 0)
291
0
            return 0;
292
0
        *name_idx = get_integer(name);
293
0
        if (*name_idx == -1 && PyErr_Occurred())
294
0
            return 0;
295
0
        break;
296
0
    default:
297
        /* Invalid character follows ']' */
298
0
        PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may "
299
0
                        "follow ']' in format field specifier");
300
0
        return 0;
301
0
    }
302
303
    /* empty string is an error */
304
0
    if (name->start == name->end) {
305
0
        PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
306
0
        return 0;
307
0
    }
308
309
0
    return 2;
310
0
}
311
312
313
/* input: field_name
314
   output: 'first' points to the part before the first '[' or '.'
315
           'first_idx' is -1 if 'first' is not an integer, otherwise
316
                       it's the value of first converted to an integer
317
           'rest' is an iterator to return the rest
318
*/
319
static int
320
field_name_split(PyObject *str, Py_ssize_t start, Py_ssize_t end, SubString *first,
321
                 Py_ssize_t *first_idx, FieldNameIterator *rest,
322
                 AutoNumber *auto_number)
323
32
{
324
32
    Py_UCS4 c;
325
32
    Py_ssize_t i = start;
326
32
    int field_name_is_empty;
327
32
    int using_numeric_index;
328
329
    /* find the part up until the first '.' or '[' */
330
64
    while (i < end) {
331
32
        switch (c = PyUnicode_READ_CHAR(str, i++)) {
332
0
        case '[':
333
0
        case '.':
334
            /* backup so that we this character is available to the
335
               "rest" iterator */
336
0
            i--;
337
0
            break;
338
32
        default:
339
32
            continue;
340
32
        }
341
0
        break;
342
32
    }
343
344
    /* set up the return values */
345
32
    SubString_init(first, str, start, i);
346
32
    FieldNameIterator_init(rest, str, i, end);
347
348
    /* see if "first" is an integer, in which case it's used as an index */
349
32
    *first_idx = get_integer(first);
350
32
    if (*first_idx == -1 && PyErr_Occurred())
351
0
        return 0;
352
353
32
    field_name_is_empty = first->start >= first->end;
354
355
    /* If the field name is omitted or if we have a numeric index
356
       specified, then we're doing numeric indexing into args. */
357
32
    using_numeric_index = field_name_is_empty || *first_idx != -1;
358
359
    /* We always get here exactly one time for each field we're
360
       processing. And we get here in field order (counting by left
361
       braces). So this is the perfect place to handle automatic field
362
       numbering if the field name is omitted. */
363
364
    /* Check if we need to do the auto-numbering. It's not needed if
365
       we're called from string.Format routines, because it's handled
366
       in that class by itself. */
367
32
    if (auto_number) {
368
        /* Initialize our auto numbering state if this is the first
369
           time we're either auto-numbering or manually numbering. */
370
32
        if (auto_number->an_state == ANS_INIT && using_numeric_index)
371
32
            auto_number->an_state = field_name_is_empty ?
372
32
                ANS_AUTO : ANS_MANUAL;
373
374
        /* Make sure our state is consistent with what we're doing
375
           this time through. Only check if we're using a numeric
376
           index. */
377
32
        if (using_numeric_index)
378
32
            if (autonumber_state_error(auto_number->an_state,
379
32
                                       field_name_is_empty))
380
0
                return 0;
381
        /* Zero length field means we want to do auto-numbering of the
382
           fields. */
383
32
        if (field_name_is_empty)
384
0
            *first_idx = (auto_number->an_field_number)++;
385
32
    }
386
387
32
    return 1;
388
32
}
389
390
391
/*
392
    get_field_object returns the object inside {}, before the
393
    format_spec.  It handles getindex and getattr lookups and consumes
394
    the entire input string.
395
*/
396
static PyObject *
397
get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
398
                 AutoNumber *auto_number)
399
32
{
400
32
    PyObject *obj = NULL;
401
32
    int ok;
402
32
    int is_attribute;
403
32
    SubString name;
404
32
    SubString first;
405
32
    Py_ssize_t index;
406
32
    FieldNameIterator rest;
407
408
32
    if (!field_name_split(input->str, input->start, input->end, &first,
409
32
                          &index, &rest, auto_number)) {
410
0
        goto error;
411
0
    }
412
413
32
    if (index == -1) {
414
        /* look up in kwargs */
415
0
        PyObject *key = SubString_new_object(&first);
416
0
        if (key == NULL) {
417
0
            goto error;
418
0
        }
419
0
        if (kwargs == NULL) {
420
0
            PyErr_SetObject(PyExc_KeyError, key);
421
0
            Py_DECREF(key);
422
0
            goto error;
423
0
        }
424
        /* Use PyObject_GetItem instead of PyDict_GetItem because this
425
           code is no longer just used with kwargs. It might be passed
426
           a non-dict when called through format_map. */
427
0
        obj = PyObject_GetItem(kwargs, key);
428
0
        Py_DECREF(key);
429
0
        if (obj == NULL) {
430
0
            goto error;
431
0
        }
432
0
    }
433
32
    else {
434
        /* If args is NULL, we have a format string with a positional field
435
           with only kwargs to retrieve it from. This can only happen when
436
           used with format_map(), where positional arguments are not
437
           allowed. */
438
32
        if (args == NULL) {
439
0
            PyErr_SetString(PyExc_ValueError, "Format string contains "
440
0
                            "positional fields");
441
0
            goto error;
442
0
        }
443
444
        /* look up in args */
445
32
        obj = PySequence_GetItem(args, index);
446
32
        if (obj == NULL) {
447
0
            PyErr_Format(PyExc_IndexError,
448
0
                         "Replacement index %zd out of range for positional "
449
0
                         "args tuple",
450
0
                         index);
451
0
             goto error;
452
0
        }
453
32
    }
454
455
    /* iterate over the rest of the field_name */
456
32
    while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
457
32
                                        &name)) == 2) {
458
0
        PyObject *tmp;
459
460
0
        if (is_attribute)
461
            /* getattr lookup "." */
462
0
            tmp = getattr(obj, &name);
463
0
        else
464
            /* getitem lookup "[]" */
465
0
            if (index == -1)
466
0
                tmp = getitem_str(obj, &name);
467
0
            else
468
0
                if (PySequence_Check(obj))
469
0
                    tmp = getitem_sequence(obj, index);
470
0
                else
471
                    /* not a sequence */
472
0
                    tmp = getitem_idx(obj, index);
473
0
        if (tmp == NULL)
474
0
            goto error;
475
476
        /* assign to obj */
477
0
        Py_SETREF(obj, tmp);
478
0
    }
479
    /* end of iterator, this is the non-error case */
480
32
    if (ok == 1)
481
32
        return obj;
482
0
error:
483
0
    Py_XDECREF(obj);
484
0
    return NULL;
485
32
}
486
487
/************************************************************************/
488
/*****************  Field rendering functions  **************************/
489
/************************************************************************/
490
491
/*
492
    render_field() is the main function in this section.  It takes the
493
    field object and field specification string generated by
494
    get_field_and_spec, and renders the field into the output string.
495
496
    render_field calls fieldobj.__format__(format_spec) method, and
497
    appends to the output.
498
*/
499
static int
500
render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *writer)
501
32
{
502
32
    int ok = 0;
503
32
    PyObject *result = NULL;
504
32
    PyObject *format_spec_object = NULL;
505
32
    int (*formatter) (_PyUnicodeWriter*, PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL;
506
32
    int err;
507
508
    /* If we know the type exactly, skip the lookup of __format__ and just
509
       call the formatter directly. */
510
32
    if (PyUnicode_CheckExact(fieldobj))
511
0
        formatter = _PyUnicode_FormatAdvancedWriter;
512
32
    else if (PyLong_CheckExact(fieldobj))
513
32
        formatter = _PyLong_FormatAdvancedWriter;
514
0
    else if (PyFloat_CheckExact(fieldobj))
515
0
        formatter = _PyFloat_FormatAdvancedWriter;
516
0
    else if (PyComplex_CheckExact(fieldobj))
517
0
        formatter = _PyComplex_FormatAdvancedWriter;
518
519
32
    if (formatter) {
520
        /* we know exactly which formatter will be called when __format__ is
521
           looked up, so call it directly, instead. */
522
32
        err = formatter(writer, fieldobj, format_spec->str,
523
32
                        format_spec->start, format_spec->end);
524
32
        return (err == 0);
525
32
    }
526
0
    else {
527
        /* We need to create an object out of the pointers we have, because
528
           __format__ takes a string/unicode object for format_spec. */
529
0
        if (format_spec->str)
530
0
            format_spec_object = PyUnicode_Substring(format_spec->str,
531
0
                                                     format_spec->start,
532
0
                                                     format_spec->end);
533
0
        else
534
0
            format_spec_object = Py_GetConstant(Py_CONSTANT_EMPTY_STR);
535
0
        if (format_spec_object == NULL)
536
0
            goto done;
537
538
0
        result = PyObject_Format(fieldobj, format_spec_object);
539
0
    }
540
0
    if (result == NULL)
541
0
        goto done;
542
543
0
    if (_PyUnicodeWriter_WriteStr(writer, result) == -1)
544
0
        goto done;
545
0
    ok = 1;
546
547
0
done:
548
0
    Py_XDECREF(format_spec_object);
549
0
    Py_XDECREF(result);
550
0
    return ok;
551
0
}
552
553
static int
554
parse_field(SubString *str, SubString *field_name, SubString *format_spec,
555
            int *format_spec_needs_expanding, Py_UCS4 *conversion)
556
32
{
557
    /* Note this function works if the field name is zero length,
558
       which is good.  Zero length field names are handled later, in
559
       field_name_split. */
560
561
32
    Py_UCS4 c = 0;
562
563
    /* initialize these, as they may be empty */
564
32
    *conversion = '\0';
565
32
    SubString_init(format_spec, NULL, 0, 0);
566
567
    /* Search for the field name.  it's terminated by the end of
568
       the string, or a ':' or '!' */
569
32
    field_name->str = str->str;
570
32
    field_name->start = str->start;
571
64
    while (str->start < str->end) {
572
64
        switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
573
0
        case '{':
574
0
            PyErr_SetString(PyExc_ValueError, "unexpected '{' in field name");
575
0
            return 0;
576
0
        case '[':
577
0
            for (; str->start < str->end; str->start++)
578
0
                if (PyUnicode_READ_CHAR(str->str, str->start) == ']')
579
0
                    break;
580
0
            continue;
581
0
        case '}':
582
32
        case ':':
583
32
        case '!':
584
32
            break;
585
32
        default:
586
32
            continue;
587
64
        }
588
32
        break;
589
64
    }
590
591
32
    field_name->end = str->start - 1;
592
32
    if (c == '!' || c == ':') {
593
32
        Py_ssize_t count;
594
        /* we have a format specifier and/or a conversion */
595
        /* don't include the last character */
596
597
        /* see if there's a conversion specifier */
598
32
        if (c == '!') {
599
            /* there must be another character present */
600
0
            if (str->start >= str->end) {
601
0
                PyErr_SetString(PyExc_ValueError,
602
0
                                "end of string while looking for conversion "
603
0
                                "specifier");
604
0
                return 0;
605
0
            }
606
0
            *conversion = PyUnicode_READ_CHAR(str->str, str->start++);
607
608
0
            if (str->start < str->end) {
609
0
                c = PyUnicode_READ_CHAR(str->str, str->start++);
610
0
                if (c == '}')
611
0
                    return 1;
612
0
                if (c != ':') {
613
0
                    PyErr_SetString(PyExc_ValueError,
614
0
                                    "expected ':' after conversion specifier");
615
0
                    return 0;
616
0
                }
617
0
            }
618
0
        }
619
32
        format_spec->str = str->str;
620
32
        format_spec->start = str->start;
621
32
        count = 1;
622
128
        while (str->start < str->end) {
623
128
            switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
624
0
            case '{':
625
0
                *format_spec_needs_expanding = 1;
626
0
                count++;
627
0
                break;
628
32
            case '}':
629
32
                count--;
630
32
                if (count == 0) {
631
32
                    format_spec->end = str->start - 1;
632
32
                    return 1;
633
32
                }
634
0
                break;
635
96
            default:
636
96
                break;
637
128
            }
638
128
        }
639
640
0
        PyErr_SetString(PyExc_ValueError, "unmatched '{' in format spec");
641
0
        return 0;
642
32
    }
643
0
    else if (c != '}') {
644
0
        PyErr_SetString(PyExc_ValueError, "expected '}' before end of string");
645
0
        return 0;
646
0
    }
647
648
0
    return 1;
649
32
}
650
651
/************************************************************************/
652
/******* Output string allocation and escape-to-markup processing  ******/
653
/************************************************************************/
654
655
/* MarkupIterator breaks the string into pieces of either literal
656
   text, or things inside {} that need to be marked up.  it is
657
   designed to make it easy to wrap a Python iterator around it, for
658
   use with the Formatter class */
659
660
typedef struct {
661
    SubString str;
662
} MarkupIterator;
663
664
static int
665
MarkupIterator_init(MarkupIterator *self, PyObject *str,
666
                    Py_ssize_t start, Py_ssize_t end)
667
32
{
668
32
    SubString_init(&self->str, str, start, end);
669
32
    return 1;
670
32
}
671
672
/* returns 0 on error, 1 on non-error termination, and 2 if it got a
673
   string (or something to be expanded) */
674
static int
675
MarkupIterator_next(MarkupIterator *self, SubString *literal,
676
                    int *field_present, SubString *field_name,
677
                    SubString *format_spec, Py_UCS4 *conversion,
678
                    int *format_spec_needs_expanding)
679
64
{
680
64
    int at_end;
681
64
    Py_UCS4 c = 0;
682
64
    Py_ssize_t start;
683
64
    Py_ssize_t len;
684
64
    int markup_follows = 0;
685
686
    /* initialize all of the output variables */
687
64
    SubString_init(literal, NULL, 0, 0);
688
64
    SubString_init(field_name, NULL, 0, 0);
689
64
    SubString_init(format_spec, NULL, 0, 0);
690
64
    *conversion = '\0';
691
64
    *format_spec_needs_expanding = 0;
692
64
    *field_present = 0;
693
694
    /* No more input, end of iterator.  This is the normal exit
695
       path. */
696
64
    if (self->str.start >= self->str.end)
697
32
        return 1;
698
699
32
    start = self->str.start;
700
701
    /* First read any literal text. Read until the end of string, an
702
       escaped '{' or '}', or an unescaped '{'.  In order to never
703
       allocate memory and so I can just pass pointers around, if
704
       there's an escaped '{' or '}' then we'll return the literal
705
       including the brace, but no format object.  The next time
706
       through, we'll return the rest of the literal, skipping past
707
       the second consecutive brace. */
708
96
    while (self->str.start < self->str.end) {
709
96
        switch (c = PyUnicode_READ_CHAR(self->str.str, self->str.start++)) {
710
32
        case '{':
711
32
        case '}':
712
32
            markup_follows = 1;
713
32
            break;
714
64
        default:
715
64
            continue;
716
96
        }
717
32
        break;
718
96
    }
719
720
32
    at_end = self->str.start >= self->str.end;
721
32
    len = self->str.start - start;
722
723
32
    if ((c == '}') && (at_end ||
724
0
                       (c != PyUnicode_READ_CHAR(self->str.str,
725
0
                                                 self->str.start)))) {
726
0
        PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
727
0
                        "in format string");
728
0
        return 0;
729
0
    }
730
32
    if (at_end && c == '{') {
731
0
        PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
732
0
                        "in format string");
733
0
        return 0;
734
0
    }
735
32
    if (!at_end) {
736
32
        if (c == PyUnicode_READ_CHAR(self->str.str, self->str.start)) {
737
            /* escaped } or {, skip it in the input.  there is no
738
               markup object following us, just this literal text */
739
0
            self->str.start++;
740
0
            markup_follows = 0;
741
0
        }
742
32
        else
743
32
            len--;
744
32
    }
745
746
    /* record the literal text */
747
32
    literal->str = self->str.str;
748
32
    literal->start = start;
749
32
    literal->end = start + len;
750
751
32
    if (!markup_follows)
752
0
        return 2;
753
754
    /* this is markup; parse the field */
755
32
    *field_present = 1;
756
32
    if (!parse_field(&self->str, field_name, format_spec,
757
32
                     format_spec_needs_expanding, conversion))
758
0
        return 0;
759
32
    return 2;
760
32
}
761
762
763
/* do the !r or !s conversion on obj */
764
static PyObject *
765
do_conversion(PyObject *obj, Py_UCS4 conversion)
766
0
{
767
    /* XXX in pre-3.0, do we need to convert this to unicode, since it
768
       might have returned a string? */
769
0
    switch (conversion) {
770
0
    case 'r':
771
0
        return PyObject_Repr(obj);
772
0
    case 's':
773
0
        return PyObject_Str(obj);
774
0
    case 'a':
775
0
        return PyObject_ASCII(obj);
776
0
    default:
777
0
        if (conversion > 32 && conversion < 127) {
778
                /* It's the ASCII subrange; casting to char is safe
779
                   (assuming the execution character set is an ASCII
780
                   superset). */
781
0
                PyErr_Format(PyExc_ValueError,
782
0
                     "Unknown conversion specifier %c",
783
0
                     (char)conversion);
784
0
        } else
785
0
                PyErr_Format(PyExc_ValueError,
786
0
                     "Unknown conversion specifier \\x%x",
787
0
                     (unsigned int)conversion);
788
0
        return NULL;
789
0
    }
790
0
}
791
792
/* given:
793
794
   {field_name!conversion:format_spec}
795
796
   compute the result and write it to output.
797
   format_spec_needs_expanding is an optimization.  if it's false,
798
   just output the string directly, otherwise recursively expand the
799
   format_spec string.
800
801
   field_name is allowed to be zero length, in which case we
802
   are doing auto field numbering.
803
*/
804
805
static int
806
output_markup(SubString *field_name, SubString *format_spec,
807
              int format_spec_needs_expanding, Py_UCS4 conversion,
808
              _PyUnicodeWriter *writer, PyObject *args, PyObject *kwargs,
809
              int recursion_depth, AutoNumber *auto_number)
810
32
{
811
32
    PyObject *tmp = NULL;
812
32
    PyObject *fieldobj = NULL;
813
32
    SubString expanded_format_spec;
814
32
    SubString *actual_format_spec;
815
32
    int result = 0;
816
817
    /* convert field_name to an object */
818
32
    fieldobj = get_field_object(field_name, args, kwargs, auto_number);
819
32
    if (fieldobj == NULL)
820
0
        goto done;
821
822
32
    if (conversion != '\0') {
823
0
        tmp = do_conversion(fieldobj, conversion);
824
0
        if (tmp == NULL)
825
0
            goto done;
826
827
        /* do the assignment, transferring ownership: fieldobj = tmp */
828
0
        Py_SETREF(fieldobj, tmp);
829
0
        tmp = NULL;
830
0
    }
831
832
    /* if needed, recursively compute the format_spec */
833
32
    if (format_spec_needs_expanding) {
834
0
        tmp = build_string(format_spec, args, kwargs, recursion_depth-1,
835
0
                           auto_number);
836
0
        if (tmp == NULL)
837
0
            goto done;
838
839
        /* note that in the case we're expanding the format string,
840
           tmp must be kept around until after the call to
841
           render_field. */
842
0
        SubString_init(&expanded_format_spec, tmp, 0, PyUnicode_GET_LENGTH(tmp));
843
0
        actual_format_spec = &expanded_format_spec;
844
0
    }
845
32
    else
846
32
        actual_format_spec = format_spec;
847
848
32
    if (render_field(fieldobj, actual_format_spec, writer) == 0)
849
0
        goto done;
850
851
32
    result = 1;
852
853
32
done:
854
32
    Py_XDECREF(fieldobj);
855
32
    Py_XDECREF(tmp);
856
857
32
    return result;
858
32
}
859
860
/*
861
    do_markup is the top-level loop for the format() method.  It
862
    searches through the format string for escapes to markup codes, and
863
    calls other functions to move non-markup text to the output,
864
    and to perform the markup to the output.
865
*/
866
static int
867
do_markup(SubString *input, PyObject *args, PyObject *kwargs,
868
          _PyUnicodeWriter *writer, int recursion_depth, AutoNumber *auto_number)
869
32
{
870
32
    MarkupIterator iter;
871
32
    int format_spec_needs_expanding;
872
32
    int result;
873
32
    int field_present;
874
32
    SubString literal;
875
32
    SubString field_name;
876
32
    SubString format_spec;
877
32
    Py_UCS4 conversion;
878
879
32
    MarkupIterator_init(&iter, input->str, input->start, input->end);
880
64
    while ((result = MarkupIterator_next(&iter, &literal, &field_present,
881
64
                                         &field_name, &format_spec,
882
64
                                         &conversion,
883
64
                                         &format_spec_needs_expanding)) == 2) {
884
32
        if (literal.end != literal.start) {
885
32
            if (!field_present && iter.str.start == iter.str.end)
886
0
                writer->overallocate = 0;
887
32
            if (_PyUnicodeWriter_WriteSubstring(writer, literal.str,
888
32
                                                literal.start, literal.end) < 0)
889
0
                return 0;
890
32
        }
891
892
32
        if (field_present) {
893
32
            if (iter.str.start == iter.str.end)
894
32
                writer->overallocate = 0;
895
32
            if (!output_markup(&field_name, &format_spec,
896
32
                               format_spec_needs_expanding, conversion, writer,
897
32
                               args, kwargs, recursion_depth, auto_number))
898
0
                return 0;
899
32
        }
900
32
    }
901
32
    return result;
902
32
}
903
904
905
/*
906
    build_string allocates the output string and then
907
    calls do_markup to do the heavy lifting.
908
*/
909
static PyObject *
910
build_string(SubString *input, PyObject *args, PyObject *kwargs,
911
             int recursion_depth, AutoNumber *auto_number)
912
32
{
913
32
    _PyUnicodeWriter writer;
914
915
    /* check the recursion level */
916
32
    if (recursion_depth <= 0) {
917
0
        PyErr_SetString(PyExc_ValueError,
918
0
                        "Max string recursion exceeded");
919
0
        return NULL;
920
0
    }
921
922
32
    _PyUnicodeWriter_Init(&writer);
923
32
    writer.overallocate = 1;
924
32
    writer.min_length = PyUnicode_GET_LENGTH(input->str) + 100;
925
926
32
    if (!do_markup(input, args, kwargs, &writer, recursion_depth,
927
32
                   auto_number)) {
928
0
        _PyUnicodeWriter_Dealloc(&writer);
929
0
        return NULL;
930
0
    }
931
932
32
    return _PyUnicodeWriter_Finish(&writer);
933
32
}
934
935
/************************************************************************/
936
/*********** main routine ***********************************************/
937
/************************************************************************/
938
939
/* this is the main entry point */
940
static PyObject *
941
do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
942
32
{
943
32
    SubString input;
944
945
    /* PEP 3101 says only 2 levels, so that
946
       "{0:{1}}".format('abc', 's')            # works
947
       "{0:{1:{2}}}".format('abc', 's', '')    # fails
948
    */
949
32
    int recursion_depth = 2;
950
951
32
    AutoNumber auto_number;
952
32
    AutoNumber_Init(&auto_number);
953
32
    SubString_init(&input, self, 0, PyUnicode_GET_LENGTH(self));
954
32
    return build_string(&input, args, kwargs, recursion_depth, &auto_number);
955
32
}
956
957
static PyObject *
958
do_string_format_map(PyObject *self, PyObject *obj)
959
0
{
960
0
    return do_string_format(self, NULL, obj);
961
0
}
962
963
964
/************************************************************************/
965
/*********** formatteriterator ******************************************/
966
/************************************************************************/
967
968
/* This is used to implement string.Formatter.vparse().  It exists so
969
   Formatter can share code with the built in unicode.format() method.
970
   It's really just a wrapper around MarkupIterator that is callable
971
   from Python. */
972
973
typedef struct {
974
    PyObject_HEAD
975
    PyObject *str;
976
    MarkupIterator it_markup;
977
} formatteriterobject;
978
979
static void
980
formatteriter_dealloc(PyObject *op)
981
0
{
982
0
    formatteriterobject *it = (formatteriterobject*)op;
983
0
    Py_XDECREF(it->str);
984
0
    PyObject_Free(it);
985
0
}
986
987
/* returns a tuple:
988
   (literal, field_name, format_spec, conversion)
989
990
   literal is any literal text to output.  might be zero length
991
   field_name is the string before the ':'.  might be None
992
   format_spec is the string after the ':'.  mibht be None
993
   conversion is either None, or the string after the '!'
994
*/
995
static PyObject *
996
formatteriter_next(PyObject *op)
997
0
{
998
0
    formatteriterobject *it = (formatteriterobject*)op;
999
0
    SubString literal;
1000
0
    SubString field_name;
1001
0
    SubString format_spec;
1002
0
    Py_UCS4 conversion;
1003
0
    int format_spec_needs_expanding;
1004
0
    int field_present;
1005
0
    int result = MarkupIterator_next(&it->it_markup, &literal, &field_present,
1006
0
                                     &field_name, &format_spec, &conversion,
1007
0
                                     &format_spec_needs_expanding);
1008
1009
    /* all of the SubString objects point into it->str, so no
1010
       memory management needs to be done on them */
1011
0
    assert(0 <= result && result <= 2);
1012
0
    if (result == 0 || result == 1)
1013
        /* if 0, error has already been set, if 1, iterator is empty */
1014
0
        return NULL;
1015
0
    else {
1016
0
        PyObject *literal_str = NULL;
1017
0
        PyObject *field_name_str = NULL;
1018
0
        PyObject *format_spec_str = NULL;
1019
0
        PyObject *conversion_str = NULL;
1020
0
        PyObject *tuple = NULL;
1021
1022
0
        literal_str = SubString_new_object(&literal);
1023
0
        if (literal_str == NULL)
1024
0
            goto done;
1025
1026
0
        field_name_str = SubString_new_object(&field_name);
1027
0
        if (field_name_str == NULL)
1028
0
            goto done;
1029
1030
        /* if field_name is non-zero length, return a string for
1031
           format_spec (even if zero length), else return None */
1032
0
        format_spec_str = (field_present ?
1033
0
                           SubString_new_object_or_empty :
1034
0
                           SubString_new_object)(&format_spec);
1035
0
        if (format_spec_str == NULL)
1036
0
            goto done;
1037
1038
        /* if the conversion is not specified, return a None,
1039
           otherwise create a one length string with the conversion
1040
           character */
1041
0
        if (conversion == '\0') {
1042
0
            conversion_str = Py_NewRef(Py_None);
1043
0
        }
1044
0
        else
1045
0
            conversion_str = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1046
0
                                                       &conversion, 1);
1047
0
        if (conversion_str == NULL)
1048
0
            goto done;
1049
1050
0
        tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,
1051
0
                             conversion_str);
1052
0
    done:
1053
0
        Py_XDECREF(literal_str);
1054
0
        Py_XDECREF(field_name_str);
1055
0
        Py_XDECREF(format_spec_str);
1056
0
        Py_XDECREF(conversion_str);
1057
0
        return tuple;
1058
0
    }
1059
0
}
1060
1061
static PyMethodDef formatteriter_methods[] = {
1062
    {NULL,              NULL}           /* sentinel */
1063
};
1064
1065
static PyTypeObject PyFormatterIter_Type = {
1066
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
1067
    "formatteriterator",                /* tp_name */
1068
    sizeof(formatteriterobject),        /* tp_basicsize */
1069
    0,                                  /* tp_itemsize */
1070
    /* methods */
1071
    formatteriter_dealloc,              /* tp_dealloc */
1072
    0,                                  /* tp_vectorcall_offset */
1073
    0,                                  /* tp_getattr */
1074
    0,                                  /* tp_setattr */
1075
    0,                                  /* tp_as_async */
1076
    0,                                  /* tp_repr */
1077
    0,                                  /* tp_as_number */
1078
    0,                                  /* tp_as_sequence */
1079
    0,                                  /* tp_as_mapping */
1080
    0,                                  /* tp_hash */
1081
    0,                                  /* tp_call */
1082
    0,                                  /* tp_str */
1083
    PyObject_GenericGetAttr,            /* tp_getattro */
1084
    0,                                  /* tp_setattro */
1085
    0,                                  /* tp_as_buffer */
1086
    Py_TPFLAGS_DEFAULT,                 /* tp_flags */
1087
    0,                                  /* tp_doc */
1088
    0,                                  /* tp_traverse */
1089
    0,                                  /* tp_clear */
1090
    0,                                  /* tp_richcompare */
1091
    0,                                  /* tp_weaklistoffset */
1092
    PyObject_SelfIter,                  /* tp_iter */
1093
    formatteriter_next,                 /* tp_iternext */
1094
    formatteriter_methods,              /* tp_methods */
1095
    0,
1096
};
1097
1098
/* unicode_formatter_parser is used to implement
1099
   string.Formatter.vformat.  it parses a string and returns tuples
1100
   describing the parsed elements.  It's a wrapper around
1101
   stringlib/string_format.h's MarkupIterator */
1102
static PyObject *
1103
formatter_parser(PyObject *Py_UNUSED(module), PyObject *self)
1104
0
{
1105
0
    formatteriterobject *it;
1106
1107
0
    if (!PyUnicode_Check(self)) {
1108
0
        PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
1109
0
        return NULL;
1110
0
    }
1111
1112
0
    it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
1113
0
    if (it == NULL)
1114
0
        return NULL;
1115
1116
    /* take ownership, give the object to the iterator */
1117
0
    it->str = Py_NewRef(self);
1118
1119
    /* initialize the contained MarkupIterator */
1120
0
    MarkupIterator_init(&it->it_markup, (PyObject*)self, 0, PyUnicode_GET_LENGTH(self));
1121
0
    return (PyObject *)it;
1122
0
}
1123
1124
1125
/************************************************************************/
1126
/*********** fieldnameiterator ******************************************/
1127
/************************************************************************/
1128
1129
1130
/* This is used to implement string.Formatter.vparse().  It parses the
1131
   field name into attribute and item values.  It's a Python-callable
1132
   wrapper around FieldNameIterator */
1133
1134
typedef struct {
1135
    PyObject_HEAD
1136
    PyObject *str;
1137
    FieldNameIterator it_field;
1138
} fieldnameiterobject;
1139
1140
static void
1141
fieldnameiter_dealloc(PyObject *op)
1142
0
{
1143
0
    fieldnameiterobject *it = (fieldnameiterobject*)op;
1144
0
    Py_XDECREF(it->str);
1145
0
    PyObject_Free(it);
1146
0
}
1147
1148
/* returns a tuple:
1149
   (is_attr, value)
1150
   is_attr is true if we used attribute syntax (e.g., '.foo')
1151
              false if we used index syntax (e.g., '[foo]')
1152
   value is an integer or string
1153
*/
1154
static PyObject *
1155
fieldnameiter_next(PyObject *op)
1156
0
{
1157
0
    fieldnameiterobject *it = (fieldnameiterobject*)op;
1158
0
    int result;
1159
0
    int is_attr;
1160
0
    Py_ssize_t idx;
1161
0
    SubString name;
1162
1163
0
    result = FieldNameIterator_next(&it->it_field, &is_attr,
1164
0
                                    &idx, &name);
1165
0
    if (result == 0 || result == 1)
1166
        /* if 0, error has already been set, if 1, iterator is empty */
1167
0
        return NULL;
1168
0
    else {
1169
0
        PyObject* result = NULL;
1170
0
        PyObject* is_attr_obj = NULL;
1171
0
        PyObject* obj = NULL;
1172
1173
0
        is_attr_obj = PyBool_FromLong(is_attr);
1174
0
        if (is_attr_obj == NULL)
1175
0
            goto done;
1176
1177
        /* either an integer or a string */
1178
0
        if (idx != -1)
1179
0
            obj = PyLong_FromSsize_t(idx);
1180
0
        else
1181
0
            obj = SubString_new_object(&name);
1182
0
        if (obj == NULL)
1183
0
            goto done;
1184
1185
        /* return a tuple of values */
1186
0
        result = PyTuple_Pack(2, is_attr_obj, obj);
1187
1188
0
    done:
1189
0
        Py_XDECREF(is_attr_obj);
1190
0
        Py_XDECREF(obj);
1191
0
        return result;
1192
0
    }
1193
0
}
1194
1195
static PyMethodDef fieldnameiter_methods[] = {
1196
    {NULL,              NULL}           /* sentinel */
1197
};
1198
1199
static PyTypeObject PyFieldNameIter_Type = {
1200
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
1201
    "fieldnameiterator",                /* tp_name */
1202
    sizeof(fieldnameiterobject),        /* tp_basicsize */
1203
    0,                                  /* tp_itemsize */
1204
    /* methods */
1205
    fieldnameiter_dealloc,              /* tp_dealloc */
1206
    0,                                  /* tp_vectorcall_offset */
1207
    0,                                  /* tp_getattr */
1208
    0,                                  /* tp_setattr */
1209
    0,                                  /* tp_as_async */
1210
    0,                                  /* tp_repr */
1211
    0,                                  /* tp_as_number */
1212
    0,                                  /* tp_as_sequence */
1213
    0,                                  /* tp_as_mapping */
1214
    0,                                  /* tp_hash */
1215
    0,                                  /* tp_call */
1216
    0,                                  /* tp_str */
1217
    PyObject_GenericGetAttr,            /* tp_getattro */
1218
    0,                                  /* tp_setattro */
1219
    0,                                  /* tp_as_buffer */
1220
    Py_TPFLAGS_DEFAULT,                 /* tp_flags */
1221
    0,                                  /* tp_doc */
1222
    0,                                  /* tp_traverse */
1223
    0,                                  /* tp_clear */
1224
    0,                                  /* tp_richcompare */
1225
    0,                                  /* tp_weaklistoffset */
1226
    PyObject_SelfIter,                  /* tp_iter */
1227
    fieldnameiter_next,                 /* tp_iternext */
1228
    fieldnameiter_methods,              /* tp_methods */
1229
    0};
1230
1231
/* unicode_formatter_field_name_split is used to implement
1232
   string.Formatter.vformat.  it takes a PEP 3101 "field name", and
1233
   returns a tuple of (first, rest): "first", the part before the
1234
   first '.' or '['; and "rest", an iterator for the rest of the field
1235
   name.  it's a wrapper around stringlib/string_format.h's
1236
   field_name_split.  The iterator it returns is a
1237
   FieldNameIterator */
1238
static PyObject *
1239
formatter_field_name_split(PyObject *Py_UNUSED(module), PyObject *self)
1240
0
{
1241
0
    SubString first;
1242
0
    Py_ssize_t first_idx;
1243
0
    fieldnameiterobject *it;
1244
1245
0
    PyObject *first_obj = NULL;
1246
0
    PyObject *result = NULL;
1247
1248
0
    if (!PyUnicode_Check(self)) {
1249
0
        PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
1250
0
        return NULL;
1251
0
    }
1252
1253
0
    it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
1254
0
    if (it == NULL)
1255
0
        return NULL;
1256
1257
    /* take ownership, give the object to the iterator.  this is
1258
       just to keep the field_name alive */
1259
0
    it->str = Py_NewRef(self);
1260
1261
    /* Pass in auto_number = NULL. We'll return an empty string for
1262
       first_obj in that case. */
1263
0
    if (!field_name_split((PyObject*)self, 0, PyUnicode_GET_LENGTH(self),
1264
0
                          &first, &first_idx, &it->it_field, NULL))
1265
0
        goto done;
1266
1267
    /* first becomes an integer, if possible; else a string */
1268
0
    if (first_idx != -1)
1269
0
        first_obj = PyLong_FromSsize_t(first_idx);
1270
0
    else
1271
        /* convert "first" into a string object */
1272
0
        first_obj = SubString_new_object(&first);
1273
0
    if (first_obj == NULL)
1274
0
        goto done;
1275
1276
    /* return a tuple of values */
1277
0
    result = PyTuple_Pack(2, first_obj, it);
1278
1279
0
done:
1280
0
    Py_XDECREF(it);
1281
0
    Py_XDECREF(first_obj);
1282
0
    return result;
1283
0
}