Coverage Report

Created: 2026-06-14 06:25

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/Python-3.8.3/Objects/stringlib/unicode_format.h
Line
Count
Source
1
/*
2
    unicode_format.h -- implementation of str.format().
3
*/
4
5
/************************************************************************/
6
/***********   Global data structures and forward declarations  *********/
7
/************************************************************************/
8
9
/*
10
   A SubString consists of the characters between two string or
11
   unicode pointers.
12
*/
13
typedef struct {
14
    PyObject *str; /* borrowed reference */
15
    Py_ssize_t start, end;
16
} SubString;
17
18
19
typedef enum {
20
    ANS_INIT,
21
    ANS_AUTO,
22
    ANS_MANUAL
23
} AutoNumberState;   /* Keep track if we're auto-numbering fields */
24
25
/* Keeps track of our auto-numbering state, and which number field we're on */
26
typedef struct {
27
    AutoNumberState an_state;
28
    int an_field_number;
29
} AutoNumber;
30
31
32
/* forward declaration for recursion */
33
static PyObject *
34
build_string(SubString *input, PyObject *args, PyObject *kwargs,
35
             int recursion_depth, AutoNumber *auto_number);
36
37
38
39
/************************************************************************/
40
/**************************  Utility  functions  ************************/
41
/************************************************************************/
42
43
static void
44
AutoNumber_Init(AutoNumber *auto_number)
45
103
{
46
103
    auto_number->an_state = ANS_INIT;
47
103
    auto_number->an_field_number = 0;
48
103
}
49
50
/* fill in a SubString from a pointer and length */
51
Py_LOCAL_INLINE(void)
52
SubString_init(SubString *str, PyObject *s, Py_ssize_t start, Py_ssize_t end)
53
1.71k
{
54
1.71k
    str->str = s;
55
1.71k
    str->start = start;
56
1.71k
    str->end = end;
57
1.71k
}
58
59
/* return a new string.  if str->str is NULL, return None */
60
Py_LOCAL_INLINE(PyObject *)
61
SubString_new_object(SubString *str)
62
0
{
63
0
    if (str->str == NULL)
64
0
        Py_RETURN_NONE;
65
0
    return PyUnicode_Substring(str->str, str->start, str->end);
66
0
}
67
68
/* return a new string.  if str->str is NULL, return a new empty string */
69
Py_LOCAL_INLINE(PyObject *)
70
SubString_new_object_or_empty(SubString *str)
71
0
{
72
0
    if (str->str == NULL) {
73
0
        return PyUnicode_New(0, 0);
74
0
    }
75
0
    return SubString_new_object(str);
76
0
}
77
78
/* Return 1 if an error has been detected switching between automatic
79
   field numbering and manual field specification, else return 0. Set
80
   ValueError on error. */
81
static int
82
autonumber_state_error(AutoNumberState state, int field_name_is_empty)
83
172
{
84
172
    if (state == ANS_MANUAL) {
85
0
        if (field_name_is_empty) {
86
0
            PyErr_SetString(PyExc_ValueError, "cannot switch from "
87
0
                            "manual field specification to "
88
0
                            "automatic field numbering");
89
0
            return 1;
90
0
        }
91
0
    }
92
172
    else {
93
172
        if (!field_name_is_empty) {
94
0
            PyErr_SetString(PyExc_ValueError, "cannot switch from "
95
0
                            "automatic field numbering to "
96
0
                            "manual field specification");
97
0
            return 1;
98
0
        }
99
172
    }
100
172
    return 0;
101
172
}
102
103
104
/************************************************************************/
105
/***********  Format string parsing -- integers and identifiers *********/
106
/************************************************************************/
107
108
static Py_ssize_t
109
get_integer(const SubString *str)
110
172
{
111
172
    Py_ssize_t accumulator = 0;
112
172
    Py_ssize_t digitval;
113
172
    Py_ssize_t i;
114
115
    /* empty string is an error */
116
172
    if (str->start >= str->end)
117
172
        return -1;
118
119
0
    for (i = str->start; i < str->end; i++) {
120
0
        digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str->str, i));
121
0
        if (digitval < 0)
122
0
            return -1;
123
        /*
124
           Detect possible overflow before it happens:
125
126
              accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
127
              accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
128
        */
129
0
        if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
130
0
            PyErr_Format(PyExc_ValueError,
131
0
                         "Too many decimal digits in format string");
132
0
            return -1;
133
0
        }
134
0
        accumulator = accumulator * 10 + digitval;
135
0
    }
136
0
    return accumulator;
137
0
}
138
139
/************************************************************************/
140
/******** Functions to get field objects and specification strings ******/
141
/************************************************************************/
142
143
/* do the equivalent of obj.name */
144
static PyObject *
145
getattr(PyObject *obj, SubString *name)
146
0
{
147
0
    PyObject *newobj;
148
0
    PyObject *str = SubString_new_object(name);
149
0
    if (str == NULL)
150
0
        return NULL;
151
0
    newobj = PyObject_GetAttr(obj, str);
152
0
    Py_DECREF(str);
153
0
    return newobj;
154
0
}
155
156
/* do the equivalent of obj[idx], where obj is a sequence */
157
static PyObject *
158
getitem_sequence(PyObject *obj, Py_ssize_t idx)
159
0
{
160
0
    return PySequence_GetItem(obj, idx);
161
0
}
162
163
/* do the equivalent of obj[idx], where obj is not a sequence */
164
static PyObject *
165
getitem_idx(PyObject *obj, Py_ssize_t idx)
166
0
{
167
0
    PyObject *newobj;
168
0
    PyObject *idx_obj = PyLong_FromSsize_t(idx);
169
0
    if (idx_obj == NULL)
170
0
        return NULL;
171
0
    newobj = PyObject_GetItem(obj, idx_obj);
172
0
    Py_DECREF(idx_obj);
173
0
    return newobj;
174
0
}
175
176
/* do the equivalent of obj[name] */
177
static PyObject *
178
getitem_str(PyObject *obj, SubString *name)
179
0
{
180
0
    PyObject *newobj;
181
0
    PyObject *str = SubString_new_object(name);
182
0
    if (str == NULL)
183
0
        return NULL;
184
0
    newobj = PyObject_GetItem(obj, str);
185
0
    Py_DECREF(str);
186
0
    return newobj;
187
0
}
188
189
typedef struct {
190
    /* the entire string we're parsing.  we assume that someone else
191
       is managing its lifetime, and that it will exist for the
192
       lifetime of the iterator.  can be empty */
193
    SubString str;
194
195
    /* index to where we are inside field_name */
196
    Py_ssize_t index;
197
} FieldNameIterator;
198
199
200
static int
201
FieldNameIterator_init(FieldNameIterator *self, PyObject *s,
202
                       Py_ssize_t start, Py_ssize_t end)
203
172
{
204
172
    SubString_init(&self->str, s, start, end);
205
172
    self->index = start;
206
172
    return 1;
207
172
}
208
209
static int
210
_FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
211
0
{
212
0
    Py_UCS4 c;
213
214
0
    name->str = self->str.str;
215
0
    name->start = self->index;
216
217
    /* return everything until '.' or '[' */
218
0
    while (self->index < self->str.end) {
219
0
        c = PyUnicode_READ_CHAR(self->str.str, self->index++);
220
0
        switch (c) {
221
0
        case '[':
222
0
        case '.':
223
            /* backup so that we this character will be seen next time */
224
0
            self->index--;
225
0
            break;
226
0
        default:
227
0
            continue;
228
0
        }
229
0
        break;
230
0
    }
231
    /* end of string is okay */
232
0
    name->end = self->index;
233
0
    return 1;
234
0
}
235
236
static int
237
_FieldNameIterator_item(FieldNameIterator *self, SubString *name)
238
0
{
239
0
    int bracket_seen = 0;
240
0
    Py_UCS4 c;
241
242
0
    name->str = self->str.str;
243
0
    name->start = self->index;
244
245
    /* return everything until ']' */
246
0
    while (self->index < self->str.end) {
247
0
        c = PyUnicode_READ_CHAR(self->str.str, self->index++);
248
0
        switch (c) {
249
0
        case ']':
250
0
            bracket_seen = 1;
251
0
            break;
252
0
        default:
253
0
            continue;
254
0
        }
255
0
        break;
256
0
    }
257
    /* make sure we ended with a ']' */
258
0
    if (!bracket_seen) {
259
0
        PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");
260
0
        return 0;
261
0
    }
262
263
    /* end of string is okay */
264
    /* don't include the ']' */
265
0
    name->end = self->index-1;
266
0
    return 1;
267
0
}
268
269
/* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
270
static int
271
FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
272
                       Py_ssize_t *name_idx, SubString *name)
273
172
{
274
    /* check at end of input */
275
172
    if (self->index >= self->str.end)
276
172
        return 1;
277
278
0
    switch (PyUnicode_READ_CHAR(self->str.str, self->index++)) {
279
0
    case '.':
280
0
        *is_attribute = 1;
281
0
        if (_FieldNameIterator_attr(self, name) == 0)
282
0
            return 0;
283
0
        *name_idx = -1;
284
0
        break;
285
0
    case '[':
286
0
        *is_attribute = 0;
287
0
        if (_FieldNameIterator_item(self, name) == 0)
288
0
            return 0;
289
0
        *name_idx = get_integer(name);
290
0
        if (*name_idx == -1 && PyErr_Occurred())
291
0
            return 0;
292
0
        break;
293
0
    default:
294
        /* Invalid character follows ']' */
295
0
        PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may "
296
0
                        "follow ']' in format field specifier");
297
0
        return 0;
298
0
    }
299
300
    /* empty string is an error */
301
0
    if (name->start == name->end) {
302
0
        PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
303
0
        return 0;
304
0
    }
305
306
0
    return 2;
307
0
}
308
309
310
/* input: field_name
311
   output: 'first' points to the part before the first '[' or '.'
312
           'first_idx' is -1 if 'first' is not an integer, otherwise
313
                       it's the value of first converted to an integer
314
           'rest' is an iterator to return the rest
315
*/
316
static int
317
field_name_split(PyObject *str, Py_ssize_t start, Py_ssize_t end, SubString *first,
318
                 Py_ssize_t *first_idx, FieldNameIterator *rest,
319
                 AutoNumber *auto_number)
320
172
{
321
172
    Py_UCS4 c;
322
172
    Py_ssize_t i = start;
323
172
    int field_name_is_empty;
324
172
    int using_numeric_index;
325
326
    /* find the part up until the first '.' or '[' */
327
172
    while (i < end) {
328
0
        switch (c = PyUnicode_READ_CHAR(str, i++)) {
329
0
        case '[':
330
0
        case '.':
331
            /* backup so that we this character is available to the
332
               "rest" iterator */
333
0
            i--;
334
0
            break;
335
0
        default:
336
0
            continue;
337
0
        }
338
0
        break;
339
0
    }
340
341
    /* set up the return values */
342
172
    SubString_init(first, str, start, i);
343
172
    FieldNameIterator_init(rest, str, i, end);
344
345
    /* see if "first" is an integer, in which case it's used as an index */
346
172
    *first_idx = get_integer(first);
347
172
    if (*first_idx == -1 && PyErr_Occurred())
348
0
        return 0;
349
350
172
    field_name_is_empty = first->start >= first->end;
351
352
    /* If the field name is omitted or if we have a numeric index
353
       specified, then we're doing numeric indexing into args. */
354
172
    using_numeric_index = field_name_is_empty || *first_idx != -1;
355
356
    /* We always get here exactly one time for each field we're
357
       processing. And we get here in field order (counting by left
358
       braces). So this is the perfect place to handle automatic field
359
       numbering if the field name is omitted. */
360
361
    /* Check if we need to do the auto-numbering. It's not needed if
362
       we're called from string.Format routines, because it's handled
363
       in that class by itself. */
364
172
    if (auto_number) {
365
        /* Initialize our auto numbering state if this is the first
366
           time we're either auto-numbering or manually numbering. */
367
172
        if (auto_number->an_state == ANS_INIT && using_numeric_index)
368
103
            auto_number->an_state = field_name_is_empty ?
369
103
                ANS_AUTO : ANS_MANUAL;
370
371
        /* Make sure our state is consistent with what we're doing
372
           this time through. Only check if we're using a numeric
373
           index. */
374
172
        if (using_numeric_index)
375
172
            if (autonumber_state_error(auto_number->an_state,
376
172
                                       field_name_is_empty))
377
0
                return 0;
378
        /* Zero length field means we want to do auto-numbering of the
379
           fields. */
380
172
        if (field_name_is_empty)
381
172
            *first_idx = (auto_number->an_field_number)++;
382
172
    }
383
384
172
    return 1;
385
172
}
386
387
388
/*
389
    get_field_object returns the object inside {}, before the
390
    format_spec.  It handles getindex and getattr lookups and consumes
391
    the entire input string.
392
*/
393
static PyObject *
394
get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
395
                 AutoNumber *auto_number)
396
172
{
397
172
    PyObject *obj = NULL;
398
172
    int ok;
399
172
    int is_attribute;
400
172
    SubString name;
401
172
    SubString first;
402
172
    Py_ssize_t index;
403
172
    FieldNameIterator rest;
404
405
172
    if (!field_name_split(input->str, input->start, input->end, &first,
406
172
                          &index, &rest, auto_number)) {
407
0
        goto error;
408
0
    }
409
410
172
    if (index == -1) {
411
        /* look up in kwargs */
412
0
        PyObject *key = SubString_new_object(&first);
413
0
        if (key == NULL) {
414
0
            goto error;
415
0
        }
416
0
        if (kwargs == NULL) {
417
0
            PyErr_SetObject(PyExc_KeyError, key);
418
0
            Py_DECREF(key);
419
0
            goto error;
420
0
        }
421
        /* Use PyObject_GetItem instead of PyDict_GetItem because this
422
           code is no longer just used with kwargs. It might be passed
423
           a non-dict when called through format_map. */
424
0
        obj = PyObject_GetItem(kwargs, key);
425
0
        Py_DECREF(key);
426
0
        if (obj == NULL) {
427
0
            goto error;
428
0
        }
429
0
    }
430
172
    else {
431
        /* If args is NULL, we have a format string with a positional field
432
           with only kwargs to retrieve it from. This can only happen when
433
           used with format_map(), where positional arguments are not
434
           allowed. */
435
172
        if (args == NULL) {
436
0
            PyErr_SetString(PyExc_ValueError, "Format string contains "
437
0
                            "positional fields");
438
0
            goto error;
439
0
        }
440
441
        /* look up in args */
442
172
        obj = PySequence_GetItem(args, index);
443
172
        if (obj == NULL) {
444
0
            PyErr_Format(PyExc_IndexError,
445
0
                         "Replacement index %zd out of range for positional "
446
0
                         "args tuple",
447
0
                         index);
448
0
             goto error;
449
0
        }
450
172
    }
451
452
    /* iterate over the rest of the field_name */
453
172
    while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
454
172
                                        &name)) == 2) {
455
0
        PyObject *tmp;
456
457
0
        if (is_attribute)
458
            /* getattr lookup "." */
459
0
            tmp = getattr(obj, &name);
460
0
        else
461
            /* getitem lookup "[]" */
462
0
            if (index == -1)
463
0
                tmp = getitem_str(obj, &name);
464
0
            else
465
0
                if (PySequence_Check(obj))
466
0
                    tmp = getitem_sequence(obj, index);
467
0
                else
468
                    /* not a sequence */
469
0
                    tmp = getitem_idx(obj, index);
470
0
        if (tmp == NULL)
471
0
            goto error;
472
473
        /* assign to obj */
474
0
        Py_DECREF(obj);
475
0
        obj = tmp;
476
0
    }
477
    /* end of iterator, this is the non-error case */
478
172
    if (ok == 1)
479
172
        return obj;
480
0
error:
481
0
    Py_XDECREF(obj);
482
0
    return NULL;
483
172
}
484
485
/************************************************************************/
486
/*****************  Field rendering functions  **************************/
487
/************************************************************************/
488
489
/*
490
    render_field() is the main function in this section.  It takes the
491
    field object and field specification string generated by
492
    get_field_and_spec, and renders the field into the output string.
493
494
    render_field calls fieldobj.__format__(format_spec) method, and
495
    appends to the output.
496
*/
497
static int
498
render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *writer)
499
172
{
500
172
    int ok = 0;
501
172
    PyObject *result = NULL;
502
172
    PyObject *format_spec_object = NULL;
503
172
    int (*formatter) (_PyUnicodeWriter*, PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL;
504
172
    int err;
505
506
    /* If we know the type exactly, skip the lookup of __format__ and just
507
       call the formatter directly. */
508
172
    if (PyUnicode_CheckExact(fieldobj))
509
144
        formatter = _PyUnicode_FormatAdvancedWriter;
510
28
    else if (PyLong_CheckExact(fieldobj))
511
28
        formatter = _PyLong_FormatAdvancedWriter;
512
0
    else if (PyFloat_CheckExact(fieldobj))
513
0
        formatter = _PyFloat_FormatAdvancedWriter;
514
0
    else if (PyComplex_CheckExact(fieldobj))
515
0
        formatter = _PyComplex_FormatAdvancedWriter;
516
517
172
    if (formatter) {
518
        /* we know exactly which formatter will be called when __format__ is
519
           looked up, so call it directly, instead. */
520
172
        err = formatter(writer, fieldobj, format_spec->str,
521
172
                        format_spec->start, format_spec->end);
522
172
        return (err == 0);
523
172
    }
524
0
    else {
525
        /* We need to create an object out of the pointers we have, because
526
           __format__ takes a string/unicode object for format_spec. */
527
0
        if (format_spec->str)
528
0
            format_spec_object = PyUnicode_Substring(format_spec->str,
529
0
                                                     format_spec->start,
530
0
                                                     format_spec->end);
531
0
        else
532
0
            format_spec_object = PyUnicode_New(0, 0);
533
0
        if (format_spec_object == NULL)
534
0
            goto done;
535
536
0
        result = PyObject_Format(fieldobj, format_spec_object);
537
0
    }
538
0
    if (result == NULL)
539
0
        goto done;
540
541
0
    if (_PyUnicodeWriter_WriteStr(writer, result) == -1)
542
0
        goto done;
543
0
    ok = 1;
544
545
0
done:
546
0
    Py_XDECREF(format_spec_object);
547
0
    Py_XDECREF(result);
548
0
    return ok;
549
0
}
550
551
static int
552
parse_field(SubString *str, SubString *field_name, SubString *format_spec,
553
            int *format_spec_needs_expanding, Py_UCS4 *conversion)
554
172
{
555
    /* Note this function works if the field name is zero length,
556
       which is good.  Zero length field names are handled later, in
557
       field_name_split. */
558
559
172
    Py_UCS4 c = 0;
560
561
    /* initialize these, as they may be empty */
562
172
    *conversion = '\0';
563
172
    SubString_init(format_spec, NULL, 0, 0);
564
565
    /* Search for the field name.  it's terminated by the end of
566
       the string, or a ':' or '!' */
567
172
    field_name->str = str->str;
568
172
    field_name->start = str->start;
569
172
    while (str->start < str->end) {
570
172
        switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
571
0
        case '{':
572
0
            PyErr_SetString(PyExc_ValueError, "unexpected '{' in field name");
573
0
            return 0;
574
0
        case '[':
575
0
            for (; str->start < str->end; str->start++)
576
0
                if (PyUnicode_READ_CHAR(str->str, str->start) == ']')
577
0
                    break;
578
0
            continue;
579
138
        case '}':
580
138
        case ':':
581
172
        case '!':
582
172
            break;
583
0
        default:
584
0
            continue;
585
172
        }
586
172
        break;
587
172
    }
588
589
172
    field_name->end = str->start - 1;
590
172
    if (c == '!' || c == ':') {
591
34
        Py_ssize_t count;
592
        /* we have a format specifier and/or a conversion */
593
        /* don't include the last character */
594
595
        /* see if there's a conversion specifier */
596
34
        if (c == '!') {
597
            /* there must be another character present */
598
34
            if (str->start >= str->end) {
599
0
                PyErr_SetString(PyExc_ValueError,
600
0
                                "end of string while looking for conversion "
601
0
                                "specifier");
602
0
                return 0;
603
0
            }
604
34
            *conversion = PyUnicode_READ_CHAR(str->str, str->start++);
605
606
34
            if (str->start < str->end) {
607
34
                c = PyUnicode_READ_CHAR(str->str, str->start++);
608
34
                if (c == '}')
609
34
                    return 1;
610
0
                if (c != ':') {
611
0
                    PyErr_SetString(PyExc_ValueError,
612
0
                                    "expected ':' after conversion specifier");
613
0
                    return 0;
614
0
                }
615
0
            }
616
34
        }
617
0
        format_spec->str = str->str;
618
0
        format_spec->start = str->start;
619
0
        count = 1;
620
0
        while (str->start < str->end) {
621
0
            switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
622
0
            case '{':
623
0
                *format_spec_needs_expanding = 1;
624
0
                count++;
625
0
                break;
626
0
            case '}':
627
0
                count--;
628
0
                if (count == 0) {
629
0
                    format_spec->end = str->start - 1;
630
0
                    return 1;
631
0
                }
632
0
                break;
633
0
            default:
634
0
                break;
635
0
            }
636
0
        }
637
638
0
        PyErr_SetString(PyExc_ValueError, "unmatched '{' in format spec");
639
0
        return 0;
640
0
    }
641
138
    else if (c != '}') {
642
0
        PyErr_SetString(PyExc_ValueError, "expected '}' before end of string");
643
0
        return 0;
644
0
    }
645
646
138
    return 1;
647
172
}
648
649
/************************************************************************/
650
/******* Output string allocation and escape-to-markup processing  ******/
651
/************************************************************************/
652
653
/* MarkupIterator breaks the string into pieces of either literal
654
   text, or things inside {} that need to be marked up.  it is
655
   designed to make it easy to wrap a Python iterator around it, for
656
   use with the Formatter class */
657
658
typedef struct {
659
    SubString str;
660
} MarkupIterator;
661
662
static int
663
MarkupIterator_init(MarkupIterator *self, PyObject *str,
664
                    Py_ssize_t start, Py_ssize_t end)
665
103
{
666
103
    SubString_init(&self->str, str, start, end);
667
103
    return 1;
668
103
}
669
670
/* returns 0 on error, 1 on non-error termination, and 2 if it got a
671
   string (or something to be expanded) */
672
static int
673
MarkupIterator_next(MarkupIterator *self, SubString *literal,
674
                    int *field_present, SubString *field_name,
675
                    SubString *format_spec, Py_UCS4 *conversion,
676
                    int *format_spec_needs_expanding)
677
331
{
678
331
    int at_end;
679
331
    Py_UCS4 c = 0;
680
331
    Py_ssize_t start;
681
331
    Py_ssize_t len;
682
331
    int markup_follows = 0;
683
684
    /* initialize all of the output variables */
685
331
    SubString_init(literal, NULL, 0, 0);
686
331
    SubString_init(field_name, NULL, 0, 0);
687
331
    SubString_init(format_spec, NULL, 0, 0);
688
331
    *conversion = '\0';
689
331
    *format_spec_needs_expanding = 0;
690
331
    *field_present = 0;
691
692
    /* No more input, end of iterator.  This is the normal exit
693
       path. */
694
331
    if (self->str.start >= self->str.end)
695
103
        return 1;
696
697
228
    start = self->str.start;
698
699
    /* First read any literal text. Read until the end of string, an
700
       escaped '{' or '}', or an unescaped '{'.  In order to never
701
       allocate memory and so I can just pass pointers around, if
702
       there's an escaped '{' or '}' then we'll return the literal
703
       including the brace, but no format object.  The next time
704
       through, we'll return the rest of the literal, skipping past
705
       the second consecutive brace. */
706
1.54k
    while (self->str.start < self->str.end) {
707
1.48k
        switch (c = PyUnicode_READ_CHAR(self->str.str, self->str.start++)) {
708
172
        case '{':
709
172
        case '}':
710
172
            markup_follows = 1;
711
172
            break;
712
1.31k
        default:
713
1.31k
            continue;
714
1.48k
        }
715
172
        break;
716
1.48k
    }
717
718
228
    at_end = self->str.start >= self->str.end;
719
228
    len = self->str.start - start;
720
721
228
    if ((c == '}') && (at_end ||
722
0
                       (c != PyUnicode_READ_CHAR(self->str.str,
723
0
                                                 self->str.start)))) {
724
0
        PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
725
0
                        "in format string");
726
0
        return 0;
727
0
    }
728
228
    if (at_end && c == '{') {
729
0
        PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
730
0
                        "in format string");
731
0
        return 0;
732
0
    }
733
228
    if (!at_end) {
734
172
        if (c == PyUnicode_READ_CHAR(self->str.str, self->str.start)) {
735
            /* escaped } or {, skip it in the input.  there is no
736
               markup object following us, just this literal text */
737
0
            self->str.start++;
738
0
            markup_follows = 0;
739
0
        }
740
172
        else
741
172
            len--;
742
172
    }
743
744
    /* record the literal text */
745
228
    literal->str = self->str.str;
746
228
    literal->start = start;
747
228
    literal->end = start + len;
748
749
228
    if (!markup_follows)
750
56
        return 2;
751
752
    /* this is markup; parse the field */
753
172
    *field_present = 1;
754
172
    if (!parse_field(&self->str, field_name, format_spec,
755
172
                     format_spec_needs_expanding, conversion))
756
0
        return 0;
757
172
    return 2;
758
172
}
759
760
761
/* do the !r or !s conversion on obj */
762
static PyObject *
763
do_conversion(PyObject *obj, Py_UCS4 conversion)
764
34
{
765
    /* XXX in pre-3.0, do we need to convert this to unicode, since it
766
       might have returned a string? */
767
34
    switch (conversion) {
768
34
    case 'r':
769
34
        return PyObject_Repr(obj);
770
0
    case 's':
771
0
        return PyObject_Str(obj);
772
0
    case 'a':
773
0
        return PyObject_ASCII(obj);
774
0
    default:
775
0
        if (conversion > 32 && conversion < 127) {
776
                /* It's the ASCII subrange; casting to char is safe
777
                   (assuming the execution character set is an ASCII
778
                   superset). */
779
0
                PyErr_Format(PyExc_ValueError,
780
0
                     "Unknown conversion specifier %c",
781
0
                     (char)conversion);
782
0
        } else
783
0
                PyErr_Format(PyExc_ValueError,
784
0
                     "Unknown conversion specifier \\x%x",
785
0
                     (unsigned int)conversion);
786
0
        return NULL;
787
34
    }
788
34
}
789
790
/* given:
791
792
   {field_name!conversion:format_spec}
793
794
   compute the result and write it to output.
795
   format_spec_needs_expanding is an optimization.  if it's false,
796
   just output the string directly, otherwise recursively expand the
797
   format_spec string.
798
799
   field_name is allowed to be zero length, in which case we
800
   are doing auto field numbering.
801
*/
802
803
static int
804
output_markup(SubString *field_name, SubString *format_spec,
805
              int format_spec_needs_expanding, Py_UCS4 conversion,
806
              _PyUnicodeWriter *writer, PyObject *args, PyObject *kwargs,
807
              int recursion_depth, AutoNumber *auto_number)
808
172
{
809
172
    PyObject *tmp = NULL;
810
172
    PyObject *fieldobj = NULL;
811
172
    SubString expanded_format_spec;
812
172
    SubString *actual_format_spec;
813
172
    int result = 0;
814
815
    /* convert field_name to an object */
816
172
    fieldobj = get_field_object(field_name, args, kwargs, auto_number);
817
172
    if (fieldobj == NULL)
818
0
        goto done;
819
820
172
    if (conversion != '\0') {
821
34
        tmp = do_conversion(fieldobj, conversion);
822
34
        if (tmp == NULL || PyUnicode_READY(tmp) == -1)
823
0
            goto done;
824
825
        /* do the assignment, transferring ownership: fieldobj = tmp */
826
34
        Py_DECREF(fieldobj);
827
34
        fieldobj = tmp;
828
34
        tmp = NULL;
829
34
    }
830
831
    /* if needed, recurively compute the format_spec */
832
172
    if (format_spec_needs_expanding) {
833
0
        tmp = build_string(format_spec, args, kwargs, recursion_depth-1,
834
0
                           auto_number);
835
0
        if (tmp == NULL || PyUnicode_READY(tmp) == -1)
836
0
            goto done;
837
838
        /* note that in the case we're expanding the format string,
839
           tmp must be kept around until after the call to
840
           render_field. */
841
0
        SubString_init(&expanded_format_spec, tmp, 0, PyUnicode_GET_LENGTH(tmp));
842
0
        actual_format_spec = &expanded_format_spec;
843
0
    }
844
172
    else
845
172
        actual_format_spec = format_spec;
846
847
172
    if (render_field(fieldobj, actual_format_spec, writer) == 0)
848
0
        goto done;
849
850
172
    result = 1;
851
852
172
done:
853
172
    Py_XDECREF(fieldobj);
854
172
    Py_XDECREF(tmp);
855
856
172
    return result;
857
172
}
858
859
/*
860
    do_markup is the top-level loop for the format() method.  It
861
    searches through the format string for escapes to markup codes, and
862
    calls other functions to move non-markup text to the output,
863
    and to perform the markup to the output.
864
*/
865
static int
866
do_markup(SubString *input, PyObject *args, PyObject *kwargs,
867
          _PyUnicodeWriter *writer, int recursion_depth, AutoNumber *auto_number)
868
103
{
869
103
    MarkupIterator iter;
870
103
    int format_spec_needs_expanding;
871
103
    int result;
872
103
    int field_present;
873
103
    SubString literal;
874
103
    SubString field_name;
875
103
    SubString format_spec;
876
103
    Py_UCS4 conversion;
877
878
103
    MarkupIterator_init(&iter, input->str, input->start, input->end);
879
331
    while ((result = MarkupIterator_next(&iter, &literal, &field_present,
880
331
                                         &field_name, &format_spec,
881
331
                                         &conversion,
882
331
                                         &format_spec_needs_expanding)) == 2) {
883
228
        if (literal.end != literal.start) {
884
215
            if (!field_present && iter.str.start == iter.str.end)
885
56
                writer->overallocate = 0;
886
215
            if (_PyUnicodeWriter_WriteSubstring(writer, literal.str,
887
215
                                                literal.start, literal.end) < 0)
888
0
                return 0;
889
215
        }
890
891
228
        if (field_present) {
892
172
            if (iter.str.start == iter.str.end)
893
47
                writer->overallocate = 0;
894
172
            if (!output_markup(&field_name, &format_spec,
895
172
                               format_spec_needs_expanding, conversion, writer,
896
172
                               args, kwargs, recursion_depth, auto_number))
897
0
                return 0;
898
172
        }
899
228
    }
900
103
    return result;
901
103
}
902
903
904
/*
905
    build_string allocates the output string and then
906
    calls do_markup to do the heavy lifting.
907
*/
908
static PyObject *
909
build_string(SubString *input, PyObject *args, PyObject *kwargs,
910
             int recursion_depth, AutoNumber *auto_number)
911
103
{
912
103
    _PyUnicodeWriter writer;
913
914
    /* check the recursion level */
915
103
    if (recursion_depth <= 0) {
916
0
        PyErr_SetString(PyExc_ValueError,
917
0
                        "Max string recursion exceeded");
918
0
        return NULL;
919
0
    }
920
921
103
    _PyUnicodeWriter_Init(&writer);
922
103
    writer.overallocate = 1;
923
103
    writer.min_length = PyUnicode_GET_LENGTH(input->str) + 100;
924
925
103
    if (!do_markup(input, args, kwargs, &writer, recursion_depth,
926
103
                   auto_number)) {
927
0
        _PyUnicodeWriter_Dealloc(&writer);
928
0
        return NULL;
929
0
    }
930
931
103
    return _PyUnicodeWriter_Finish(&writer);
932
103
}
933
934
/************************************************************************/
935
/*********** main routine ***********************************************/
936
/************************************************************************/
937
938
/* this is the main entry point */
939
static PyObject *
940
do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
941
103
{
942
103
    SubString input;
943
944
    /* PEP 3101 says only 2 levels, so that
945
       "{0:{1}}".format('abc', 's')            # works
946
       "{0:{1:{2}}}".format('abc', 's', '')    # fails
947
    */
948
103
    int recursion_depth = 2;
949
950
103
    AutoNumber auto_number;
951
952
103
    if (PyUnicode_READY(self) == -1)
953
0
        return NULL;
954
955
103
    AutoNumber_Init(&auto_number);
956
103
    SubString_init(&input, self, 0, PyUnicode_GET_LENGTH(self));
957
103
    return build_string(&input, args, kwargs, recursion_depth, &auto_number);
958
103
}
959
960
static PyObject *
961
do_string_format_map(PyObject *self, PyObject *obj)
962
0
{
963
0
    return do_string_format(self, NULL, obj);
964
0
}
965
966
967
/************************************************************************/
968
/*********** formatteriterator ******************************************/
969
/************************************************************************/
970
971
/* This is used to implement string.Formatter.vparse().  It exists so
972
   Formatter can share code with the built in unicode.format() method.
973
   It's really just a wrapper around MarkupIterator that is callable
974
   from Python. */
975
976
typedef struct {
977
    PyObject_HEAD
978
    PyObject *str;
979
    MarkupIterator it_markup;
980
} formatteriterobject;
981
982
static void
983
formatteriter_dealloc(formatteriterobject *it)
984
0
{
985
0
    Py_XDECREF(it->str);
986
0
    PyObject_FREE(it);
987
0
}
988
989
/* returns a tuple:
990
   (literal, field_name, format_spec, conversion)
991
992
   literal is any literal text to output.  might be zero length
993
   field_name is the string before the ':'.  might be None
994
   format_spec is the string after the ':'.  mibht be None
995
   conversion is either None, or the string after the '!'
996
*/
997
static PyObject *
998
formatteriter_next(formatteriterobject *it)
999
0
{
1000
0
    SubString literal;
1001
0
    SubString field_name;
1002
0
    SubString format_spec;
1003
0
    Py_UCS4 conversion;
1004
0
    int format_spec_needs_expanding;
1005
0
    int field_present;
1006
0
    int result = MarkupIterator_next(&it->it_markup, &literal, &field_present,
1007
0
                                     &field_name, &format_spec, &conversion,
1008
0
                                     &format_spec_needs_expanding);
1009
1010
    /* all of the SubString objects point into it->str, so no
1011
       memory management needs to be done on them */
1012
0
    assert(0 <= result && result <= 2);
1013
0
    if (result == 0 || result == 1)
1014
        /* if 0, error has already been set, if 1, iterator is empty */
1015
0
        return NULL;
1016
0
    else {
1017
0
        PyObject *literal_str = NULL;
1018
0
        PyObject *field_name_str = NULL;
1019
0
        PyObject *format_spec_str = NULL;
1020
0
        PyObject *conversion_str = NULL;
1021
0
        PyObject *tuple = NULL;
1022
1023
0
        literal_str = SubString_new_object(&literal);
1024
0
        if (literal_str == NULL)
1025
0
            goto done;
1026
1027
0
        field_name_str = SubString_new_object(&field_name);
1028
0
        if (field_name_str == NULL)
1029
0
            goto done;
1030
1031
        /* if field_name is non-zero length, return a string for
1032
           format_spec (even if zero length), else return None */
1033
0
        format_spec_str = (field_present ?
1034
0
                           SubString_new_object_or_empty :
1035
0
                           SubString_new_object)(&format_spec);
1036
0
        if (format_spec_str == NULL)
1037
0
            goto done;
1038
1039
        /* if the conversion is not specified, return a None,
1040
           otherwise create a one length string with the conversion
1041
           character */
1042
0
        if (conversion == '\0') {
1043
0
            conversion_str = Py_None;
1044
0
            Py_INCREF(conversion_str);
1045
0
        }
1046
0
        else
1047
0
            conversion_str = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1048
0
                                                       &conversion, 1);
1049
0
        if (conversion_str == NULL)
1050
0
            goto done;
1051
1052
0
        tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,
1053
0
                             conversion_str);
1054
0
    done:
1055
0
        Py_XDECREF(literal_str);
1056
0
        Py_XDECREF(field_name_str);
1057
0
        Py_XDECREF(format_spec_str);
1058
0
        Py_XDECREF(conversion_str);
1059
0
        return tuple;
1060
0
    }
1061
0
}
1062
1063
static PyMethodDef formatteriter_methods[] = {
1064
    {NULL,              NULL}           /* sentinel */
1065
};
1066
1067
static PyTypeObject PyFormatterIter_Type = {
1068
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
1069
    "formatteriterator",                /* tp_name */
1070
    sizeof(formatteriterobject),        /* tp_basicsize */
1071
    0,                                  /* tp_itemsize */
1072
    /* methods */
1073
    (destructor)formatteriter_dealloc,  /* tp_dealloc */
1074
    0,                                  /* tp_vectorcall_offset */
1075
    0,                                  /* tp_getattr */
1076
    0,                                  /* tp_setattr */
1077
    0,                                  /* tp_as_async */
1078
    0,                                  /* tp_repr */
1079
    0,                                  /* tp_as_number */
1080
    0,                                  /* tp_as_sequence */
1081
    0,                                  /* tp_as_mapping */
1082
    0,                                  /* tp_hash */
1083
    0,                                  /* tp_call */
1084
    0,                                  /* tp_str */
1085
    PyObject_GenericGetAttr,            /* tp_getattro */
1086
    0,                                  /* tp_setattro */
1087
    0,                                  /* tp_as_buffer */
1088
    Py_TPFLAGS_DEFAULT,                 /* tp_flags */
1089
    0,                                  /* tp_doc */
1090
    0,                                  /* tp_traverse */
1091
    0,                                  /* tp_clear */
1092
    0,                                  /* tp_richcompare */
1093
    0,                                  /* tp_weaklistoffset */
1094
    PyObject_SelfIter,                  /* tp_iter */
1095
    (iternextfunc)formatteriter_next,   /* tp_iternext */
1096
    formatteriter_methods,              /* tp_methods */
1097
    0,
1098
};
1099
1100
/* unicode_formatter_parser is used to implement
1101
   string.Formatter.vformat.  it parses a string and returns tuples
1102
   describing the parsed elements.  It's a wrapper around
1103
   stringlib/string_format.h's MarkupIterator */
1104
static PyObject *
1105
formatter_parser(PyObject *ignored, PyObject *self)
1106
0
{
1107
0
    formatteriterobject *it;
1108
1109
0
    if (!PyUnicode_Check(self)) {
1110
0
        PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
1111
0
        return NULL;
1112
0
    }
1113
1114
0
    if (PyUnicode_READY(self) == -1)
1115
0
        return NULL;
1116
1117
0
    it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
1118
0
    if (it == NULL)
1119
0
        return NULL;
1120
1121
    /* take ownership, give the object to the iterator */
1122
0
    Py_INCREF(self);
1123
0
    it->str = self;
1124
1125
    /* initialize the contained MarkupIterator */
1126
0
    MarkupIterator_init(&it->it_markup, (PyObject*)self, 0, PyUnicode_GET_LENGTH(self));
1127
0
    return (PyObject *)it;
1128
0
}
1129
1130
1131
/************************************************************************/
1132
/*********** fieldnameiterator ******************************************/
1133
/************************************************************************/
1134
1135
1136
/* This is used to implement string.Formatter.vparse().  It parses the
1137
   field name into attribute and item values.  It's a Python-callable
1138
   wrapper around FieldNameIterator */
1139
1140
typedef struct {
1141
    PyObject_HEAD
1142
    PyObject *str;
1143
    FieldNameIterator it_field;
1144
} fieldnameiterobject;
1145
1146
static void
1147
fieldnameiter_dealloc(fieldnameiterobject *it)
1148
0
{
1149
0
    Py_XDECREF(it->str);
1150
0
    PyObject_FREE(it);
1151
0
}
1152
1153
/* returns a tuple:
1154
   (is_attr, value)
1155
   is_attr is true if we used attribute syntax (e.g., '.foo')
1156
              false if we used index syntax (e.g., '[foo]')
1157
   value is an integer or string
1158
*/
1159
static PyObject *
1160
fieldnameiter_next(fieldnameiterobject *it)
1161
0
{
1162
0
    int result;
1163
0
    int is_attr;
1164
0
    Py_ssize_t idx;
1165
0
    SubString name;
1166
1167
0
    result = FieldNameIterator_next(&it->it_field, &is_attr,
1168
0
                                    &idx, &name);
1169
0
    if (result == 0 || result == 1)
1170
        /* if 0, error has already been set, if 1, iterator is empty */
1171
0
        return NULL;
1172
0
    else {
1173
0
        PyObject* result = NULL;
1174
0
        PyObject* is_attr_obj = NULL;
1175
0
        PyObject* obj = NULL;
1176
1177
0
        is_attr_obj = PyBool_FromLong(is_attr);
1178
0
        if (is_attr_obj == NULL)
1179
0
            goto done;
1180
1181
        /* either an integer or a string */
1182
0
        if (idx != -1)
1183
0
            obj = PyLong_FromSsize_t(idx);
1184
0
        else
1185
0
            obj = SubString_new_object(&name);
1186
0
        if (obj == NULL)
1187
0
            goto done;
1188
1189
        /* return a tuple of values */
1190
0
        result = PyTuple_Pack(2, is_attr_obj, obj);
1191
1192
0
    done:
1193
0
        Py_XDECREF(is_attr_obj);
1194
0
        Py_XDECREF(obj);
1195
0
        return result;
1196
0
    }
1197
0
}
1198
1199
static PyMethodDef fieldnameiter_methods[] = {
1200
    {NULL,              NULL}           /* sentinel */
1201
};
1202
1203
static PyTypeObject PyFieldNameIter_Type = {
1204
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
1205
    "fieldnameiterator",                /* tp_name */
1206
    sizeof(fieldnameiterobject),        /* tp_basicsize */
1207
    0,                                  /* tp_itemsize */
1208
    /* methods */
1209
    (destructor)fieldnameiter_dealloc,  /* tp_dealloc */
1210
    0,                                  /* tp_vectorcall_offset */
1211
    0,                                  /* tp_getattr */
1212
    0,                                  /* tp_setattr */
1213
    0,                                  /* tp_as_async */
1214
    0,                                  /* tp_repr */
1215
    0,                                  /* tp_as_number */
1216
    0,                                  /* tp_as_sequence */
1217
    0,                                  /* tp_as_mapping */
1218
    0,                                  /* tp_hash */
1219
    0,                                  /* tp_call */
1220
    0,                                  /* tp_str */
1221
    PyObject_GenericGetAttr,            /* tp_getattro */
1222
    0,                                  /* tp_setattro */
1223
    0,                                  /* tp_as_buffer */
1224
    Py_TPFLAGS_DEFAULT,                 /* tp_flags */
1225
    0,                                  /* tp_doc */
1226
    0,                                  /* tp_traverse */
1227
    0,                                  /* tp_clear */
1228
    0,                                  /* tp_richcompare */
1229
    0,                                  /* tp_weaklistoffset */
1230
    PyObject_SelfIter,                  /* tp_iter */
1231
    (iternextfunc)fieldnameiter_next,   /* tp_iternext */
1232
    fieldnameiter_methods,              /* tp_methods */
1233
    0};
1234
1235
/* unicode_formatter_field_name_split is used to implement
1236
   string.Formatter.vformat.  it takes a PEP 3101 "field name", and
1237
   returns a tuple of (first, rest): "first", the part before the
1238
   first '.' or '['; and "rest", an iterator for the rest of the field
1239
   name.  it's a wrapper around stringlib/string_format.h's
1240
   field_name_split.  The iterator it returns is a
1241
   FieldNameIterator */
1242
static PyObject *
1243
formatter_field_name_split(PyObject *ignored, PyObject *self)
1244
0
{
1245
0
    SubString first;
1246
0
    Py_ssize_t first_idx;
1247
0
    fieldnameiterobject *it;
1248
1249
0
    PyObject *first_obj = NULL;
1250
0
    PyObject *result = NULL;
1251
1252
0
    if (!PyUnicode_Check(self)) {
1253
0
        PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
1254
0
        return NULL;
1255
0
    }
1256
1257
0
    if (PyUnicode_READY(self) == -1)
1258
0
        return NULL;
1259
1260
0
    it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
1261
0
    if (it == NULL)
1262
0
        return NULL;
1263
1264
    /* take ownership, give the object to the iterator.  this is
1265
       just to keep the field_name alive */
1266
0
    Py_INCREF(self);
1267
0
    it->str = self;
1268
1269
    /* Pass in auto_number = NULL. We'll return an empty string for
1270
       first_obj in that case. */
1271
0
    if (!field_name_split((PyObject*)self, 0, PyUnicode_GET_LENGTH(self),
1272
0
                          &first, &first_idx, &it->it_field, NULL))
1273
0
        goto done;
1274
1275
    /* first becomes an integer, if possible; else a string */
1276
0
    if (first_idx != -1)
1277
0
        first_obj = PyLong_FromSsize_t(first_idx);
1278
0
    else
1279
        /* convert "first" into a string object */
1280
0
        first_obj = SubString_new_object(&first);
1281
0
    if (first_obj == NULL)
1282
0
        goto done;
1283
1284
    /* return a tuple of values */
1285
0
    result = PyTuple_Pack(2, first_obj, it);
1286
1287
0
done:
1288
0
    Py_XDECREF(it);
1289
0
    Py_XDECREF(first_obj);
1290
0
    return result;
1291
0
}