Coverage Report

Created: 2025-07-11 06:59

/src/Python-3.8.3/Objects/stringlib/unicode_format.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
    unicode_format.h -- implementation of str.format().
3
*/
4
5
/************************************************************************/
6
/***********   Global data structures and forward declarations  *********/
7
/************************************************************************/
8
9
/*
10
   A SubString consists of the characters between two string or
11
   unicode pointers.
12
*/
13
typedef struct {
14
    PyObject *str; /* borrowed reference */
15
    Py_ssize_t start, end;
16
} SubString;
17
18
19
typedef enum {
20
    ANS_INIT,
21
    ANS_AUTO,
22
    ANS_MANUAL
23
} AutoNumberState;   /* Keep track if we're auto-numbering fields */
24
25
/* Keeps track of our auto-numbering state, and which number field we're on */
26
typedef struct {
27
    AutoNumberState an_state;
28
    int an_field_number;
29
} AutoNumber;
30
31
32
/* forward declaration for recursion */
33
static PyObject *
34
build_string(SubString *input, PyObject *args, PyObject *kwargs,
35
             int recursion_depth, AutoNumber *auto_number);
36
37
38
39
/************************************************************************/
40
/**************************  Utility  functions  ************************/
41
/************************************************************************/
42
43
static void
44
AutoNumber_Init(AutoNumber *auto_number)
45
105
{
46
105
    auto_number->an_state = ANS_INIT;
47
105
    auto_number->an_field_number = 0;
48
105
}
49
50
/* fill in a SubString from a pointer and length */
51
Py_LOCAL_INLINE(void)
52
SubString_init(SubString *str, PyObject *s, Py_ssize_t start, Py_ssize_t end)
53
1.74k
{
54
1.74k
    str->str = s;
55
1.74k
    str->start = start;
56
1.74k
    str->end = end;
57
1.74k
}
58
59
/* return a new string.  if str->str is NULL, return None */
60
Py_LOCAL_INLINE(PyObject *)
61
SubString_new_object(SubString *str)
62
0
{
63
0
    if (str->str == NULL)
64
0
        Py_RETURN_NONE;
65
0
    return PyUnicode_Substring(str->str, str->start, str->end);
66
0
}
67
68
/* return a new string.  if str->str is NULL, return a new empty string */
69
Py_LOCAL_INLINE(PyObject *)
70
SubString_new_object_or_empty(SubString *str)
71
0
{
72
0
    if (str->str == NULL) {
73
0
        return PyUnicode_New(0, 0);
74
0
    }
75
0
    return SubString_new_object(str);
76
0
}
77
78
/* Return 1 if an error has been detected switching between automatic
79
   field numbering and manual field specification, else return 0. Set
80
   ValueError on error. */
81
static int
82
autonumber_state_error(AutoNumberState state, int field_name_is_empty)
83
175
{
84
175
    if (state == ANS_MANUAL) {
85
0
        if (field_name_is_empty) {
86
0
            PyErr_SetString(PyExc_ValueError, "cannot switch from "
87
0
                            "manual field specification to "
88
0
                            "automatic field numbering");
89
0
            return 1;
90
0
        }
91
0
    }
92
175
    else {
93
175
        if (!field_name_is_empty) {
94
0
            PyErr_SetString(PyExc_ValueError, "cannot switch from "
95
0
                            "automatic field numbering to "
96
0
                            "manual field specification");
97
0
            return 1;
98
0
        }
99
175
    }
100
175
    return 0;
101
175
}
102
103
104
/************************************************************************/
105
/***********  Format string parsing -- integers and identifiers *********/
106
/************************************************************************/
107
108
static Py_ssize_t
109
get_integer(const SubString *str)
110
175
{
111
175
    Py_ssize_t accumulator = 0;
112
175
    Py_ssize_t digitval;
113
175
    Py_ssize_t i;
114
115
    /* empty string is an error */
116
175
    if (str->start >= str->end)
117
175
        return -1;
118
119
0
    for (i = str->start; i < str->end; i++) {
120
0
        digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str->str, i));
121
0
        if (digitval < 0)
122
0
            return -1;
123
        /*
124
           Detect possible overflow before it happens:
125
126
              accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
127
              accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
128
        */
129
0
        if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
130
0
            PyErr_Format(PyExc_ValueError,
131
0
                         "Too many decimal digits in format string");
132
0
            return -1;
133
0
        }
134
0
        accumulator = accumulator * 10 + digitval;
135
0
    }
136
0
    return accumulator;
137
0
}
138
139
/************************************************************************/
140
/******** Functions to get field objects and specification strings ******/
141
/************************************************************************/
142
143
/* do the equivalent of obj.name */
144
static PyObject *
145
getattr(PyObject *obj, SubString *name)
146
0
{
147
0
    PyObject *newobj;
148
0
    PyObject *str = SubString_new_object(name);
149
0
    if (str == NULL)
150
0
        return NULL;
151
0
    newobj = PyObject_GetAttr(obj, str);
152
0
    Py_DECREF(str);
153
0
    return newobj;
154
0
}
155
156
/* do the equivalent of obj[idx], where obj is a sequence */
157
static PyObject *
158
getitem_sequence(PyObject *obj, Py_ssize_t idx)
159
0
{
160
0
    return PySequence_GetItem(obj, idx);
161
0
}
162
163
/* do the equivalent of obj[idx], where obj is not a sequence */
164
static PyObject *
165
getitem_idx(PyObject *obj, Py_ssize_t idx)
166
0
{
167
0
    PyObject *newobj;
168
0
    PyObject *idx_obj = PyLong_FromSsize_t(idx);
169
0
    if (idx_obj == NULL)
170
0
        return NULL;
171
0
    newobj = PyObject_GetItem(obj, idx_obj);
172
0
    Py_DECREF(idx_obj);
173
0
    return newobj;
174
0
}
175
176
/* do the equivalent of obj[name] */
177
static PyObject *
178
getitem_str(PyObject *obj, SubString *name)
179
0
{
180
0
    PyObject *newobj;
181
0
    PyObject *str = SubString_new_object(name);
182
0
    if (str == NULL)
183
0
        return NULL;
184
0
    newobj = PyObject_GetItem(obj, str);
185
0
    Py_DECREF(str);
186
0
    return newobj;
187
0
}
188
189
typedef struct {
190
    /* the entire string we're parsing.  we assume that someone else
191
       is managing its lifetime, and that it will exist for the
192
       lifetime of the iterator.  can be empty */
193
    SubString str;
194
195
    /* index to where we are inside field_name */
196
    Py_ssize_t index;
197
} FieldNameIterator;
198
199
200
static int
201
FieldNameIterator_init(FieldNameIterator *self, PyObject *s,
202
                       Py_ssize_t start, Py_ssize_t end)
203
175
{
204
175
    SubString_init(&self->str, s, start, end);
205
175
    self->index = start;
206
175
    return 1;
207
175
}
208
209
static int
210
_FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
211
0
{
212
0
    Py_UCS4 c;
213
214
0
    name->str = self->str.str;
215
0
    name->start = self->index;
216
217
    /* return everything until '.' or '[' */
218
0
    while (self->index < self->str.end) {
219
0
        c = PyUnicode_READ_CHAR(self->str.str, self->index++);
220
0
        switch (c) {
221
0
        case '[':
222
0
        case '.':
223
            /* backup so that we this character will be seen next time */
224
0
            self->index--;
225
0
            break;
226
0
        default:
227
0
            continue;
228
0
        }
229
0
        break;
230
0
    }
231
    /* end of string is okay */
232
0
    name->end = self->index;
233
0
    return 1;
234
0
}
235
236
static int
237
_FieldNameIterator_item(FieldNameIterator *self, SubString *name)
238
0
{
239
0
    int bracket_seen = 0;
240
0
    Py_UCS4 c;
241
242
0
    name->str = self->str.str;
243
0
    name->start = self->index;
244
245
    /* return everything until ']' */
246
0
    while (self->index < self->str.end) {
247
0
        c = PyUnicode_READ_CHAR(self->str.str, self->index++);
248
0
        switch (c) {
249
0
        case ']':
250
0
            bracket_seen = 1;
251
0
            break;
252
0
        default:
253
0
            continue;
254
0
        }
255
0
        break;
256
0
    }
257
    /* make sure we ended with a ']' */
258
0
    if (!bracket_seen) {
259
0
        PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");
260
0
        return 0;
261
0
    }
262
263
    /* end of string is okay */
264
    /* don't include the ']' */
265
0
    name->end = self->index-1;
266
0
    return 1;
267
0
}
268
269
/* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
270
static int
271
FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
272
                       Py_ssize_t *name_idx, SubString *name)
273
175
{
274
    /* check at end of input */
275
175
    if (self->index >= self->str.end)
276
175
        return 1;
277
278
0
    switch (PyUnicode_READ_CHAR(self->str.str, self->index++)) {
279
0
    case '.':
280
0
        *is_attribute = 1;
281
0
        if (_FieldNameIterator_attr(self, name) == 0)
282
0
            return 0;
283
0
        *name_idx = -1;
284
0
        break;
285
0
    case '[':
286
0
        *is_attribute = 0;
287
0
        if (_FieldNameIterator_item(self, name) == 0)
288
0
            return 0;
289
0
        *name_idx = get_integer(name);
290
0
        if (*name_idx == -1 && PyErr_Occurred())
291
0
            return 0;
292
0
        break;
293
0
    default:
294
        /* Invalid character follows ']' */
295
0
        PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may "
296
0
                        "follow ']' in format field specifier");
297
0
        return 0;
298
0
    }
299
300
    /* empty string is an error */
301
0
    if (name->start == name->end) {
302
0
        PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
303
0
        return 0;
304
0
    }
305
306
0
    return 2;
307
0
}
308
309
310
/* input: field_name
311
   output: 'first' points to the part before the first '[' or '.'
312
           'first_idx' is -1 if 'first' is not an integer, otherwise
313
                       it's the value of first converted to an integer
314
           'rest' is an iterator to return the rest
315
*/
316
static int
317
field_name_split(PyObject *str, Py_ssize_t start, Py_ssize_t end, SubString *first,
318
                 Py_ssize_t *first_idx, FieldNameIterator *rest,
319
                 AutoNumber *auto_number)
320
175
{
321
175
    Py_UCS4 c;
322
175
    Py_ssize_t i = start;
323
175
    int field_name_is_empty;
324
175
    int using_numeric_index;
325
326
    /* find the part up until the first '.' or '[' */
327
175
    while (i < end) {
328
0
        switch (c = PyUnicode_READ_CHAR(str, i++)) {
329
0
        case '[':
330
0
        case '.':
331
            /* backup so that we this character is available to the
332
               "rest" iterator */
333
0
            i--;
334
0
            break;
335
0
        default:
336
0
            continue;
337
0
        }
338
0
        break;
339
0
    }
340
341
    /* set up the return values */
342
175
    SubString_init(first, str, start, i);
343
175
    FieldNameIterator_init(rest, str, i, end);
344
345
    /* see if "first" is an integer, in which case it's used as an index */
346
175
    *first_idx = get_integer(first);
347
175
    if (*first_idx == -1 && PyErr_Occurred())
348
0
        return 0;
349
350
175
    field_name_is_empty = first->start >= first->end;
351
352
    /* If the field name is omitted or if we have a numeric index
353
       specified, then we're doing numeric indexing into args. */
354
175
    using_numeric_index = field_name_is_empty || *first_idx != -1;
355
356
    /* We always get here exactly one time for each field we're
357
       processing. And we get here in field order (counting by left
358
       braces). So this is the perfect place to handle automatic field
359
       numbering if the field name is omitted. */
360
361
    /* Check if we need to do the auto-numbering. It's not needed if
362
       we're called from string.Format routines, because it's handled
363
       in that class by itself. */
364
175
    if (auto_number) {
365
        /* Initialize our auto numbering state if this is the first
366
           time we're either auto-numbering or manually numbering. */
367
175
        if (auto_number->an_state == ANS_INIT && using_numeric_index)
368
105
            auto_number->an_state = field_name_is_empty ?
369
105
                ANS_AUTO : ANS_MANUAL;
370
371
        /* Make sure our state is consistent with what we're doing
372
           this time through. Only check if we're using a numeric
373
           index. */
374
175
        if (using_numeric_index)
375
175
            if (autonumber_state_error(auto_number->an_state,
376
175
                                       field_name_is_empty))
377
0
                return 0;
378
        /* Zero length field means we want to do auto-numbering of the
379
           fields. */
380
175
        if (field_name_is_empty)
381
175
            *first_idx = (auto_number->an_field_number)++;
382
175
    }
383
384
175
    return 1;
385
175
}
386
387
388
/*
389
    get_field_object returns the object inside {}, before the
390
    format_spec.  It handles getindex and getattr lookups and consumes
391
    the entire input string.
392
*/
393
static PyObject *
394
get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
395
                 AutoNumber *auto_number)
396
175
{
397
175
    PyObject *obj = NULL;
398
175
    int ok;
399
175
    int is_attribute;
400
175
    SubString name;
401
175
    SubString first;
402
175
    Py_ssize_t index;
403
175
    FieldNameIterator rest;
404
405
175
    if (!field_name_split(input->str, input->start, input->end, &first,
406
175
                          &index, &rest, auto_number)) {
407
0
        goto error;
408
0
    }
409
410
175
    if (index == -1) {
411
        /* look up in kwargs */
412
0
        PyObject *key = SubString_new_object(&first);
413
0
        if (key == NULL) {
414
0
            goto error;
415
0
        }
416
0
        if (kwargs == NULL) {
417
0
            PyErr_SetObject(PyExc_KeyError, key);
418
0
            Py_DECREF(key);
419
0
            goto error;
420
0
        }
421
        /* Use PyObject_GetItem instead of PyDict_GetItem because this
422
           code is no longer just used with kwargs. It might be passed
423
           a non-dict when called through format_map. */
424
0
        obj = PyObject_GetItem(kwargs, key);
425
0
        Py_DECREF(key);
426
0
        if (obj == NULL) {
427
0
            goto error;
428
0
        }
429
0
    }
430
175
    else {
431
        /* If args is NULL, we have a format string with a positional field
432
           with only kwargs to retrieve it from. This can only happen when
433
           used with format_map(), where positional arguments are not
434
           allowed. */
435
175
        if (args == NULL) {
436
0
            PyErr_SetString(PyExc_ValueError, "Format string contains "
437
0
                            "positional fields");
438
0
            goto error;
439
0
        }
440
441
        /* look up in args */
442
175
        obj = PySequence_GetItem(args, index);
443
175
        if (obj == NULL) {
444
0
            PyErr_Format(PyExc_IndexError,
445
0
                         "Replacement index %zd out of range for positional "
446
0
                         "args tuple",
447
0
                         index);
448
0
             goto error;
449
0
        }
450
175
    }
451
452
    /* iterate over the rest of the field_name */
453
175
    while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
454
175
                                        &name)) == 2) {
455
0
        PyObject *tmp;
456
457
0
        if (is_attribute)
458
            /* getattr lookup "." */
459
0
            tmp = getattr(obj, &name);
460
0
        else
461
            /* getitem lookup "[]" */
462
0
            if (index == -1)
463
0
                tmp = getitem_str(obj, &name);
464
0
            else
465
0
                if (PySequence_Check(obj))
466
0
                    tmp = getitem_sequence(obj, index);
467
0
                else
468
                    /* not a sequence */
469
0
                    tmp = getitem_idx(obj, index);
470
0
        if (tmp == NULL)
471
0
            goto error;
472
473
        /* assign to obj */
474
0
        Py_DECREF(obj);
475
0
        obj = tmp;
476
0
    }
477
    /* end of iterator, this is the non-error case */
478
175
    if (ok == 1)
479
175
        return obj;
480
0
error:
481
0
    Py_XDECREF(obj);
482
0
    return NULL;
483
175
}
484
485
/************************************************************************/
486
/*****************  Field rendering functions  **************************/
487
/************************************************************************/
488
489
/*
490
    render_field() is the main function in this section.  It takes the
491
    field object and field specification string generated by
492
    get_field_and_spec, and renders the field into the output string.
493
494
    render_field calls fieldobj.__format__(format_spec) method, and
495
    appends to the output.
496
*/
497
static int
498
render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *writer)
499
175
{
500
175
    int ok = 0;
501
175
    PyObject *result = NULL;
502
175
    PyObject *format_spec_object = NULL;
503
175
    int (*formatter) (_PyUnicodeWriter*, PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL;
504
175
    int err;
505
506
    /* If we know the type exactly, skip the lookup of __format__ and just
507
       call the formatter directly. */
508
175
    if (PyUnicode_CheckExact(fieldobj))
509
147
        formatter = _PyUnicode_FormatAdvancedWriter;
510
28
    else if (PyLong_CheckExact(fieldobj))
511
28
        formatter = _PyLong_FormatAdvancedWriter;
512
0
    else if (PyFloat_CheckExact(fieldobj))
513
0
        formatter = _PyFloat_FormatAdvancedWriter;
514
0
    else if (PyComplex_CheckExact(fieldobj))
515
0
        formatter = _PyComplex_FormatAdvancedWriter;
516
517
175
    if (formatter) {
518
        /* we know exactly which formatter will be called when __format__ is
519
           looked up, so call it directly, instead. */
520
175
        err = formatter(writer, fieldobj, format_spec->str,
521
175
                        format_spec->start, format_spec->end);
522
175
        return (err == 0);
523
175
    }
524
0
    else {
525
        /* We need to create an object out of the pointers we have, because
526
           __format__ takes a string/unicode object for format_spec. */
527
0
        if (format_spec->str)
528
0
            format_spec_object = PyUnicode_Substring(format_spec->str,
529
0
                                                     format_spec->start,
530
0
                                                     format_spec->end);
531
0
        else
532
0
            format_spec_object = PyUnicode_New(0, 0);
533
0
        if (format_spec_object == NULL)
534
0
            goto done;
535
536
0
        result = PyObject_Format(fieldobj, format_spec_object);
537
0
    }
538
0
    if (result == NULL)
539
0
        goto done;
540
541
0
    if (_PyUnicodeWriter_WriteStr(writer, result) == -1)
542
0
        goto done;
543
0
    ok = 1;
544
545
0
done:
546
0
    Py_XDECREF(format_spec_object);
547
0
    Py_XDECREF(result);
548
0
    return ok;
549
0
}
550
551
static int
552
parse_field(SubString *str, SubString *field_name, SubString *format_spec,
553
            int *format_spec_needs_expanding, Py_UCS4 *conversion)
554
175
{
555
    /* Note this function works if the field name is zero length,
556
       which is good.  Zero length field names are handled later, in
557
       field_name_split. */
558
559
175
    Py_UCS4 c = 0;
560
561
    /* initialize these, as they may be empty */
562
175
    *conversion = '\0';
563
175
    SubString_init(format_spec, NULL, 0, 0);
564
565
    /* Search for the field name.  it's terminated by the end of
566
       the string, or a ':' or '!' */
567
175
    field_name->str = str->str;
568
175
    field_name->start = str->start;
569
175
    while (str->start < str->end) {
570
175
        switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
571
0
        case '{':
572
0
            PyErr_SetString(PyExc_ValueError, "unexpected '{' in field name");
573
0
            return 0;
574
0
        case '[':
575
0
            for (; str->start < str->end; str->start++)
576
0
                if (PyUnicode_READ_CHAR(str->str, str->start) == ']')
577
0
                    break;
578
0
            continue;
579
140
        case '}':
580
140
        case ':':
581
175
        case '!':
582
175
            break;
583
0
        default:
584
0
            continue;
585
175
        }
586
175
        break;
587
175
    }
588
589
175
    field_name->end = str->start - 1;
590
175
    if (c == '!' || c == ':') {
591
35
        Py_ssize_t count;
592
        /* we have a format specifier and/or a conversion */
593
        /* don't include the last character */
594
595
        /* see if there's a conversion specifier */
596
35
        if (c == '!') {
597
            /* there must be another character present */
598
35
            if (str->start >= str->end) {
599
0
                PyErr_SetString(PyExc_ValueError,
600
0
                                "end of string while looking for conversion "
601
0
                                "specifier");
602
0
                return 0;
603
0
            }
604
35
            *conversion = PyUnicode_READ_CHAR(str->str, str->start++);
605
606
35
            if (str->start < str->end) {
607
35
                c = PyUnicode_READ_CHAR(str->str, str->start++);
608
35
                if (c == '}')
609
35
                    return 1;
610
0
                if (c != ':') {
611
0
                    PyErr_SetString(PyExc_ValueError,
612
0
                                    "expected ':' after conversion specifier");
613
0
                    return 0;
614
0
                }
615
0
            }
616
35
        }
617
0
        format_spec->str = str->str;
618
0
        format_spec->start = str->start;
619
0
        count = 1;
620
0
        while (str->start < str->end) {
621
0
            switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
622
0
            case '{':
623
0
                *format_spec_needs_expanding = 1;
624
0
                count++;
625
0
                break;
626
0
            case '}':
627
0
                count--;
628
0
                if (count == 0) {
629
0
                    format_spec->end = str->start - 1;
630
0
                    return 1;
631
0
                }
632
0
                break;
633
0
            default:
634
0
                break;
635
0
            }
636
0
        }
637
638
0
        PyErr_SetString(PyExc_ValueError, "unmatched '{' in format spec");
639
0
        return 0;
640
0
    }
641
140
    else if (c != '}') {
642
0
        PyErr_SetString(PyExc_ValueError, "expected '}' before end of string");
643
0
        return 0;
644
0
    }
645
646
140
    return 1;
647
175
}
648
649
/************************************************************************/
650
/******* Output string allocation and escape-to-markup processing  ******/
651
/************************************************************************/
652
653
/* MarkupIterator breaks the string into pieces of either literal
654
   text, or things inside {} that need to be marked up.  it is
655
   designed to make it easy to wrap a Python iterator around it, for
656
   use with the Formatter class */
657
658
typedef struct {
659
    SubString str;
660
} MarkupIterator;
661
662
static int
663
MarkupIterator_init(MarkupIterator *self, PyObject *str,
664
                    Py_ssize_t start, Py_ssize_t end)
665
105
{
666
105
    SubString_init(&self->str, str, start, end);
667
105
    return 1;
668
105
}
669
670
/* returns 0 on error, 1 on non-error termination, and 2 if it got a
671
   string (or something to be expanded) */
672
static int
673
MarkupIterator_next(MarkupIterator *self, SubString *literal,
674
                    int *field_present, SubString *field_name,
675
                    SubString *format_spec, Py_UCS4 *conversion,
676
                    int *format_spec_needs_expanding)
677
336
{
678
336
    int at_end;
679
336
    Py_UCS4 c = 0;
680
336
    Py_ssize_t start;
681
336
    Py_ssize_t len;
682
336
    int markup_follows = 0;
683
684
    /* initialize all of the output variables */
685
336
    SubString_init(literal, NULL, 0, 0);
686
336
    SubString_init(field_name, NULL, 0, 0);
687
336
    SubString_init(format_spec, NULL, 0, 0);
688
336
    *conversion = '\0';
689
336
    *format_spec_needs_expanding = 0;
690
336
    *field_present = 0;
691
692
    /* No more input, end of iterator.  This is the normal exit
693
       path. */
694
336
    if (self->str.start >= self->str.end)
695
105
        return 1;
696
697
231
    start = self->str.start;
698
699
    /* First read any literal text. Read until the end of string, an
700
       escaped '{' or '}', or an unescaped '{'.  In order to never
701
       allocate memory and so I can just pass pointers around, if
702
       there's an escaped '{' or '}' then we'll return the literal
703
       including the brace, but no format object.  The next time
704
       through, we'll return the rest of the literal, skipping past
705
       the second consecutive brace. */
706
1.56k
    while (self->str.start < self->str.end) {
707
1.50k
        switch (c = PyUnicode_READ_CHAR(self->str.str, self->str.start++)) {
708
175
        case '{':
709
175
        case '}':
710
175
            markup_follows = 1;
711
175
            break;
712
1.33k
        default:
713
1.33k
            continue;
714
1.50k
        }
715
175
        break;
716
1.50k
    }
717
718
231
    at_end = self->str.start >= self->str.end;
719
231
    len = self->str.start - start;
720
721
231
    if ((c == '}') && (at_end ||
722
0
                       (c != PyUnicode_READ_CHAR(self->str.str,
723
0
                                                 self->str.start)))) {
724
0
        PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
725
0
                        "in format string");
726
0
        return 0;
727
0
    }
728
231
    if (at_end && c == '{') {
729
0
        PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
730
0
                        "in format string");
731
0
        return 0;
732
0
    }
733
231
    if (!at_end) {
734
175
        if (c == PyUnicode_READ_CHAR(self->str.str, self->str.start)) {
735
            /* escaped } or {, skip it in the input.  there is no
736
               markup object following us, just this literal text */
737
0
            self->str.start++;
738
0
            markup_follows = 0;
739
0
        }
740
175
        else
741
175
            len--;
742
175
    }
743
744
    /* record the literal text */
745
231
    literal->str = self->str.str;
746
231
    literal->start = start;
747
231
    literal->end = start + len;
748
749
231
    if (!markup_follows)
750
56
        return 2;
751
752
    /* this is markup; parse the field */
753
175
    *field_present = 1;
754
175
    if (!parse_field(&self->str, field_name, format_spec,
755
175
                     format_spec_needs_expanding, conversion))
756
0
        return 0;
757
175
    return 2;
758
175
}
759
760
761
/* do the !r or !s conversion on obj */
762
static PyObject *
763
do_conversion(PyObject *obj, Py_UCS4 conversion)
764
35
{
765
    /* XXX in pre-3.0, do we need to convert this to unicode, since it
766
       might have returned a string? */
767
35
    switch (conversion) {
768
35
    case 'r':
769
35
        return PyObject_Repr(obj);
770
0
    case 's':
771
0
        return PyObject_Str(obj);
772
0
    case 'a':
773
0
        return PyObject_ASCII(obj);
774
0
    default:
775
0
        if (conversion > 32 && conversion < 127) {
776
                /* It's the ASCII subrange; casting to char is safe
777
                   (assuming the execution character set is an ASCII
778
                   superset). */
779
0
                PyErr_Format(PyExc_ValueError,
780
0
                     "Unknown conversion specifier %c",
781
0
                     (char)conversion);
782
0
        } else
783
0
                PyErr_Format(PyExc_ValueError,
784
0
                     "Unknown conversion specifier \\x%x",
785
0
                     (unsigned int)conversion);
786
0
        return NULL;
787
35
    }
788
35
}
789
790
/* given:
791
792
   {field_name!conversion:format_spec}
793
794
   compute the result and write it to output.
795
   format_spec_needs_expanding is an optimization.  if it's false,
796
   just output the string directly, otherwise recursively expand the
797
   format_spec string.
798
799
   field_name is allowed to be zero length, in which case we
800
   are doing auto field numbering.
801
*/
802
803
static int
804
output_markup(SubString *field_name, SubString *format_spec,
805
              int format_spec_needs_expanding, Py_UCS4 conversion,
806
              _PyUnicodeWriter *writer, PyObject *args, PyObject *kwargs,
807
              int recursion_depth, AutoNumber *auto_number)
808
175
{
809
175
    PyObject *tmp = NULL;
810
175
    PyObject *fieldobj = NULL;
811
175
    SubString expanded_format_spec;
812
175
    SubString *actual_format_spec;
813
175
    int result = 0;
814
815
    /* convert field_name to an object */
816
175
    fieldobj = get_field_object(field_name, args, kwargs, auto_number);
817
175
    if (fieldobj == NULL)
818
0
        goto done;
819
820
175
    if (conversion != '\0') {
821
35
        tmp = do_conversion(fieldobj, conversion);
822
35
        if (tmp == NULL || PyUnicode_READY(tmp) == -1)
823
0
            goto done;
824
825
        /* do the assignment, transferring ownership: fieldobj = tmp */
826
35
        Py_DECREF(fieldobj);
827
35
        fieldobj = tmp;
828
35
        tmp = NULL;
829
35
    }
830
831
    /* if needed, recurively compute the format_spec */
832
175
    if (format_spec_needs_expanding) {
833
0
        tmp = build_string(format_spec, args, kwargs, recursion_depth-1,
834
0
                           auto_number);
835
0
        if (tmp == NULL || PyUnicode_READY(tmp) == -1)
836
0
            goto done;
837
838
        /* note that in the case we're expanding the format string,
839
           tmp must be kept around until after the call to
840
           render_field. */
841
0
        SubString_init(&expanded_format_spec, tmp, 0, PyUnicode_GET_LENGTH(tmp));
842
0
        actual_format_spec = &expanded_format_spec;
843
0
    }
844
175
    else
845
175
        actual_format_spec = format_spec;
846
847
175
    if (render_field(fieldobj, actual_format_spec, writer) == 0)
848
0
        goto done;
849
850
175
    result = 1;
851
852
175
done:
853
175
    Py_XDECREF(fieldobj);
854
175
    Py_XDECREF(tmp);
855
856
175
    return result;
857
175
}
858
859
/*
860
    do_markup is the top-level loop for the format() method.  It
861
    searches through the format string for escapes to markup codes, and
862
    calls other functions to move non-markup text to the output,
863
    and to perform the markup to the output.
864
*/
865
static int
866
do_markup(SubString *input, PyObject *args, PyObject *kwargs,
867
          _PyUnicodeWriter *writer, int recursion_depth, AutoNumber *auto_number)
868
105
{
869
105
    MarkupIterator iter;
870
105
    int format_spec_needs_expanding;
871
105
    int result;
872
105
    int field_present;
873
105
    SubString literal;
874
105
    SubString field_name;
875
105
    SubString format_spec;
876
105
    Py_UCS4 conversion;
877
878
105
    MarkupIterator_init(&iter, input->str, input->start, input->end);
879
336
    while ((result = MarkupIterator_next(&iter, &literal, &field_present,
880
336
                                         &field_name, &format_spec,
881
336
                                         &conversion,
882
336
                                         &format_spec_needs_expanding)) == 2) {
883
231
        if (literal.end != literal.start) {
884
217
            if (!field_present && iter.str.start == iter.str.end)
885
56
                writer->overallocate = 0;
886
217
            if (_PyUnicodeWriter_WriteSubstring(writer, literal.str,
887
217
                                                literal.start, literal.end) < 0)
888
0
                return 0;
889
217
        }
890
891
231
        if (field_present) {
892
175
            if (iter.str.start == iter.str.end)
893
49
                writer->overallocate = 0;
894
175
            if (!output_markup(&field_name, &format_spec,
895
175
                               format_spec_needs_expanding, conversion, writer,
896
175
                               args, kwargs, recursion_depth, auto_number))
897
0
                return 0;
898
175
        }
899
231
    }
900
105
    return result;
901
105
}
902
903
904
/*
905
    build_string allocates the output string and then
906
    calls do_markup to do the heavy lifting.
907
*/
908
static PyObject *
909
build_string(SubString *input, PyObject *args, PyObject *kwargs,
910
             int recursion_depth, AutoNumber *auto_number)
911
105
{
912
105
    _PyUnicodeWriter writer;
913
914
    /* check the recursion level */
915
105
    if (recursion_depth <= 0) {
916
0
        PyErr_SetString(PyExc_ValueError,
917
0
                        "Max string recursion exceeded");
918
0
        return NULL;
919
0
    }
920
921
105
    _PyUnicodeWriter_Init(&writer);
922
105
    writer.overallocate = 1;
923
105
    writer.min_length = PyUnicode_GET_LENGTH(input->str) + 100;
924
925
105
    if (!do_markup(input, args, kwargs, &writer, recursion_depth,
926
105
                   auto_number)) {
927
0
        _PyUnicodeWriter_Dealloc(&writer);
928
0
        return NULL;
929
0
    }
930
931
105
    return _PyUnicodeWriter_Finish(&writer);
932
105
}
933
934
/************************************************************************/
935
/*********** main routine ***********************************************/
936
/************************************************************************/
937
938
/* this is the main entry point */
939
static PyObject *
940
do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
941
105
{
942
105
    SubString input;
943
944
    /* PEP 3101 says only 2 levels, so that
945
       "{0:{1}}".format('abc', 's')            # works
946
       "{0:{1:{2}}}".format('abc', 's', '')    # fails
947
    */
948
105
    int recursion_depth = 2;
949
950
105
    AutoNumber auto_number;
951
952
105
    if (PyUnicode_READY(self) == -1)
953
0
        return NULL;
954
955
105
    AutoNumber_Init(&auto_number);
956
105
    SubString_init(&input, self, 0, PyUnicode_GET_LENGTH(self));
957
105
    return build_string(&input, args, kwargs, recursion_depth, &auto_number);
958
105
}
959
960
static PyObject *
961
do_string_format_map(PyObject *self, PyObject *obj)
962
0
{
963
0
    return do_string_format(self, NULL, obj);
964
0
}
965
966
967
/************************************************************************/
968
/*********** formatteriterator ******************************************/
969
/************************************************************************/
970
971
/* This is used to implement string.Formatter.vparse().  It exists so
972
   Formatter can share code with the built in unicode.format() method.
973
   It's really just a wrapper around MarkupIterator that is callable
974
   from Python. */
975
976
typedef struct {
977
    PyObject_HEAD
978
    PyObject *str;
979
    MarkupIterator it_markup;
980
} formatteriterobject;
981
982
static void
983
formatteriter_dealloc(formatteriterobject *it)
984
0
{
985
0
    Py_XDECREF(it->str);
986
0
    PyObject_FREE(it);
987
0
}
988
989
/* returns a tuple:
990
   (literal, field_name, format_spec, conversion)
991
992
   literal is any literal text to output.  might be zero length
993
   field_name is the string before the ':'.  might be None
994
   format_spec is the string after the ':'.  mibht be None
995
   conversion is either None, or the string after the '!'
996
*/
997
static PyObject *
998
formatteriter_next(formatteriterobject *it)
999
0
{
1000
0
    SubString literal;
1001
0
    SubString field_name;
1002
0
    SubString format_spec;
1003
0
    Py_UCS4 conversion;
1004
0
    int format_spec_needs_expanding;
1005
0
    int field_present;
1006
0
    int result = MarkupIterator_next(&it->it_markup, &literal, &field_present,
1007
0
                                     &field_name, &format_spec, &conversion,
1008
0
                                     &format_spec_needs_expanding);
1009
1010
    /* all of the SubString objects point into it->str, so no
1011
       memory management needs to be done on them */
1012
0
    assert(0 <= result && result <= 2);
1013
0
    if (result == 0 || result == 1)
1014
        /* if 0, error has already been set, if 1, iterator is empty */
1015
0
        return NULL;
1016
0
    else {
1017
0
        PyObject *literal_str = NULL;
1018
0
        PyObject *field_name_str = NULL;
1019
0
        PyObject *format_spec_str = NULL;
1020
0
        PyObject *conversion_str = NULL;
1021
0
        PyObject *tuple = NULL;
1022
1023
0
        literal_str = SubString_new_object(&literal);
1024
0
        if (literal_str == NULL)
1025
0
            goto done;
1026
1027
0
        field_name_str = SubString_new_object(&field_name);
1028
0
        if (field_name_str == NULL)
1029
0
            goto done;
1030
1031
        /* if field_name is non-zero length, return a string for
1032
           format_spec (even if zero length), else return None */
1033
0
        format_spec_str = (field_present ?
1034
0
                           SubString_new_object_or_empty :
1035
0
                           SubString_new_object)(&format_spec);
1036
0
        if (format_spec_str == NULL)
1037
0
            goto done;
1038
1039
        /* if the conversion is not specified, return a None,
1040
           otherwise create a one length string with the conversion
1041
           character */
1042
0
        if (conversion == '\0') {
1043
0
            conversion_str = Py_None;
1044
0
            Py_INCREF(conversion_str);
1045
0
        }
1046
0
        else
1047
0
            conversion_str = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1048
0
                                                       &conversion, 1);
1049
0
        if (conversion_str == NULL)
1050
0
            goto done;
1051
1052
0
        tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,
1053
0
                             conversion_str);
1054
0
    done:
1055
0
        Py_XDECREF(literal_str);
1056
0
        Py_XDECREF(field_name_str);
1057
0
        Py_XDECREF(format_spec_str);
1058
0
        Py_XDECREF(conversion_str);
1059
0
        return tuple;
1060
0
    }
1061
0
}
1062
1063
static PyMethodDef formatteriter_methods[] = {
1064
    {NULL,              NULL}           /* sentinel */
1065
};
1066
1067
static PyTypeObject PyFormatterIter_Type = {
1068
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
1069
    "formatteriterator",                /* tp_name */
1070
    sizeof(formatteriterobject),        /* tp_basicsize */
1071
    0,                                  /* tp_itemsize */
1072
    /* methods */
1073
    (destructor)formatteriter_dealloc,  /* tp_dealloc */
1074
    0,                                  /* tp_vectorcall_offset */
1075
    0,                                  /* tp_getattr */
1076
    0,                                  /* tp_setattr */
1077
    0,                                  /* tp_as_async */
1078
    0,                                  /* tp_repr */
1079
    0,                                  /* tp_as_number */
1080
    0,                                  /* tp_as_sequence */
1081
    0,                                  /* tp_as_mapping */
1082
    0,                                  /* tp_hash */
1083
    0,                                  /* tp_call */
1084
    0,                                  /* tp_str */
1085
    PyObject_GenericGetAttr,            /* tp_getattro */
1086
    0,                                  /* tp_setattro */
1087
    0,                                  /* tp_as_buffer */
1088
    Py_TPFLAGS_DEFAULT,                 /* tp_flags */
1089
    0,                                  /* tp_doc */
1090
    0,                                  /* tp_traverse */
1091
    0,                                  /* tp_clear */
1092
    0,                                  /* tp_richcompare */
1093
    0,                                  /* tp_weaklistoffset */
1094
    PyObject_SelfIter,                  /* tp_iter */
1095
    (iternextfunc)formatteriter_next,   /* tp_iternext */
1096
    formatteriter_methods,              /* tp_methods */
1097
    0,
1098
};
1099
1100
/* unicode_formatter_parser is used to implement
1101
   string.Formatter.vformat.  it parses a string and returns tuples
1102
   describing the parsed elements.  It's a wrapper around
1103
   stringlib/string_format.h's MarkupIterator */
1104
static PyObject *
1105
formatter_parser(PyObject *ignored, PyObject *self)
1106
0
{
1107
0
    formatteriterobject *it;
1108
1109
0
    if (!PyUnicode_Check(self)) {
1110
0
        PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
1111
0
        return NULL;
1112
0
    }
1113
1114
0
    if (PyUnicode_READY(self) == -1)
1115
0
        return NULL;
1116
1117
0
    it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
1118
0
    if (it == NULL)
1119
0
        return NULL;
1120
1121
    /* take ownership, give the object to the iterator */
1122
0
    Py_INCREF(self);
1123
0
    it->str = self;
1124
1125
    /* initialize the contained MarkupIterator */
1126
0
    MarkupIterator_init(&it->it_markup, (PyObject*)self, 0, PyUnicode_GET_LENGTH(self));
1127
0
    return (PyObject *)it;
1128
0
}
1129
1130
1131
/************************************************************************/
1132
/*********** fieldnameiterator ******************************************/
1133
/************************************************************************/
1134
1135
1136
/* This is used to implement string.Formatter.vparse().  It parses the
1137
   field name into attribute and item values.  It's a Python-callable
1138
   wrapper around FieldNameIterator */
1139
1140
typedef struct {
1141
    PyObject_HEAD
1142
    PyObject *str;
1143
    FieldNameIterator it_field;
1144
} fieldnameiterobject;
1145
1146
static void
1147
fieldnameiter_dealloc(fieldnameiterobject *it)
1148
0
{
1149
0
    Py_XDECREF(it->str);
1150
0
    PyObject_FREE(it);
1151
0
}
1152
1153
/* returns a tuple:
1154
   (is_attr, value)
1155
   is_attr is true if we used attribute syntax (e.g., '.foo')
1156
              false if we used index syntax (e.g., '[foo]')
1157
   value is an integer or string
1158
*/
1159
static PyObject *
1160
fieldnameiter_next(fieldnameiterobject *it)
1161
0
{
1162
0
    int result;
1163
0
    int is_attr;
1164
0
    Py_ssize_t idx;
1165
0
    SubString name;
1166
1167
0
    result = FieldNameIterator_next(&it->it_field, &is_attr,
1168
0
                                    &idx, &name);
1169
0
    if (result == 0 || result == 1)
1170
        /* if 0, error has already been set, if 1, iterator is empty */
1171
0
        return NULL;
1172
0
    else {
1173
0
        PyObject* result = NULL;
1174
0
        PyObject* is_attr_obj = NULL;
1175
0
        PyObject* obj = NULL;
1176
1177
0
        is_attr_obj = PyBool_FromLong(is_attr);
1178
0
        if (is_attr_obj == NULL)
1179
0
            goto done;
1180
1181
        /* either an integer or a string */
1182
0
        if (idx != -1)
1183
0
            obj = PyLong_FromSsize_t(idx);
1184
0
        else
1185
0
            obj = SubString_new_object(&name);
1186
0
        if (obj == NULL)
1187
0
            goto done;
1188
1189
        /* return a tuple of values */
1190
0
        result = PyTuple_Pack(2, is_attr_obj, obj);
1191
1192
0
    done:
1193
0
        Py_XDECREF(is_attr_obj);
1194
0
        Py_XDECREF(obj);
1195
0
        return result;
1196
0
    }
1197
0
}
1198
1199
static PyMethodDef fieldnameiter_methods[] = {
1200
    {NULL,              NULL}           /* sentinel */
1201
};
1202
1203
static PyTypeObject PyFieldNameIter_Type = {
1204
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
1205
    "fieldnameiterator",                /* tp_name */
1206
    sizeof(fieldnameiterobject),        /* tp_basicsize */
1207
    0,                                  /* tp_itemsize */
1208
    /* methods */
1209
    (destructor)fieldnameiter_dealloc,  /* tp_dealloc */
1210
    0,                                  /* tp_vectorcall_offset */
1211
    0,                                  /* tp_getattr */
1212
    0,                                  /* tp_setattr */
1213
    0,                                  /* tp_as_async */
1214
    0,                                  /* tp_repr */
1215
    0,                                  /* tp_as_number */
1216
    0,                                  /* tp_as_sequence */
1217
    0,                                  /* tp_as_mapping */
1218
    0,                                  /* tp_hash */
1219
    0,                                  /* tp_call */
1220
    0,                                  /* tp_str */
1221
    PyObject_GenericGetAttr,            /* tp_getattro */
1222
    0,                                  /* tp_setattro */
1223
    0,                                  /* tp_as_buffer */
1224
    Py_TPFLAGS_DEFAULT,                 /* tp_flags */
1225
    0,                                  /* tp_doc */
1226
    0,                                  /* tp_traverse */
1227
    0,                                  /* tp_clear */
1228
    0,                                  /* tp_richcompare */
1229
    0,                                  /* tp_weaklistoffset */
1230
    PyObject_SelfIter,                  /* tp_iter */
1231
    (iternextfunc)fieldnameiter_next,   /* tp_iternext */
1232
    fieldnameiter_methods,              /* tp_methods */
1233
    0};
1234
1235
/* unicode_formatter_field_name_split is used to implement
1236
   string.Formatter.vformat.  it takes a PEP 3101 "field name", and
1237
   returns a tuple of (first, rest): "first", the part before the
1238
   first '.' or '['; and "rest", an iterator for the rest of the field
1239
   name.  it's a wrapper around stringlib/string_format.h's
1240
   field_name_split.  The iterator it returns is a
1241
   FieldNameIterator */
1242
static PyObject *
1243
formatter_field_name_split(PyObject *ignored, PyObject *self)
1244
0
{
1245
0
    SubString first;
1246
0
    Py_ssize_t first_idx;
1247
0
    fieldnameiterobject *it;
1248
1249
0
    PyObject *first_obj = NULL;
1250
0
    PyObject *result = NULL;
1251
1252
0
    if (!PyUnicode_Check(self)) {
1253
0
        PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
1254
0
        return NULL;
1255
0
    }
1256
1257
0
    if (PyUnicode_READY(self) == -1)
1258
0
        return NULL;
1259
1260
0
    it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
1261
0
    if (it == NULL)
1262
0
        return NULL;
1263
1264
    /* take ownership, give the object to the iterator.  this is
1265
       just to keep the field_name alive */
1266
0
    Py_INCREF(self);
1267
0
    it->str = self;
1268
1269
    /* Pass in auto_number = NULL. We'll return an empty string for
1270
       first_obj in that case. */
1271
0
    if (!field_name_split((PyObject*)self, 0, PyUnicode_GET_LENGTH(self),
1272
0
                          &first, &first_idx, &it->it_field, NULL))
1273
0
        goto done;
1274
1275
    /* first becomes an integer, if possible; else a string */
1276
0
    if (first_idx != -1)
1277
0
        first_obj = PyLong_FromSsize_t(first_idx);
1278
0
    else
1279
        /* convert "first" into a string object */
1280
0
        first_obj = SubString_new_object(&first);
1281
0
    if (first_obj == NULL)
1282
0
        goto done;
1283
1284
    /* return a tuple of values */
1285
0
    result = PyTuple_Pack(2, first_obj, it);
1286
1287
0
done:
1288
0
    Py_XDECREF(it);
1289
0
    Py_XDECREF(first_obj);
1290
0
    return result;
1291
0
}