Coverage Report

Created: 2025-10-12 06:48

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Objects/stringlib/unicode_format.h
Line
Count
Source
1
/*
2
    unicode_format.h -- implementation of str.format().
3
*/
4
5
#include "pycore_complexobject.h" // _PyComplex_FormatAdvancedWriter()
6
#include "pycore_floatobject.h"   // _PyFloat_FormatAdvancedWriter()
7
8
/************************************************************************/
9
/***********   Global data structures and forward declarations  *********/
10
/************************************************************************/
11
12
/*
13
   A SubString consists of the characters between two string or
14
   unicode pointers.
15
*/
16
typedef struct {
17
    PyObject *str; /* borrowed reference */
18
    Py_ssize_t start, end;
19
} SubString;
20
21
22
typedef enum {
23
    ANS_INIT,
24
    ANS_AUTO,
25
    ANS_MANUAL
26
} AutoNumberState;   /* Keep track if we're auto-numbering fields */
27
28
/* Keeps track of our auto-numbering state, and which number field we're on */
29
typedef struct {
30
    AutoNumberState an_state;
31
    int an_field_number;
32
} AutoNumber;
33
34
35
/* forward declaration for recursion */
36
static PyObject *
37
build_string(SubString *input, PyObject *args, PyObject *kwargs,
38
             int recursion_depth, AutoNumber *auto_number);
39
40
41
42
/************************************************************************/
43
/**************************  Utility  functions  ************************/
44
/************************************************************************/
45
46
static void
47
AutoNumber_Init(AutoNumber *auto_number)
48
9.90M
{
49
9.90M
    auto_number->an_state = ANS_INIT;
50
9.90M
    auto_number->an_field_number = 0;
51
9.90M
}
52
53
/* fill in a SubString from a pointer and length */
54
Py_LOCAL_INLINE(void)
55
SubString_init(SubString *str, PyObject *s, Py_ssize_t start, Py_ssize_t end)
56
188M
{
57
188M
    str->str = s;
58
188M
    str->start = start;
59
188M
    str->end = end;
60
188M
}
61
62
/* return a new string.  if str->str is NULL, return None */
63
Py_LOCAL_INLINE(PyObject *)
64
SubString_new_object(SubString *str)
65
42
{
66
42
    if (str->str == NULL)
67
0
        Py_RETURN_NONE;
68
42
    return PyUnicode_Substring(str->str, str->start, str->end);
69
42
}
70
71
/* return a new string.  if str->str is NULL, return a new empty string */
72
Py_LOCAL_INLINE(PyObject *)
73
SubString_new_object_or_empty(SubString *str)
74
0
{
75
0
    if (str->str == NULL) {
76
0
        return Py_GetConstant(Py_CONSTANT_EMPTY_STR);
77
0
    }
78
0
    return SubString_new_object(str);
79
0
}
80
81
/* Return 1 if an error has been detected switching between automatic
82
   field numbering and manual field specification, else return 0. Set
83
   ValueError on error. */
84
static int
85
autonumber_state_error(AutoNumberState state, int field_name_is_empty)
86
18.5M
{
87
18.5M
    if (state == ANS_MANUAL) {
88
64
        if (field_name_is_empty) {
89
0
            PyErr_SetString(PyExc_ValueError, "cannot switch from "
90
0
                            "manual field specification to "
91
0
                            "automatic field numbering");
92
0
            return 1;
93
0
        }
94
64
    }
95
18.5M
    else {
96
18.5M
        if (!field_name_is_empty) {
97
0
            PyErr_SetString(PyExc_ValueError, "cannot switch from "
98
0
                            "automatic field numbering to "
99
0
                            "manual field specification");
100
0
            return 1;
101
0
        }
102
18.5M
    }
103
18.5M
    return 0;
104
18.5M
}
105
106
107
/************************************************************************/
108
/***********  Format string parsing -- integers and identifiers *********/
109
/************************************************************************/
110
111
static Py_ssize_t
112
get_integer(const SubString *str)
113
18.5M
{
114
18.5M
    Py_ssize_t accumulator = 0;
115
18.5M
    Py_ssize_t digitval;
116
18.5M
    Py_ssize_t i;
117
118
    /* empty string is an error */
119
18.5M
    if (str->start >= str->end)
120
18.5M
        return -1;
121
122
170
    for (i = str->start; i < str->end; i++) {
123
106
        digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str->str, i));
124
106
        if (digitval < 0)
125
42
            return -1;
126
        /*
127
           Detect possible overflow before it happens:
128
129
              accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
130
              accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
131
        */
132
64
        if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
133
0
            PyErr_Format(PyExc_ValueError,
134
0
                         "Too many decimal digits in format string");
135
0
            return -1;
136
0
        }
137
64
        accumulator = accumulator * 10 + digitval;
138
64
    }
139
64
    return accumulator;
140
106
}
141
142
/************************************************************************/
143
/******** Functions to get field objects and specification strings ******/
144
/************************************************************************/
145
146
/* do the equivalent of obj.name */
147
static PyObject *
148
getattr(PyObject *obj, SubString *name)
149
0
{
150
0
    PyObject *newobj;
151
0
    PyObject *str = SubString_new_object(name);
152
0
    if (str == NULL)
153
0
        return NULL;
154
0
    newobj = PyObject_GetAttr(obj, str);
155
0
    Py_DECREF(str);
156
0
    return newobj;
157
0
}
158
159
/* do the equivalent of obj[idx], where obj is a sequence */
160
static PyObject *
161
getitem_sequence(PyObject *obj, Py_ssize_t idx)
162
0
{
163
0
    return PySequence_GetItem(obj, idx);
164
0
}
165
166
/* do the equivalent of obj[idx], where obj is not a sequence */
167
static PyObject *
168
getitem_idx(PyObject *obj, Py_ssize_t idx)
169
0
{
170
0
    PyObject *newobj;
171
0
    PyObject *idx_obj = PyLong_FromSsize_t(idx);
172
0
    if (idx_obj == NULL)
173
0
        return NULL;
174
0
    newobj = PyObject_GetItem(obj, idx_obj);
175
0
    Py_DECREF(idx_obj);
176
0
    return newobj;
177
0
}
178
179
/* do the equivalent of obj[name] */
180
static PyObject *
181
getitem_str(PyObject *obj, SubString *name)
182
0
{
183
0
    PyObject *newobj;
184
0
    PyObject *str = SubString_new_object(name);
185
0
    if (str == NULL)
186
0
        return NULL;
187
0
    newobj = PyObject_GetItem(obj, str);
188
0
    Py_DECREF(str);
189
0
    return newobj;
190
0
}
191
192
typedef struct {
193
    /* the entire string we're parsing.  we assume that someone else
194
       is managing its lifetime, and that it will exist for the
195
       lifetime of the iterator.  can be empty */
196
    SubString str;
197
198
    /* index to where we are inside field_name */
199
    Py_ssize_t index;
200
} FieldNameIterator;
201
202
203
static int
204
FieldNameIterator_init(FieldNameIterator *self, PyObject *s,
205
                       Py_ssize_t start, Py_ssize_t end)
206
18.5M
{
207
18.5M
    SubString_init(&self->str, s, start, end);
208
18.5M
    self->index = start;
209
18.5M
    return 1;
210
18.5M
}
211
212
static int
213
_FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
214
0
{
215
0
    Py_UCS4 c;
216
217
0
    name->str = self->str.str;
218
0
    name->start = self->index;
219
220
    /* return everything until '.' or '[' */
221
0
    while (self->index < self->str.end) {
222
0
        c = PyUnicode_READ_CHAR(self->str.str, self->index++);
223
0
        switch (c) {
224
0
        case '[':
225
0
        case '.':
226
            /* backup so that we this character will be seen next time */
227
0
            self->index--;
228
0
            break;
229
0
        default:
230
0
            continue;
231
0
        }
232
0
        break;
233
0
    }
234
    /* end of string is okay */
235
0
    name->end = self->index;
236
0
    return 1;
237
0
}
238
239
static int
240
_FieldNameIterator_item(FieldNameIterator *self, SubString *name)
241
0
{
242
0
    int bracket_seen = 0;
243
0
    Py_UCS4 c;
244
245
0
    name->str = self->str.str;
246
0
    name->start = self->index;
247
248
    /* return everything until ']' */
249
0
    while (self->index < self->str.end) {
250
0
        c = PyUnicode_READ_CHAR(self->str.str, self->index++);
251
0
        switch (c) {
252
0
        case ']':
253
0
            bracket_seen = 1;
254
0
            break;
255
0
        default:
256
0
            continue;
257
0
        }
258
0
        break;
259
0
    }
260
    /* make sure we ended with a ']' */
261
0
    if (!bracket_seen) {
262
0
        PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");
263
0
        return 0;
264
0
    }
265
266
    /* end of string is okay */
267
    /* don't include the ']' */
268
0
    name->end = self->index-1;
269
0
    return 1;
270
0
}
271
272
/* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
273
static int
274
FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
275
                       Py_ssize_t *name_idx, SubString *name)
276
18.5M
{
277
    /* check at end of input */
278
18.5M
    if (self->index >= self->str.end)
279
18.5M
        return 1;
280
281
0
    switch (PyUnicode_READ_CHAR(self->str.str, self->index++)) {
282
0
    case '.':
283
0
        *is_attribute = 1;
284
0
        if (_FieldNameIterator_attr(self, name) == 0)
285
0
            return 0;
286
0
        *name_idx = -1;
287
0
        break;
288
0
    case '[':
289
0
        *is_attribute = 0;
290
0
        if (_FieldNameIterator_item(self, name) == 0)
291
0
            return 0;
292
0
        *name_idx = get_integer(name);
293
0
        if (*name_idx == -1 && PyErr_Occurred())
294
0
            return 0;
295
0
        break;
296
0
    default:
297
        /* Invalid character follows ']' */
298
0
        PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may "
299
0
                        "follow ']' in format field specifier");
300
0
        return 0;
301
0
    }
302
303
    /* empty string is an error */
304
0
    if (name->start == name->end) {
305
0
        PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
306
0
        return 0;
307
0
    }
308
309
0
    return 2;
310
0
}
311
312
313
/* input: field_name
314
   output: 'first' points to the part before the first '[' or '.'
315
           'first_idx' is -1 if 'first' is not an integer, otherwise
316
                       it's the value of first converted to an integer
317
           'rest' is an iterator to return the rest
318
*/
319
static int
320
field_name_split(PyObject *str, Py_ssize_t start, Py_ssize_t end, SubString *first,
321
                 Py_ssize_t *first_idx, FieldNameIterator *rest,
322
                 AutoNumber *auto_number)
323
18.5M
{
324
18.5M
    Py_UCS4 c;
325
18.5M
    Py_ssize_t i = start;
326
18.5M
    int field_name_is_empty;
327
18.5M
    int using_numeric_index;
328
329
    /* find the part up until the first '.' or '[' */
330
18.5M
    while (i < end) {
331
428
        switch (c = PyUnicode_READ_CHAR(str, i++)) {
332
0
        case '[':
333
0
        case '.':
334
            /* backup so that we this character is available to the
335
               "rest" iterator */
336
0
            i--;
337
0
            break;
338
428
        default:
339
428
            continue;
340
428
        }
341
0
        break;
342
428
    }
343
344
    /* set up the return values */
345
18.5M
    SubString_init(first, str, start, i);
346
18.5M
    FieldNameIterator_init(rest, str, i, end);
347
348
    /* see if "first" is an integer, in which case it's used as an index */
349
18.5M
    *first_idx = get_integer(first);
350
18.5M
    if (*first_idx == -1 && PyErr_Occurred())
351
0
        return 0;
352
353
18.5M
    field_name_is_empty = first->start >= first->end;
354
355
    /* If the field name is omitted or if we have a numeric index
356
       specified, then we're doing numeric indexing into args. */
357
18.5M
    using_numeric_index = field_name_is_empty || *first_idx != -1;
358
359
    /* We always get here exactly one time for each field we're
360
       processing. And we get here in field order (counting by left
361
       braces). So this is the perfect place to handle automatic field
362
       numbering if the field name is omitted. */
363
364
    /* Check if we need to do the auto-numbering. It's not needed if
365
       we're called from string.Format routines, because it's handled
366
       in that class by itself. */
367
18.5M
    if (auto_number) {
368
        /* Initialize our auto numbering state if this is the first
369
           time we're either auto-numbering or manually numbering. */
370
18.5M
        if (auto_number->an_state == ANS_INIT && using_numeric_index)
371
9.90M
            auto_number->an_state = field_name_is_empty ?
372
9.90M
                ANS_AUTO : ANS_MANUAL;
373
374
        /* Make sure our state is consistent with what we're doing
375
           this time through. Only check if we're using a numeric
376
           index. */
377
18.5M
        if (using_numeric_index)
378
18.5M
            if (autonumber_state_error(auto_number->an_state,
379
18.5M
                                       field_name_is_empty))
380
0
                return 0;
381
        /* Zero length field means we want to do auto-numbering of the
382
           fields. */
383
18.5M
        if (field_name_is_empty)
384
18.5M
            *first_idx = (auto_number->an_field_number)++;
385
18.5M
    }
386
387
18.5M
    return 1;
388
18.5M
}
389
390
391
/*
392
    get_field_object returns the object inside {}, before the
393
    format_spec.  It handles getindex and getattr lookups and consumes
394
    the entire input string.
395
*/
396
static PyObject *
397
get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
398
                 AutoNumber *auto_number)
399
18.5M
{
400
18.5M
    PyObject *obj = NULL;
401
18.5M
    int ok;
402
18.5M
    int is_attribute;
403
18.5M
    SubString name;
404
18.5M
    SubString first;
405
18.5M
    Py_ssize_t index;
406
18.5M
    FieldNameIterator rest;
407
408
18.5M
    if (!field_name_split(input->str, input->start, input->end, &first,
409
18.5M
                          &index, &rest, auto_number)) {
410
0
        goto error;
411
0
    }
412
413
18.5M
    if (index == -1) {
414
        /* look up in kwargs */
415
42
        PyObject *key = SubString_new_object(&first);
416
42
        if (key == NULL) {
417
0
            goto error;
418
0
        }
419
42
        if (kwargs == NULL) {
420
0
            PyErr_SetObject(PyExc_KeyError, key);
421
0
            Py_DECREF(key);
422
0
            goto error;
423
0
        }
424
        /* Use PyObject_GetItem instead of PyDict_GetItem because this
425
           code is no longer just used with kwargs. It might be passed
426
           a non-dict when called through format_map. */
427
42
        obj = PyObject_GetItem(kwargs, key);
428
42
        Py_DECREF(key);
429
42
        if (obj == NULL) {
430
0
            goto error;
431
0
        }
432
42
    }
433
18.5M
    else {
434
        /* If args is NULL, we have a format string with a positional field
435
           with only kwargs to retrieve it from. This can only happen when
436
           used with format_map(), where positional arguments are not
437
           allowed. */
438
18.5M
        if (args == NULL) {
439
0
            PyErr_SetString(PyExc_ValueError, "Format string contains "
440
0
                            "positional fields");
441
0
            goto error;
442
0
        }
443
444
        /* look up in args */
445
18.5M
        obj = PySequence_GetItem(args, index);
446
18.5M
        if (obj == NULL) {
447
0
            PyErr_Format(PyExc_IndexError,
448
0
                         "Replacement index %zd out of range for positional "
449
0
                         "args tuple",
450
0
                         index);
451
0
             goto error;
452
0
        }
453
18.5M
    }
454
455
    /* iterate over the rest of the field_name */
456
18.5M
    while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
457
18.5M
                                        &name)) == 2) {
458
0
        PyObject *tmp;
459
460
0
        if (is_attribute)
461
            /* getattr lookup "." */
462
0
            tmp = getattr(obj, &name);
463
0
        else
464
            /* getitem lookup "[]" */
465
0
            if (index == -1)
466
0
                tmp = getitem_str(obj, &name);
467
0
            else
468
0
                if (PySequence_Check(obj))
469
0
                    tmp = getitem_sequence(obj, index);
470
0
                else
471
                    /* not a sequence */
472
0
                    tmp = getitem_idx(obj, index);
473
0
        if (tmp == NULL)
474
0
            goto error;
475
476
        /* assign to obj */
477
0
        Py_SETREF(obj, tmp);
478
0
    }
479
    /* end of iterator, this is the non-error case */
480
18.5M
    if (ok == 1)
481
18.5M
        return obj;
482
0
error:
483
0
    Py_XDECREF(obj);
484
0
    return NULL;
485
18.5M
}
486
487
/************************************************************************/
488
/*****************  Field rendering functions  **************************/
489
/************************************************************************/
490
491
/*
492
    render_field() is the main function in this section.  It takes the
493
    field object and field specification string generated by
494
    get_field_and_spec, and renders the field into the output string.
495
496
    render_field calls fieldobj.__format__(format_spec) method, and
497
    appends to the output.
498
*/
499
static int
500
render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *writer)
501
18.5M
{
502
18.5M
    int ok = 0;
503
18.5M
    PyObject *result = NULL;
504
18.5M
    PyObject *format_spec_object = NULL;
505
18.5M
    int (*formatter) (_PyUnicodeWriter*, PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL;
506
18.5M
    int err;
507
508
    /* If we know the type exactly, skip the lookup of __format__ and just
509
       call the formatter directly. */
510
18.5M
    if (PyUnicode_CheckExact(fieldobj))
511
18.2M
        formatter = _PyUnicode_FormatAdvancedWriter;
512
308k
    else if (PyLong_CheckExact(fieldobj))
513
76.9k
        formatter = _PyLong_FormatAdvancedWriter;
514
231k
    else if (PyFloat_CheckExact(fieldobj))
515
0
        formatter = _PyFloat_FormatAdvancedWriter;
516
231k
    else if (PyComplex_CheckExact(fieldobj))
517
0
        formatter = _PyComplex_FormatAdvancedWriter;
518
519
18.5M
    if (formatter) {
520
        /* we know exactly which formatter will be called when __format__ is
521
           looked up, so call it directly, instead. */
522
18.3M
        err = formatter(writer, fieldobj, format_spec->str,
523
18.3M
                        format_spec->start, format_spec->end);
524
18.3M
        return (err == 0);
525
18.3M
    }
526
231k
    else {
527
        /* We need to create an object out of the pointers we have, because
528
           __format__ takes a string/unicode object for format_spec. */
529
231k
        if (format_spec->str)
530
0
            format_spec_object = PyUnicode_Substring(format_spec->str,
531
0
                                                     format_spec->start,
532
0
                                                     format_spec->end);
533
231k
        else
534
231k
            format_spec_object = Py_GetConstant(Py_CONSTANT_EMPTY_STR);
535
231k
        if (format_spec_object == NULL)
536
0
            goto done;
537
538
231k
        result = PyObject_Format(fieldobj, format_spec_object);
539
231k
    }
540
231k
    if (result == NULL)
541
2
        goto done;
542
543
231k
    if (_PyUnicodeWriter_WriteStr(writer, result) == -1)
544
0
        goto done;
545
231k
    ok = 1;
546
547
231k
done:
548
231k
    Py_XDECREF(format_spec_object);
549
231k
    Py_XDECREF(result);
550
231k
    return ok;
551
231k
}
552
553
static int
554
parse_field(SubString *str, SubString *field_name, SubString *format_spec,
555
            int *format_spec_needs_expanding, Py_UCS4 *conversion)
556
18.5M
{
557
    /* Note this function works if the field name is zero length,
558
       which is good.  Zero length field names are handled later, in
559
       field_name_split. */
560
561
18.5M
    Py_UCS4 c = 0;
562
563
    /* initialize these, as they may be empty */
564
18.5M
    *conversion = '\0';
565
18.5M
    SubString_init(format_spec, NULL, 0, 0);
566
567
    /* Search for the field name.  it's terminated by the end of
568
       the string, or a ':' or '!' */
569
18.5M
    field_name->str = str->str;
570
18.5M
    field_name->start = str->start;
571
18.5M
    while (str->start < str->end) {
572
18.5M
        switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
573
0
        case '{':
574
0
            PyErr_SetString(PyExc_ValueError, "unexpected '{' in field name");
575
0
            return 0;
576
0
        case '[':
577
0
            for (; str->start < str->end; str->start++)
578
0
                if (PyUnicode_READ_CHAR(str->str, str->start) == ']')
579
0
                    break;
580
0
            continue;
581
17.9M
        case '}':
582
17.9M
        case ':':
583
18.5M
        case '!':
584
18.5M
            break;
585
428
        default:
586
428
            continue;
587
18.5M
        }
588
18.5M
        break;
589
18.5M
    }
590
591
18.5M
    field_name->end = str->start - 1;
592
18.5M
    if (c == '!' || c == ':') {
593
638k
        Py_ssize_t count;
594
        /* we have a format specifier and/or a conversion */
595
        /* don't include the last character */
596
597
        /* see if there's a conversion specifier */
598
638k
        if (c == '!') {
599
            /* there must be another character present */
600
638k
            if (str->start >= str->end) {
601
0
                PyErr_SetString(PyExc_ValueError,
602
0
                                "end of string while looking for conversion "
603
0
                                "specifier");
604
0
                return 0;
605
0
            }
606
638k
            *conversion = PyUnicode_READ_CHAR(str->str, str->start++);
607
608
638k
            if (str->start < str->end) {
609
638k
                c = PyUnicode_READ_CHAR(str->str, str->start++);
610
638k
                if (c == '}')
611
638k
                    return 1;
612
0
                if (c != ':') {
613
0
                    PyErr_SetString(PyExc_ValueError,
614
0
                                    "expected ':' after conversion specifier");
615
0
                    return 0;
616
0
                }
617
0
            }
618
638k
        }
619
64
        format_spec->str = str->str;
620
64
        format_spec->start = str->start;
621
64
        count = 1;
622
256
        while (str->start < str->end) {
623
256
            switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
624
0
            case '{':
625
0
                *format_spec_needs_expanding = 1;
626
0
                count++;
627
0
                break;
628
64
            case '}':
629
64
                count--;
630
64
                if (count == 0) {
631
64
                    format_spec->end = str->start - 1;
632
64
                    return 1;
633
64
                }
634
0
                break;
635
192
            default:
636
192
                break;
637
256
            }
638
256
        }
639
640
0
        PyErr_SetString(PyExc_ValueError, "unmatched '{' in format spec");
641
0
        return 0;
642
64
    }
643
17.9M
    else if (c != '}') {
644
0
        PyErr_SetString(PyExc_ValueError, "expected '}' before end of string");
645
0
        return 0;
646
0
    }
647
648
17.9M
    return 1;
649
18.5M
}
650
651
/************************************************************************/
652
/******* Output string allocation and escape-to-markup processing  ******/
653
/************************************************************************/
654
655
/* MarkupIterator breaks the string into pieces of either literal
656
   text, or things inside {} that need to be marked up.  it is
657
   designed to make it easy to wrap a Python iterator around it, for
658
   use with the Formatter class */
659
660
typedef struct {
661
    SubString str;
662
} MarkupIterator;
663
664
static int
665
MarkupIterator_init(MarkupIterator *self, PyObject *str,
666
                    Py_ssize_t start, Py_ssize_t end)
667
9.90M
{
668
9.90M
    SubString_init(&self->str, str, start, end);
669
9.90M
    return 1;
670
9.90M
}
671
672
/* returns 0 on error, 1 on non-error termination, and 2 if it got a
673
   string (or something to be expanded) */
674
static int
675
MarkupIterator_next(MarkupIterator *self, SubString *literal,
676
                    int *field_present, SubString *field_name,
677
                    SubString *format_spec, Py_UCS4 *conversion,
678
                    int *format_spec_needs_expanding)
679
37.5M
{
680
37.5M
    int at_end;
681
37.5M
    Py_UCS4 c = 0;
682
37.5M
    Py_ssize_t start;
683
37.5M
    Py_ssize_t len;
684
37.5M
    int markup_follows = 0;
685
686
    /* initialize all of the output variables */
687
37.5M
    SubString_init(literal, NULL, 0, 0);
688
37.5M
    SubString_init(field_name, NULL, 0, 0);
689
37.5M
    SubString_init(format_spec, NULL, 0, 0);
690
37.5M
    *conversion = '\0';
691
37.5M
    *format_spec_needs_expanding = 0;
692
37.5M
    *field_present = 0;
693
694
    /* No more input, end of iterator.  This is the normal exit
695
       path. */
696
37.5M
    if (self->str.start >= self->str.end)
697
9.90M
        return 1;
698
699
27.6M
    start = self->str.start;
700
701
    /* First read any literal text. Read until the end of string, an
702
       escaped '{' or '}', or an unescaped '{'.  In order to never
703
       allocate memory and so I can just pass pointers around, if
704
       there's an escaped '{' or '}' then we'll return the literal
705
       including the brace, but no format object.  The next time
706
       through, we'll return the rest of the literal, skipping past
707
       the second consecutive brace. */
708
84.8M
    while (self->str.start < self->str.end) {
709
75.8M
        switch (c = PyUnicode_READ_CHAR(self->str.str, self->str.start++)) {
710
18.5M
        case '{':
711
18.5M
        case '}':
712
18.5M
            markup_follows = 1;
713
18.5M
            break;
714
57.2M
        default:
715
57.2M
            continue;
716
75.8M
        }
717
18.5M
        break;
718
75.8M
    }
719
720
27.6M
    at_end = self->str.start >= self->str.end;
721
27.6M
    len = self->str.start - start;
722
723
27.6M
    if ((c == '}') && (at_end ||
724
0
                       (c != PyUnicode_READ_CHAR(self->str.str,
725
0
                                                 self->str.start)))) {
726
0
        PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
727
0
                        "in format string");
728
0
        return 0;
729
0
    }
730
27.6M
    if (at_end && c == '{') {
731
0
        PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
732
0
                        "in format string");
733
0
        return 0;
734
0
    }
735
27.6M
    if (!at_end) {
736
18.5M
        if (c == PyUnicode_READ_CHAR(self->str.str, self->str.start)) {
737
            /* escaped } or {, skip it in the input.  there is no
738
               markup object following us, just this literal text */
739
0
            self->str.start++;
740
0
            markup_follows = 0;
741
0
        }
742
18.5M
        else
743
18.5M
            len--;
744
18.5M
    }
745
746
    /* record the literal text */
747
27.6M
    literal->str = self->str.str;
748
27.6M
    literal->start = start;
749
27.6M
    literal->end = start + len;
750
751
27.6M
    if (!markup_follows)
752
9.01M
        return 2;
753
754
    /* this is markup; parse the field */
755
18.5M
    *field_present = 1;
756
18.5M
    if (!parse_field(&self->str, field_name, format_spec,
757
18.5M
                     format_spec_needs_expanding, conversion))
758
0
        return 0;
759
18.5M
    return 2;
760
18.5M
}
761
762
763
/* do the !r or !s conversion on obj */
764
static PyObject *
765
do_conversion(PyObject *obj, Py_UCS4 conversion)
766
638k
{
767
    /* XXX in pre-3.0, do we need to convert this to unicode, since it
768
       might have returned a string? */
769
638k
    switch (conversion) {
770
638k
    case 'r':
771
638k
        return PyObject_Repr(obj);
772
0
    case 's':
773
0
        return PyObject_Str(obj);
774
0
    case 'a':
775
0
        return PyObject_ASCII(obj);
776
0
    default:
777
0
        if (conversion > 32 && conversion < 127) {
778
                /* It's the ASCII subrange; casting to char is safe
779
                   (assuming the execution character set is an ASCII
780
                   superset). */
781
0
                PyErr_Format(PyExc_ValueError,
782
0
                     "Unknown conversion specifier %c",
783
0
                     (char)conversion);
784
0
        } else
785
0
                PyErr_Format(PyExc_ValueError,
786
0
                     "Unknown conversion specifier \\x%x",
787
0
                     (unsigned int)conversion);
788
0
        return NULL;
789
638k
    }
790
638k
}
791
792
/* given:
793
794
   {field_name!conversion:format_spec}
795
796
   compute the result and write it to output.
797
   format_spec_needs_expanding is an optimization.  if it's false,
798
   just output the string directly, otherwise recursively expand the
799
   format_spec string.
800
801
   field_name is allowed to be zero length, in which case we
802
   are doing auto field numbering.
803
*/
804
805
static int
806
output_markup(SubString *field_name, SubString *format_spec,
807
              int format_spec_needs_expanding, Py_UCS4 conversion,
808
              _PyUnicodeWriter *writer, PyObject *args, PyObject *kwargs,
809
              int recursion_depth, AutoNumber *auto_number)
810
18.5M
{
811
18.5M
    PyObject *tmp = NULL;
812
18.5M
    PyObject *fieldobj = NULL;
813
18.5M
    SubString expanded_format_spec;
814
18.5M
    SubString *actual_format_spec;
815
18.5M
    int result = 0;
816
817
    /* convert field_name to an object */
818
18.5M
    fieldobj = get_field_object(field_name, args, kwargs, auto_number);
819
18.5M
    if (fieldobj == NULL)
820
0
        goto done;
821
822
18.5M
    if (conversion != '\0') {
823
638k
        tmp = do_conversion(fieldobj, conversion);
824
638k
        if (tmp == NULL)
825
0
            goto done;
826
827
        /* do the assignment, transferring ownership: fieldobj = tmp */
828
638k
        Py_SETREF(fieldobj, tmp);
829
638k
        tmp = NULL;
830
638k
    }
831
832
    /* if needed, recursively compute the format_spec */
833
18.5M
    if (format_spec_needs_expanding) {
834
0
        tmp = build_string(format_spec, args, kwargs, recursion_depth-1,
835
0
                           auto_number);
836
0
        if (tmp == NULL)
837
0
            goto done;
838
839
        /* note that in the case we're expanding the format string,
840
           tmp must be kept around until after the call to
841
           render_field. */
842
0
        SubString_init(&expanded_format_spec, tmp, 0, PyUnicode_GET_LENGTH(tmp));
843
0
        actual_format_spec = &expanded_format_spec;
844
0
    }
845
18.5M
    else
846
18.5M
        actual_format_spec = format_spec;
847
848
18.5M
    if (render_field(fieldobj, actual_format_spec, writer) == 0)
849
2
        goto done;
850
851
18.5M
    result = 1;
852
853
18.5M
done:
854
18.5M
    Py_XDECREF(fieldobj);
855
18.5M
    Py_XDECREF(tmp);
856
857
18.5M
    return result;
858
18.5M
}
859
860
/*
861
    do_markup is the top-level loop for the format() method.  It
862
    searches through the format string for escapes to markup codes, and
863
    calls other functions to move non-markup text to the output,
864
    and to perform the markup to the output.
865
*/
866
static int
867
do_markup(SubString *input, PyObject *args, PyObject *kwargs,
868
          _PyUnicodeWriter *writer, int recursion_depth, AutoNumber *auto_number)
869
9.90M
{
870
9.90M
    MarkupIterator iter;
871
9.90M
    int format_spec_needs_expanding;
872
9.90M
    int result;
873
9.90M
    int field_present;
874
9.90M
    SubString literal;
875
9.90M
    SubString field_name;
876
9.90M
    SubString format_spec;
877
9.90M
    Py_UCS4 conversion;
878
879
9.90M
    MarkupIterator_init(&iter, input->str, input->start, input->end);
880
37.5M
    while ((result = MarkupIterator_next(&iter, &literal, &field_present,
881
37.5M
                                         &field_name, &format_spec,
882
37.5M
                                         &conversion,
883
37.5M
                                         &format_spec_needs_expanding)) == 2) {
884
27.6M
        if (literal.end != literal.start) {
885
18.9M
            if (!field_present && iter.str.start == iter.str.end)
886
9.01M
                writer->overallocate = 0;
887
18.9M
            if (_PyUnicodeWriter_WriteSubstring(writer, literal.str,
888
18.9M
                                                literal.start, literal.end) < 0)
889
0
                return 0;
890
18.9M
        }
891
892
27.6M
        if (field_present) {
893
18.5M
            if (iter.str.start == iter.str.end)
894
888k
                writer->overallocate = 0;
895
18.5M
            if (!output_markup(&field_name, &format_spec,
896
18.5M
                               format_spec_needs_expanding, conversion, writer,
897
18.5M
                               args, kwargs, recursion_depth, auto_number))
898
2
                return 0;
899
18.5M
        }
900
27.6M
    }
901
9.90M
    return result;
902
9.90M
}
903
904
905
/*
906
    build_string allocates the output string and then
907
    calls do_markup to do the heavy lifting.
908
*/
909
static PyObject *
910
build_string(SubString *input, PyObject *args, PyObject *kwargs,
911
             int recursion_depth, AutoNumber *auto_number)
912
9.90M
{
913
9.90M
    _PyUnicodeWriter writer;
914
915
    /* check the recursion level */
916
9.90M
    if (recursion_depth <= 0) {
917
0
        PyErr_SetString(PyExc_ValueError,
918
0
                        "Max string recursion exceeded");
919
0
        return NULL;
920
0
    }
921
922
9.90M
    _PyUnicodeWriter_Init(&writer);
923
9.90M
    writer.overallocate = 1;
924
9.90M
    writer.min_length = PyUnicode_GET_LENGTH(input->str) + 100;
925
926
9.90M
    if (!do_markup(input, args, kwargs, &writer, recursion_depth,
927
9.90M
                   auto_number)) {
928
2
        _PyUnicodeWriter_Dealloc(&writer);
929
2
        return NULL;
930
2
    }
931
932
9.90M
    return _PyUnicodeWriter_Finish(&writer);
933
9.90M
}
934
935
/************************************************************************/
936
/*********** main routine ***********************************************/
937
/************************************************************************/
938
939
/* this is the main entry point */
940
static PyObject *
941
do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
942
9.90M
{
943
9.90M
    SubString input;
944
945
    /* PEP 3101 says only 2 levels, so that
946
       "{0:{1}}".format('abc', 's')            # works
947
       "{0:{1:{2}}}".format('abc', 's', '')    # fails
948
    */
949
9.90M
    int recursion_depth = 2;
950
951
9.90M
    AutoNumber auto_number;
952
9.90M
    AutoNumber_Init(&auto_number);
953
9.90M
    SubString_init(&input, self, 0, PyUnicode_GET_LENGTH(self));
954
9.90M
    return build_string(&input, args, kwargs, recursion_depth, &auto_number);
955
9.90M
}
956
957
static PyObject *
958
do_string_format_map(PyObject *self, PyObject *obj)
959
0
{
960
0
    return do_string_format(self, NULL, obj);
961
0
}
962
963
964
/************************************************************************/
965
/*********** formatteriterator ******************************************/
966
/************************************************************************/
967
968
/* This is used to implement string.Formatter.vparse().  It exists so
969
   Formatter can share code with the built in unicode.format() method.
970
   It's really just a wrapper around MarkupIterator that is callable
971
   from Python. */
972
973
typedef struct {
974
    PyObject_HEAD
975
    PyObject *str;
976
    MarkupIterator it_markup;
977
} formatteriterobject;
978
979
static void
980
formatteriter_dealloc(PyObject *op)
981
0
{
982
0
    formatteriterobject *it = (formatteriterobject*)op;
983
0
    Py_XDECREF(it->str);
984
0
    PyObject_Free(it);
985
0
}
986
987
/* returns a tuple:
988
   (literal, field_name, format_spec, conversion)
989
990
   literal is any literal text to output.  might be zero length
991
   field_name is the string before the ':'.  might be None
992
   format_spec is the string after the ':'.  mibht be None
993
   conversion is either None, or the string after the '!'
994
*/
995
static PyObject *
996
formatteriter_next(PyObject *op)
997
0
{
998
0
    formatteriterobject *it = (formatteriterobject*)op;
999
0
    SubString literal;
1000
0
    SubString field_name;
1001
0
    SubString format_spec;
1002
0
    Py_UCS4 conversion;
1003
0
    int format_spec_needs_expanding;
1004
0
    int field_present;
1005
0
    int result = MarkupIterator_next(&it->it_markup, &literal, &field_present,
1006
0
                                     &field_name, &format_spec, &conversion,
1007
0
                                     &format_spec_needs_expanding);
1008
1009
    /* all of the SubString objects point into it->str, so no
1010
       memory management needs to be done on them */
1011
0
    assert(0 <= result && result <= 2);
1012
0
    if (result == 0 || result == 1)
1013
        /* if 0, error has already been set, if 1, iterator is empty */
1014
0
        return NULL;
1015
0
    else {
1016
0
        PyObject *literal_str = NULL;
1017
0
        PyObject *field_name_str = NULL;
1018
0
        PyObject *format_spec_str = NULL;
1019
0
        PyObject *conversion_str = NULL;
1020
0
        PyObject *tuple = NULL;
1021
1022
0
        literal_str = SubString_new_object(&literal);
1023
0
        if (literal_str == NULL)
1024
0
            goto done;
1025
1026
0
        field_name_str = SubString_new_object(&field_name);
1027
0
        if (field_name_str == NULL)
1028
0
            goto done;
1029
1030
        /* if field_name is non-zero length, return a string for
1031
           format_spec (even if zero length), else return None */
1032
0
        format_spec_str = (field_present ?
1033
0
                           SubString_new_object_or_empty :
1034
0
                           SubString_new_object)(&format_spec);
1035
0
        if (format_spec_str == NULL)
1036
0
            goto done;
1037
1038
        /* if the conversion is not specified, return a None,
1039
           otherwise create a one length string with the conversion
1040
           character */
1041
0
        if (conversion == '\0') {
1042
0
            conversion_str = Py_NewRef(Py_None);
1043
0
        }
1044
0
        else
1045
0
            conversion_str = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1046
0
                                                       &conversion, 1);
1047
0
        if (conversion_str == NULL)
1048
0
            goto done;
1049
1050
0
        tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,
1051
0
                             conversion_str);
1052
0
    done:
1053
0
        Py_XDECREF(literal_str);
1054
0
        Py_XDECREF(field_name_str);
1055
0
        Py_XDECREF(format_spec_str);
1056
0
        Py_XDECREF(conversion_str);
1057
0
        return tuple;
1058
0
    }
1059
0
}
1060
1061
static PyMethodDef formatteriter_methods[] = {
1062
    {NULL,              NULL}           /* sentinel */
1063
};
1064
1065
static PyTypeObject PyFormatterIter_Type = {
1066
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
1067
    "formatteriterator",                /* tp_name */
1068
    sizeof(formatteriterobject),        /* tp_basicsize */
1069
    0,                                  /* tp_itemsize */
1070
    /* methods */
1071
    formatteriter_dealloc,              /* tp_dealloc */
1072
    0,                                  /* tp_vectorcall_offset */
1073
    0,                                  /* tp_getattr */
1074
    0,                                  /* tp_setattr */
1075
    0,                                  /* tp_as_async */
1076
    0,                                  /* tp_repr */
1077
    0,                                  /* tp_as_number */
1078
    0,                                  /* tp_as_sequence */
1079
    0,                                  /* tp_as_mapping */
1080
    0,                                  /* tp_hash */
1081
    0,                                  /* tp_call */
1082
    0,                                  /* tp_str */
1083
    PyObject_GenericGetAttr,            /* tp_getattro */
1084
    0,                                  /* tp_setattro */
1085
    0,                                  /* tp_as_buffer */
1086
    Py_TPFLAGS_DEFAULT,                 /* tp_flags */
1087
    0,                                  /* tp_doc */
1088
    0,                                  /* tp_traverse */
1089
    0,                                  /* tp_clear */
1090
    0,                                  /* tp_richcompare */
1091
    0,                                  /* tp_weaklistoffset */
1092
    PyObject_SelfIter,                  /* tp_iter */
1093
    formatteriter_next,                 /* tp_iternext */
1094
    formatteriter_methods,              /* tp_methods */
1095
    0,
1096
};
1097
1098
/* unicode_formatter_parser is used to implement
1099
   string.Formatter.vformat.  it parses a string and returns tuples
1100
   describing the parsed elements.  It's a wrapper around
1101
   stringlib/string_format.h's MarkupIterator */
1102
static PyObject *
1103
formatter_parser(PyObject *Py_UNUSED(module), PyObject *self)
1104
0
{
1105
0
    formatteriterobject *it;
1106
1107
0
    if (!PyUnicode_Check(self)) {
1108
0
        PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
1109
0
        return NULL;
1110
0
    }
1111
1112
0
    it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
1113
0
    if (it == NULL)
1114
0
        return NULL;
1115
1116
    /* take ownership, give the object to the iterator */
1117
0
    it->str = Py_NewRef(self);
1118
1119
    /* initialize the contained MarkupIterator */
1120
0
    MarkupIterator_init(&it->it_markup, (PyObject*)self, 0, PyUnicode_GET_LENGTH(self));
1121
0
    return (PyObject *)it;
1122
0
}
1123
1124
1125
/************************************************************************/
1126
/*********** fieldnameiterator ******************************************/
1127
/************************************************************************/
1128
1129
1130
/* This is used to implement string.Formatter.vparse().  It parses the
1131
   field name into attribute and item values.  It's a Python-callable
1132
   wrapper around FieldNameIterator */
1133
1134
typedef struct {
1135
    PyObject_HEAD
1136
    PyObject *str;
1137
    FieldNameIterator it_field;
1138
} fieldnameiterobject;
1139
1140
static void
1141
fieldnameiter_dealloc(PyObject *op)
1142
0
{
1143
0
    fieldnameiterobject *it = (fieldnameiterobject*)op;
1144
0
    Py_XDECREF(it->str);
1145
0
    PyObject_Free(it);
1146
0
}
1147
1148
/* returns a tuple:
1149
   (is_attr, value)
1150
   is_attr is true if we used attribute syntax (e.g., '.foo')
1151
              false if we used index syntax (e.g., '[foo]')
1152
   value is an integer or string
1153
*/
1154
static PyObject *
1155
fieldnameiter_next(PyObject *op)
1156
0
{
1157
0
    fieldnameiterobject *it = (fieldnameiterobject*)op;
1158
0
    int result;
1159
0
    int is_attr;
1160
0
    Py_ssize_t idx;
1161
0
    SubString name;
1162
1163
0
    result = FieldNameIterator_next(&it->it_field, &is_attr,
1164
0
                                    &idx, &name);
1165
0
    if (result == 0 || result == 1)
1166
        /* if 0, error has already been set, if 1, iterator is empty */
1167
0
        return NULL;
1168
0
    else {
1169
0
        PyObject* result = NULL;
1170
0
        PyObject* is_attr_obj = NULL;
1171
0
        PyObject* obj = NULL;
1172
1173
0
        is_attr_obj = PyBool_FromLong(is_attr);
1174
0
        if (is_attr_obj == NULL)
1175
0
            goto done;
1176
1177
        /* either an integer or a string */
1178
0
        if (idx != -1)
1179
0
            obj = PyLong_FromSsize_t(idx);
1180
0
        else
1181
0
            obj = SubString_new_object(&name);
1182
0
        if (obj == NULL)
1183
0
            goto done;
1184
1185
        /* return a tuple of values */
1186
0
        result = PyTuple_Pack(2, is_attr_obj, obj);
1187
1188
0
    done:
1189
0
        Py_XDECREF(is_attr_obj);
1190
0
        Py_XDECREF(obj);
1191
0
        return result;
1192
0
    }
1193
0
}
1194
1195
static PyMethodDef fieldnameiter_methods[] = {
1196
    {NULL,              NULL}           /* sentinel */
1197
};
1198
1199
static PyTypeObject PyFieldNameIter_Type = {
1200
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
1201
    "fieldnameiterator",                /* tp_name */
1202
    sizeof(fieldnameiterobject),        /* tp_basicsize */
1203
    0,                                  /* tp_itemsize */
1204
    /* methods */
1205
    fieldnameiter_dealloc,              /* tp_dealloc */
1206
    0,                                  /* tp_vectorcall_offset */
1207
    0,                                  /* tp_getattr */
1208
    0,                                  /* tp_setattr */
1209
    0,                                  /* tp_as_async */
1210
    0,                                  /* tp_repr */
1211
    0,                                  /* tp_as_number */
1212
    0,                                  /* tp_as_sequence */
1213
    0,                                  /* tp_as_mapping */
1214
    0,                                  /* tp_hash */
1215
    0,                                  /* tp_call */
1216
    0,                                  /* tp_str */
1217
    PyObject_GenericGetAttr,            /* tp_getattro */
1218
    0,                                  /* tp_setattro */
1219
    0,                                  /* tp_as_buffer */
1220
    Py_TPFLAGS_DEFAULT,                 /* tp_flags */
1221
    0,                                  /* tp_doc */
1222
    0,                                  /* tp_traverse */
1223
    0,                                  /* tp_clear */
1224
    0,                                  /* tp_richcompare */
1225
    0,                                  /* tp_weaklistoffset */
1226
    PyObject_SelfIter,                  /* tp_iter */
1227
    fieldnameiter_next,                 /* tp_iternext */
1228
    fieldnameiter_methods,              /* tp_methods */
1229
    0};
1230
1231
/* unicode_formatter_field_name_split is used to implement
1232
   string.Formatter.vformat.  it takes a PEP 3101 "field name", and
1233
   returns a tuple of (first, rest): "first", the part before the
1234
   first '.' or '['; and "rest", an iterator for the rest of the field
1235
   name.  it's a wrapper around stringlib/string_format.h's
1236
   field_name_split.  The iterator it returns is a
1237
   FieldNameIterator */
1238
static PyObject *
1239
formatter_field_name_split(PyObject *Py_UNUSED(module), PyObject *self)
1240
0
{
1241
0
    SubString first;
1242
0
    Py_ssize_t first_idx;
1243
0
    fieldnameiterobject *it;
1244
1245
0
    PyObject *first_obj = NULL;
1246
0
    PyObject *result = NULL;
1247
1248
0
    if (!PyUnicode_Check(self)) {
1249
0
        PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
1250
0
        return NULL;
1251
0
    }
1252
1253
0
    it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
1254
0
    if (it == NULL)
1255
0
        return NULL;
1256
1257
    /* take ownership, give the object to the iterator.  this is
1258
       just to keep the field_name alive */
1259
0
    it->str = Py_NewRef(self);
1260
1261
    /* Pass in auto_number = NULL. We'll return an empty string for
1262
       first_obj in that case. */
1263
0
    if (!field_name_split((PyObject*)self, 0, PyUnicode_GET_LENGTH(self),
1264
0
                          &first, &first_idx, &it->it_field, NULL))
1265
0
        goto done;
1266
1267
    /* first becomes an integer, if possible; else a string */
1268
0
    if (first_idx != -1)
1269
0
        first_obj = PyLong_FromSsize_t(first_idx);
1270
0
    else
1271
        /* convert "first" into a string object */
1272
0
        first_obj = SubString_new_object(&first);
1273
0
    if (first_obj == NULL)
1274
0
        goto done;
1275
1276
    /* return a tuple of values */
1277
0
    result = PyTuple_Pack(2, first_obj, it);
1278
1279
0
done:
1280
0
    Py_XDECREF(it);
1281
0
    Py_XDECREF(first_obj);
1282
0
    return result;
1283
0
}