Coverage Report

Created: 2026-03-08 06:40

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Objects/unicode_format.c
Line
Count
Source
1
/*
2
3
Unicode implementation based on original code by Fredrik Lundh,
4
modified by Marc-Andre Lemburg <mal@lemburg.com>.
5
6
Major speed upgrades to the method implementations at the Reykjavik
7
NeedForSpeed sprint, by Fredrik Lundh and Andrew Dalke.
8
9
Copyright (c) Corporation for National Research Initiatives.
10
11
--------------------------------------------------------------------
12
The original string type implementation is:
13
14
  Copyright (c) 1999 by Secret Labs AB
15
  Copyright (c) 1999 by Fredrik Lundh
16
17
By obtaining, using, and/or copying this software and/or its
18
associated documentation, you agree that you have read, understood,
19
and will comply with the following terms and conditions:
20
21
Permission to use, copy, modify, and distribute this software and its
22
associated documentation for any purpose and without fee is hereby
23
granted, provided that the above copyright notice appears in all
24
copies, and that both that copyright notice and this permission notice
25
appear in supporting documentation, and that the name of Secret Labs
26
AB or the author not be used in advertising or publicity pertaining to
27
distribution of the software without specific, written prior
28
permission.
29
30
SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
31
THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
32
FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
33
ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
34
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
35
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
36
OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
37
--------------------------------------------------------------------
38
39
*/
40
41
// PyUnicode_Format() implementation
42
43
#include "Python.h"
44
#include "pycore_abstract.h"      // _PyIndex_Check()
45
#include "pycore_format.h"        // F_ALT
46
#include "pycore_long.h"          // _PyLong_FormatWriter()
47
#include "pycore_object.h"        // _PyObject_IsUniquelyReferenced()
48
#include "pycore_unicodeobject.h" // _Py_MAX_UNICODE
49
50
51
0
#define MAX_UNICODE _Py_MAX_UNICODE
52
25.1M
#define ensure_unicode _PyUnicode_EnsureUnicode
53
54
struct unicode_formatter_t {
55
    PyObject *args;
56
    int args_owned;
57
    Py_ssize_t arglen, argidx;
58
    PyObject *dict;
59
60
    int fmtkind;
61
    Py_ssize_t fmtcnt, fmtpos;
62
    const void *fmtdata;
63
    PyObject *fmtstr;
64
65
    _PyUnicodeWriter writer;
66
};
67
68
69
struct unicode_format_arg_t {
70
    Py_UCS4 ch;
71
    int flags;
72
    Py_ssize_t width;
73
    int prec;
74
    int sign;
75
    Py_ssize_t fmtstart;
76
    PyObject *key;
77
};
78
79
80
// Use FORMAT_ERROR("...%s", "") when there is no arguments.
81
4.49M
#define FORMAT_ERROR(EXC, FMT, ...) do {                                    \
82
4.49M
    if (arg->key != NULL) {                                                 \
83
0
        PyErr_Format((EXC), "format argument %R: " FMT,                     \
84
0
                     arg->key, __VA_ARGS__);                                \
85
0
    }                                                                       \
86
4.49M
    else if (ctx->argidx >= 0) {                                            \
87
0
        PyErr_Format((EXC), "format argument %zd: " FMT,                    \
88
0
                     ctx->argidx, __VA_ARGS__);                             \
89
0
    }                                                                       \
90
4.49M
    else {                                                                  \
91
4.49M
        PyErr_Format((EXC), "format argument: " FMT, __VA_ARGS__);          \
92
4.49M
    }                                                                       \
93
4.49M
} while (0)
94
95
96
static PyObject *
97
unicode_format_getnextarg(struct unicode_formatter_t *ctx, int allowone)
98
53.5M
{
99
53.5M
    Py_ssize_t argidx = ctx->argidx;
100
101
53.5M
    if (argidx < ctx->arglen && (allowone || ctx->arglen >= 0)) {
102
53.5M
        ctx->argidx++;
103
53.5M
        if (ctx->arglen >= 0) {
104
39.1M
            return PyTuple_GetItem(ctx->args, argidx);
105
39.1M
        }
106
14.4M
        else if (allowone) {
107
14.4M
            return ctx->args;
108
14.4M
        }
109
53.5M
    }
110
0
    PyErr_Format(PyExc_TypeError,
111
0
                 "not enough arguments for format string (got %zd)",
112
0
                 ctx->arglen < 0 ? 1 : ctx->arglen);
113
0
    return NULL;
114
53.5M
}
115
116
117
/* Returns a new reference to a PyUnicode object, or NULL on failure. */
118
119
/* Format a float into the writer if the writer is not NULL, or into *p_output
120
   otherwise.
121
122
   Return 0 on success, raise an exception and return -1 on error. */
123
static int
124
formatfloat(PyObject *v,
125
            struct unicode_formatter_t *ctx,
126
            struct unicode_format_arg_t *arg,
127
            PyObject **p_output,
128
            _PyUnicodeWriter *writer)
129
109
{
130
109
    char *p;
131
109
    double x;
132
109
    Py_ssize_t len;
133
109
    int prec;
134
109
    int dtoa_flags = 0;
135
136
109
    x = PyFloat_AsDouble(v);
137
109
    if (x == -1.0 && PyErr_Occurred()) {
138
0
        if (PyErr_ExceptionMatches(PyExc_TypeError)) {
139
0
            FORMAT_ERROR(PyExc_TypeError,
140
0
                         "%%%c requires a real number, not %T",
141
0
                         arg->ch, v);
142
0
        }
143
0
        return -1;
144
0
    }
145
146
109
    prec = arg->prec;
147
109
    if (prec < 0)
148
0
        prec = 6;
149
150
109
    if (arg->flags & F_ALT)
151
0
        dtoa_flags |= Py_DTSF_ALT;
152
109
    p = PyOS_double_to_string(x, arg->ch, prec, dtoa_flags, NULL);
153
109
    if (p == NULL)
154
0
        return -1;
155
109
    len = strlen(p);
156
109
    if (writer) {
157
0
        if (_PyUnicodeWriter_WriteASCIIString(writer, p, len) < 0) {
158
0
            PyMem_Free(p);
159
0
            return -1;
160
0
        }
161
0
    }
162
109
    else
163
109
        *p_output = _PyUnicode_FromASCII(p, len);
164
109
    PyMem_Free(p);
165
109
    return 0;
166
109
}
167
168
169
/* formatlong() emulates the format codes d, u, o, x and X, and
170
 * the F_ALT flag, for Python's long (unbounded) ints.  It's not used for
171
 * Python's regular ints.
172
 * Return value:  a new PyUnicodeObject*, or NULL if error.
173
 *     The output string is of the form
174
 *         "-"? ("0x" | "0X")? digit+
175
 *     "0x"/"0X" are present only for x and X conversions, with F_ALT
176
 *         set in flags.  The case of hex digits will be correct,
177
 *     There will be at least prec digits, zero-filled on the left if
178
 *         necessary to get that many.
179
 * val          object to be converted
180
 * flags        bitmask of format flags; only F_ALT is looked at
181
 * prec         minimum number of digits; 0-fill on left if needed
182
 * type         a character in [duoxX]; u acts the same as d
183
 *
184
 * CAUTION:  o, x and X conversions on regular ints can never
185
 * produce a '-' sign, but can for Python's unbounded ints.
186
 */
187
PyObject *
188
_PyUnicode_FormatLong(PyObject *val, int alt, int prec, int type)
189
3.73M
{
190
3.73M
    PyObject *result = NULL;
191
3.73M
    char *buf;
192
3.73M
    Py_ssize_t i;
193
3.73M
    int sign;           /* 1 if '-', else 0 */
194
3.73M
    int len;            /* number of characters */
195
3.73M
    Py_ssize_t llen;
196
3.73M
    int numdigits;      /* len == numnondigits + numdigits */
197
3.73M
    int numnondigits = 0;
198
199
    /* Avoid exceeding SSIZE_T_MAX */
200
3.73M
    if (prec > INT_MAX-3) {
201
0
        PyErr_SetString(PyExc_OverflowError,
202
0
                        "precision too large");
203
0
        return NULL;
204
0
    }
205
206
3.73M
    assert(PyLong_Check(val));
207
208
3.73M
    switch (type) {
209
0
    default:
210
0
        Py_UNREACHABLE();
211
3.66M
    case 'd':
212
3.66M
    case 'i':
213
3.66M
    case 'u':
214
        /* int and int subclasses should print numerically when a numeric */
215
        /* format code is used (see issue18780) */
216
3.66M
        result = PyNumber_ToBase(val, 10);
217
3.66M
        break;
218
66.9k
    case 'o':
219
66.9k
        numnondigits = 2;
220
66.9k
        result = PyNumber_ToBase(val, 8);
221
66.9k
        break;
222
86
    case 'x':
223
2.64k
    case 'X':
224
2.64k
        numnondigits = 2;
225
2.64k
        result = PyNumber_ToBase(val, 16);
226
2.64k
        break;
227
3.73M
    }
228
3.73M
    if (!result)
229
0
        return NULL;
230
231
3.73M
    assert(_PyUnicode_IsModifiable(result));
232
3.73M
    assert(PyUnicode_IS_ASCII(result));
233
234
    /* To modify the string in-place, there can only be one reference. */
235
3.73M
    if (!_PyObject_IsUniquelyReferenced(result)) {
236
0
        Py_DECREF(result);
237
0
        PyErr_BadInternalCall();
238
0
        return NULL;
239
0
    }
240
3.73M
    buf = PyUnicode_DATA(result);
241
3.73M
    llen = PyUnicode_GET_LENGTH(result);
242
3.73M
    if (llen > INT_MAX) {
243
0
        Py_DECREF(result);
244
0
        PyErr_SetString(PyExc_ValueError,
245
0
                        "string too large in _PyUnicode_FormatLong");
246
0
        return NULL;
247
0
    }
248
3.73M
    len = (int)llen;
249
3.73M
    sign = buf[0] == '-';
250
3.73M
    numnondigits += sign;
251
3.73M
    numdigits = len - numnondigits;
252
3.73M
    assert(numdigits > 0);
253
254
    /* Get rid of base marker unless F_ALT */
255
3.73M
    if (((alt) == 0 &&
256
3.73M
        (type == 'o' || type == 'x' || type == 'X'))) {
257
69.5k
        assert(buf[sign] == '0');
258
69.5k
        assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
259
69.5k
               buf[sign+1] == 'o');
260
69.5k
        numnondigits -= 2;
261
69.5k
        buf += 2;
262
69.5k
        len -= 2;
263
69.5k
        if (sign)
264
0
            buf[0] = '-';
265
69.5k
        assert(len == numnondigits + numdigits);
266
69.5k
        assert(numdigits > 0);
267
69.5k
    }
268
269
    /* Fill with leading zeroes to meet minimum width. */
270
3.73M
    if (prec > numdigits) {
271
0
        PyObject *r1 = PyBytes_FromStringAndSize(NULL,
272
0
                                numnondigits + prec);
273
0
        char *b1;
274
0
        if (!r1) {
275
0
            Py_DECREF(result);
276
0
            return NULL;
277
0
        }
278
0
        b1 = PyBytes_AS_STRING(r1);
279
0
        for (i = 0; i < numnondigits; ++i)
280
0
            *b1++ = *buf++;
281
0
        for (i = 0; i < prec - numdigits; i++)
282
0
            *b1++ = '0';
283
0
        for (i = 0; i < numdigits; i++)
284
0
            *b1++ = *buf++;
285
0
        *b1 = '\0';
286
0
        Py_SETREF(result, r1);
287
0
        buf = PyBytes_AS_STRING(result);
288
0
        len = numnondigits + prec;
289
0
    }
290
291
    /* Fix up case for hex conversions. */
292
3.73M
    if (type == 'X') {
293
        /* Need to convert all lower case letters to upper case.
294
           and need to convert 0x to 0X (and -0x to -0X). */
295
7.52k
        for (i = 0; i < len; i++)
296
4.96k
            if (buf[i] >= 'a' && buf[i] <= 'x')
297
1.92k
                buf[i] -= 'a'-'A';
298
2.56k
    }
299
3.73M
    if (!PyUnicode_Check(result)
300
3.73M
        || buf != PyUnicode_DATA(result)) {
301
69.5k
        PyObject *unicode;
302
69.5k
        unicode = _PyUnicode_FromASCII(buf, len);
303
69.5k
        Py_SETREF(result, unicode);
304
69.5k
    }
305
3.66M
    else if (len != PyUnicode_GET_LENGTH(result)) {
306
0
        if (PyUnicode_Resize(&result, len) < 0)
307
0
            Py_CLEAR(result);
308
0
    }
309
3.73M
    return result;
310
3.73M
}
311
312
313
/* Format an integer or a float as an integer.
314
 * Return 1 if the number has been formatted into the writer,
315
 *        0 if the number has been formatted into *p_output
316
 *       -1 and raise an exception on error */
317
static int
318
mainformatlong(PyObject *v,
319
               struct unicode_formatter_t *ctx,
320
               struct unicode_format_arg_t *arg,
321
               PyObject **p_output,
322
               _PyUnicodeWriter *writer)
323
16.6M
{
324
16.6M
    PyObject *iobj, *res;
325
16.6M
    char type = (char)arg->ch;
326
327
16.6M
    if (!PyNumber_Check(v))
328
4.49M
        goto wrongtype;
329
330
    /* make sure number is a type of integer for o, x, and X */
331
12.1M
    if (!PyLong_Check(v)) {
332
0
        if (type == 'o' || type == 'x' || type == 'X') {
333
0
            iobj = _PyNumber_Index(v);
334
0
        }
335
0
        else {
336
0
            iobj = PyNumber_Long(v);
337
0
        }
338
0
        if (iobj == NULL ) {
339
0
            if (PyErr_ExceptionMatches(PyExc_TypeError))
340
0
                goto wrongtype;
341
0
            return -1;
342
0
        }
343
0
        assert(PyLong_Check(iobj));
344
0
    }
345
12.1M
    else {
346
12.1M
        iobj = Py_NewRef(v);
347
12.1M
    }
348
349
12.1M
    if (PyLong_CheckExact(v)
350
12.1M
        && arg->width == -1 && arg->prec == -1
351
8.45M
        && !(arg->flags & (F_SIGN | F_BLANK))
352
8.45M
        && type != 'X')
353
8.45M
    {
354
        /* Fast path */
355
8.45M
        int alternate = arg->flags & F_ALT;
356
8.45M
        int base;
357
358
8.45M
        switch(type)
359
8.45M
        {
360
0
            default:
361
0
                Py_UNREACHABLE();
362
8.45M
            case 'd':
363
8.45M
            case 'i':
364
8.45M
            case 'u':
365
8.45M
                base = 10;
366
8.45M
                break;
367
0
            case 'o':
368
0
                base = 8;
369
0
                break;
370
41
            case 'x':
371
41
            case 'X':
372
41
                base = 16;
373
41
                break;
374
8.45M
        }
375
376
8.45M
        if (_PyLong_FormatWriter(writer, v, base, alternate) == -1) {
377
0
            Py_DECREF(iobj);
378
0
            return -1;
379
0
        }
380
8.45M
        Py_DECREF(iobj);
381
8.45M
        return 1;
382
8.45M
    }
383
384
3.73M
    res = _PyUnicode_FormatLong(iobj, arg->flags & F_ALT, arg->prec, type);
385
3.73M
    Py_DECREF(iobj);
386
3.73M
    if (res == NULL)
387
0
        return -1;
388
3.73M
    *p_output = res;
389
3.73M
    return 0;
390
391
4.49M
wrongtype:
392
4.49M
    switch(type)
393
4.49M
    {
394
0
        case 'o':
395
0
        case 'x':
396
0
        case 'X':
397
0
            FORMAT_ERROR(PyExc_TypeError,
398
0
                         "%%%c requires an integer, not %T",
399
0
                         arg->ch, v);
400
0
            break;
401
4.49M
        default:
402
4.49M
            FORMAT_ERROR(PyExc_TypeError,
403
4.49M
                         "%%%c requires a real number, not %T",
404
4.49M
                         arg->ch, v);
405
4.49M
            break;
406
4.49M
    }
407
4.49M
    return -1;
408
4.49M
}
409
410
411
static Py_UCS4
412
formatchar(PyObject *v,
413
           struct unicode_formatter_t *ctx,
414
           struct unicode_format_arg_t *arg)
415
0
{
416
    /* presume that the buffer is at least 3 characters long */
417
0
    if (PyUnicode_Check(v)) {
418
0
        if (PyUnicode_GET_LENGTH(v) == 1) {
419
0
            return PyUnicode_READ_CHAR(v, 0);
420
0
        }
421
0
        FORMAT_ERROR(PyExc_TypeError,
422
0
                     "%%c requires an integer or a unicode character, "
423
0
                     "not a string of length %zd",
424
0
                     PyUnicode_GET_LENGTH(v));
425
0
        return (Py_UCS4) -1;
426
0
    }
427
0
    else {
428
0
        int overflow;
429
0
        long x = PyLong_AsLongAndOverflow(v, &overflow);
430
0
        if (x == -1 && PyErr_Occurred()) {
431
0
            if (PyErr_ExceptionMatches(PyExc_TypeError)) {
432
0
                FORMAT_ERROR(PyExc_TypeError,
433
0
                             "%%c requires an integer or a unicode character, "
434
0
                             "not %T",
435
0
                             v);
436
0
            }
437
0
            return (Py_UCS4) -1;
438
0
        }
439
440
0
        if (x < 0 || x > MAX_UNICODE) {
441
            /* this includes an overflow in converting to C long */
442
0
            FORMAT_ERROR(PyExc_OverflowError,
443
0
                         "%%c argument not in range(0x110000)%s", "");
444
0
            return (Py_UCS4) -1;
445
0
        }
446
447
0
        return (Py_UCS4) x;
448
0
    }
449
0
}
450
451
452
/* Parse options of an argument: flags, width, precision.
453
   Handle also "%(name)" syntax.
454
455
   Return 0 if the argument has been formatted into arg->str.
456
   Return 1 if the argument has been written into ctx->writer,
457
   Raise an exception and return -1 on error. */
458
static int
459
unicode_format_arg_parse(struct unicode_formatter_t *ctx,
460
                         struct unicode_format_arg_t *arg)
461
53.5M
{
462
53.5M
#define FORMAT_READ(ctx) \
463
57.6M
        PyUnicode_READ((ctx)->fmtkind, (ctx)->fmtdata, (ctx)->fmtpos)
464
465
53.5M
    PyObject *v;
466
467
53.5M
    if (arg->ch == '(') {
468
        /* Get argument value from a dictionary. Example: "%(name)s". */
469
37.5k
        Py_ssize_t keystart;
470
37.5k
        Py_ssize_t keylen;
471
37.5k
        int pcount = 1;
472
473
37.5k
        if (ctx->dict == NULL) {
474
0
            PyErr_Format(PyExc_TypeError,
475
0
                         "format requires a mapping, not %T",
476
0
                         ctx->args);
477
0
            return -1;
478
0
        }
479
37.5k
        ++ctx->fmtpos;
480
37.5k
        --ctx->fmtcnt;
481
37.5k
        keystart = ctx->fmtpos;
482
        /* Skip over balanced parentheses */
483
338k
        while (pcount > 0 && --ctx->fmtcnt >= 0) {
484
300k
            arg->ch = FORMAT_READ(ctx);
485
300k
            if (arg->ch == ')')
486
37.5k
                --pcount;
487
262k
            else if (arg->ch == '(')
488
0
                ++pcount;
489
300k
            ctx->fmtpos++;
490
300k
        }
491
37.5k
        keylen = ctx->fmtpos - keystart - 1;
492
37.5k
        if (ctx->fmtcnt < 0 || pcount > 0) {
493
0
            PyErr_Format(PyExc_ValueError,
494
0
                         "stray %% or incomplete format key at position %zd",
495
0
                         arg->fmtstart);
496
0
            return -1;
497
0
        }
498
37.5k
        arg->key = PyUnicode_Substring(ctx->fmtstr,
499
37.5k
                                       keystart, keystart + keylen);
500
37.5k
        if (arg->key == NULL)
501
0
            return -1;
502
37.5k
        if (ctx->args_owned) {
503
26.8k
            ctx->args_owned = 0;
504
26.8k
            Py_DECREF(ctx->args);
505
26.8k
        }
506
37.5k
        ctx->args = PyObject_GetItem(ctx->dict, arg->key);
507
37.5k
        if (ctx->args == NULL)
508
0
            return -1;
509
37.5k
        ctx->args_owned = 1;
510
37.5k
        ctx->arglen = -3;
511
37.5k
        ctx->argidx = -4;
512
37.5k
    }
513
53.4M
    else {
514
53.4M
        if (ctx->arglen < -1) {
515
0
            PyErr_Format(PyExc_ValueError,
516
0
                         "format requires a parenthesised mapping key "
517
0
                         "at position %zd",
518
0
                         arg->fmtstart);
519
0
            return -1;
520
0
        }
521
53.4M
    }
522
523
    /* Parse flags. Example: "%+i" => flags=F_SIGN. */
524
53.5M
    while (--ctx->fmtcnt >= 0) {
525
53.5M
        arg->ch = FORMAT_READ(ctx);
526
53.5M
        ctx->fmtpos++;
527
53.5M
        switch (arg->ch) {
528
0
        case '-': arg->flags |= F_LJUST; continue;
529
0
        case '+': arg->flags |= F_SIGN; continue;
530
0
        case ' ': arg->flags |= F_BLANK; continue;
531
41
        case '#': arg->flags |= F_ALT; continue;
532
69.5k
        case '0': arg->flags |= F_ZERO; continue;
533
53.5M
        }
534
53.5M
        break;
535
53.5M
    }
536
537
    /* Parse width. Example: "%10s" => width=10 */
538
53.5M
    if (arg->ch == '*') {
539
55.4k
        if (ctx->arglen < -1) {
540
0
            PyErr_Format(PyExc_ValueError,
541
0
                    "* cannot be used with a parenthesised mapping key "
542
0
                    "at position %zd",
543
0
                    arg->fmtstart);
544
0
            return -1;
545
0
        }
546
55.4k
        v = unicode_format_getnextarg(ctx, 0);
547
55.4k
        if (v == NULL)
548
0
            return -1;
549
55.4k
        if (!PyLong_Check(v)) {
550
0
            FORMAT_ERROR(PyExc_TypeError, "* requires int, not %T", v);
551
0
            return -1;
552
0
        }
553
55.4k
        arg->width = PyLong_AsSsize_t(v);
554
55.4k
        if (arg->width == -1 && PyErr_Occurred()) {
555
0
            if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
556
0
                FORMAT_ERROR(PyExc_OverflowError,
557
0
                             "too big for width%s", "");
558
0
            }
559
0
            return -1;
560
0
        }
561
55.4k
        if (arg->width < 0) {
562
0
            arg->flags |= F_LJUST;
563
0
            arg->width = -arg->width;
564
0
        }
565
55.4k
        if (--ctx->fmtcnt >= 0) {
566
55.4k
            arg->ch = FORMAT_READ(ctx);
567
55.4k
            ctx->fmtpos++;
568
55.4k
        }
569
55.4k
    }
570
53.4M
    else if (arg->ch >= '0' && arg->ch <= '9') {
571
3.67M
        arg->width = arg->ch - '0';
572
3.67M
        while (--ctx->fmtcnt >= 0) {
573
3.67M
            arg->ch = FORMAT_READ(ctx);
574
3.67M
            ctx->fmtpos++;
575
3.67M
            if (arg->ch < '0' || arg->ch > '9')
576
3.67M
                break;
577
            /* Since arg->ch is unsigned, the RHS would end up as unsigned,
578
               mixing signed and unsigned comparison. Since arg->ch is between
579
               '0' and '9', casting to int is safe. */
580
0
            if (arg->width > (PY_SSIZE_T_MAX - ((int)arg->ch - '0')) / 10) {
581
0
                PyErr_Format(PyExc_ValueError,
582
0
                             "width too big at position %zd",
583
0
                             arg->fmtstart);
584
0
                return -1;
585
0
            }
586
0
            arg->width = arg->width*10 + (arg->ch - '0');
587
0
        }
588
3.67M
    }
589
590
    /* Parse precision. Example: "%.3f" => prec=3 */
591
53.5M
    if (arg->ch == '.') {
592
109
        arg->prec = 0;
593
109
        if (--ctx->fmtcnt >= 0) {
594
109
            arg->ch = FORMAT_READ(ctx);
595
109
            ctx->fmtpos++;
596
109
        }
597
109
        if (arg->ch == '*') {
598
0
            if (ctx->arglen < -1) {
599
0
                PyErr_Format(PyExc_ValueError,
600
0
                        "* cannot be used with a parenthesised mapping key "
601
0
                        "at position %zd",
602
0
                        arg->fmtstart);
603
0
                return -1;
604
0
            }
605
0
            v = unicode_format_getnextarg(ctx, 0);
606
0
            if (v == NULL)
607
0
                return -1;
608
0
            if (!PyLong_Check(v)) {
609
0
                FORMAT_ERROR(PyExc_TypeError, "* requires int, not %T", v);
610
0
                return -1;
611
0
            }
612
0
            arg->prec = PyLong_AsInt(v);
613
0
            if (arg->prec == -1 && PyErr_Occurred()) {
614
0
                if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
615
0
                    FORMAT_ERROR(PyExc_OverflowError,
616
0
                                 "too big for precision%s", "");
617
0
                }
618
0
                return -1;
619
0
            }
620
0
            if (arg->prec < 0)
621
0
                arg->prec = 0;
622
0
            if (--ctx->fmtcnt >= 0) {
623
0
                arg->ch = FORMAT_READ(ctx);
624
0
                ctx->fmtpos++;
625
0
            }
626
0
        }
627
109
        else if (arg->ch >= '0' && arg->ch <= '9') {
628
109
            arg->prec = arg->ch - '0';
629
109
            while (--ctx->fmtcnt >= 0) {
630
109
                arg->ch = FORMAT_READ(ctx);
631
109
                ctx->fmtpos++;
632
109
                if (arg->ch < '0' || arg->ch > '9')
633
109
                    break;
634
0
                if (arg->prec > (INT_MAX - ((int)arg->ch - '0')) / 10) {
635
0
                    PyErr_Format(PyExc_ValueError,
636
0
                                 "precision too big at position %zd",
637
0
                                 arg->fmtstart);
638
0
                    return -1;
639
0
                }
640
0
                arg->prec = arg->prec*10 + (arg->ch - '0');
641
0
            }
642
109
        }
643
109
    }
644
645
    /* Ignore "h", "l" and "L" format prefix (ex: "%hi" or "%ls") */
646
53.5M
    if (ctx->fmtcnt >= 0) {
647
53.5M
        if (arg->ch == 'h' || arg->ch == 'l' || arg->ch == 'L') {
648
0
            if (--ctx->fmtcnt >= 0) {
649
0
                arg->ch = FORMAT_READ(ctx);
650
0
                ctx->fmtpos++;
651
0
            }
652
0
        }
653
53.5M
    }
654
53.5M
    if (ctx->fmtcnt < 0) {
655
0
        PyErr_Format(PyExc_ValueError,
656
0
                     "stray %% at position %zd", arg->fmtstart);
657
0
        return -1;
658
0
    }
659
53.5M
    return 0;
660
661
53.5M
#undef FORMAT_READ
662
53.5M
}
663
664
665
/* Format one argument. Supported conversion specifiers:
666
667
   - "s", "r", "a": any type
668
   - "i", "d", "u": int or float
669
   - "o", "x", "X": int
670
   - "e", "E", "f", "F", "g", "G": float
671
   - "c": int or str (1 character)
672
673
   When possible, the output is written directly into the Unicode writer
674
   (ctx->writer). A string is created when padding is required.
675
676
   Return 0 if the argument has been formatted into *p_str,
677
          1 if the argument has been written into ctx->writer,
678
         -1 on error. */
679
static int
680
unicode_format_arg_format(struct unicode_formatter_t *ctx,
681
                          struct unicode_format_arg_t *arg,
682
                          PyObject **p_str)
683
53.5M
{
684
53.5M
    PyObject *v;
685
53.5M
    _PyUnicodeWriter *writer = &ctx->writer;
686
687
53.5M
    if (ctx->fmtcnt == 0)
688
16.4M
        ctx->writer.overallocate = 0;
689
690
53.5M
    v = unicode_format_getnextarg(ctx, 1);
691
53.5M
    if (v == NULL)
692
0
        return -1;
693
694
695
53.5M
    switch (arg->ch) {
696
36.8M
    case 's':
697
36.8M
    case 'r':
698
36.8M
    case 'a':
699
36.8M
        if (PyLong_CheckExact(v) && arg->width == -1 && arg->prec == -1) {
700
            /* Fast path */
701
135
            if (_PyLong_FormatWriter(writer, v, 10, arg->flags & F_ALT) == -1)
702
0
                return -1;
703
135
            return 1;
704
135
        }
705
706
36.8M
        if (PyUnicode_CheckExact(v) && arg->ch == 's') {
707
36.8M
            *p_str = Py_NewRef(v);
708
36.8M
        }
709
11.6k
        else {
710
11.6k
            if (arg->ch == 's')
711
90
                *p_str = PyObject_Str(v);
712
11.5k
            else if (arg->ch == 'r')
713
11.5k
                *p_str = PyObject_Repr(v);
714
0
            else
715
0
                *p_str = PyObject_ASCII(v);
716
11.6k
        }
717
36.8M
        break;
718
719
0
    case 'i':
720
16.6M
    case 'd':
721
16.6M
    case 'u':
722
16.6M
    case 'o':
723
16.6M
    case 'x':
724
16.6M
    case 'X':
725
16.6M
    {
726
16.6M
        int ret = mainformatlong(v, ctx, arg, p_str, writer);
727
16.6M
        if (ret != 0)
728
12.9M
            return ret;
729
3.73M
        arg->sign = 1;
730
3.73M
        break;
731
16.6M
    }
732
733
0
    case 'e':
734
0
    case 'E':
735
109
    case 'f':
736
109
    case 'F':
737
109
    case 'g':
738
109
    case 'G':
739
109
        if (arg->width == -1 && arg->prec == -1
740
0
            && !(arg->flags & (F_SIGN | F_BLANK)))
741
0
        {
742
            /* Fast path */
743
0
            if (formatfloat(v, ctx, arg, NULL, writer) == -1)
744
0
                return -1;
745
0
            return 1;
746
0
        }
747
748
109
        arg->sign = 1;
749
109
        if (formatfloat(v, ctx, arg, p_str, NULL) == -1)
750
0
            return -1;
751
109
        break;
752
753
109
    case 'c':
754
0
    {
755
0
        Py_UCS4 ch = formatchar(v, ctx, arg);
756
0
        if (ch == (Py_UCS4) -1)
757
0
            return -1;
758
0
        if (arg->width == -1 && arg->prec == -1) {
759
            /* Fast path */
760
0
            if (_PyUnicodeWriter_WriteCharInline(writer, ch) < 0)
761
0
                return -1;
762
0
            return 1;
763
0
        }
764
0
        *p_str = PyUnicode_FromOrdinal(ch);
765
0
        break;
766
0
    }
767
768
0
    default:
769
0
        if (arg->ch < 128 && Py_ISALPHA(arg->ch)) {
770
0
            PyErr_Format(PyExc_ValueError,
771
0
                         "unsupported format %%%c at position %zd",
772
0
                         (int)arg->ch, arg->fmtstart);
773
0
        }
774
0
        else if (arg->ch == '\'') {
775
0
            PyErr_Format(PyExc_ValueError,
776
0
                         "stray %% at position %zd or unexpected "
777
0
                         "format character \"'\" at position %zd",
778
0
                         arg->fmtstart,
779
0
                         ctx->fmtpos - 1);
780
0
        }
781
0
        else if (arg->ch >= 32 && arg->ch < 127) {
782
0
            PyErr_Format(PyExc_ValueError,
783
0
                         "stray %% at position %zd or unexpected "
784
0
                         "format character '%c' at position %zd",
785
0
                         arg->fmtstart,
786
0
                         (int)arg->ch, ctx->fmtpos - 1);
787
0
        }
788
0
        else if (Py_UNICODE_ISPRINTABLE(arg->ch)) {
789
0
            PyErr_Format(PyExc_ValueError,
790
0
                         "stray %% at position %zd or unexpected "
791
0
                         "format character '%c' (U+%04X) at position %zd",
792
0
                         arg->fmtstart,
793
0
                         (int)arg->ch, (int)arg->ch, ctx->fmtpos - 1);
794
0
        }
795
0
        else {
796
0
            PyErr_Format(PyExc_ValueError,
797
0
                         "stray %% at position %zd or unexpected "
798
0
                         "format character U+%04X at position %zd",
799
0
                         arg->fmtstart, (int)arg->ch, ctx->fmtpos - 1);
800
0
        }
801
0
        return -1;
802
53.5M
    }
803
40.5M
    if (*p_str == NULL)
804
0
        return -1;
805
40.5M
    assert (PyUnicode_Check(*p_str));
806
40.5M
    return 0;
807
40.5M
}
808
809
810
static int
811
unicode_format_arg_output(struct unicode_formatter_t *ctx,
812
                          struct unicode_format_arg_t *arg,
813
                          PyObject *str)
814
40.5M
{
815
40.5M
    Py_ssize_t len;
816
40.5M
    int kind;
817
40.5M
    const void *pbuf;
818
40.5M
    Py_ssize_t pindex;
819
40.5M
    Py_UCS4 signchar;
820
40.5M
    Py_ssize_t buflen;
821
40.5M
    Py_UCS4 maxchar;
822
40.5M
    Py_ssize_t sublen;
823
40.5M
    _PyUnicodeWriter *writer = &ctx->writer;
824
40.5M
    Py_UCS4 fill;
825
826
40.5M
    fill = ' ';
827
40.5M
    if (arg->sign && arg->flags & F_ZERO)
828
69.5k
        fill = '0';
829
830
40.5M
    len = PyUnicode_GET_LENGTH(str);
831
40.5M
    if ((arg->width == -1 || arg->width <= len)
832
40.4M
        && (arg->prec == -1 || arg->prec >= len)
833
40.4M
        && !(arg->flags & (F_SIGN | F_BLANK)))
834
40.4M
    {
835
        /* Fast path */
836
40.4M
        if (_PyUnicodeWriter_WriteStr(writer, str) == -1)
837
0
            return -1;
838
40.4M
        return 0;
839
40.4M
    }
840
841
    /* Truncate the string for "s", "r" and "a" formats
842
       if the precision is set */
843
84.2k
    if (arg->ch == 's' || arg->ch == 'r' || arg->ch == 'a') {
844
0
        if (arg->prec >= 0 && len > arg->prec)
845
0
            len = arg->prec;
846
0
    }
847
848
    /* Adjust sign and width */
849
84.2k
    kind = PyUnicode_KIND(str);
850
84.2k
    pbuf = PyUnicode_DATA(str);
851
84.2k
    pindex = 0;
852
84.2k
    signchar = '\0';
853
84.2k
    if (arg->sign) {
854
84.2k
        Py_UCS4 ch = PyUnicode_READ(kind, pbuf, pindex);
855
84.2k
        if (ch == '-' || ch == '+') {
856
0
            signchar = ch;
857
0
            len--;
858
0
            pindex++;
859
0
        }
860
84.2k
        else if (arg->flags & F_SIGN)
861
0
            signchar = '+';
862
84.2k
        else if (arg->flags & F_BLANK)
863
0
            signchar = ' ';
864
84.2k
        else
865
84.2k
            arg->sign = 0;
866
84.2k
    }
867
84.2k
    if (arg->width < len)
868
109
        arg->width = len;
869
870
    /* Prepare the writer */
871
84.2k
    maxchar = writer->maxchar;
872
84.2k
    if (!(arg->flags & F_LJUST)) {
873
84.2k
        if (arg->sign) {
874
0
            if ((arg->width-1) > len)
875
0
                maxchar = Py_MAX(maxchar, fill);
876
0
        }
877
84.2k
        else {
878
84.2k
            if (arg->width > len)
879
84.1k
                maxchar = Py_MAX(maxchar, fill);
880
84.2k
        }
881
84.2k
    }
882
84.2k
    if (PyUnicode_MAX_CHAR_VALUE(str) > maxchar) {
883
66.9k
        Py_UCS4 strmaxchar = _PyUnicode_FindMaxChar(str, 0, pindex+len);
884
66.9k
        maxchar = Py_MAX(maxchar, strmaxchar);
885
66.9k
    }
886
887
84.2k
    buflen = arg->width;
888
84.2k
    if (arg->sign && len == arg->width)
889
0
        buflen++;
890
84.2k
    if (_PyUnicodeWriter_Prepare(writer, buflen, maxchar) == -1)
891
0
        return -1;
892
893
    /* Write the sign if needed */
894
84.2k
    if (arg->sign) {
895
0
        if (fill != ' ') {
896
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos, signchar);
897
0
            writer->pos += 1;
898
0
        }
899
0
        if (arg->width > len)
900
0
            arg->width--;
901
0
    }
902
903
    /* Write the numeric prefix for "x", "X" and "o" formats
904
       if the alternate form is used.
905
       For example, write "0x" for the "%#x" format. */
906
84.2k
    if ((arg->flags & F_ALT) && (arg->ch == 'x' || arg->ch == 'X' || arg->ch == 'o')) {
907
0
        assert(PyUnicode_READ(kind, pbuf, pindex) == '0');
908
0
        assert(PyUnicode_READ(kind, pbuf, pindex + 1) == arg->ch);
909
0
        if (fill != ' ') {
910
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos, '0');
911
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos+1, arg->ch);
912
0
            writer->pos += 2;
913
0
            pindex += 2;
914
0
        }
915
0
        arg->width -= 2;
916
0
        if (arg->width < 0)
917
0
            arg->width = 0;
918
0
        len -= 2;
919
0
    }
920
921
    /* Pad left with the fill character if needed */
922
84.2k
    if (arg->width > len && !(arg->flags & F_LJUST)) {
923
84.1k
        sublen = arg->width - len;
924
84.1k
        _PyUnicode_Fill(writer->kind, writer->data, fill, writer->pos, sublen);
925
84.1k
        writer->pos += sublen;
926
84.1k
        arg->width = len;
927
84.1k
    }
928
929
    /* If padding with spaces: write sign if needed and/or numeric prefix if
930
       the alternate form is used */
931
84.2k
    if (fill == ' ') {
932
17.1k
        if (arg->sign) {
933
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos, signchar);
934
0
            writer->pos += 1;
935
0
        }
936
17.1k
        if ((arg->flags & F_ALT) && (arg->ch == 'x' || arg->ch == 'X' || arg->ch == 'o')) {
937
0
            assert(PyUnicode_READ(kind, pbuf, pindex) == '0');
938
0
            assert(PyUnicode_READ(kind, pbuf, pindex+1) == arg->ch);
939
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos, '0');
940
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos+1, arg->ch);
941
0
            writer->pos += 2;
942
0
            pindex += 2;
943
0
        }
944
17.1k
    }
945
946
    /* Write characters */
947
84.2k
    if (len) {
948
84.2k
        _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
949
84.2k
                                      str, pindex, len);
950
84.2k
        writer->pos += len;
951
84.2k
    }
952
953
    /* Pad right with the fill character if needed */
954
84.2k
    if (arg->width > len) {
955
0
        sublen = arg->width - len;
956
0
        _PyUnicode_Fill(writer->kind, writer->data, ' ', writer->pos, sublen);
957
0
        writer->pos += sublen;
958
0
    }
959
84.2k
    return 0;
960
84.2k
}
961
962
963
/* Helper of PyUnicode_Format(): format one arg.
964
   Return 0 on success, raise an exception and return -1 on error. */
965
static int
966
unicode_format_arg(struct unicode_formatter_t *ctx)
967
53.5M
{
968
53.5M
    struct unicode_format_arg_t arg;
969
53.5M
    PyObject *str;
970
53.5M
    int ret;
971
972
53.5M
    arg.ch = PyUnicode_READ(ctx->fmtkind, ctx->fmtdata, ctx->fmtpos);
973
53.5M
    if (arg.ch == '%') {
974
0
        ctx->fmtpos++;
975
0
        ctx->fmtcnt--;
976
0
        if (_PyUnicodeWriter_WriteCharInline(&ctx->writer, '%') < 0)
977
0
            return -1;
978
0
        return 0;
979
0
    }
980
53.5M
    arg.flags = 0;
981
53.5M
    arg.width = -1;
982
53.5M
    arg.prec = -1;
983
53.5M
    arg.sign = 0;
984
53.5M
    arg.fmtstart = ctx->fmtpos - 1;
985
53.5M
    arg.key = NULL;
986
53.5M
    str = NULL;
987
988
53.5M
    ret = unicode_format_arg_parse(ctx, &arg);
989
53.5M
    if (ret == -1) {
990
0
        goto onError;
991
0
    }
992
993
53.5M
    ret = unicode_format_arg_format(ctx, &arg, &str);
994
53.5M
    if (ret == -1) {
995
4.49M
        goto onError;
996
4.49M
    }
997
998
49.0M
    if (ret != 1) {
999
40.5M
        ret = unicode_format_arg_output(ctx, &arg, str);
1000
40.5M
        Py_DECREF(str);
1001
40.5M
        if (ret == -1) {
1002
0
            goto onError;
1003
0
        }
1004
40.5M
    }
1005
1006
49.0M
    if (ctx->dict && (ctx->argidx < ctx->arglen)) {
1007
        // XXX: Never happens?
1008
0
        PyErr_SetString(PyExc_TypeError,
1009
0
                        "not all arguments converted during string formatting");
1010
0
        goto onError;
1011
0
    }
1012
49.0M
    Py_XDECREF(arg.key);
1013
49.0M
    return 0;
1014
1015
4.49M
  onError:
1016
4.49M
    Py_XDECREF(arg.key);
1017
4.49M
    return -1;
1018
49.0M
}
1019
1020
1021
PyObject *
1022
PyUnicode_Format(PyObject *format, PyObject *args)
1023
25.1M
{
1024
25.1M
    struct unicode_formatter_t ctx;
1025
1026
25.1M
    if (format == NULL || args == NULL) {
1027
0
        PyErr_BadInternalCall();
1028
0
        return NULL;
1029
0
    }
1030
1031
25.1M
    if (ensure_unicode(format) < 0)
1032
0
        return NULL;
1033
1034
25.1M
    ctx.fmtstr = format;
1035
25.1M
    ctx.fmtdata = PyUnicode_DATA(ctx.fmtstr);
1036
25.1M
    ctx.fmtkind = PyUnicode_KIND(ctx.fmtstr);
1037
25.1M
    ctx.fmtcnt = PyUnicode_GET_LENGTH(ctx.fmtstr);
1038
25.1M
    ctx.fmtpos = 0;
1039
1040
25.1M
    _PyUnicodeWriter_Init(&ctx.writer);
1041
25.1M
    ctx.writer.min_length = ctx.fmtcnt + 100;
1042
25.1M
    ctx.writer.overallocate = 1;
1043
1044
25.1M
    if (PyTuple_Check(args)) {
1045
10.7M
        ctx.arglen = PyTuple_Size(args);
1046
10.7M
        ctx.argidx = 0;
1047
10.7M
    }
1048
14.4M
    else {
1049
14.4M
        ctx.arglen = -1;
1050
14.4M
        ctx.argidx = -2;
1051
14.4M
    }
1052
25.1M
    ctx.args_owned = 0;
1053
25.1M
    if (PyMapping_Check(args) && !PyTuple_Check(args) && !PyUnicode_Check(args))
1054
10.7k
        ctx.dict = args;
1055
25.1M
    else
1056
25.1M
        ctx.dict = NULL;
1057
25.1M
    ctx.args = args;
1058
1059
124M
    while (--ctx.fmtcnt >= 0) {
1060
103M
        if (PyUnicode_READ(ctx.fmtkind, ctx.fmtdata, ctx.fmtpos) != '%') {
1061
50.3M
            Py_ssize_t nonfmtpos;
1062
1063
50.3M
            nonfmtpos = ctx.fmtpos++;
1064
544M
            while (ctx.fmtcnt >= 0 &&
1065
535M
                   PyUnicode_READ(ctx.fmtkind, ctx.fmtdata, ctx.fmtpos) != '%') {
1066
494M
                ctx.fmtpos++;
1067
494M
                ctx.fmtcnt--;
1068
494M
            }
1069
50.3M
            if (ctx.fmtcnt < 0) {
1070
8.68M
                ctx.fmtpos--;
1071
8.68M
                ctx.writer.overallocate = 0;
1072
8.68M
            }
1073
1074
50.3M
            if (_PyUnicodeWriter_WriteSubstring(&ctx.writer, ctx.fmtstr,
1075
50.3M
                                                nonfmtpos, ctx.fmtpos) < 0)
1076
0
                goto onError;
1077
50.3M
        }
1078
53.5M
        else {
1079
53.5M
            ctx.fmtpos++;
1080
53.5M
            if (unicode_format_arg(&ctx) == -1)
1081
4.49M
                goto onError;
1082
53.5M
        }
1083
103M
    }
1084
1085
20.6M
    if (ctx.argidx < ctx.arglen && !ctx.dict) {
1086
0
        PyErr_Format(PyExc_TypeError,
1087
0
                     "not all arguments converted during string formatting "
1088
0
                     "(required %zd, got %zd)",
1089
0
                     ctx.arglen < 0 ? 0 : ctx.argidx,
1090
0
                     ctx.arglen < 0 ? 1 : ctx.arglen);
1091
0
        goto onError;
1092
0
    }
1093
1094
20.6M
    if (ctx.args_owned) {
1095
10.7k
        Py_DECREF(ctx.args);
1096
10.7k
    }
1097
20.6M
    return _PyUnicodeWriter_Finish(&ctx.writer);
1098
1099
4.49M
  onError:
1100
4.49M
    _PyUnicodeWriter_Dealloc(&ctx.writer);
1101
4.49M
    if (ctx.args_owned) {
1102
0
        Py_DECREF(ctx.args);
1103
0
    }
1104
    return NULL;
1105
20.6M
}