Coverage Report

Created: 2026-04-12 06:54

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Objects/unicode_format.c
Line
Count
Source
1
/*
2
3
Unicode implementation based on original code by Fredrik Lundh,
4
modified by Marc-Andre Lemburg <mal@lemburg.com>.
5
6
Major speed upgrades to the method implementations at the Reykjavik
7
NeedForSpeed sprint, by Fredrik Lundh and Andrew Dalke.
8
9
Copyright (c) Corporation for National Research Initiatives.
10
11
--------------------------------------------------------------------
12
The original string type implementation is:
13
14
  Copyright (c) 1999 by Secret Labs AB
15
  Copyright (c) 1999 by Fredrik Lundh
16
17
By obtaining, using, and/or copying this software and/or its
18
associated documentation, you agree that you have read, understood,
19
and will comply with the following terms and conditions:
20
21
Permission to use, copy, modify, and distribute this software and its
22
associated documentation for any purpose and without fee is hereby
23
granted, provided that the above copyright notice appears in all
24
copies, and that both that copyright notice and this permission notice
25
appear in supporting documentation, and that the name of Secret Labs
26
AB or the author not be used in advertising or publicity pertaining to
27
distribution of the software without specific, written prior
28
permission.
29
30
SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
31
THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
32
FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
33
ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
34
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
35
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
36
OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
37
--------------------------------------------------------------------
38
39
*/
40
41
// PyUnicode_Format() implementation
42
43
#include "Python.h"
44
#include "pycore_abstract.h"      // _PyIndex_Check()
45
#include "pycore_format.h"        // F_ALT
46
#include "pycore_long.h"          // _PyLong_FormatWriter()
47
#include "pycore_object.h"        // _PyObject_IsUniquelyReferenced()
48
#include "pycore_unicodeobject.h" // _Py_MAX_UNICODE
49
50
51
0
#define MAX_UNICODE _Py_MAX_UNICODE
52
17.3M
#define ensure_unicode _PyUnicode_EnsureUnicode
53
54
struct unicode_formatter_t {
55
    PyObject *args;
56
    int args_owned;
57
    Py_ssize_t arglen, argidx;
58
    PyObject *dict;
59
60
    int fmtkind;
61
    Py_ssize_t fmtcnt, fmtpos;
62
    const void *fmtdata;
63
    PyObject *fmtstr;
64
65
    _PyUnicodeWriter writer;
66
};
67
68
69
struct unicode_format_arg_t {
70
    Py_UCS4 ch;
71
    int flags;
72
    Py_ssize_t width;
73
    int prec;
74
    int sign;
75
    Py_ssize_t fmtstart;
76
    PyObject *key;
77
};
78
79
80
// Use FORMAT_ERROR("...%s", "") when there is no arguments.
81
1.20M
#define FORMAT_ERROR(EXC, FMT, ...) do {                                    \
82
1.20M
    if (arg->key != NULL) {                                                 \
83
0
        PyErr_Format((EXC), "format argument %R: " FMT,                     \
84
0
                     arg->key, __VA_ARGS__);                                \
85
0
    }                                                                       \
86
1.20M
    else if (ctx->argidx >= 0) {                                            \
87
0
        PyErr_Format((EXC), "format argument %zd: " FMT,                    \
88
0
                     ctx->argidx, __VA_ARGS__);                             \
89
0
    }                                                                       \
90
1.20M
    else {                                                                  \
91
1.20M
        PyErr_Format((EXC), "format argument: " FMT, __VA_ARGS__);          \
92
1.20M
    }                                                                       \
93
1.20M
} while (0)
94
95
96
static PyObject *
97
unicode_format_getnextarg(struct unicode_formatter_t *ctx, int allowone)
98
33.3M
{
99
33.3M
    Py_ssize_t argidx = ctx->argidx;
100
101
33.3M
    if (argidx < ctx->arglen && (allowone || ctx->arglen >= 0)) {
102
33.3M
        ctx->argidx++;
103
33.3M
        if (ctx->arglen >= 0) {
104
27.7M
            return PyTuple_GetItem(ctx->args, argidx);
105
27.7M
        }
106
5.59M
        else if (allowone) {
107
5.59M
            return ctx->args;
108
5.59M
        }
109
33.3M
    }
110
0
    PyErr_Format(PyExc_TypeError,
111
0
                 "not enough arguments for format string (got %zd)",
112
0
                 ctx->arglen < 0 ? 1 : ctx->arglen);
113
0
    return NULL;
114
33.3M
}
115
116
117
/* Returns a new reference to a PyUnicode object, or NULL on failure. */
118
119
/* Format a float into the writer if the writer is not NULL, or into *p_output
120
   otherwise.
121
122
   Return 0 on success, raise an exception and return -1 on error. */
123
static int
124
formatfloat(PyObject *v,
125
            struct unicode_formatter_t *ctx,
126
            struct unicode_format_arg_t *arg,
127
            PyObject **p_output,
128
            _PyUnicodeWriter *writer)
129
104
{
130
104
    char *p;
131
104
    double x;
132
104
    Py_ssize_t len;
133
104
    int prec;
134
104
    int dtoa_flags = 0;
135
136
104
    x = PyFloat_AsDouble(v);
137
104
    if (x == -1.0 && PyErr_Occurred()) {
138
0
        if (PyErr_ExceptionMatches(PyExc_TypeError)) {
139
0
            FORMAT_ERROR(PyExc_TypeError,
140
0
                         "%%%c requires a real number, not %T",
141
0
                         arg->ch, v);
142
0
        }
143
0
        return -1;
144
0
    }
145
146
104
    prec = arg->prec;
147
104
    if (prec < 0)
148
0
        prec = 6;
149
150
104
    if (arg->flags & F_ALT)
151
0
        dtoa_flags |= Py_DTSF_ALT;
152
104
    p = PyOS_double_to_string(x, arg->ch, prec, dtoa_flags, NULL);
153
104
    if (p == NULL)
154
0
        return -1;
155
104
    len = strlen(p);
156
104
    if (writer) {
157
0
        if (_PyUnicodeWriter_WriteASCIIString(writer, p, len) < 0) {
158
0
            PyMem_Free(p);
159
0
            return -1;
160
0
        }
161
0
    }
162
104
    else
163
104
        *p_output = _PyUnicode_FromASCII(p, len);
164
104
    PyMem_Free(p);
165
104
    return 0;
166
104
}
167
168
169
/* formatlong() emulates the format codes d, u, o, x and X, and
170
 * the F_ALT flag, for Python's long (unbounded) ints.  It's not used for
171
 * Python's regular ints.
172
 * Return value:  a new PyUnicodeObject*, or NULL if error.
173
 *     The output string is of the form
174
 *         "-"? ("0x" | "0X")? digit+
175
 *     "0x"/"0X" are present only for x and X conversions, with F_ALT
176
 *         set in flags.  The case of hex digits will be correct,
177
 *     There will be at least prec digits, zero-filled on the left if
178
 *         necessary to get that many.
179
 * val          object to be converted
180
 * flags        bitmask of format flags; only F_ALT is looked at
181
 * prec         minimum number of digits; 0-fill on left if needed
182
 * type         a character in [duoxX]; u acts the same as d
183
 *
184
 * CAUTION:  o, x and X conversions on regular ints can never
185
 * produce a '-' sign, but can for Python's unbounded ints.
186
 */
187
PyObject *
188
_PyUnicode_FormatLong(PyObject *val, int alt, int prec, int type)
189
9.35M
{
190
9.35M
    PyObject *result = NULL;
191
9.35M
    char *buf;
192
9.35M
    Py_ssize_t i;
193
9.35M
    int sign;           /* 1 if '-', else 0 */
194
9.35M
    int len;            /* number of characters */
195
9.35M
    Py_ssize_t llen;
196
9.35M
    int numdigits;      /* len == numnondigits + numdigits */
197
9.35M
    int numnondigits = 0;
198
199
    /* Avoid exceeding SSIZE_T_MAX */
200
9.35M
    if (prec > INT_MAX-3) {
201
0
        PyErr_SetString(PyExc_OverflowError,
202
0
                        "precision too large");
203
0
        return NULL;
204
0
    }
205
206
9.35M
    assert(PyLong_Check(val));
207
208
9.35M
    switch (type) {
209
0
    default:
210
0
        Py_UNREACHABLE();
211
3.66M
    case 'd':
212
3.66M
    case 'i':
213
3.66M
    case 'u':
214
        /* int and int subclasses should print numerically when a numeric */
215
        /* format code is used (see issue18780) */
216
3.66M
        result = PyNumber_ToBase(val, 10);
217
3.66M
        break;
218
81.7k
    case 'o':
219
81.7k
        numnondigits = 2;
220
81.7k
        result = PyNumber_ToBase(val, 8);
221
81.7k
        break;
222
83
    case 'x':
223
5.60M
    case 'X':
224
5.60M
        numnondigits = 2;
225
5.60M
        result = PyNumber_ToBase(val, 16);
226
5.60M
        break;
227
9.35M
    }
228
9.35M
    if (!result)
229
0
        return NULL;
230
231
9.35M
    assert(_PyUnicode_IsModifiable(result));
232
9.35M
    assert(PyUnicode_IS_ASCII(result));
233
234
    /* To modify the string in-place, there can only be one reference. */
235
9.35M
    if (!_PyObject_IsUniquelyReferenced(result)) {
236
0
        Py_DECREF(result);
237
0
        PyErr_BadInternalCall();
238
0
        return NULL;
239
0
    }
240
9.35M
    buf = PyUnicode_DATA(result);
241
9.35M
    llen = PyUnicode_GET_LENGTH(result);
242
9.35M
    if (llen > INT_MAX) {
243
0
        Py_DECREF(result);
244
0
        PyErr_SetString(PyExc_ValueError,
245
0
                        "string too large in _PyUnicode_FormatLong");
246
0
        return NULL;
247
0
    }
248
9.35M
    len = (int)llen;
249
9.35M
    sign = buf[0] == '-';
250
9.35M
    numnondigits += sign;
251
9.35M
    numdigits = len - numnondigits;
252
9.35M
    assert(numdigits > 0);
253
254
    /* Get rid of base marker unless F_ALT */
255
9.35M
    if (((alt) == 0 &&
256
9.35M
        (type == 'o' || type == 'x' || type == 'X'))) {
257
5.68M
        assert(buf[sign] == '0');
258
5.68M
        assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
259
5.68M
               buf[sign+1] == 'o');
260
5.68M
        numnondigits -= 2;
261
5.68M
        buf += 2;
262
5.68M
        len -= 2;
263
5.68M
        if (sign)
264
0
            buf[0] = '-';
265
5.68M
        assert(len == numnondigits + numdigits);
266
5.68M
        assert(numdigits > 0);
267
5.68M
    }
268
269
    /* Fill with leading zeroes to meet minimum width. */
270
9.35M
    if (prec > numdigits) {
271
0
        PyObject *r1 = PyBytes_FromStringAndSize(NULL,
272
0
                                numnondigits + prec);
273
0
        char *b1;
274
0
        if (!r1) {
275
0
            Py_DECREF(result);
276
0
            return NULL;
277
0
        }
278
0
        b1 = PyBytes_AS_STRING(r1);
279
0
        for (i = 0; i < numnondigits; ++i)
280
0
            *b1++ = *buf++;
281
0
        for (i = 0; i < prec - numdigits; i++)
282
0
            *b1++ = '0';
283
0
        for (i = 0; i < numdigits; i++)
284
0
            *b1++ = *buf++;
285
0
        *b1 = '\0';
286
0
        Py_SETREF(result, r1);
287
0
        buf = PyBytes_AS_STRING(result);
288
0
        len = numnondigits + prec;
289
0
    }
290
291
    /* Fix up case for hex conversions. */
292
9.35M
    if (type == 'X') {
293
        /* Need to convert all lower case letters to upper case.
294
           and need to convert 0x to 0X (and -0x to -0X). */
295
39.2M
        for (i = 0; i < len; i++)
296
33.6M
            if (buf[i] >= 'a' && buf[i] <= 'x')
297
7.15M
                buf[i] -= 'a'-'A';
298
5.60M
    }
299
9.35M
    if (!PyUnicode_Check(result)
300
9.35M
        || buf != PyUnicode_DATA(result)) {
301
5.68M
        PyObject *unicode;
302
5.68M
        unicode = _PyUnicode_FromASCII(buf, len);
303
5.68M
        Py_SETREF(result, unicode);
304
5.68M
    }
305
3.66M
    else if (len != PyUnicode_GET_LENGTH(result)) {
306
0
        if (PyUnicode_Resize(&result, len) < 0)
307
0
            Py_CLEAR(result);
308
0
    }
309
9.35M
    return result;
310
9.35M
}
311
312
313
/* Format an integer or a float as an integer.
314
 * Return 1 if the number has been formatted into the writer,
315
 *        0 if the number has been formatted into *p_output
316
 *       -1 and raise an exception on error */
317
static int
318
mainformatlong(PyObject *v,
319
               struct unicode_formatter_t *ctx,
320
               struct unicode_format_arg_t *arg,
321
               PyObject **p_output,
322
               _PyUnicodeWriter *writer)
323
13.2M
{
324
13.2M
    PyObject *iobj, *res;
325
13.2M
    char type = (char)arg->ch;
326
327
13.2M
    if (!PyNumber_Check(v))
328
1.20M
        goto wrongtype;
329
330
    /* make sure number is a type of integer for o, x, and X */
331
12.0M
    if (!PyLong_Check(v)) {
332
0
        if (type == 'o' || type == 'x' || type == 'X') {
333
0
            iobj = _PyNumber_Index(v);
334
0
        }
335
0
        else {
336
0
            iobj = PyNumber_Long(v);
337
0
        }
338
0
        if (iobj == NULL ) {
339
0
            if (PyErr_ExceptionMatches(PyExc_TypeError))
340
0
                goto wrongtype;
341
0
            return -1;
342
0
        }
343
0
        assert(PyLong_Check(iobj));
344
0
    }
345
12.0M
    else {
346
12.0M
        iobj = Py_NewRef(v);
347
12.0M
    }
348
349
12.0M
    if (PyLong_CheckExact(v)
350
12.0M
        && arg->width == -1 && arg->prec == -1
351
8.32M
        && !(arg->flags & (F_SIGN | F_BLANK))
352
8.32M
        && type != 'X')
353
2.71M
    {
354
        /* Fast path */
355
2.71M
        int alternate = arg->flags & F_ALT;
356
2.71M
        int base;
357
358
2.71M
        switch(type)
359
2.71M
        {
360
0
            default:
361
0
                Py_UNREACHABLE();
362
2.71M
            case 'd':
363
2.71M
            case 'i':
364
2.71M
            case 'u':
365
2.71M
                base = 10;
366
2.71M
                break;
367
0
            case 'o':
368
0
                base = 8;
369
0
                break;
370
43
            case 'x':
371
43
            case 'X':
372
43
                base = 16;
373
43
                break;
374
2.71M
        }
375
376
2.71M
        if (_PyLong_FormatWriter(writer, v, base, alternate) == -1) {
377
0
            Py_DECREF(iobj);
378
0
            return -1;
379
0
        }
380
2.71M
        Py_DECREF(iobj);
381
2.71M
        return 1;
382
2.71M
    }
383
384
9.35M
    res = _PyUnicode_FormatLong(iobj, arg->flags & F_ALT, arg->prec, type);
385
9.35M
    Py_DECREF(iobj);
386
9.35M
    if (res == NULL)
387
0
        return -1;
388
9.35M
    *p_output = res;
389
9.35M
    return 0;
390
391
1.20M
wrongtype:
392
1.20M
    switch(type)
393
1.20M
    {
394
0
        case 'o':
395
0
        case 'x':
396
0
        case 'X':
397
0
            FORMAT_ERROR(PyExc_TypeError,
398
0
                         "%%%c requires an integer, not %T",
399
0
                         arg->ch, v);
400
0
            break;
401
1.20M
        default:
402
1.20M
            FORMAT_ERROR(PyExc_TypeError,
403
1.20M
                         "%%%c requires a real number, not %T",
404
1.20M
                         arg->ch, v);
405
1.20M
            break;
406
1.20M
    }
407
1.20M
    return -1;
408
1.20M
}
409
410
411
static Py_UCS4
412
formatchar(PyObject *v,
413
           struct unicode_formatter_t *ctx,
414
           struct unicode_format_arg_t *arg)
415
0
{
416
    /* presume that the buffer is at least 3 characters long */
417
0
    if (PyUnicode_Check(v)) {
418
0
        if (PyUnicode_GET_LENGTH(v) == 1) {
419
0
            return PyUnicode_READ_CHAR(v, 0);
420
0
        }
421
0
        FORMAT_ERROR(PyExc_TypeError,
422
0
                     "%%c requires an integer or a unicode character, "
423
0
                     "not a string of length %zd",
424
0
                     PyUnicode_GET_LENGTH(v));
425
0
        return (Py_UCS4) -1;
426
0
    }
427
0
    else {
428
0
        int overflow;
429
0
        long x = PyLong_AsLongAndOverflow(v, &overflow);
430
0
        if (x == -1 && PyErr_Occurred()) {
431
0
            if (PyErr_ExceptionMatches(PyExc_TypeError)) {
432
0
                FORMAT_ERROR(PyExc_TypeError,
433
0
                             "%%c requires an integer or a unicode character, "
434
0
                             "not %T",
435
0
                             v);
436
0
            }
437
0
            return (Py_UCS4) -1;
438
0
        }
439
440
0
        if (x < 0 || x > MAX_UNICODE) {
441
            /* this includes an overflow in converting to C long */
442
0
            FORMAT_ERROR(PyExc_OverflowError,
443
0
                         "%%c argument not in range(0x110000)%s", "");
444
0
            return (Py_UCS4) -1;
445
0
        }
446
447
0
        return (Py_UCS4) x;
448
0
    }
449
0
}
450
451
452
/* Parse options of an argument: flags, width, precision.
453
   Handle also "%(name)" syntax.
454
455
   Return 0 if the argument has been formatted into arg->str.
456
   Return 1 if the argument has been written into ctx->writer,
457
   Raise an exception and return -1 on error. */
458
static int
459
unicode_format_arg_parse(struct unicode_formatter_t *ctx,
460
                         struct unicode_format_arg_t *arg)
461
33.2M
{
462
33.2M
#define FORMAT_READ(ctx) \
463
37.5M
        PyUnicode_READ((ctx)->fmtkind, (ctx)->fmtdata, (ctx)->fmtpos)
464
465
33.2M
    PyObject *v;
466
467
33.2M
    if (arg->ch == '(') {
468
        /* Get argument value from a dictionary. Example: "%(name)s". */
469
56.7k
        Py_ssize_t keystart;
470
56.7k
        Py_ssize_t keylen;
471
56.7k
        int pcount = 1;
472
473
56.7k
        if (ctx->dict == NULL) {
474
0
            PyErr_Format(PyExc_TypeError,
475
0
                         "format requires a mapping, not %T",
476
0
                         ctx->args);
477
0
            return -1;
478
0
        }
479
56.7k
        ++ctx->fmtpos;
480
56.7k
        --ctx->fmtcnt;
481
56.7k
        keystart = ctx->fmtpos;
482
        /* Skip over balanced parentheses */
483
510k
        while (pcount > 0 && --ctx->fmtcnt >= 0) {
484
453k
            arg->ch = FORMAT_READ(ctx);
485
453k
            if (arg->ch == ')')
486
56.7k
                --pcount;
487
397k
            else if (arg->ch == '(')
488
0
                ++pcount;
489
453k
            ctx->fmtpos++;
490
453k
        }
491
56.7k
        keylen = ctx->fmtpos - keystart - 1;
492
56.7k
        if (ctx->fmtcnt < 0 || pcount > 0) {
493
0
            PyErr_Format(PyExc_ValueError,
494
0
                         "stray %% or incomplete format key at position %zd",
495
0
                         arg->fmtstart);
496
0
            return -1;
497
0
        }
498
56.7k
        arg->key = PyUnicode_Substring(ctx->fmtstr,
499
56.7k
                                       keystart, keystart + keylen);
500
56.7k
        if (arg->key == NULL)
501
0
            return -1;
502
56.7k
        if (ctx->args_owned) {
503
40.5k
            ctx->args_owned = 0;
504
40.5k
            Py_DECREF(ctx->args);
505
40.5k
        }
506
56.7k
        ctx->args = PyObject_GetItem(ctx->dict, arg->key);
507
56.7k
        if (ctx->args == NULL)
508
0
            return -1;
509
56.7k
        ctx->args_owned = 1;
510
56.7k
        ctx->arglen = -3;
511
56.7k
        ctx->argidx = -4;
512
56.7k
    }
513
33.2M
    else {
514
33.2M
        if (ctx->arglen < -1) {
515
0
            PyErr_Format(PyExc_ValueError,
516
0
                         "format requires a parenthesised mapping key "
517
0
                         "at position %zd",
518
0
                         arg->fmtstart);
519
0
            return -1;
520
0
        }
521
33.2M
    }
522
523
    /* Parse flags. Example: "%+i" => flags=F_SIGN. */
524
33.3M
    while (--ctx->fmtcnt >= 0) {
525
33.3M
        arg->ch = FORMAT_READ(ctx);
526
33.3M
        ctx->fmtpos++;
527
33.3M
        switch (arg->ch) {
528
0
        case '-': arg->flags |= F_LJUST; continue;
529
0
        case '+': arg->flags |= F_SIGN; continue;
530
0
        case ' ': arg->flags |= F_BLANK; continue;
531
43
        case '#': arg->flags |= F_ALT; continue;
532
83.9k
        case '0': arg->flags |= F_ZERO; continue;
533
33.3M
        }
534
33.2M
        break;
535
33.3M
    }
536
537
    /* Parse width. Example: "%10s" => width=10 */
538
33.2M
    if (arg->ch == '*') {
539
67.7k
        if (ctx->arglen < -1) {
540
0
            PyErr_Format(PyExc_ValueError,
541
0
                    "* cannot be used with a parenthesised mapping key "
542
0
                    "at position %zd",
543
0
                    arg->fmtstart);
544
0
            return -1;
545
0
        }
546
67.7k
        v = unicode_format_getnextarg(ctx, 0);
547
67.7k
        if (v == NULL)
548
0
            return -1;
549
67.7k
        if (!PyLong_Check(v)) {
550
0
            FORMAT_ERROR(PyExc_TypeError, "* requires int, not %T", v);
551
0
            return -1;
552
0
        }
553
67.7k
        arg->width = PyLong_AsSsize_t(v);
554
67.7k
        if (arg->width == -1 && PyErr_Occurred()) {
555
0
            if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
556
0
                FORMAT_ERROR(PyExc_OverflowError,
557
0
                             "too big for width%s", "");
558
0
            }
559
0
            return -1;
560
0
        }
561
67.7k
        if (arg->width < 0) {
562
0
            arg->flags |= F_LJUST;
563
0
            arg->width = -arg->width;
564
0
        }
565
67.7k
        if (--ctx->fmtcnt >= 0) {
566
67.7k
            arg->ch = FORMAT_READ(ctx);
567
67.7k
            ctx->fmtpos++;
568
67.7k
        }
569
67.7k
    }
570
33.2M
    else if (arg->ch >= '0' && arg->ch <= '9') {
571
3.67M
        arg->width = arg->ch - '0';
572
3.67M
        while (--ctx->fmtcnt >= 0) {
573
3.67M
            arg->ch = FORMAT_READ(ctx);
574
3.67M
            ctx->fmtpos++;
575
3.67M
            if (arg->ch < '0' || arg->ch > '9')
576
3.67M
                break;
577
            /* Since arg->ch is unsigned, the RHS would end up as unsigned,
578
               mixing signed and unsigned comparison. Since arg->ch is between
579
               '0' and '9', casting to int is safe. */
580
0
            if (arg->width > (PY_SSIZE_T_MAX - ((int)arg->ch - '0')) / 10) {
581
0
                PyErr_Format(PyExc_ValueError,
582
0
                             "width too big at position %zd",
583
0
                             arg->fmtstart);
584
0
                return -1;
585
0
            }
586
0
            arg->width = arg->width*10 + (arg->ch - '0');
587
0
        }
588
3.67M
    }
589
590
    /* Parse precision. Example: "%.3f" => prec=3 */
591
33.2M
    if (arg->ch == '.') {
592
104
        arg->prec = 0;
593
104
        if (--ctx->fmtcnt >= 0) {
594
104
            arg->ch = FORMAT_READ(ctx);
595
104
            ctx->fmtpos++;
596
104
        }
597
104
        if (arg->ch == '*') {
598
0
            if (ctx->arglen < -1) {
599
0
                PyErr_Format(PyExc_ValueError,
600
0
                        "* cannot be used with a parenthesised mapping key "
601
0
                        "at position %zd",
602
0
                        arg->fmtstart);
603
0
                return -1;
604
0
            }
605
0
            v = unicode_format_getnextarg(ctx, 0);
606
0
            if (v == NULL)
607
0
                return -1;
608
0
            if (!PyLong_Check(v)) {
609
0
                FORMAT_ERROR(PyExc_TypeError, "* requires int, not %T", v);
610
0
                return -1;
611
0
            }
612
0
            arg->prec = PyLong_AsInt(v);
613
0
            if (arg->prec == -1 && PyErr_Occurred()) {
614
0
                if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
615
0
                    FORMAT_ERROR(PyExc_OverflowError,
616
0
                                 "too big for precision%s", "");
617
0
                }
618
0
                return -1;
619
0
            }
620
0
            if (arg->prec < 0)
621
0
                arg->prec = 0;
622
0
            if (--ctx->fmtcnt >= 0) {
623
0
                arg->ch = FORMAT_READ(ctx);
624
0
                ctx->fmtpos++;
625
0
            }
626
0
        }
627
104
        else if (arg->ch >= '0' && arg->ch <= '9') {
628
104
            arg->prec = arg->ch - '0';
629
104
            while (--ctx->fmtcnt >= 0) {
630
104
                arg->ch = FORMAT_READ(ctx);
631
104
                ctx->fmtpos++;
632
104
                if (arg->ch < '0' || arg->ch > '9')
633
104
                    break;
634
0
                if (arg->prec > (INT_MAX - ((int)arg->ch - '0')) / 10) {
635
0
                    PyErr_Format(PyExc_ValueError,
636
0
                                 "precision too big at position %zd",
637
0
                                 arg->fmtstart);
638
0
                    return -1;
639
0
                }
640
0
                arg->prec = arg->prec*10 + (arg->ch - '0');
641
0
            }
642
104
        }
643
104
    }
644
645
    /* Ignore "h", "l" and "L" format prefix (ex: "%hi" or "%ls") */
646
33.2M
    if (ctx->fmtcnt >= 0) {
647
33.2M
        if (arg->ch == 'h' || arg->ch == 'l' || arg->ch == 'L') {
648
0
            if (--ctx->fmtcnt >= 0) {
649
0
                arg->ch = FORMAT_READ(ctx);
650
0
                ctx->fmtpos++;
651
0
            }
652
0
        }
653
33.2M
    }
654
33.2M
    if (ctx->fmtcnt < 0) {
655
0
        PyErr_Format(PyExc_ValueError,
656
0
                     "stray %% at position %zd", arg->fmtstart);
657
0
        return -1;
658
0
    }
659
33.2M
    return 0;
660
661
33.2M
#undef FORMAT_READ
662
33.2M
}
663
664
665
/* Format one argument. Supported conversion specifiers:
666
667
   - "s", "r", "a": any type
668
   - "i", "d", "u": int or float
669
   - "o", "x", "X": int
670
   - "e", "E", "f", "F", "g", "G": float
671
   - "c": int or str (1 character)
672
673
   When possible, the output is written directly into the Unicode writer
674
   (ctx->writer). A string is created when padding is required.
675
676
   Return 0 if the argument has been formatted into *p_str,
677
          1 if the argument has been written into ctx->writer,
678
         -1 on error. */
679
static int
680
unicode_format_arg_format(struct unicode_formatter_t *ctx,
681
                          struct unicode_format_arg_t *arg,
682
                          PyObject **p_str)
683
33.2M
{
684
33.2M
    PyObject *v;
685
33.2M
    _PyUnicodeWriter *writer = &ctx->writer;
686
687
33.2M
    if (ctx->fmtcnt == 0)
688
13.1M
        ctx->writer.overallocate = 0;
689
690
33.2M
    v = unicode_format_getnextarg(ctx, 1);
691
33.2M
    if (v == NULL)
692
0
        return -1;
693
694
695
33.2M
    switch (arg->ch) {
696
19.9M
    case 's':
697
20.0M
    case 'r':
698
20.0M
    case 'a':
699
20.0M
        if (PyLong_CheckExact(v) && arg->width == -1 && arg->prec == -1) {
700
            /* Fast path */
701
166
            if (_PyLong_FormatWriter(writer, v, 10, arg->flags & F_ALT) == -1)
702
0
                return -1;
703
166
            return 1;
704
166
        }
705
706
20.0M
        if (PyUnicode_CheckExact(v) && arg->ch == 's') {
707
14.3M
            *p_str = Py_NewRef(v);
708
14.3M
        }
709
5.61M
        else {
710
5.61M
            if (arg->ch == 's')
711
5.60M
                *p_str = PyObject_Str(v);
712
9.96k
            else if (arg->ch == 'r')
713
9.96k
                *p_str = PyObject_Repr(v);
714
0
            else
715
0
                *p_str = PyObject_ASCII(v);
716
5.61M
        }
717
20.0M
        break;
718
719
0
    case 'i':
720
7.58M
    case 'd':
721
7.58M
    case 'u':
722
7.66M
    case 'o':
723
7.66M
    case 'x':
724
13.2M
    case 'X':
725
13.2M
    {
726
13.2M
        int ret = mainformatlong(v, ctx, arg, p_str, writer);
727
13.2M
        if (ret != 0)
728
3.92M
            return ret;
729
9.35M
        arg->sign = 1;
730
9.35M
        break;
731
13.2M
    }
732
733
0
    case 'e':
734
0
    case 'E':
735
104
    case 'f':
736
104
    case 'F':
737
104
    case 'g':
738
104
    case 'G':
739
104
        if (arg->width == -1 && arg->prec == -1
740
0
            && !(arg->flags & (F_SIGN | F_BLANK)))
741
0
        {
742
            /* Fast path */
743
0
            if (formatfloat(v, ctx, arg, NULL, writer) == -1)
744
0
                return -1;
745
0
            return 1;
746
0
        }
747
748
104
        arg->sign = 1;
749
104
        if (formatfloat(v, ctx, arg, p_str, NULL) == -1)
750
0
            return -1;
751
104
        break;
752
753
104
    case 'c':
754
0
    {
755
0
        Py_UCS4 ch = formatchar(v, ctx, arg);
756
0
        if (ch == (Py_UCS4) -1)
757
0
            return -1;
758
0
        if (arg->width == -1 && arg->prec == -1) {
759
            /* Fast path */
760
0
            if (_PyUnicodeWriter_WriteCharInline(writer, ch) < 0)
761
0
                return -1;
762
0
            return 1;
763
0
        }
764
0
        *p_str = PyUnicode_FromOrdinal(ch);
765
0
        break;
766
0
    }
767
768
0
    default:
769
0
        if (arg->ch < 128 && Py_ISALPHA(arg->ch)) {
770
0
            PyErr_Format(PyExc_ValueError,
771
0
                         "unsupported format %%%c at position %zd",
772
0
                         (int)arg->ch, arg->fmtstart);
773
0
        }
774
0
        else if (arg->ch == '\'') {
775
0
            PyErr_Format(PyExc_ValueError,
776
0
                         "stray %% at position %zd or unexpected "
777
0
                         "format character \"'\" at position %zd",
778
0
                         arg->fmtstart,
779
0
                         ctx->fmtpos - 1);
780
0
        }
781
0
        else if (arg->ch >= 32 && arg->ch < 127) {
782
0
            PyErr_Format(PyExc_ValueError,
783
0
                         "stray %% at position %zd or unexpected "
784
0
                         "format character '%c' at position %zd",
785
0
                         arg->fmtstart,
786
0
                         (int)arg->ch, ctx->fmtpos - 1);
787
0
        }
788
0
        else if (Py_UNICODE_ISPRINTABLE(arg->ch)) {
789
0
            PyErr_Format(PyExc_ValueError,
790
0
                         "stray %% at position %zd or unexpected "
791
0
                         "format character '%c' (U+%04X) at position %zd",
792
0
                         arg->fmtstart,
793
0
                         (int)arg->ch, (int)arg->ch, ctx->fmtpos - 1);
794
0
        }
795
0
        else {
796
0
            PyErr_Format(PyExc_ValueError,
797
0
                         "stray %% at position %zd or unexpected "
798
0
                         "format character U+%04X at position %zd",
799
0
                         arg->fmtstart, (int)arg->ch, ctx->fmtpos - 1);
800
0
        }
801
0
        return -1;
802
33.2M
    }
803
29.3M
    if (*p_str == NULL)
804
0
        return -1;
805
29.3M
    assert (PyUnicode_Check(*p_str));
806
29.3M
    return 0;
807
29.3M
}
808
809
810
static int
811
unicode_format_arg_output(struct unicode_formatter_t *ctx,
812
                          struct unicode_format_arg_t *arg,
813
                          PyObject *str)
814
29.3M
{
815
29.3M
    Py_ssize_t len;
816
29.3M
    int kind;
817
29.3M
    const void *pbuf;
818
29.3M
    Py_ssize_t pindex;
819
29.3M
    Py_UCS4 signchar;
820
29.3M
    Py_ssize_t buflen;
821
29.3M
    Py_UCS4 maxchar;
822
29.3M
    Py_ssize_t sublen;
823
29.3M
    _PyUnicodeWriter *writer = &ctx->writer;
824
29.3M
    Py_UCS4 fill;
825
826
29.3M
    fill = ' ';
827
29.3M
    if (arg->sign && arg->flags & F_ZERO)
828
83.9k
        fill = '0';
829
830
29.3M
    len = PyUnicode_GET_LENGTH(str);
831
29.3M
    if ((arg->width == -1 || arg->width <= len)
832
29.2M
        && (arg->prec == -1 || arg->prec >= len)
833
29.2M
        && !(arg->flags & (F_SIGN | F_BLANK)))
834
29.2M
    {
835
        /* Fast path */
836
29.2M
        if (_PyUnicodeWriter_WriteStr(writer, str) == -1)
837
0
            return -1;
838
29.2M
        return 0;
839
29.2M
    }
840
841
    /* Truncate the string for "s", "r" and "a" formats
842
       if the precision is set */
843
99.0k
    if (arg->ch == 's' || arg->ch == 'r' || arg->ch == 'a') {
844
0
        if (arg->prec >= 0 && len > arg->prec)
845
0
            len = arg->prec;
846
0
    }
847
848
    /* Adjust sign and width */
849
99.0k
    kind = PyUnicode_KIND(str);
850
99.0k
    pbuf = PyUnicode_DATA(str);
851
99.0k
    pindex = 0;
852
99.0k
    signchar = '\0';
853
99.0k
    if (arg->sign) {
854
99.0k
        Py_UCS4 ch = PyUnicode_READ(kind, pbuf, pindex);
855
99.0k
        if (ch == '-' || ch == '+') {
856
0
            signchar = ch;
857
0
            len--;
858
0
            pindex++;
859
0
        }
860
99.0k
        else if (arg->flags & F_SIGN)
861
0
            signchar = '+';
862
99.0k
        else if (arg->flags & F_BLANK)
863
0
            signchar = ' ';
864
99.0k
        else
865
99.0k
            arg->sign = 0;
866
99.0k
    }
867
99.0k
    if (arg->width < len)
868
104
        arg->width = len;
869
870
    /* Prepare the writer */
871
99.0k
    maxchar = writer->maxchar;
872
99.0k
    if (!(arg->flags & F_LJUST)) {
873
99.0k
        if (arg->sign) {
874
0
            if ((arg->width-1) > len)
875
0
                maxchar = Py_MAX(maxchar, fill);
876
0
        }
877
99.0k
        else {
878
99.0k
            if (arg->width > len)
879
98.9k
                maxchar = Py_MAX(maxchar, fill);
880
99.0k
        }
881
99.0k
    }
882
99.0k
    if (PyUnicode_MAX_CHAR_VALUE(str) > maxchar) {
883
81.7k
        Py_UCS4 strmaxchar = _PyUnicode_FindMaxChar(str, 0, pindex+len);
884
81.7k
        maxchar = Py_MAX(maxchar, strmaxchar);
885
81.7k
    }
886
887
99.0k
    buflen = arg->width;
888
99.0k
    if (arg->sign && len == arg->width)
889
0
        buflen++;
890
99.0k
    if (_PyUnicodeWriter_Prepare(writer, buflen, maxchar) == -1)
891
0
        return -1;
892
893
    /* Write the sign if needed */
894
99.0k
    if (arg->sign) {
895
0
        if (fill != ' ') {
896
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos, signchar);
897
0
            writer->pos += 1;
898
0
        }
899
0
        if (arg->width > len)
900
0
            arg->width--;
901
0
    }
902
903
    /* Write the numeric prefix for "x", "X" and "o" formats
904
       if the alternate form is used.
905
       For example, write "0x" for the "%#x" format. */
906
99.0k
    if ((arg->flags & F_ALT) && (arg->ch == 'x' || arg->ch == 'X' || arg->ch == 'o')) {
907
0
        assert(PyUnicode_READ(kind, pbuf, pindex) == '0');
908
0
        assert(PyUnicode_READ(kind, pbuf, pindex + 1) == arg->ch);
909
0
        if (fill != ' ') {
910
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos, '0');
911
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos+1, arg->ch);
912
0
            writer->pos += 2;
913
0
            pindex += 2;
914
0
        }
915
0
        arg->width -= 2;
916
0
        if (arg->width < 0)
917
0
            arg->width = 0;
918
0
        len -= 2;
919
0
    }
920
921
    /* Pad left with the fill character if needed */
922
99.0k
    if (arg->width > len && !(arg->flags & F_LJUST)) {
923
98.9k
        sublen = arg->width - len;
924
98.9k
        _PyUnicode_Fill(writer->kind, writer->data, fill, writer->pos, sublen);
925
98.9k
        writer->pos += sublen;
926
98.9k
        arg->width = len;
927
98.9k
    }
928
929
    /* If padding with spaces: write sign if needed and/or numeric prefix if
930
       the alternate form is used */
931
99.0k
    if (fill == ' ') {
932
17.1k
        if (arg->sign) {
933
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos, signchar);
934
0
            writer->pos += 1;
935
0
        }
936
17.1k
        if ((arg->flags & F_ALT) && (arg->ch == 'x' || arg->ch == 'X' || arg->ch == 'o')) {
937
0
            assert(PyUnicode_READ(kind, pbuf, pindex) == '0');
938
0
            assert(PyUnicode_READ(kind, pbuf, pindex+1) == arg->ch);
939
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos, '0');
940
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos+1, arg->ch);
941
0
            writer->pos += 2;
942
0
            pindex += 2;
943
0
        }
944
17.1k
    }
945
946
    /* Write characters */
947
99.0k
    if (len) {
948
99.0k
        _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
949
99.0k
                                      str, pindex, len);
950
99.0k
        writer->pos += len;
951
99.0k
    }
952
953
    /* Pad right with the fill character if needed */
954
99.0k
    if (arg->width > len) {
955
0
        sublen = arg->width - len;
956
0
        _PyUnicode_Fill(writer->kind, writer->data, ' ', writer->pos, sublen);
957
0
        writer->pos += sublen;
958
0
    }
959
99.0k
    return 0;
960
99.0k
}
961
962
963
/* Helper of PyUnicode_Format(): format one arg.
964
   Return 0 on success, raise an exception and return -1 on error. */
965
static int
966
unicode_format_arg(struct unicode_formatter_t *ctx)
967
33.2M
{
968
33.2M
    struct unicode_format_arg_t arg;
969
33.2M
    PyObject *str;
970
33.2M
    int ret;
971
972
33.2M
    arg.ch = PyUnicode_READ(ctx->fmtkind, ctx->fmtdata, ctx->fmtpos);
973
33.2M
    if (arg.ch == '%') {
974
0
        ctx->fmtpos++;
975
0
        ctx->fmtcnt--;
976
0
        if (_PyUnicodeWriter_WriteCharInline(&ctx->writer, '%') < 0)
977
0
            return -1;
978
0
        return 0;
979
0
    }
980
33.2M
    arg.flags = 0;
981
33.2M
    arg.width = -1;
982
33.2M
    arg.prec = -1;
983
33.2M
    arg.sign = 0;
984
33.2M
    arg.fmtstart = ctx->fmtpos - 1;
985
33.2M
    arg.key = NULL;
986
33.2M
    str = NULL;
987
988
33.2M
    ret = unicode_format_arg_parse(ctx, &arg);
989
33.2M
    if (ret == -1) {
990
0
        goto onError;
991
0
    }
992
993
33.2M
    ret = unicode_format_arg_format(ctx, &arg, &str);
994
33.2M
    if (ret == -1) {
995
1.20M
        goto onError;
996
1.20M
    }
997
998
32.0M
    if (ret != 1) {
999
29.3M
        ret = unicode_format_arg_output(ctx, &arg, str);
1000
29.3M
        Py_DECREF(str);
1001
29.3M
        if (ret == -1) {
1002
0
            goto onError;
1003
0
        }
1004
29.3M
    }
1005
1006
32.0M
    if (ctx->dict && (ctx->argidx < ctx->arglen)) {
1007
        // XXX: Never happens?
1008
0
        PyErr_SetString(PyExc_TypeError,
1009
0
                        "not all arguments converted during string formatting");
1010
0
        goto onError;
1011
0
    }
1012
32.0M
    Py_XDECREF(arg.key);
1013
32.0M
    return 0;
1014
1015
1.20M
  onError:
1016
1.20M
    Py_XDECREF(arg.key);
1017
1.20M
    return -1;
1018
32.0M
}
1019
1020
1021
PyObject *
1022
PyUnicode_Format(PyObject *format, PyObject *args)
1023
17.3M
{
1024
17.3M
    struct unicode_formatter_t ctx;
1025
1026
17.3M
    if (format == NULL || args == NULL) {
1027
0
        PyErr_BadInternalCall();
1028
0
        return NULL;
1029
0
    }
1030
1031
17.3M
    if (ensure_unicode(format) < 0)
1032
0
        return NULL;
1033
1034
17.3M
    ctx.fmtstr = format;
1035
17.3M
    ctx.fmtdata = PyUnicode_DATA(ctx.fmtstr);
1036
17.3M
    ctx.fmtkind = PyUnicode_KIND(ctx.fmtstr);
1037
17.3M
    ctx.fmtcnt = PyUnicode_GET_LENGTH(ctx.fmtstr);
1038
17.3M
    ctx.fmtpos = 0;
1039
1040
17.3M
    _PyUnicodeWriter_Init(&ctx.writer);
1041
17.3M
    ctx.writer.min_length = ctx.fmtcnt + 100;
1042
17.3M
    ctx.writer.overallocate = 1;
1043
1044
17.3M
    if (PyTuple_Check(args)) {
1045
11.8M
        ctx.arglen = PyTuple_Size(args);
1046
11.8M
        ctx.argidx = 0;
1047
11.8M
    }
1048
5.55M
    else {
1049
5.55M
        ctx.arglen = -1;
1050
5.55M
        ctx.argidx = -2;
1051
5.55M
    }
1052
17.3M
    ctx.args_owned = 0;
1053
17.3M
    if (PyMapping_Check(args) && !PyTuple_Check(args) && !PyUnicode_Check(args))
1054
16.9k
        ctx.dict = args;
1055
17.3M
    else
1056
17.3M
        ctx.dict = NULL;
1057
17.3M
    ctx.args = args;
1058
1059
84.1M
    while (--ctx.fmtcnt >= 0) {
1060
67.9M
        if (PyUnicode_READ(ctx.fmtkind, ctx.fmtdata, ctx.fmtpos) != '%') {
1061
34.6M
            Py_ssize_t nonfmtpos;
1062
1063
34.6M
            nonfmtpos = ctx.fmtpos++;
1064
227M
            while (ctx.fmtcnt >= 0 &&
1065
223M
                   PyUnicode_READ(ctx.fmtkind, ctx.fmtdata, ctx.fmtpos) != '%') {
1066
192M
                ctx.fmtpos++;
1067
192M
                ctx.fmtcnt--;
1068
192M
            }
1069
34.6M
            if (ctx.fmtcnt < 0) {
1070
4.21M
                ctx.fmtpos--;
1071
4.21M
                ctx.writer.overallocate = 0;
1072
4.21M
            }
1073
1074
34.6M
            if (_PyUnicodeWriter_WriteSubstring(&ctx.writer, ctx.fmtstr,
1075
34.6M
                                                nonfmtpos, ctx.fmtpos) < 0)
1076
0
                goto onError;
1077
34.6M
        }
1078
33.2M
        else {
1079
33.2M
            ctx.fmtpos++;
1080
33.2M
            if (unicode_format_arg(&ctx) == -1)
1081
1.20M
                goto onError;
1082
33.2M
        }
1083
67.9M
    }
1084
1085
16.1M
    if (ctx.argidx < ctx.arglen && !ctx.dict) {
1086
0
        PyErr_Format(PyExc_TypeError,
1087
0
                     "not all arguments converted during string formatting "
1088
0
                     "(required %zd, got %zd)",
1089
0
                     ctx.arglen < 0 ? 0 : ctx.argidx,
1090
0
                     ctx.arglen < 0 ? 1 : ctx.arglen);
1091
0
        goto onError;
1092
0
    }
1093
1094
16.1M
    if (ctx.args_owned) {
1095
16.2k
        Py_DECREF(ctx.args);
1096
16.2k
    }
1097
16.1M
    return _PyUnicodeWriter_Finish(&ctx.writer);
1098
1099
1.20M
  onError:
1100
1.20M
    _PyUnicodeWriter_Dealloc(&ctx.writer);
1101
1.20M
    if (ctx.args_owned) {
1102
0
        Py_DECREF(ctx.args);
1103
0
    }
1104
    return NULL;
1105
16.1M
}