Coverage Report

Created: 2026-05-16 06:46

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Objects/unicode_format.c
Line
Count
Source
1
/*
2
3
Unicode implementation based on original code by Fredrik Lundh,
4
modified by Marc-Andre Lemburg <mal@lemburg.com>.
5
6
Major speed upgrades to the method implementations at the Reykjavik
7
NeedForSpeed sprint, by Fredrik Lundh and Andrew Dalke.
8
9
Copyright (c) Corporation for National Research Initiatives.
10
11
--------------------------------------------------------------------
12
The original string type implementation is:
13
14
  Copyright (c) 1999 by Secret Labs AB
15
  Copyright (c) 1999 by Fredrik Lundh
16
17
By obtaining, using, and/or copying this software and/or its
18
associated documentation, you agree that you have read, understood,
19
and will comply with the following terms and conditions:
20
21
Permission to use, copy, modify, and distribute this software and its
22
associated documentation for any purpose and without fee is hereby
23
granted, provided that the above copyright notice appears in all
24
copies, and that both that copyright notice and this permission notice
25
appear in supporting documentation, and that the name of Secret Labs
26
AB or the author not be used in advertising or publicity pertaining to
27
distribution of the software without specific, written prior
28
permission.
29
30
SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
31
THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
32
FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
33
ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
34
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
35
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
36
OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
37
--------------------------------------------------------------------
38
39
*/
40
41
// PyUnicode_Format() implementation
42
43
#include "Python.h"
44
#include "pycore_abstract.h"      // _PyIndex_Check()
45
#include "pycore_format.h"        // F_ALT
46
#include "pycore_long.h"          // _PyLong_FormatWriter()
47
#include "pycore_object.h"        // _PyObject_IsUniquelyReferenced()
48
#include "pycore_unicodeobject.h" // _Py_MAX_UNICODE
49
50
51
0
#define MAX_UNICODE _Py_MAX_UNICODE
52
12.6M
#define ensure_unicode _PyUnicode_EnsureUnicode
53
54
struct unicode_formatter_t {
55
    PyObject *args;
56
    int args_owned;
57
    Py_ssize_t arglen, argidx;
58
    PyObject *dict;
59
60
    int fmtkind;
61
    Py_ssize_t fmtcnt, fmtpos;
62
    const void *fmtdata;
63
    PyObject *fmtstr;
64
65
    _PyUnicodeWriter writer;
66
};
67
68
69
struct unicode_format_arg_t {
70
    Py_UCS4 ch;
71
    int flags;
72
    Py_ssize_t width;
73
    int prec;
74
    int sign;
75
    Py_ssize_t fmtstart;
76
    PyObject *key;
77
};
78
79
80
// Use FORMAT_ERROR("...%s", "") when there is no arguments.
81
1.06M
#define FORMAT_ERROR(EXC, FMT, ...) do {                                    \
82
1.06M
    if (arg->key != NULL) {                                                 \
83
0
        PyErr_Format((EXC), "format argument %R: " FMT,                     \
84
0
                     arg->key, __VA_ARGS__);                                \
85
0
    }                                                                       \
86
1.06M
    else if (ctx->argidx >= 0) {                                            \
87
0
        PyErr_Format((EXC), "format argument %zd: " FMT,                    \
88
0
                     ctx->argidx, __VA_ARGS__);                             \
89
0
    }                                                                       \
90
1.06M
    else {                                                                  \
91
1.06M
        PyErr_Format((EXC), "format argument: " FMT, __VA_ARGS__);          \
92
1.06M
    }                                                                       \
93
1.06M
} while (0)
94
95
96
static PyObject *
97
unicode_format_getnextarg(struct unicode_formatter_t *ctx, int allowone)
98
23.7M
{
99
23.7M
    Py_ssize_t argidx = ctx->argidx;
100
101
23.7M
    if (argidx < ctx->arglen && (allowone || ctx->arglen >= 0)) {
102
23.7M
        ctx->argidx++;
103
23.7M
        if (ctx->arglen >= 0) {
104
18.4M
            return PyTuple_GetItem(ctx->args, argidx);
105
18.4M
        }
106
5.28M
        else if (allowone) {
107
5.28M
            return ctx->args;
108
5.28M
        }
109
23.7M
    }
110
0
    PyErr_Format(PyExc_TypeError,
111
0
                 "not enough arguments for format string (got %zd)",
112
0
                 ctx->arglen < 0 ? 1 : ctx->arglen);
113
0
    return NULL;
114
23.7M
}
115
116
117
/* Returns a new reference to a PyUnicode object, or NULL on failure. */
118
119
/* Format a float into the writer if the writer is not NULL, or into *p_output
120
   otherwise.
121
122
   Return 0 on success, raise an exception and return -1 on error. */
123
static int
124
formatfloat(PyObject *v,
125
            struct unicode_formatter_t *ctx,
126
            struct unicode_format_arg_t *arg,
127
            PyObject **p_output,
128
            _PyUnicodeWriter *writer)
129
108
{
130
108
    char *p;
131
108
    double x;
132
108
    Py_ssize_t len;
133
108
    int prec;
134
108
    int dtoa_flags = 0;
135
136
108
    x = PyFloat_AsDouble(v);
137
108
    if (x == -1.0 && PyErr_Occurred()) {
138
0
        if (PyErr_ExceptionMatches(PyExc_TypeError)) {
139
0
            FORMAT_ERROR(PyExc_TypeError,
140
0
                         "%%%c requires a real number, not %T",
141
0
                         arg->ch, v);
142
0
        }
143
0
        return -1;
144
0
    }
145
146
108
    prec = arg->prec;
147
108
    if (prec < 0)
148
0
        prec = 6;
149
150
108
    if (arg->flags & F_ALT)
151
0
        dtoa_flags |= Py_DTSF_ALT;
152
108
    p = PyOS_double_to_string(x, arg->ch, prec, dtoa_flags, NULL);
153
108
    if (p == NULL)
154
0
        return -1;
155
108
    len = strlen(p);
156
108
    if (writer) {
157
0
        if (_PyUnicodeWriter_WriteASCIIString(writer, p, len) < 0) {
158
0
            PyMem_Free(p);
159
0
            return -1;
160
0
        }
161
0
    }
162
108
    else
163
108
        *p_output = _PyUnicode_FromASCII(p, len);
164
108
    PyMem_Free(p);
165
108
    return 0;
166
108
}
167
168
169
/* formatlong() emulates the format codes d, u, o, x and X, and
170
 * the F_ALT flag, for Python's long (unbounded) ints.  It's not used for
171
 * Python's regular ints.
172
 * Return value:  a new PyUnicodeObject*, or NULL if error.
173
 *     The output string is of the form
174
 *         "-"? ("0x" | "0X")? digit+
175
 *     "0x"/"0X" are present only for x and X conversions, with F_ALT
176
 *         set in flags.  The case of hex digits will be correct,
177
 *     There will be at least prec digits, zero-filled on the left if
178
 *         necessary to get that many.
179
 * val          object to be converted
180
 * flags        bitmask of format flags; only F_ALT is looked at
181
 * prec         minimum number of digits; 0-fill on left if needed
182
 * type         a character in [duoxX]; u acts the same as d
183
 *
184
 * CAUTION:  o, x and X conversions on regular ints can never
185
 * produce a '-' sign, but can for Python's unbounded ints.
186
 */
187
PyObject *
188
_PyUnicode_FormatLong(PyObject *val, int alt, int prec, int type)
189
5.11M
{
190
5.11M
    PyObject *result = NULL;
191
5.11M
    char *buf;
192
5.11M
    Py_ssize_t i;
193
5.11M
    int sign;           /* 1 if '-', else 0 */
194
5.11M
    int len;            /* number of characters */
195
5.11M
    Py_ssize_t llen;
196
5.11M
    int numdigits;      /* len == numnondigits + numdigits */
197
5.11M
    int numnondigits = 0;
198
199
    /* Avoid exceeding SSIZE_T_MAX */
200
5.11M
    if (prec > INT_MAX-3) {
201
0
        PyErr_SetString(PyExc_OverflowError,
202
0
                        "precision too large");
203
0
        return NULL;
204
0
    }
205
206
5.11M
    assert(PyLong_Check(val));
207
208
5.11M
    switch (type) {
209
0
    default:
210
0
        Py_UNREACHABLE();
211
30
    case 'd':
212
30
    case 'i':
213
30
    case 'u':
214
        /* int and int subclasses should print numerically when a numeric */
215
        /* format code is used (see issue18780) */
216
30
        result = PyNumber_ToBase(val, 10);
217
30
        break;
218
78.3k
    case 'o':
219
78.3k
        numnondigits = 2;
220
78.3k
        result = PyNumber_ToBase(val, 8);
221
78.3k
        break;
222
87
    case 'x':
223
5.03M
    case 'X':
224
5.03M
        numnondigits = 2;
225
5.03M
        result = PyNumber_ToBase(val, 16);
226
5.03M
        break;
227
5.11M
    }
228
5.11M
    if (!result)
229
0
        return NULL;
230
231
5.11M
    assert(_PyUnicode_IsModifiable(result));
232
5.11M
    assert(PyUnicode_IS_ASCII(result));
233
234
    /* To modify the string in-place, there can only be one reference. */
235
5.11M
    if (!_PyObject_IsUniquelyReferenced(result)) {
236
0
        Py_DECREF(result);
237
0
        PyErr_BadInternalCall();
238
0
        return NULL;
239
0
    }
240
5.11M
    buf = PyUnicode_DATA(result);
241
5.11M
    llen = PyUnicode_GET_LENGTH(result);
242
5.11M
    if (llen > INT_MAX) {
243
0
        Py_DECREF(result);
244
0
        PyErr_SetString(PyExc_ValueError,
245
0
                        "string too large in _PyUnicode_FormatLong");
246
0
        return NULL;
247
0
    }
248
5.11M
    len = (int)llen;
249
5.11M
    sign = buf[0] == '-';
250
5.11M
    numnondigits += sign;
251
5.11M
    numdigits = len - numnondigits;
252
5.11M
    assert(numdigits > 0);
253
254
    /* Get rid of base marker unless F_ALT */
255
5.11M
    if (((alt) == 0 &&
256
5.11M
        (type == 'o' || type == 'x' || type == 'X'))) {
257
5.11M
        assert(buf[sign] == '0');
258
5.11M
        assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
259
5.11M
               buf[sign+1] == 'o');
260
5.11M
        numnondigits -= 2;
261
5.11M
        buf += 2;
262
5.11M
        len -= 2;
263
5.11M
        if (sign)
264
0
            buf[0] = '-';
265
5.11M
        assert(len == numnondigits + numdigits);
266
5.11M
        assert(numdigits > 0);
267
5.11M
    }
268
269
    /* Fill with leading zeroes to meet minimum width. */
270
5.11M
    if (prec > numdigits) {
271
0
        PyObject *r1 = PyBytes_FromStringAndSize(NULL,
272
0
                                numnondigits + prec);
273
0
        char *b1;
274
0
        if (!r1) {
275
0
            Py_DECREF(result);
276
0
            return NULL;
277
0
        }
278
0
        b1 = PyBytes_AS_STRING(r1);
279
0
        for (i = 0; i < numnondigits; ++i)
280
0
            *b1++ = *buf++;
281
0
        for (i = 0; i < prec - numdigits; i++)
282
0
            *b1++ = '0';
283
0
        for (i = 0; i < numdigits; i++)
284
0
            *b1++ = *buf++;
285
0
        *b1 = '\0';
286
0
        Py_SETREF(result, r1);
287
0
        buf = PyBytes_AS_STRING(result);
288
0
        len = numnondigits + prec;
289
0
    }
290
291
    /* Fix up case for hex conversions. */
292
5.11M
    if (type == 'X') {
293
        /* Need to convert all lower case letters to upper case.
294
           and need to convert 0x to 0X (and -0x to -0X). */
295
35.1M
        for (i = 0; i < len; i++)
296
30.1M
            if (buf[i] >= 'a' && buf[i] <= 'x')
297
6.41M
                buf[i] -= 'a'-'A';
298
5.03M
    }
299
5.11M
    if (!PyUnicode_Check(result)
300
5.11M
        || buf != PyUnicode_DATA(result)) {
301
5.11M
        PyObject *unicode;
302
5.11M
        unicode = _PyUnicode_FromASCII(buf, len);
303
5.11M
        Py_SETREF(result, unicode);
304
5.11M
    }
305
30
    else if (len != PyUnicode_GET_LENGTH(result)) {
306
0
        if (PyUnicode_Resize(&result, len) < 0)
307
0
            Py_CLEAR(result);
308
0
    }
309
5.11M
    return result;
310
5.11M
}
311
312
313
/* Format an integer or a float as an integer.
314
 * Return 1 if the number has been formatted into the writer,
315
 *        0 if the number has been formatted into *p_output
316
 *       -1 and raise an exception on error */
317
static int
318
mainformatlong(PyObject *v,
319
               struct unicode_formatter_t *ctx,
320
               struct unicode_format_arg_t *arg,
321
               PyObject **p_output,
322
               _PyUnicodeWriter *writer)
323
8.71M
{
324
8.71M
    PyObject *iobj, *res;
325
8.71M
    char type = (char)arg->ch;
326
327
8.71M
    if (!PyNumber_Check(v))
328
1.06M
        goto wrongtype;
329
330
    /* make sure number is a type of integer for o, x, and X */
331
7.64M
    if (!PyLong_Check(v)) {
332
0
        if (type == 'o' || type == 'x' || type == 'X') {
333
0
            iobj = _PyNumber_Index(v);
334
0
        }
335
0
        else {
336
0
            iobj = PyNumber_Long(v);
337
0
        }
338
0
        if (iobj == NULL ) {
339
0
            if (PyErr_ExceptionMatches(PyExc_TypeError))
340
0
                goto wrongtype;
341
0
            return -1;
342
0
        }
343
0
        assert(PyLong_Check(iobj));
344
0
    }
345
7.64M
    else {
346
7.64M
        iobj = Py_NewRef(v);
347
7.64M
    }
348
349
7.64M
    if (PyLong_CheckExact(v)
350
7.64M
        && arg->width == -1 && arg->prec == -1
351
7.56M
        && !(arg->flags & (F_SIGN | F_BLANK))
352
7.56M
        && type != 'X')
353
2.52M
    {
354
        /* Fast path */
355
2.52M
        int alternate = arg->flags & F_ALT;
356
2.52M
        int base;
357
358
2.52M
        switch(type)
359
2.52M
        {
360
0
            default:
361
0
                Py_UNREACHABLE();
362
2.52M
            case 'd':
363
2.52M
            case 'i':
364
2.52M
            case 'u':
365
2.52M
                base = 10;
366
2.52M
                break;
367
0
            case 'o':
368
0
                base = 8;
369
0
                break;
370
51
            case 'x':
371
51
            case 'X':
372
51
                base = 16;
373
51
                break;
374
2.52M
        }
375
376
2.52M
        if (_PyLong_FormatWriter(writer, v, base, alternate) == -1) {
377
0
            Py_DECREF(iobj);
378
0
            return -1;
379
0
        }
380
2.52M
        Py_DECREF(iobj);
381
2.52M
        return 1;
382
2.52M
    }
383
384
5.11M
    res = _PyUnicode_FormatLong(iobj, arg->flags & F_ALT, arg->prec, type);
385
5.11M
    Py_DECREF(iobj);
386
5.11M
    if (res == NULL)
387
0
        return -1;
388
5.11M
    *p_output = res;
389
5.11M
    return 0;
390
391
1.06M
wrongtype:
392
1.06M
    switch(type)
393
1.06M
    {
394
0
        case 'o':
395
0
        case 'x':
396
0
        case 'X':
397
0
            FORMAT_ERROR(PyExc_TypeError,
398
0
                         "%%%c requires an integer, not %T",
399
0
                         arg->ch, v);
400
0
            break;
401
1.06M
        default:
402
1.06M
            FORMAT_ERROR(PyExc_TypeError,
403
1.06M
                         "%%%c requires a real number, not %T",
404
1.06M
                         arg->ch, v);
405
1.06M
            break;
406
1.06M
    }
407
1.06M
    return -1;
408
1.06M
}
409
410
411
static Py_UCS4
412
formatchar(PyObject *v,
413
           struct unicode_formatter_t *ctx,
414
           struct unicode_format_arg_t *arg)
415
0
{
416
    /* presume that the buffer is at least 3 characters long */
417
0
    if (PyUnicode_Check(v)) {
418
0
        if (PyUnicode_GET_LENGTH(v) == 1) {
419
0
            return PyUnicode_READ_CHAR(v, 0);
420
0
        }
421
0
        FORMAT_ERROR(PyExc_TypeError,
422
0
                     "%%c requires an integer or a unicode character, "
423
0
                     "not a string of length %zd",
424
0
                     PyUnicode_GET_LENGTH(v));
425
0
        return (Py_UCS4) -1;
426
0
    }
427
0
    else {
428
0
        int overflow;
429
0
        long x = PyLong_AsLongAndOverflow(v, &overflow);
430
0
        if (x == -1 && PyErr_Occurred()) {
431
0
            if (PyErr_ExceptionMatches(PyExc_TypeError)) {
432
0
                FORMAT_ERROR(PyExc_TypeError,
433
0
                             "%%c requires an integer or a unicode character, "
434
0
                             "not %T",
435
0
                             v);
436
0
            }
437
0
            return (Py_UCS4) -1;
438
0
        }
439
440
0
        if (x < 0 || x > MAX_UNICODE) {
441
            /* this includes an overflow in converting to C long */
442
0
            FORMAT_ERROR(PyExc_OverflowError,
443
0
                         "%%c argument not in range(0x110000)%s", "");
444
0
            return (Py_UCS4) -1;
445
0
        }
446
447
0
        return (Py_UCS4) x;
448
0
    }
449
0
}
450
451
452
/* Parse options of an argument: flags, width, precision.
453
   Handle also "%(name)" syntax.
454
455
   Return 0 if the argument has been formatted into arg->str.
456
   Return 1 if the argument has been written into ctx->writer,
457
   Raise an exception and return -1 on error. */
458
static int
459
unicode_format_arg_parse(struct unicode_formatter_t *ctx,
460
                         struct unicode_format_arg_t *arg)
461
23.6M
{
462
23.6M
#define FORMAT_READ(ctx) \
463
24.2M
        PyUnicode_READ((ctx)->fmtkind, (ctx)->fmtdata, (ctx)->fmtpos)
464
465
23.6M
    PyObject *v;
466
467
23.6M
    if (arg->ch == '(') {
468
        /* Get argument value from a dictionary. Example: "%(name)s". */
469
52.8k
        Py_ssize_t keystart;
470
52.8k
        Py_ssize_t keylen;
471
52.8k
        int pcount = 1;
472
473
52.8k
        if (ctx->dict == NULL) {
474
0
            PyErr_Format(PyExc_TypeError,
475
0
                         "format requires a mapping, not %T",
476
0
                         ctx->args);
477
0
            return -1;
478
0
        }
479
52.8k
        ++ctx->fmtpos;
480
52.8k
        --ctx->fmtcnt;
481
52.8k
        keystart = ctx->fmtpos;
482
        /* Skip over balanced parentheses */
483
475k
        while (pcount > 0 && --ctx->fmtcnt >= 0) {
484
422k
            arg->ch = FORMAT_READ(ctx);
485
422k
            if (arg->ch == ')')
486
52.8k
                --pcount;
487
369k
            else if (arg->ch == '(')
488
0
                ++pcount;
489
422k
            ctx->fmtpos++;
490
422k
        }
491
52.8k
        keylen = ctx->fmtpos - keystart - 1;
492
52.8k
        if (ctx->fmtcnt < 0 || pcount > 0) {
493
0
            PyErr_Format(PyExc_ValueError,
494
0
                         "stray %% or incomplete format key at position %zd",
495
0
                         arg->fmtstart);
496
0
            return -1;
497
0
        }
498
52.8k
        arg->key = PyUnicode_Substring(ctx->fmtstr,
499
52.8k
                                       keystart, keystart + keylen);
500
52.8k
        if (arg->key == NULL)
501
0
            return -1;
502
52.8k
        if (ctx->args_owned) {
503
37.7k
            ctx->args_owned = 0;
504
37.7k
            Py_DECREF(ctx->args);
505
37.7k
        }
506
52.8k
        ctx->args = PyObject_GetItem(ctx->dict, arg->key);
507
52.8k
        if (ctx->args == NULL)
508
0
            return -1;
509
52.8k
        ctx->args_owned = 1;
510
52.8k
        ctx->arglen = -3;
511
52.8k
        ctx->argidx = -4;
512
52.8k
    }
513
23.6M
    else {
514
23.6M
        if (ctx->arglen < -1) {
515
0
            PyErr_Format(PyExc_ValueError,
516
0
                         "format requires a parenthesised mapping key "
517
0
                         "at position %zd",
518
0
                         arg->fmtstart);
519
0
            return -1;
520
0
        }
521
23.6M
    }
522
523
    /* Parse flags. Example: "%+i" => flags=F_SIGN. */
524
23.7M
    while (--ctx->fmtcnt >= 0) {
525
23.7M
        arg->ch = FORMAT_READ(ctx);
526
23.7M
        ctx->fmtpos++;
527
23.7M
        switch (arg->ch) {
528
0
        case '-': arg->flags |= F_LJUST; continue;
529
0
        case '+': arg->flags |= F_SIGN; continue;
530
0
        case ' ': arg->flags |= F_BLANK; continue;
531
51
        case '#': arg->flags |= F_ALT; continue;
532
80.4k
        case '0': arg->flags |= F_ZERO; continue;
533
23.7M
        }
534
23.6M
        break;
535
23.7M
    }
536
537
    /* Parse width. Example: "%10s" => width=10 */
538
23.6M
    if (arg->ch == '*') {
539
64.9k
        if (ctx->arglen < -1) {
540
0
            PyErr_Format(PyExc_ValueError,
541
0
                    "* cannot be used with a parenthesised mapping key "
542
0
                    "at position %zd",
543
0
                    arg->fmtstart);
544
0
            return -1;
545
0
        }
546
64.9k
        v = unicode_format_getnextarg(ctx, 0);
547
64.9k
        if (v == NULL)
548
0
            return -1;
549
64.9k
        if (!PyLong_Check(v)) {
550
0
            FORMAT_ERROR(PyExc_TypeError, "* requires int, not %T", v);
551
0
            return -1;
552
0
        }
553
64.9k
        arg->width = PyLong_AsSsize_t(v);
554
64.9k
        if (arg->width == -1 && PyErr_Occurred()) {
555
0
            if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
556
0
                FORMAT_ERROR(PyExc_OverflowError,
557
0
                             "too big for width%s", "");
558
0
            }
559
0
            return -1;
560
0
        }
561
64.9k
        if (arg->width < 0) {
562
0
            arg->flags |= F_LJUST;
563
0
            arg->width = -arg->width;
564
0
        }
565
64.9k
        if (--ctx->fmtcnt >= 0) {
566
64.9k
            arg->ch = FORMAT_READ(ctx);
567
64.9k
            ctx->fmtpos++;
568
64.9k
        }
569
64.9k
    }
570
23.6M
    else if (arg->ch >= '0' && arg->ch <= '9') {
571
15.5k
        arg->width = arg->ch - '0';
572
15.5k
        while (--ctx->fmtcnt >= 0) {
573
15.5k
            arg->ch = FORMAT_READ(ctx);
574
15.5k
            ctx->fmtpos++;
575
15.5k
            if (arg->ch < '0' || arg->ch > '9')
576
15.5k
                break;
577
            /* Since arg->ch is unsigned, the RHS would end up as unsigned,
578
               mixing signed and unsigned comparison. Since arg->ch is between
579
               '0' and '9', casting to int is safe. */
580
0
            if (arg->width > (PY_SSIZE_T_MAX - ((int)arg->ch - '0')) / 10) {
581
0
                PyErr_Format(PyExc_ValueError,
582
0
                             "width too big at position %zd",
583
0
                             arg->fmtstart);
584
0
                return -1;
585
0
            }
586
0
            arg->width = arg->width*10 + (arg->ch - '0');
587
0
        }
588
15.5k
    }
589
590
    /* Parse precision. Example: "%.3f" => prec=3 */
591
23.6M
    if (arg->ch == '.') {
592
108
        arg->prec = 0;
593
108
        if (--ctx->fmtcnt >= 0) {
594
108
            arg->ch = FORMAT_READ(ctx);
595
108
            ctx->fmtpos++;
596
108
        }
597
108
        if (arg->ch == '*') {
598
0
            if (ctx->arglen < -1) {
599
0
                PyErr_Format(PyExc_ValueError,
600
0
                        "* cannot be used with a parenthesised mapping key "
601
0
                        "at position %zd",
602
0
                        arg->fmtstart);
603
0
                return -1;
604
0
            }
605
0
            v = unicode_format_getnextarg(ctx, 0);
606
0
            if (v == NULL)
607
0
                return -1;
608
0
            if (!PyLong_Check(v)) {
609
0
                FORMAT_ERROR(PyExc_TypeError, "* requires int, not %T", v);
610
0
                return -1;
611
0
            }
612
0
            arg->prec = PyLong_AsInt(v);
613
0
            if (arg->prec == -1 && PyErr_Occurred()) {
614
0
                if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
615
0
                    FORMAT_ERROR(PyExc_OverflowError,
616
0
                                 "too big for precision%s", "");
617
0
                }
618
0
                return -1;
619
0
            }
620
0
            if (arg->prec < 0)
621
0
                arg->prec = 0;
622
0
            if (--ctx->fmtcnt >= 0) {
623
0
                arg->ch = FORMAT_READ(ctx);
624
0
                ctx->fmtpos++;
625
0
            }
626
0
        }
627
108
        else if (arg->ch >= '0' && arg->ch <= '9') {
628
108
            arg->prec = arg->ch - '0';
629
108
            while (--ctx->fmtcnt >= 0) {
630
108
                arg->ch = FORMAT_READ(ctx);
631
108
                ctx->fmtpos++;
632
108
                if (arg->ch < '0' || arg->ch > '9')
633
108
                    break;
634
0
                if (arg->prec > (INT_MAX - ((int)arg->ch - '0')) / 10) {
635
0
                    PyErr_Format(PyExc_ValueError,
636
0
                                 "precision too big at position %zd",
637
0
                                 arg->fmtstart);
638
0
                    return -1;
639
0
                }
640
0
                arg->prec = arg->prec*10 + (arg->ch - '0');
641
0
            }
642
108
        }
643
108
    }
644
645
    /* Ignore "h", "l" and "L" format prefix (ex: "%hi" or "%ls") */
646
23.6M
    if (ctx->fmtcnt >= 0) {
647
23.6M
        if (arg->ch == 'h' || arg->ch == 'l' || arg->ch == 'L') {
648
0
            if (--ctx->fmtcnt >= 0) {
649
0
                arg->ch = FORMAT_READ(ctx);
650
0
                ctx->fmtpos++;
651
0
            }
652
0
        }
653
23.6M
    }
654
23.6M
    if (ctx->fmtcnt < 0) {
655
0
        PyErr_Format(PyExc_ValueError,
656
0
                     "stray %% at position %zd", arg->fmtstart);
657
0
        return -1;
658
0
    }
659
23.6M
    return 0;
660
661
23.6M
#undef FORMAT_READ
662
23.6M
}
663
664
665
/* Format one argument. Supported conversion specifiers:
666
667
   - "s", "r", "a": any type
668
   - "i", "d", "u": int or float
669
   - "o", "x", "X": int
670
   - "e", "E", "f", "F", "g", "G": float
671
   - "c": int or str (1 character)
672
673
   When possible, the output is written directly into the Unicode writer
674
   (ctx->writer). A string is created when padding is required.
675
676
   Return 0 if the argument has been formatted into *p_str,
677
          1 if the argument has been written into ctx->writer,
678
         -1 on error. */
679
static int
680
unicode_format_arg_format(struct unicode_formatter_t *ctx,
681
                          struct unicode_format_arg_t *arg,
682
                          PyObject **p_str)
683
23.6M
{
684
23.6M
    PyObject *v;
685
23.6M
    _PyUnicodeWriter *writer = &ctx->writer;
686
687
23.6M
    if (ctx->fmtcnt == 0)
688
8.61M
        ctx->writer.overallocate = 0;
689
690
23.6M
    v = unicode_format_getnextarg(ctx, 1);
691
23.6M
    if (v == NULL)
692
0
        return -1;
693
694
695
23.6M
    switch (arg->ch) {
696
14.9M
    case 's':
697
14.9M
    case 'r':
698
14.9M
    case 'a':
699
14.9M
        if (PyLong_CheckExact(v) && arg->width == -1 && arg->prec == -1) {
700
            /* Fast path */
701
178
            if (_PyLong_FormatWriter(writer, v, 10, arg->flags & F_ALT) == -1)
702
0
                return -1;
703
178
            return 1;
704
178
        }
705
706
14.9M
        if (PyUnicode_CheckExact(v) && arg->ch == 's') {
707
9.92M
            *p_str = Py_NewRef(v);
708
9.92M
        }
709
5.04M
        else {
710
5.04M
            if (arg->ch == 's')
711
5.03M
                *p_str = PyObject_Str(v);
712
10.6k
            else if (arg->ch == 'r')
713
10.6k
                *p_str = PyObject_Repr(v);
714
0
            else
715
0
                *p_str = PyObject_ASCII(v);
716
5.04M
        }
717
14.9M
        break;
718
719
0
    case 'i':
720
3.59M
    case 'd':
721
3.59M
    case 'u':
722
3.67M
    case 'o':
723
3.67M
    case 'x':
724
8.71M
    case 'X':
725
8.71M
    {
726
8.71M
        int ret = mainformatlong(v, ctx, arg, p_str, writer);
727
8.71M
        if (ret != 0)
728
3.59M
            return ret;
729
5.11M
        arg->sign = 1;
730
5.11M
        break;
731
8.71M
    }
732
733
0
    case 'e':
734
0
    case 'E':
735
108
    case 'f':
736
108
    case 'F':
737
108
    case 'g':
738
108
    case 'G':
739
108
        if (arg->width == -1 && arg->prec == -1
740
0
            && !(arg->flags & (F_SIGN | F_BLANK)))
741
0
        {
742
            /* Fast path */
743
0
            if (formatfloat(v, ctx, arg, NULL, writer) == -1)
744
0
                return -1;
745
0
            return 1;
746
0
        }
747
748
108
        arg->sign = 1;
749
108
        if (formatfloat(v, ctx, arg, p_str, NULL) == -1)
750
0
            return -1;
751
108
        break;
752
753
108
    case 'c':
754
0
    {
755
0
        Py_UCS4 ch = formatchar(v, ctx, arg);
756
0
        if (ch == (Py_UCS4) -1)
757
0
            return -1;
758
0
        if (arg->width == -1 && arg->prec == -1) {
759
            /* Fast path */
760
0
            if (_PyUnicodeWriter_WriteCharInline(writer, ch) < 0)
761
0
                return -1;
762
0
            return 1;
763
0
        }
764
0
        *p_str = PyUnicode_FromOrdinal(ch);
765
0
        break;
766
0
    }
767
768
0
    default:
769
0
        if (arg->ch < 128 && Py_ISALPHA(arg->ch)) {
770
0
            PyErr_Format(PyExc_ValueError,
771
0
                         "unsupported format %%%c at position %zd",
772
0
                         (int)arg->ch, arg->fmtstart);
773
0
        }
774
0
        else if (arg->ch == '\'') {
775
0
            PyErr_Format(PyExc_ValueError,
776
0
                         "stray %% at position %zd or unexpected "
777
0
                         "format character \"'\" at position %zd",
778
0
                         arg->fmtstart,
779
0
                         ctx->fmtpos - 1);
780
0
        }
781
0
        else if (arg->ch >= 32 && arg->ch < 127) {
782
0
            PyErr_Format(PyExc_ValueError,
783
0
                         "stray %% at position %zd or unexpected "
784
0
                         "format character '%c' at position %zd",
785
0
                         arg->fmtstart,
786
0
                         (int)arg->ch, ctx->fmtpos - 1);
787
0
        }
788
0
        else if (Py_UNICODE_ISPRINTABLE(arg->ch)) {
789
0
            PyErr_Format(PyExc_ValueError,
790
0
                         "stray %% at position %zd or unexpected "
791
0
                         "format character '%c' (U+%04X) at position %zd",
792
0
                         arg->fmtstart,
793
0
                         (int)arg->ch, (int)arg->ch, ctx->fmtpos - 1);
794
0
        }
795
0
        else {
796
0
            PyErr_Format(PyExc_ValueError,
797
0
                         "stray %% at position %zd or unexpected "
798
0
                         "format character U+%04X at position %zd",
799
0
                         arg->fmtstart, (int)arg->ch, ctx->fmtpos - 1);
800
0
        }
801
0
        return -1;
802
23.6M
    }
803
20.0M
    if (*p_str == NULL)
804
0
        return -1;
805
20.0M
    assert (PyUnicode_Check(*p_str));
806
20.0M
    return 0;
807
20.0M
}
808
809
810
static int
811
unicode_format_arg_output(struct unicode_formatter_t *ctx,
812
                          struct unicode_format_arg_t *arg,
813
                          PyObject *str)
814
20.0M
{
815
20.0M
    Py_ssize_t len;
816
20.0M
    int kind;
817
20.0M
    const void *pbuf;
818
20.0M
    Py_ssize_t pindex;
819
20.0M
    Py_UCS4 signchar;
820
20.0M
    Py_ssize_t buflen;
821
20.0M
    Py_UCS4 maxchar;
822
20.0M
    Py_ssize_t sublen;
823
20.0M
    _PyUnicodeWriter *writer = &ctx->writer;
824
20.0M
    Py_UCS4 fill;
825
826
20.0M
    fill = ' ';
827
20.0M
    if (arg->sign && arg->flags & F_ZERO)
828
80.4k
        fill = '0';
829
830
20.0M
    len = PyUnicode_GET_LENGTH(str);
831
20.0M
    if ((arg->width == -1 || arg->width <= len)
832
20.0M
        && (arg->prec == -1 || arg->prec >= len)
833
20.0M
        && !(arg->flags & (F_SIGN | F_BLANK)))
834
20.0M
    {
835
        /* Fast path */
836
20.0M
        if (_PyUnicodeWriter_WriteStr(writer, str) == -1)
837
0
            return -1;
838
20.0M
        return 0;
839
20.0M
    }
840
841
    /* Truncate the string for "s", "r" and "a" formats
842
       if the precision is set */
843
78.6k
    if (arg->ch == 's' || arg->ch == 'r' || arg->ch == 'a') {
844
0
        if (arg->prec >= 0 && len > arg->prec)
845
0
            len = arg->prec;
846
0
    }
847
848
    /* Adjust sign and width */
849
78.6k
    kind = PyUnicode_KIND(str);
850
78.6k
    pbuf = PyUnicode_DATA(str);
851
78.6k
    pindex = 0;
852
78.6k
    signchar = '\0';
853
78.6k
    if (arg->sign) {
854
78.6k
        Py_UCS4 ch = PyUnicode_READ(kind, pbuf, pindex);
855
78.6k
        if (ch == '-' || ch == '+') {
856
0
            signchar = ch;
857
0
            len--;
858
0
            pindex++;
859
0
        }
860
78.6k
        else if (arg->flags & F_SIGN)
861
0
            signchar = '+';
862
78.6k
        else if (arg->flags & F_BLANK)
863
0
            signchar = ' ';
864
78.6k
        else
865
78.6k
            arg->sign = 0;
866
78.6k
    }
867
78.6k
    if (arg->width < len)
868
108
        arg->width = len;
869
870
    /* Prepare the writer */
871
78.6k
    maxchar = writer->maxchar;
872
78.6k
    if (!(arg->flags & F_LJUST)) {
873
78.6k
        if (arg->sign) {
874
0
            if ((arg->width-1) > len)
875
0
                maxchar = Py_MAX(maxchar, fill);
876
0
        }
877
78.6k
        else {
878
78.6k
            if (arg->width > len)
879
78.5k
                maxchar = Py_MAX(maxchar, fill);
880
78.6k
        }
881
78.6k
    }
882
78.6k
    if (PyUnicode_MAX_CHAR_VALUE(str) > maxchar) {
883
78.3k
        Py_UCS4 strmaxchar = _PyUnicode_FindMaxChar(str, 0, pindex+len);
884
78.3k
        maxchar = Py_MAX(maxchar, strmaxchar);
885
78.3k
    }
886
887
78.6k
    buflen = arg->width;
888
78.6k
    if (arg->sign && len == arg->width)
889
0
        buflen++;
890
78.6k
    if (_PyUnicodeWriter_Prepare(writer, buflen, maxchar) == -1)
891
0
        return -1;
892
893
    /* Write the sign if needed */
894
78.6k
    if (arg->sign) {
895
0
        if (fill != ' ') {
896
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos, signchar);
897
0
            writer->pos += 1;
898
0
        }
899
0
        if (arg->width > len)
900
0
            arg->width--;
901
0
    }
902
903
    /* Write the numeric prefix for "x", "X" and "o" formats
904
       if the alternate form is used.
905
       For example, write "0x" for the "%#x" format. */
906
78.6k
    if ((arg->flags & F_ALT) && (arg->ch == 'x' || arg->ch == 'X' || arg->ch == 'o')) {
907
0
        assert(PyUnicode_READ(kind, pbuf, pindex) == '0');
908
0
        assert(PyUnicode_READ(kind, pbuf, pindex + 1) == arg->ch);
909
0
        if (fill != ' ') {
910
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos, '0');
911
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos+1, arg->ch);
912
0
            writer->pos += 2;
913
0
            pindex += 2;
914
0
        }
915
0
        arg->width -= 2;
916
0
        if (arg->width < 0)
917
0
            arg->width = 0;
918
0
        len -= 2;
919
0
    }
920
921
    /* Pad left with the fill character if needed */
922
78.6k
    if (arg->width > len && !(arg->flags & F_LJUST)) {
923
78.5k
        sublen = arg->width - len;
924
78.5k
        _PyUnicode_Fill(writer->kind, writer->data, fill, writer->pos, sublen);
925
78.5k
        writer->pos += sublen;
926
78.5k
        arg->width = len;
927
78.5k
    }
928
929
    /* If padding with spaces: write sign if needed and/or numeric prefix if
930
       the alternate form is used */
931
78.6k
    if (fill == ' ') {
932
108
        if (arg->sign) {
933
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos, signchar);
934
0
            writer->pos += 1;
935
0
        }
936
108
        if ((arg->flags & F_ALT) && (arg->ch == 'x' || arg->ch == 'X' || arg->ch == 'o')) {
937
0
            assert(PyUnicode_READ(kind, pbuf, pindex) == '0');
938
0
            assert(PyUnicode_READ(kind, pbuf, pindex+1) == arg->ch);
939
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos, '0');
940
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos+1, arg->ch);
941
0
            writer->pos += 2;
942
0
            pindex += 2;
943
0
        }
944
108
    }
945
946
    /* Write characters */
947
78.6k
    if (len) {
948
78.6k
        _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
949
78.6k
                                      str, pindex, len);
950
78.6k
        writer->pos += len;
951
78.6k
    }
952
953
    /* Pad right with the fill character if needed */
954
78.6k
    if (arg->width > len) {
955
0
        sublen = arg->width - len;
956
0
        _PyUnicode_Fill(writer->kind, writer->data, ' ', writer->pos, sublen);
957
0
        writer->pos += sublen;
958
0
    }
959
78.6k
    return 0;
960
78.6k
}
961
962
963
/* Helper of PyUnicode_Format(): format one arg.
964
   Return 0 on success, raise an exception and return -1 on error. */
965
static int
966
unicode_format_arg(struct unicode_formatter_t *ctx)
967
23.6M
{
968
23.6M
    struct unicode_format_arg_t arg;
969
23.6M
    PyObject *str;
970
23.6M
    int ret;
971
972
23.6M
    arg.ch = PyUnicode_READ(ctx->fmtkind, ctx->fmtdata, ctx->fmtpos);
973
23.6M
    if (arg.ch == '%') {
974
0
        ctx->fmtpos++;
975
0
        ctx->fmtcnt--;
976
0
        if (_PyUnicodeWriter_WriteCharInline(&ctx->writer, '%') < 0)
977
0
            return -1;
978
0
        return 0;
979
0
    }
980
23.6M
    arg.flags = 0;
981
23.6M
    arg.width = -1;
982
23.6M
    arg.prec = -1;
983
23.6M
    arg.sign = 0;
984
23.6M
    arg.fmtstart = ctx->fmtpos - 1;
985
23.6M
    arg.key = NULL;
986
23.6M
    str = NULL;
987
988
23.6M
    ret = unicode_format_arg_parse(ctx, &arg);
989
23.6M
    if (ret == -1) {
990
0
        goto onError;
991
0
    }
992
993
23.6M
    ret = unicode_format_arg_format(ctx, &arg, &str);
994
23.6M
    if (ret == -1) {
995
1.06M
        goto onError;
996
1.06M
    }
997
998
22.6M
    if (ret != 1) {
999
20.0M
        ret = unicode_format_arg_output(ctx, &arg, str);
1000
20.0M
        Py_DECREF(str);
1001
20.0M
        if (ret == -1) {
1002
0
            goto onError;
1003
0
        }
1004
20.0M
    }
1005
1006
22.6M
    if (ctx->dict && (ctx->argidx < ctx->arglen)) {
1007
        // XXX: Never happens?
1008
0
        PyErr_SetString(PyExc_TypeError,
1009
0
                        "not all arguments converted during string formatting");
1010
0
        goto onError;
1011
0
    }
1012
22.6M
    Py_XDECREF(arg.key);
1013
22.6M
    return 0;
1014
1015
1.06M
  onError:
1016
1.06M
    Py_XDECREF(arg.key);
1017
1.06M
    return -1;
1018
22.6M
}
1019
1020
1021
PyObject *
1022
PyUnicode_Format(PyObject *format, PyObject *args)
1023
12.6M
{
1024
12.6M
    struct unicode_formatter_t ctx;
1025
1026
12.6M
    if (format == NULL || args == NULL) {
1027
0
        PyErr_BadInternalCall();
1028
0
        return NULL;
1029
0
    }
1030
1031
12.6M
    if (ensure_unicode(format) < 0)
1032
0
        return NULL;
1033
1034
12.6M
    ctx.fmtstr = format;
1035
12.6M
    ctx.fmtdata = PyUnicode_DATA(ctx.fmtstr);
1036
12.6M
    ctx.fmtkind = PyUnicode_KIND(ctx.fmtstr);
1037
12.6M
    ctx.fmtcnt = PyUnicode_GET_LENGTH(ctx.fmtstr);
1038
12.6M
    ctx.fmtpos = 0;
1039
1040
12.6M
    _PyUnicodeWriter_Init(&ctx.writer);
1041
12.6M
    ctx.writer.min_length = ctx.fmtcnt + 100;
1042
12.6M
    ctx.writer.overallocate = 1;
1043
1044
12.6M
    if (PyTuple_Check(args)) {
1045
7.39M
        ctx.arglen = PyTuple_Size(args);
1046
7.39M
        ctx.argidx = 0;
1047
7.39M
    }
1048
5.24M
    else {
1049
5.24M
        ctx.arglen = -1;
1050
5.24M
        ctx.argidx = -2;
1051
5.24M
    }
1052
12.6M
    ctx.args_owned = 0;
1053
12.6M
    if (PyMapping_Check(args) && !PyTuple_Check(args) && !PyUnicode_Check(args))
1054
15.7k
        ctx.dict = args;
1055
12.6M
    else
1056
12.6M
        ctx.dict = NULL;
1057
12.6M
    ctx.args = args;
1058
1059
60.4M
    while (--ctx.fmtcnt >= 0) {
1060
48.8M
        if (PyUnicode_READ(ctx.fmtkind, ctx.fmtdata, ctx.fmtpos) != '%') {
1061
25.1M
            Py_ssize_t nonfmtpos;
1062
1063
25.1M
            nonfmtpos = ctx.fmtpos++;
1064
170M
            while (ctx.fmtcnt >= 0 &&
1065
166M
                   PyUnicode_READ(ctx.fmtkind, ctx.fmtdata, ctx.fmtpos) != '%') {
1066
144M
                ctx.fmtpos++;
1067
144M
                ctx.fmtcnt--;
1068
144M
            }
1069
25.1M
            if (ctx.fmtcnt < 0) {
1070
4.03M
                ctx.fmtpos--;
1071
4.03M
                ctx.writer.overallocate = 0;
1072
4.03M
            }
1073
1074
25.1M
            if (_PyUnicodeWriter_WriteSubstring(&ctx.writer, ctx.fmtstr,
1075
25.1M
                                                nonfmtpos, ctx.fmtpos) < 0)
1076
0
                goto onError;
1077
25.1M
        }
1078
23.6M
        else {
1079
23.6M
            ctx.fmtpos++;
1080
23.6M
            if (unicode_format_arg(&ctx) == -1)
1081
1.06M
                goto onError;
1082
23.6M
        }
1083
48.8M
    }
1084
1085
11.5M
    if (ctx.argidx < ctx.arglen && !ctx.dict) {
1086
0
        PyErr_Format(PyExc_TypeError,
1087
0
                     "not all arguments converted during string formatting "
1088
0
                     "(required %zd, got %zd)",
1089
0
                     ctx.arglen < 0 ? 0 : ctx.argidx,
1090
0
                     ctx.arglen < 0 ? 1 : ctx.arglen);
1091
0
        goto onError;
1092
0
    }
1093
1094
11.5M
    if (ctx.args_owned) {
1095
15.0k
        Py_DECREF(ctx.args);
1096
15.0k
    }
1097
11.5M
    return _PyUnicodeWriter_Finish(&ctx.writer);
1098
1099
1.06M
  onError:
1100
1.06M
    _PyUnicodeWriter_Dealloc(&ctx.writer);
1101
1.06M
    if (ctx.args_owned) {
1102
0
        Py_DECREF(ctx.args);
1103
0
    }
1104
    return NULL;
1105
11.5M
}