Coverage Report

Created: 2026-03-23 06:45

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Objects/unicode_format.c
Line
Count
Source
1
/*
2
3
Unicode implementation based on original code by Fredrik Lundh,
4
modified by Marc-Andre Lemburg <mal@lemburg.com>.
5
6
Major speed upgrades to the method implementations at the Reykjavik
7
NeedForSpeed sprint, by Fredrik Lundh and Andrew Dalke.
8
9
Copyright (c) Corporation for National Research Initiatives.
10
11
--------------------------------------------------------------------
12
The original string type implementation is:
13
14
  Copyright (c) 1999 by Secret Labs AB
15
  Copyright (c) 1999 by Fredrik Lundh
16
17
By obtaining, using, and/or copying this software and/or its
18
associated documentation, you agree that you have read, understood,
19
and will comply with the following terms and conditions:
20
21
Permission to use, copy, modify, and distribute this software and its
22
associated documentation for any purpose and without fee is hereby
23
granted, provided that the above copyright notice appears in all
24
copies, and that both that copyright notice and this permission notice
25
appear in supporting documentation, and that the name of Secret Labs
26
AB or the author not be used in advertising or publicity pertaining to
27
distribution of the software without specific, written prior
28
permission.
29
30
SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
31
THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
32
FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
33
ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
34
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
35
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
36
OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
37
--------------------------------------------------------------------
38
39
*/
40
41
// PyUnicode_Format() implementation
42
43
#include "Python.h"
44
#include "pycore_abstract.h"      // _PyIndex_Check()
45
#include "pycore_format.h"        // F_ALT
46
#include "pycore_long.h"          // _PyLong_FormatWriter()
47
#include "pycore_object.h"        // _PyObject_IsUniquelyReferenced()
48
#include "pycore_unicodeobject.h" // _Py_MAX_UNICODE
49
50
51
0
#define MAX_UNICODE _Py_MAX_UNICODE
52
14.0M
#define ensure_unicode _PyUnicode_EnsureUnicode
53
54
struct unicode_formatter_t {
55
    PyObject *args;
56
    int args_owned;
57
    Py_ssize_t arglen, argidx;
58
    PyObject *dict;
59
60
    int fmtkind;
61
    Py_ssize_t fmtcnt, fmtpos;
62
    const void *fmtdata;
63
    PyObject *fmtstr;
64
65
    _PyUnicodeWriter writer;
66
};
67
68
69
struct unicode_format_arg_t {
70
    Py_UCS4 ch;
71
    int flags;
72
    Py_ssize_t width;
73
    int prec;
74
    int sign;
75
    Py_ssize_t fmtstart;
76
    PyObject *key;
77
};
78
79
80
// Use FORMAT_ERROR("...%s", "") when there is no arguments.
81
754k
#define FORMAT_ERROR(EXC, FMT, ...) do {                                    \
82
754k
    if (arg->key != NULL) {                                                 \
83
0
        PyErr_Format((EXC), "format argument %R: " FMT,                     \
84
0
                     arg->key, __VA_ARGS__);                                \
85
0
    }                                                                       \
86
754k
    else if (ctx->argidx >= 0) {                                            \
87
0
        PyErr_Format((EXC), "format argument %zd: " FMT,                    \
88
0
                     ctx->argidx, __VA_ARGS__);                             \
89
0
    }                                                                       \
90
754k
    else {                                                                  \
91
754k
        PyErr_Format((EXC), "format argument: " FMT, __VA_ARGS__);          \
92
754k
    }                                                                       \
93
754k
} while (0)
94
95
96
static PyObject *
97
unicode_format_getnextarg(struct unicode_formatter_t *ctx, int allowone)
98
26.7M
{
99
26.7M
    Py_ssize_t argidx = ctx->argidx;
100
101
26.7M
    if (argidx < ctx->arglen && (allowone || ctx->arglen >= 0)) {
102
26.7M
        ctx->argidx++;
103
26.7M
        if (ctx->arglen >= 0) {
104
22.6M
            return PyTuple_GetItem(ctx->args, argidx);
105
22.6M
        }
106
4.11M
        else if (allowone) {
107
4.11M
            return ctx->args;
108
4.11M
        }
109
26.7M
    }
110
0
    PyErr_Format(PyExc_TypeError,
111
0
                 "not enough arguments for format string (got %zd)",
112
0
                 ctx->arglen < 0 ? 1 : ctx->arglen);
113
0
    return NULL;
114
26.7M
}
115
116
117
/* Returns a new reference to a PyUnicode object, or NULL on failure. */
118
119
/* Format a float into the writer if the writer is not NULL, or into *p_output
120
   otherwise.
121
122
   Return 0 on success, raise an exception and return -1 on error. */
123
static int
124
formatfloat(PyObject *v,
125
            struct unicode_formatter_t *ctx,
126
            struct unicode_format_arg_t *arg,
127
            PyObject **p_output,
128
            _PyUnicodeWriter *writer)
129
103
{
130
103
    char *p;
131
103
    double x;
132
103
    Py_ssize_t len;
133
103
    int prec;
134
103
    int dtoa_flags = 0;
135
136
103
    x = PyFloat_AsDouble(v);
137
103
    if (x == -1.0 && PyErr_Occurred()) {
138
0
        if (PyErr_ExceptionMatches(PyExc_TypeError)) {
139
0
            FORMAT_ERROR(PyExc_TypeError,
140
0
                         "%%%c requires a real number, not %T",
141
0
                         arg->ch, v);
142
0
        }
143
0
        return -1;
144
0
    }
145
146
103
    prec = arg->prec;
147
103
    if (prec < 0)
148
0
        prec = 6;
149
150
103
    if (arg->flags & F_ALT)
151
0
        dtoa_flags |= Py_DTSF_ALT;
152
103
    p = PyOS_double_to_string(x, arg->ch, prec, dtoa_flags, NULL);
153
103
    if (p == NULL)
154
0
        return -1;
155
103
    len = strlen(p);
156
103
    if (writer) {
157
0
        if (_PyUnicodeWriter_WriteASCIIString(writer, p, len) < 0) {
158
0
            PyMem_Free(p);
159
0
            return -1;
160
0
        }
161
0
    }
162
103
    else
163
103
        *p_output = _PyUnicode_FromASCII(p, len);
164
103
    PyMem_Free(p);
165
103
    return 0;
166
103
}
167
168
169
/* formatlong() emulates the format codes d, u, o, x and X, and
170
 * the F_ALT flag, for Python's long (unbounded) ints.  It's not used for
171
 * Python's regular ints.
172
 * Return value:  a new PyUnicodeObject*, or NULL if error.
173
 *     The output string is of the form
174
 *         "-"? ("0x" | "0X")? digit+
175
 *     "0x"/"0X" are present only for x and X conversions, with F_ALT
176
 *         set in flags.  The case of hex digits will be correct,
177
 *     There will be at least prec digits, zero-filled on the left if
178
 *         necessary to get that many.
179
 * val          object to be converted
180
 * flags        bitmask of format flags; only F_ALT is looked at
181
 * prec         minimum number of digits; 0-fill on left if needed
182
 * type         a character in [duoxX]; u acts the same as d
183
 *
184
 * CAUTION:  o, x and X conversions on regular ints can never
185
 * produce a '-' sign, but can for Python's unbounded ints.
186
 */
187
PyObject *
188
_PyUnicode_FormatLong(PyObject *val, int alt, int prec, int type)
189
8.35M
{
190
8.35M
    PyObject *result = NULL;
191
8.35M
    char *buf;
192
8.35M
    Py_ssize_t i;
193
8.35M
    int sign;           /* 1 if '-', else 0 */
194
8.35M
    int len;            /* number of characters */
195
8.35M
    Py_ssize_t llen;
196
8.35M
    int numdigits;      /* len == numnondigits + numdigits */
197
8.35M
    int numnondigits = 0;
198
199
    /* Avoid exceeding SSIZE_T_MAX */
200
8.35M
    if (prec > INT_MAX-3) {
201
0
        PyErr_SetString(PyExc_OverflowError,
202
0
                        "precision too large");
203
0
        return NULL;
204
0
    }
205
206
8.35M
    assert(PyLong_Check(val));
207
208
8.35M
    switch (type) {
209
0
    default:
210
0
        Py_UNREACHABLE();
211
3.66M
    case 'd':
212
3.66M
    case 'i':
213
3.66M
    case 'u':
214
        /* int and int subclasses should print numerically when a numeric */
215
        /* format code is used (see issue18780) */
216
3.66M
        result = PyNumber_ToBase(val, 10);
217
3.66M
        break;
218
69.1k
    case 'o':
219
69.1k
        numnondigits = 2;
220
69.1k
        result = PyNumber_ToBase(val, 8);
221
69.1k
        break;
222
78
    case 'x':
223
4.62M
    case 'X':
224
4.62M
        numnondigits = 2;
225
4.62M
        result = PyNumber_ToBase(val, 16);
226
4.62M
        break;
227
8.35M
    }
228
8.35M
    if (!result)
229
0
        return NULL;
230
231
8.35M
    assert(_PyUnicode_IsModifiable(result));
232
8.35M
    assert(PyUnicode_IS_ASCII(result));
233
234
    /* To modify the string in-place, there can only be one reference. */
235
8.35M
    if (!_PyObject_IsUniquelyReferenced(result)) {
236
0
        Py_DECREF(result);
237
0
        PyErr_BadInternalCall();
238
0
        return NULL;
239
0
    }
240
8.35M
    buf = PyUnicode_DATA(result);
241
8.35M
    llen = PyUnicode_GET_LENGTH(result);
242
8.35M
    if (llen > INT_MAX) {
243
0
        Py_DECREF(result);
244
0
        PyErr_SetString(PyExc_ValueError,
245
0
                        "string too large in _PyUnicode_FormatLong");
246
0
        return NULL;
247
0
    }
248
8.35M
    len = (int)llen;
249
8.35M
    sign = buf[0] == '-';
250
8.35M
    numnondigits += sign;
251
8.35M
    numdigits = len - numnondigits;
252
8.35M
    assert(numdigits > 0);
253
254
    /* Get rid of base marker unless F_ALT */
255
8.35M
    if (((alt) == 0 &&
256
8.35M
        (type == 'o' || type == 'x' || type == 'X'))) {
257
4.69M
        assert(buf[sign] == '0');
258
4.69M
        assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
259
4.69M
               buf[sign+1] == 'o');
260
4.69M
        numnondigits -= 2;
261
4.69M
        buf += 2;
262
4.69M
        len -= 2;
263
4.69M
        if (sign)
264
0
            buf[0] = '-';
265
4.69M
        assert(len == numnondigits + numdigits);
266
4.69M
        assert(numdigits > 0);
267
4.69M
    }
268
269
    /* Fill with leading zeroes to meet minimum width. */
270
8.35M
    if (prec > numdigits) {
271
0
        PyObject *r1 = PyBytes_FromStringAndSize(NULL,
272
0
                                numnondigits + prec);
273
0
        char *b1;
274
0
        if (!r1) {
275
0
            Py_DECREF(result);
276
0
            return NULL;
277
0
        }
278
0
        b1 = PyBytes_AS_STRING(r1);
279
0
        for (i = 0; i < numnondigits; ++i)
280
0
            *b1++ = *buf++;
281
0
        for (i = 0; i < prec - numdigits; i++)
282
0
            *b1++ = '0';
283
0
        for (i = 0; i < numdigits; i++)
284
0
            *b1++ = *buf++;
285
0
        *b1 = '\0';
286
0
        Py_SETREF(result, r1);
287
0
        buf = PyBytes_AS_STRING(result);
288
0
        len = numnondigits + prec;
289
0
    }
290
291
    /* Fix up case for hex conversions. */
292
8.35M
    if (type == 'X') {
293
        /* Need to convert all lower case letters to upper case.
294
           and need to convert 0x to 0X (and -0x to -0X). */
295
32.4M
        for (i = 0; i < len; i++)
296
27.7M
            if (buf[i] >= 'a' && buf[i] <= 'x')
297
5.94M
                buf[i] -= 'a'-'A';
298
4.62M
    }
299
8.35M
    if (!PyUnicode_Check(result)
300
8.35M
        || buf != PyUnicode_DATA(result)) {
301
4.69M
        PyObject *unicode;
302
4.69M
        unicode = _PyUnicode_FromASCII(buf, len);
303
4.69M
        Py_SETREF(result, unicode);
304
4.69M
    }
305
3.66M
    else if (len != PyUnicode_GET_LENGTH(result)) {
306
0
        if (PyUnicode_Resize(&result, len) < 0)
307
0
            Py_CLEAR(result);
308
0
    }
309
8.35M
    return result;
310
8.35M
}
311
312
313
/* Format an integer or a float as an integer.
314
 * Return 1 if the number has been formatted into the writer,
315
 *        0 if the number has been formatted into *p_output
316
 *       -1 and raise an exception on error */
317
static int
318
mainformatlong(PyObject *v,
319
               struct unicode_formatter_t *ctx,
320
               struct unicode_format_arg_t *arg,
321
               PyObject **p_output,
322
               _PyUnicodeWriter *writer)
323
10.9M
{
324
10.9M
    PyObject *iobj, *res;
325
10.9M
    char type = (char)arg->ch;
326
327
10.9M
    if (!PyNumber_Check(v))
328
754k
        goto wrongtype;
329
330
    /* make sure number is a type of integer for o, x, and X */
331
10.1M
    if (!PyLong_Check(v)) {
332
0
        if (type == 'o' || type == 'x' || type == 'X') {
333
0
            iobj = _PyNumber_Index(v);
334
0
        }
335
0
        else {
336
0
            iobj = PyNumber_Long(v);
337
0
        }
338
0
        if (iobj == NULL ) {
339
0
            if (PyErr_ExceptionMatches(PyExc_TypeError))
340
0
                goto wrongtype;
341
0
            return -1;
342
0
        }
343
0
        assert(PyLong_Check(iobj));
344
0
    }
345
10.1M
    else {
346
10.1M
        iobj = Py_NewRef(v);
347
10.1M
    }
348
349
10.1M
    if (PyLong_CheckExact(v)
350
10.1M
        && arg->width == -1 && arg->prec == -1
351
6.44M
        && !(arg->flags & (F_SIGN | F_BLANK))
352
6.44M
        && type != 'X')
353
1.82M
    {
354
        /* Fast path */
355
1.82M
        int alternate = arg->flags & F_ALT;
356
1.82M
        int base;
357
358
1.82M
        switch(type)
359
1.82M
        {
360
0
            default:
361
0
                Py_UNREACHABLE();
362
1.82M
            case 'd':
363
1.82M
            case 'i':
364
1.82M
            case 'u':
365
1.82M
                base = 10;
366
1.82M
                break;
367
0
            case 'o':
368
0
                base = 8;
369
0
                break;
370
42
            case 'x':
371
42
            case 'X':
372
42
                base = 16;
373
42
                break;
374
1.82M
        }
375
376
1.82M
        if (_PyLong_FormatWriter(writer, v, base, alternate) == -1) {
377
0
            Py_DECREF(iobj);
378
0
            return -1;
379
0
        }
380
1.82M
        Py_DECREF(iobj);
381
1.82M
        return 1;
382
1.82M
    }
383
384
8.35M
    res = _PyUnicode_FormatLong(iobj, arg->flags & F_ALT, arg->prec, type);
385
8.35M
    Py_DECREF(iobj);
386
8.35M
    if (res == NULL)
387
0
        return -1;
388
8.35M
    *p_output = res;
389
8.35M
    return 0;
390
391
754k
wrongtype:
392
754k
    switch(type)
393
754k
    {
394
0
        case 'o':
395
0
        case 'x':
396
0
        case 'X':
397
0
            FORMAT_ERROR(PyExc_TypeError,
398
0
                         "%%%c requires an integer, not %T",
399
0
                         arg->ch, v);
400
0
            break;
401
754k
        default:
402
754k
            FORMAT_ERROR(PyExc_TypeError,
403
754k
                         "%%%c requires a real number, not %T",
404
754k
                         arg->ch, v);
405
754k
            break;
406
754k
    }
407
754k
    return -1;
408
754k
}
409
410
411
static Py_UCS4
412
formatchar(PyObject *v,
413
           struct unicode_formatter_t *ctx,
414
           struct unicode_format_arg_t *arg)
415
0
{
416
    /* presume that the buffer is at least 3 characters long */
417
0
    if (PyUnicode_Check(v)) {
418
0
        if (PyUnicode_GET_LENGTH(v) == 1) {
419
0
            return PyUnicode_READ_CHAR(v, 0);
420
0
        }
421
0
        FORMAT_ERROR(PyExc_TypeError,
422
0
                     "%%c requires an integer or a unicode character, "
423
0
                     "not a string of length %zd",
424
0
                     PyUnicode_GET_LENGTH(v));
425
0
        return (Py_UCS4) -1;
426
0
    }
427
0
    else {
428
0
        int overflow;
429
0
        long x = PyLong_AsLongAndOverflow(v, &overflow);
430
0
        if (x == -1 && PyErr_Occurred()) {
431
0
            if (PyErr_ExceptionMatches(PyExc_TypeError)) {
432
0
                FORMAT_ERROR(PyExc_TypeError,
433
0
                             "%%c requires an integer or a unicode character, "
434
0
                             "not %T",
435
0
                             v);
436
0
            }
437
0
            return (Py_UCS4) -1;
438
0
        }
439
440
0
        if (x < 0 || x > MAX_UNICODE) {
441
            /* this includes an overflow in converting to C long */
442
0
            FORMAT_ERROR(PyExc_OverflowError,
443
0
                         "%%c argument not in range(0x110000)%s", "");
444
0
            return (Py_UCS4) -1;
445
0
        }
446
447
0
        return (Py_UCS4) x;
448
0
    }
449
0
}
450
451
452
/* Parse options of an argument: flags, width, precision.
453
   Handle also "%(name)" syntax.
454
455
   Return 0 if the argument has been formatted into arg->str.
456
   Return 1 if the argument has been written into ctx->writer,
457
   Raise an exception and return -1 on error. */
458
static int
459
unicode_format_arg_parse(struct unicode_formatter_t *ctx,
460
                         struct unicode_format_arg_t *arg)
461
26.7M
{
462
26.7M
#define FORMAT_READ(ctx) \
463
30.8M
        PyUnicode_READ((ctx)->fmtkind, (ctx)->fmtdata, (ctx)->fmtpos)
464
465
26.7M
    PyObject *v;
466
467
26.7M
    if (arg->ch == '(') {
468
        /* Get argument value from a dictionary. Example: "%(name)s". */
469
40.7k
        Py_ssize_t keystart;
470
40.7k
        Py_ssize_t keylen;
471
40.7k
        int pcount = 1;
472
473
40.7k
        if (ctx->dict == NULL) {
474
0
            PyErr_Format(PyExc_TypeError,
475
0
                         "format requires a mapping, not %T",
476
0
                         ctx->args);
477
0
            return -1;
478
0
        }
479
40.7k
        ++ctx->fmtpos;
480
40.7k
        --ctx->fmtcnt;
481
40.7k
        keystart = ctx->fmtpos;
482
        /* Skip over balanced parentheses */
483
366k
        while (pcount > 0 && --ctx->fmtcnt >= 0) {
484
325k
            arg->ch = FORMAT_READ(ctx);
485
325k
            if (arg->ch == ')')
486
40.7k
                --pcount;
487
284k
            else if (arg->ch == '(')
488
0
                ++pcount;
489
325k
            ctx->fmtpos++;
490
325k
        }
491
40.7k
        keylen = ctx->fmtpos - keystart - 1;
492
40.7k
        if (ctx->fmtcnt < 0 || pcount > 0) {
493
0
            PyErr_Format(PyExc_ValueError,
494
0
                         "stray %% or incomplete format key at position %zd",
495
0
                         arg->fmtstart);
496
0
            return -1;
497
0
        }
498
40.7k
        arg->key = PyUnicode_Substring(ctx->fmtstr,
499
40.7k
                                       keystart, keystart + keylen);
500
40.7k
        if (arg->key == NULL)
501
0
            return -1;
502
40.7k
        if (ctx->args_owned) {
503
29.0k
            ctx->args_owned = 0;
504
29.0k
            Py_DECREF(ctx->args);
505
29.0k
        }
506
40.7k
        ctx->args = PyObject_GetItem(ctx->dict, arg->key);
507
40.7k
        if (ctx->args == NULL)
508
0
            return -1;
509
40.7k
        ctx->args_owned = 1;
510
40.7k
        ctx->arglen = -3;
511
40.7k
        ctx->argidx = -4;
512
40.7k
    }
513
26.6M
    else {
514
26.6M
        if (ctx->arglen < -1) {
515
0
            PyErr_Format(PyExc_ValueError,
516
0
                         "format requires a parenthesised mapping key "
517
0
                         "at position %zd",
518
0
                         arg->fmtstart);
519
0
            return -1;
520
0
        }
521
26.6M
    }
522
523
    /* Parse flags. Example: "%+i" => flags=F_SIGN. */
524
26.7M
    while (--ctx->fmtcnt >= 0) {
525
26.7M
        arg->ch = FORMAT_READ(ctx);
526
26.7M
        ctx->fmtpos++;
527
26.7M
        switch (arg->ch) {
528
0
        case '-': arg->flags |= F_LJUST; continue;
529
0
        case '+': arg->flags |= F_SIGN; continue;
530
0
        case ' ': arg->flags |= F_BLANK; continue;
531
42
        case '#': arg->flags |= F_ALT; continue;
532
71.8k
        case '0': arg->flags |= F_ZERO; continue;
533
26.7M
        }
534
26.7M
        break;
535
26.7M
    }
536
537
    /* Parse width. Example: "%10s" => width=10 */
538
26.7M
    if (arg->ch == '*') {
539
57.3k
        if (ctx->arglen < -1) {
540
0
            PyErr_Format(PyExc_ValueError,
541
0
                    "* cannot be used with a parenthesised mapping key "
542
0
                    "at position %zd",
543
0
                    arg->fmtstart);
544
0
            return -1;
545
0
        }
546
57.3k
        v = unicode_format_getnextarg(ctx, 0);
547
57.3k
        if (v == NULL)
548
0
            return -1;
549
57.3k
        if (!PyLong_Check(v)) {
550
0
            FORMAT_ERROR(PyExc_TypeError, "* requires int, not %T", v);
551
0
            return -1;
552
0
        }
553
57.3k
        arg->width = PyLong_AsSsize_t(v);
554
57.3k
        if (arg->width == -1 && PyErr_Occurred()) {
555
0
            if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
556
0
                FORMAT_ERROR(PyExc_OverflowError,
557
0
                             "too big for width%s", "");
558
0
            }
559
0
            return -1;
560
0
        }
561
57.3k
        if (arg->width < 0) {
562
0
            arg->flags |= F_LJUST;
563
0
            arg->width = -arg->width;
564
0
        }
565
57.3k
        if (--ctx->fmtcnt >= 0) {
566
57.3k
            arg->ch = FORMAT_READ(ctx);
567
57.3k
            ctx->fmtpos++;
568
57.3k
        }
569
57.3k
    }
570
26.6M
    else if (arg->ch >= '0' && arg->ch <= '9') {
571
3.67M
        arg->width = arg->ch - '0';
572
3.67M
        while (--ctx->fmtcnt >= 0) {
573
3.67M
            arg->ch = FORMAT_READ(ctx);
574
3.67M
            ctx->fmtpos++;
575
3.67M
            if (arg->ch < '0' || arg->ch > '9')
576
3.67M
                break;
577
            /* Since arg->ch is unsigned, the RHS would end up as unsigned,
578
               mixing signed and unsigned comparison. Since arg->ch is between
579
               '0' and '9', casting to int is safe. */
580
0
            if (arg->width > (PY_SSIZE_T_MAX - ((int)arg->ch - '0')) / 10) {
581
0
                PyErr_Format(PyExc_ValueError,
582
0
                             "width too big at position %zd",
583
0
                             arg->fmtstart);
584
0
                return -1;
585
0
            }
586
0
            arg->width = arg->width*10 + (arg->ch - '0');
587
0
        }
588
3.67M
    }
589
590
    /* Parse precision. Example: "%.3f" => prec=3 */
591
26.7M
    if (arg->ch == '.') {
592
103
        arg->prec = 0;
593
103
        if (--ctx->fmtcnt >= 0) {
594
103
            arg->ch = FORMAT_READ(ctx);
595
103
            ctx->fmtpos++;
596
103
        }
597
103
        if (arg->ch == '*') {
598
0
            if (ctx->arglen < -1) {
599
0
                PyErr_Format(PyExc_ValueError,
600
0
                        "* cannot be used with a parenthesised mapping key "
601
0
                        "at position %zd",
602
0
                        arg->fmtstart);
603
0
                return -1;
604
0
            }
605
0
            v = unicode_format_getnextarg(ctx, 0);
606
0
            if (v == NULL)
607
0
                return -1;
608
0
            if (!PyLong_Check(v)) {
609
0
                FORMAT_ERROR(PyExc_TypeError, "* requires int, not %T", v);
610
0
                return -1;
611
0
            }
612
0
            arg->prec = PyLong_AsInt(v);
613
0
            if (arg->prec == -1 && PyErr_Occurred()) {
614
0
                if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
615
0
                    FORMAT_ERROR(PyExc_OverflowError,
616
0
                                 "too big for precision%s", "");
617
0
                }
618
0
                return -1;
619
0
            }
620
0
            if (arg->prec < 0)
621
0
                arg->prec = 0;
622
0
            if (--ctx->fmtcnt >= 0) {
623
0
                arg->ch = FORMAT_READ(ctx);
624
0
                ctx->fmtpos++;
625
0
            }
626
0
        }
627
103
        else if (arg->ch >= '0' && arg->ch <= '9') {
628
103
            arg->prec = arg->ch - '0';
629
103
            while (--ctx->fmtcnt >= 0) {
630
103
                arg->ch = FORMAT_READ(ctx);
631
103
                ctx->fmtpos++;
632
103
                if (arg->ch < '0' || arg->ch > '9')
633
103
                    break;
634
0
                if (arg->prec > (INT_MAX - ((int)arg->ch - '0')) / 10) {
635
0
                    PyErr_Format(PyExc_ValueError,
636
0
                                 "precision too big at position %zd",
637
0
                                 arg->fmtstart);
638
0
                    return -1;
639
0
                }
640
0
                arg->prec = arg->prec*10 + (arg->ch - '0');
641
0
            }
642
103
        }
643
103
    }
644
645
    /* Ignore "h", "l" and "L" format prefix (ex: "%hi" or "%ls") */
646
26.7M
    if (ctx->fmtcnt >= 0) {
647
26.7M
        if (arg->ch == 'h' || arg->ch == 'l' || arg->ch == 'L') {
648
0
            if (--ctx->fmtcnt >= 0) {
649
0
                arg->ch = FORMAT_READ(ctx);
650
0
                ctx->fmtpos++;
651
0
            }
652
0
        }
653
26.7M
    }
654
26.7M
    if (ctx->fmtcnt < 0) {
655
0
        PyErr_Format(PyExc_ValueError,
656
0
                     "stray %% at position %zd", arg->fmtstart);
657
0
        return -1;
658
0
    }
659
26.7M
    return 0;
660
661
26.7M
#undef FORMAT_READ
662
26.7M
}
663
664
665
/* Format one argument. Supported conversion specifiers:
666
667
   - "s", "r", "a": any type
668
   - "i", "d", "u": int or float
669
   - "o", "x", "X": int
670
   - "e", "E", "f", "F", "g", "G": float
671
   - "c": int or str (1 character)
672
673
   When possible, the output is written directly into the Unicode writer
674
   (ctx->writer). A string is created when padding is required.
675
676
   Return 0 if the argument has been formatted into *p_str,
677
          1 if the argument has been written into ctx->writer,
678
         -1 on error. */
679
static int
680
unicode_format_arg_format(struct unicode_formatter_t *ctx,
681
                          struct unicode_format_arg_t *arg,
682
                          PyObject **p_str)
683
26.7M
{
684
26.7M
    PyObject *v;
685
26.7M
    _PyUnicodeWriter *writer = &ctx->writer;
686
687
26.7M
    if (ctx->fmtcnt == 0)
688
10.8M
        ctx->writer.overallocate = 0;
689
690
26.7M
    v = unicode_format_getnextarg(ctx, 1);
691
26.7M
    if (v == NULL)
692
0
        return -1;
693
694
695
26.7M
    switch (arg->ch) {
696
15.7M
    case 's':
697
15.7M
    case 'r':
698
15.7M
    case 'a':
699
15.7M
        if (PyLong_CheckExact(v) && arg->width == -1 && arg->prec == -1) {
700
            /* Fast path */
701
133
            if (_PyLong_FormatWriter(writer, v, 10, arg->flags & F_ALT) == -1)
702
0
                return -1;
703
133
            return 1;
704
133
        }
705
706
15.7M
        if (PyUnicode_CheckExact(v) && arg->ch == 's') {
707
11.1M
            *p_str = Py_NewRef(v);
708
11.1M
        }
709
4.62M
        else {
710
4.62M
            if (arg->ch == 's')
711
4.61M
                *p_str = PyObject_Str(v);
712
9.52k
            else if (arg->ch == 'r')
713
9.52k
                *p_str = PyObject_Repr(v);
714
0
            else
715
0
                *p_str = PyObject_ASCII(v);
716
4.62M
        }
717
15.7M
        break;
718
719
0
    case 'i':
720
6.24M
    case 'd':
721
6.24M
    case 'u':
722
6.31M
    case 'o':
723
6.31M
    case 'x':
724
10.9M
    case 'X':
725
10.9M
    {
726
10.9M
        int ret = mainformatlong(v, ctx, arg, p_str, writer);
727
10.9M
        if (ret != 0)
728
2.58M
            return ret;
729
8.35M
        arg->sign = 1;
730
8.35M
        break;
731
10.9M
    }
732
733
0
    case 'e':
734
0
    case 'E':
735
103
    case 'f':
736
103
    case 'F':
737
103
    case 'g':
738
103
    case 'G':
739
103
        if (arg->width == -1 && arg->prec == -1
740
0
            && !(arg->flags & (F_SIGN | F_BLANK)))
741
0
        {
742
            /* Fast path */
743
0
            if (formatfloat(v, ctx, arg, NULL, writer) == -1)
744
0
                return -1;
745
0
            return 1;
746
0
        }
747
748
103
        arg->sign = 1;
749
103
        if (formatfloat(v, ctx, arg, p_str, NULL) == -1)
750
0
            return -1;
751
103
        break;
752
753
103
    case 'c':
754
0
    {
755
0
        Py_UCS4 ch = formatchar(v, ctx, arg);
756
0
        if (ch == (Py_UCS4) -1)
757
0
            return -1;
758
0
        if (arg->width == -1 && arg->prec == -1) {
759
            /* Fast path */
760
0
            if (_PyUnicodeWriter_WriteCharInline(writer, ch) < 0)
761
0
                return -1;
762
0
            return 1;
763
0
        }
764
0
        *p_str = PyUnicode_FromOrdinal(ch);
765
0
        break;
766
0
    }
767
768
0
    default:
769
0
        if (arg->ch < 128 && Py_ISALPHA(arg->ch)) {
770
0
            PyErr_Format(PyExc_ValueError,
771
0
                         "unsupported format %%%c at position %zd",
772
0
                         (int)arg->ch, arg->fmtstart);
773
0
        }
774
0
        else if (arg->ch == '\'') {
775
0
            PyErr_Format(PyExc_ValueError,
776
0
                         "stray %% at position %zd or unexpected "
777
0
                         "format character \"'\" at position %zd",
778
0
                         arg->fmtstart,
779
0
                         ctx->fmtpos - 1);
780
0
        }
781
0
        else if (arg->ch >= 32 && arg->ch < 127) {
782
0
            PyErr_Format(PyExc_ValueError,
783
0
                         "stray %% at position %zd or unexpected "
784
0
                         "format character '%c' at position %zd",
785
0
                         arg->fmtstart,
786
0
                         (int)arg->ch, ctx->fmtpos - 1);
787
0
        }
788
0
        else if (Py_UNICODE_ISPRINTABLE(arg->ch)) {
789
0
            PyErr_Format(PyExc_ValueError,
790
0
                         "stray %% at position %zd or unexpected "
791
0
                         "format character '%c' (U+%04X) at position %zd",
792
0
                         arg->fmtstart,
793
0
                         (int)arg->ch, (int)arg->ch, ctx->fmtpos - 1);
794
0
        }
795
0
        else {
796
0
            PyErr_Format(PyExc_ValueError,
797
0
                         "stray %% at position %zd or unexpected "
798
0
                         "format character U+%04X at position %zd",
799
0
                         arg->fmtstart, (int)arg->ch, ctx->fmtpos - 1);
800
0
        }
801
0
        return -1;
802
26.7M
    }
803
24.1M
    if (*p_str == NULL)
804
0
        return -1;
805
24.1M
    assert (PyUnicode_Check(*p_str));
806
24.1M
    return 0;
807
24.1M
}
808
809
810
static int
811
unicode_format_arg_output(struct unicode_formatter_t *ctx,
812
                          struct unicode_format_arg_t *arg,
813
                          PyObject *str)
814
24.1M
{
815
24.1M
    Py_ssize_t len;
816
24.1M
    int kind;
817
24.1M
    const void *pbuf;
818
24.1M
    Py_ssize_t pindex;
819
24.1M
    Py_UCS4 signchar;
820
24.1M
    Py_ssize_t buflen;
821
24.1M
    Py_UCS4 maxchar;
822
24.1M
    Py_ssize_t sublen;
823
24.1M
    _PyUnicodeWriter *writer = &ctx->writer;
824
24.1M
    Py_UCS4 fill;
825
826
24.1M
    fill = ' ';
827
24.1M
    if (arg->sign && arg->flags & F_ZERO)
828
71.8k
        fill = '0';
829
830
24.1M
    len = PyUnicode_GET_LENGTH(str);
831
24.1M
    if ((arg->width == -1 || arg->width <= len)
832
24.0M
        && (arg->prec == -1 || arg->prec >= len)
833
24.0M
        && !(arg->flags & (F_SIGN | F_BLANK)))
834
24.0M
    {
835
        /* Fast path */
836
24.0M
        if (_PyUnicodeWriter_WriteStr(writer, str) == -1)
837
0
            return -1;
838
24.0M
        return 0;
839
24.0M
    }
840
841
    /* Truncate the string for "s", "r" and "a" formats
842
       if the precision is set */
843
86.4k
    if (arg->ch == 's' || arg->ch == 'r' || arg->ch == 'a') {
844
0
        if (arg->prec >= 0 && len > arg->prec)
845
0
            len = arg->prec;
846
0
    }
847
848
    /* Adjust sign and width */
849
86.4k
    kind = PyUnicode_KIND(str);
850
86.4k
    pbuf = PyUnicode_DATA(str);
851
86.4k
    pindex = 0;
852
86.4k
    signchar = '\0';
853
86.4k
    if (arg->sign) {
854
86.4k
        Py_UCS4 ch = PyUnicode_READ(kind, pbuf, pindex);
855
86.4k
        if (ch == '-' || ch == '+') {
856
0
            signchar = ch;
857
0
            len--;
858
0
            pindex++;
859
0
        }
860
86.4k
        else if (arg->flags & F_SIGN)
861
0
            signchar = '+';
862
86.4k
        else if (arg->flags & F_BLANK)
863
0
            signchar = ' ';
864
86.4k
        else
865
86.4k
            arg->sign = 0;
866
86.4k
    }
867
86.4k
    if (arg->width < len)
868
103
        arg->width = len;
869
870
    /* Prepare the writer */
871
86.4k
    maxchar = writer->maxchar;
872
86.4k
    if (!(arg->flags & F_LJUST)) {
873
86.4k
        if (arg->sign) {
874
0
            if ((arg->width-1) > len)
875
0
                maxchar = Py_MAX(maxchar, fill);
876
0
        }
877
86.4k
        else {
878
86.4k
            if (arg->width > len)
879
86.3k
                maxchar = Py_MAX(maxchar, fill);
880
86.4k
        }
881
86.4k
    }
882
86.4k
    if (PyUnicode_MAX_CHAR_VALUE(str) > maxchar) {
883
69.1k
        Py_UCS4 strmaxchar = _PyUnicode_FindMaxChar(str, 0, pindex+len);
884
69.1k
        maxchar = Py_MAX(maxchar, strmaxchar);
885
69.1k
    }
886
887
86.4k
    buflen = arg->width;
888
86.4k
    if (arg->sign && len == arg->width)
889
0
        buflen++;
890
86.4k
    if (_PyUnicodeWriter_Prepare(writer, buflen, maxchar) == -1)
891
0
        return -1;
892
893
    /* Write the sign if needed */
894
86.4k
    if (arg->sign) {
895
0
        if (fill != ' ') {
896
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos, signchar);
897
0
            writer->pos += 1;
898
0
        }
899
0
        if (arg->width > len)
900
0
            arg->width--;
901
0
    }
902
903
    /* Write the numeric prefix for "x", "X" and "o" formats
904
       if the alternate form is used.
905
       For example, write "0x" for the "%#x" format. */
906
86.4k
    if ((arg->flags & F_ALT) && (arg->ch == 'x' || arg->ch == 'X' || arg->ch == 'o')) {
907
0
        assert(PyUnicode_READ(kind, pbuf, pindex) == '0');
908
0
        assert(PyUnicode_READ(kind, pbuf, pindex + 1) == arg->ch);
909
0
        if (fill != ' ') {
910
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos, '0');
911
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos+1, arg->ch);
912
0
            writer->pos += 2;
913
0
            pindex += 2;
914
0
        }
915
0
        arg->width -= 2;
916
0
        if (arg->width < 0)
917
0
            arg->width = 0;
918
0
        len -= 2;
919
0
    }
920
921
    /* Pad left with the fill character if needed */
922
86.4k
    if (arg->width > len && !(arg->flags & F_LJUST)) {
923
86.3k
        sublen = arg->width - len;
924
86.3k
        _PyUnicode_Fill(writer->kind, writer->data, fill, writer->pos, sublen);
925
86.3k
        writer->pos += sublen;
926
86.3k
        arg->width = len;
927
86.3k
    }
928
929
    /* If padding with spaces: write sign if needed and/or numeric prefix if
930
       the alternate form is used */
931
86.4k
    if (fill == ' ') {
932
17.1k
        if (arg->sign) {
933
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos, signchar);
934
0
            writer->pos += 1;
935
0
        }
936
17.1k
        if ((arg->flags & F_ALT) && (arg->ch == 'x' || arg->ch == 'X' || arg->ch == 'o')) {
937
0
            assert(PyUnicode_READ(kind, pbuf, pindex) == '0');
938
0
            assert(PyUnicode_READ(kind, pbuf, pindex+1) == arg->ch);
939
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos, '0');
940
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos+1, arg->ch);
941
0
            writer->pos += 2;
942
0
            pindex += 2;
943
0
        }
944
17.1k
    }
945
946
    /* Write characters */
947
86.4k
    if (len) {
948
86.4k
        _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
949
86.4k
                                      str, pindex, len);
950
86.4k
        writer->pos += len;
951
86.4k
    }
952
953
    /* Pad right with the fill character if needed */
954
86.4k
    if (arg->width > len) {
955
0
        sublen = arg->width - len;
956
0
        _PyUnicode_Fill(writer->kind, writer->data, ' ', writer->pos, sublen);
957
0
        writer->pos += sublen;
958
0
    }
959
86.4k
    return 0;
960
86.4k
}
961
962
963
/* Helper of PyUnicode_Format(): format one arg.
964
   Return 0 on success, raise an exception and return -1 on error. */
965
static int
966
unicode_format_arg(struct unicode_formatter_t *ctx)
967
26.7M
{
968
26.7M
    struct unicode_format_arg_t arg;
969
26.7M
    PyObject *str;
970
26.7M
    int ret;
971
972
26.7M
    arg.ch = PyUnicode_READ(ctx->fmtkind, ctx->fmtdata, ctx->fmtpos);
973
26.7M
    if (arg.ch == '%') {
974
0
        ctx->fmtpos++;
975
0
        ctx->fmtcnt--;
976
0
        if (_PyUnicodeWriter_WriteCharInline(&ctx->writer, '%') < 0)
977
0
            return -1;
978
0
        return 0;
979
0
    }
980
26.7M
    arg.flags = 0;
981
26.7M
    arg.width = -1;
982
26.7M
    arg.prec = -1;
983
26.7M
    arg.sign = 0;
984
26.7M
    arg.fmtstart = ctx->fmtpos - 1;
985
26.7M
    arg.key = NULL;
986
26.7M
    str = NULL;
987
988
26.7M
    ret = unicode_format_arg_parse(ctx, &arg);
989
26.7M
    if (ret == -1) {
990
0
        goto onError;
991
0
    }
992
993
26.7M
    ret = unicode_format_arg_format(ctx, &arg, &str);
994
26.7M
    if (ret == -1) {
995
754k
        goto onError;
996
754k
    }
997
998
25.9M
    if (ret != 1) {
999
24.1M
        ret = unicode_format_arg_output(ctx, &arg, str);
1000
24.1M
        Py_DECREF(str);
1001
24.1M
        if (ret == -1) {
1002
0
            goto onError;
1003
0
        }
1004
24.1M
    }
1005
1006
25.9M
    if (ctx->dict && (ctx->argidx < ctx->arglen)) {
1007
        // XXX: Never happens?
1008
0
        PyErr_SetString(PyExc_TypeError,
1009
0
                        "not all arguments converted during string formatting");
1010
0
        goto onError;
1011
0
    }
1012
25.9M
    Py_XDECREF(arg.key);
1013
25.9M
    return 0;
1014
1015
754k
  onError:
1016
754k
    Py_XDECREF(arg.key);
1017
754k
    return -1;
1018
25.9M
}
1019
1020
1021
PyObject *
1022
PyUnicode_Format(PyObject *format, PyObject *args)
1023
14.0M
{
1024
14.0M
    struct unicode_formatter_t ctx;
1025
1026
14.0M
    if (format == NULL || args == NULL) {
1027
0
        PyErr_BadInternalCall();
1028
0
        return NULL;
1029
0
    }
1030
1031
14.0M
    if (ensure_unicode(format) < 0)
1032
0
        return NULL;
1033
1034
14.0M
    ctx.fmtstr = format;
1035
14.0M
    ctx.fmtdata = PyUnicode_DATA(ctx.fmtstr);
1036
14.0M
    ctx.fmtkind = PyUnicode_KIND(ctx.fmtstr);
1037
14.0M
    ctx.fmtcnt = PyUnicode_GET_LENGTH(ctx.fmtstr);
1038
14.0M
    ctx.fmtpos = 0;
1039
1040
14.0M
    _PyUnicodeWriter_Init(&ctx.writer);
1041
14.0M
    ctx.writer.min_length = ctx.fmtcnt + 100;
1042
14.0M
    ctx.writer.overallocate = 1;
1043
1044
14.0M
    if (PyTuple_Check(args)) {
1045
9.96M
        ctx.arglen = PyTuple_Size(args);
1046
9.96M
        ctx.argidx = 0;
1047
9.96M
    }
1048
4.08M
    else {
1049
4.08M
        ctx.arglen = -1;
1050
4.08M
        ctx.argidx = -2;
1051
4.08M
    }
1052
14.0M
    ctx.args_owned = 0;
1053
14.0M
    if (PyMapping_Check(args) && !PyTuple_Check(args) && !PyUnicode_Check(args))
1054
12.1k
        ctx.dict = args;
1055
14.0M
    else
1056
14.0M
        ctx.dict = NULL;
1057
14.0M
    ctx.args = args;
1058
1059
68.0M
    while (--ctx.fmtcnt >= 0) {
1060
54.7M
        if (PyUnicode_READ(ctx.fmtkind, ctx.fmtdata, ctx.fmtpos) != '%') {
1061
28.0M
            Py_ssize_t nonfmtpos;
1062
1063
28.0M
            nonfmtpos = ctx.fmtpos++;
1064
167M
            while (ctx.fmtcnt >= 0 &&
1065
164M
                   PyUnicode_READ(ctx.fmtkind, ctx.fmtdata, ctx.fmtpos) != '%') {
1066
139M
                ctx.fmtpos++;
1067
139M
                ctx.fmtcnt--;
1068
139M
            }
1069
28.0M
            if (ctx.fmtcnt < 0) {
1070
3.19M
                ctx.fmtpos--;
1071
3.19M
                ctx.writer.overallocate = 0;
1072
3.19M
            }
1073
1074
28.0M
            if (_PyUnicodeWriter_WriteSubstring(&ctx.writer, ctx.fmtstr,
1075
28.0M
                                                nonfmtpos, ctx.fmtpos) < 0)
1076
0
                goto onError;
1077
28.0M
        }
1078
26.7M
        else {
1079
26.7M
            ctx.fmtpos++;
1080
26.7M
            if (unicode_format_arg(&ctx) == -1)
1081
754k
                goto onError;
1082
26.7M
        }
1083
54.7M
    }
1084
1085
13.2M
    if (ctx.argidx < ctx.arglen && !ctx.dict) {
1086
0
        PyErr_Format(PyExc_TypeError,
1087
0
                     "not all arguments converted during string formatting "
1088
0
                     "(required %zd, got %zd)",
1089
0
                     ctx.arglen < 0 ? 0 : ctx.argidx,
1090
0
                     ctx.arglen < 0 ? 1 : ctx.arglen);
1091
0
        goto onError;
1092
0
    }
1093
1094
13.2M
    if (ctx.args_owned) {
1095
11.6k
        Py_DECREF(ctx.args);
1096
11.6k
    }
1097
13.2M
    return _PyUnicodeWriter_Finish(&ctx.writer);
1098
1099
754k
  onError:
1100
754k
    _PyUnicodeWriter_Dealloc(&ctx.writer);
1101
754k
    if (ctx.args_owned) {
1102
0
        Py_DECREF(ctx.args);
1103
0
    }
1104
    return NULL;
1105
13.2M
}