Coverage Report

Created: 2026-02-26 06:53

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Objects/unicode_format.c
Line
Count
Source
1
/*
2
3
Unicode implementation based on original code by Fredrik Lundh,
4
modified by Marc-Andre Lemburg <mal@lemburg.com>.
5
6
Major speed upgrades to the method implementations at the Reykjavik
7
NeedForSpeed sprint, by Fredrik Lundh and Andrew Dalke.
8
9
Copyright (c) Corporation for National Research Initiatives.
10
11
--------------------------------------------------------------------
12
The original string type implementation is:
13
14
  Copyright (c) 1999 by Secret Labs AB
15
  Copyright (c) 1999 by Fredrik Lundh
16
17
By obtaining, using, and/or copying this software and/or its
18
associated documentation, you agree that you have read, understood,
19
and will comply with the following terms and conditions:
20
21
Permission to use, copy, modify, and distribute this software and its
22
associated documentation for any purpose and without fee is hereby
23
granted, provided that the above copyright notice appears in all
24
copies, and that both that copyright notice and this permission notice
25
appear in supporting documentation, and that the name of Secret Labs
26
AB or the author not be used in advertising or publicity pertaining to
27
distribution of the software without specific, written prior
28
permission.
29
30
SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
31
THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
32
FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
33
ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
34
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
35
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
36
OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
37
--------------------------------------------------------------------
38
39
*/
40
41
// PyUnicode_Format() implementation
42
43
#include "Python.h"
44
#include "pycore_abstract.h"      // _PyIndex_Check()
45
#include "pycore_format.h"        // F_ALT
46
#include "pycore_long.h"          // _PyLong_FormatWriter()
47
#include "pycore_object.h"        // _PyObject_IsUniquelyReferenced()
48
#include "pycore_unicodeobject.h" // _Py_MAX_UNICODE
49
50
51
0
#define MAX_UNICODE _Py_MAX_UNICODE
52
32.7M
#define ensure_unicode _PyUnicode_EnsureUnicode
53
54
struct unicode_formatter_t {
55
    PyObject *args;
56
    int args_owned;
57
    Py_ssize_t arglen, argidx;
58
    PyObject *dict;
59
60
    int fmtkind;
61
    Py_ssize_t fmtcnt, fmtpos;
62
    const void *fmtdata;
63
    PyObject *fmtstr;
64
65
    _PyUnicodeWriter writer;
66
};
67
68
69
struct unicode_format_arg_t {
70
    Py_UCS4 ch;
71
    int flags;
72
    Py_ssize_t width;
73
    int prec;
74
    int sign;
75
    Py_ssize_t fmtstart;
76
    PyObject *key;
77
};
78
79
80
// Use FORMAT_ERROR("...%s", "") when there is no arguments.
81
4.77M
#define FORMAT_ERROR(EXC, FMT, ...) do {                                    \
82
4.77M
    if (arg->key != NULL) {                                                 \
83
0
        PyErr_Format((EXC), "format argument %R: " FMT,                     \
84
0
                     arg->key, __VA_ARGS__);                                \
85
0
    }                                                                       \
86
4.77M
    else if (ctx->argidx >= 0) {                                            \
87
0
        PyErr_Format((EXC), "format argument %zd: " FMT,                    \
88
0
                     ctx->argidx, __VA_ARGS__);                             \
89
0
    }                                                                       \
90
4.77M
    else {                                                                  \
91
4.77M
        PyErr_Format((EXC), "format argument: " FMT, __VA_ARGS__);          \
92
4.77M
    }                                                                       \
93
4.77M
} while (0)
94
95
96
static PyObject *
97
unicode_format_getnextarg(struct unicode_formatter_t *ctx, int allowone)
98
69.1M
{
99
69.1M
    Py_ssize_t argidx = ctx->argidx;
100
101
69.1M
    if (argidx < ctx->arglen && (allowone || ctx->arglen >= 0)) {
102
69.1M
        ctx->argidx++;
103
69.1M
        if (ctx->arglen >= 0) {
104
53.7M
            return PyTuple_GetItem(ctx->args, argidx);
105
53.7M
        }
106
15.3M
        else if (allowone) {
107
15.3M
            return ctx->args;
108
15.3M
        }
109
69.1M
    }
110
0
    PyErr_Format(PyExc_TypeError,
111
0
                 "not enough arguments for format string (got %zd)",
112
0
                 ctx->arglen < 0 ? 1 : ctx->arglen);
113
0
    return NULL;
114
69.1M
}
115
116
117
/* Returns a new reference to a PyUnicode object, or NULL on failure. */
118
119
/* Format a float into the writer if the writer is not NULL, or into *p_output
120
   otherwise.
121
122
   Return 0 on success, raise an exception and return -1 on error. */
123
static int
124
formatfloat(PyObject *v,
125
            struct unicode_formatter_t *ctx,
126
            struct unicode_format_arg_t *arg,
127
            PyObject **p_output,
128
            _PyUnicodeWriter *writer)
129
106
{
130
106
    char *p;
131
106
    double x;
132
106
    Py_ssize_t len;
133
106
    int prec;
134
106
    int dtoa_flags = 0;
135
136
106
    x = PyFloat_AsDouble(v);
137
106
    if (x == -1.0 && PyErr_Occurred()) {
138
0
        if (PyErr_ExceptionMatches(PyExc_TypeError)) {
139
0
            FORMAT_ERROR(PyExc_TypeError,
140
0
                         "%%%c requires a real number, not %T",
141
0
                         arg->ch, v);
142
0
        }
143
0
        return -1;
144
0
    }
145
146
106
    prec = arg->prec;
147
106
    if (prec < 0)
148
0
        prec = 6;
149
150
106
    if (arg->flags & F_ALT)
151
0
        dtoa_flags |= Py_DTSF_ALT;
152
106
    p = PyOS_double_to_string(x, arg->ch, prec, dtoa_flags, NULL);
153
106
    if (p == NULL)
154
0
        return -1;
155
106
    len = strlen(p);
156
106
    if (writer) {
157
0
        if (_PyUnicodeWriter_WriteASCIIString(writer, p, len) < 0) {
158
0
            PyMem_Free(p);
159
0
            return -1;
160
0
        }
161
0
    }
162
106
    else
163
106
        *p_output = _PyUnicode_FromASCII(p, len);
164
106
    PyMem_Free(p);
165
106
    return 0;
166
106
}
167
168
169
/* formatlong() emulates the format codes d, u, o, x and X, and
170
 * the F_ALT flag, for Python's long (unbounded) ints.  It's not used for
171
 * Python's regular ints.
172
 * Return value:  a new PyUnicodeObject*, or NULL if error.
173
 *     The output string is of the form
174
 *         "-"? ("0x" | "0X")? digit+
175
 *     "0x"/"0X" are present only for x and X conversions, with F_ALT
176
 *         set in flags.  The case of hex digits will be correct,
177
 *     There will be at least prec digits, zero-filled on the left if
178
 *         necessary to get that many.
179
 * val          object to be converted
180
 * flags        bitmask of format flags; only F_ALT is looked at
181
 * prec         minimum number of digits; 0-fill on left if needed
182
 * type         a character in [duoxX]; u acts the same as d
183
 *
184
 * CAUTION:  o, x and X conversions on regular ints can never
185
 * produce a '-' sign, but can for Python's unbounded ints.
186
 */
187
PyObject *
188
_PyUnicode_FormatLong(PyObject *val, int alt, int prec, int type)
189
9.62M
{
190
9.62M
    PyObject *result = NULL;
191
9.62M
    char *buf;
192
9.62M
    Py_ssize_t i;
193
9.62M
    int sign;           /* 1 if '-', else 0 */
194
9.62M
    int len;            /* number of characters */
195
9.62M
    Py_ssize_t llen;
196
9.62M
    int numdigits;      /* len == numnondigits + numdigits */
197
9.62M
    int numnondigits = 0;
198
199
    /* Avoid exceeding SSIZE_T_MAX */
200
9.62M
    if (prec > INT_MAX-3) {
201
0
        PyErr_SetString(PyExc_OverflowError,
202
0
                        "precision too large");
203
0
        return NULL;
204
0
    }
205
206
9.62M
    assert(PyLong_Check(val));
207
208
9.62M
    switch (type) {
209
0
    default:
210
0
        Py_UNREACHABLE();
211
3.66M
    case 'd':
212
3.66M
    case 'i':
213
3.66M
    case 'u':
214
        /* int and int subclasses should print numerically when a numeric */
215
        /* format code is used (see issue18780) */
216
3.66M
        result = PyNumber_ToBase(val, 10);
217
3.66M
        break;
218
0
    case 'o':
219
0
        numnondigits = 2;
220
0
        result = PyNumber_ToBase(val, 8);
221
0
        break;
222
87
    case 'x':
223
5.96M
    case 'X':
224
5.96M
        numnondigits = 2;
225
5.96M
        result = PyNumber_ToBase(val, 16);
226
5.96M
        break;
227
9.62M
    }
228
9.62M
    if (!result)
229
0
        return NULL;
230
231
9.62M
    assert(_PyUnicode_IsModifiable(result));
232
9.62M
    assert(PyUnicode_IS_ASCII(result));
233
234
    /* To modify the string in-place, there can only be one reference. */
235
9.62M
    if (!_PyObject_IsUniquelyReferenced(result)) {
236
0
        Py_DECREF(result);
237
0
        PyErr_BadInternalCall();
238
0
        return NULL;
239
0
    }
240
9.62M
    buf = PyUnicode_DATA(result);
241
9.62M
    llen = PyUnicode_GET_LENGTH(result);
242
9.62M
    if (llen > INT_MAX) {
243
0
        Py_DECREF(result);
244
0
        PyErr_SetString(PyExc_ValueError,
245
0
                        "string too large in _PyUnicode_FormatLong");
246
0
        return NULL;
247
0
    }
248
9.62M
    len = (int)llen;
249
9.62M
    sign = buf[0] == '-';
250
9.62M
    numnondigits += sign;
251
9.62M
    numdigits = len - numnondigits;
252
9.62M
    assert(numdigits > 0);
253
254
    /* Get rid of base marker unless F_ALT */
255
9.62M
    if (((alt) == 0 &&
256
9.62M
        (type == 'o' || type == 'x' || type == 'X'))) {
257
5.96M
        assert(buf[sign] == '0');
258
5.96M
        assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
259
5.96M
               buf[sign+1] == 'o');
260
5.96M
        numnondigits -= 2;
261
5.96M
        buf += 2;
262
5.96M
        len -= 2;
263
5.96M
        if (sign)
264
0
            buf[0] = '-';
265
5.96M
        assert(len == numnondigits + numdigits);
266
5.96M
        assert(numdigits > 0);
267
5.96M
    }
268
269
    /* Fill with leading zeroes to meet minimum width. */
270
9.62M
    if (prec > numdigits) {
271
0
        PyObject *r1 = PyBytes_FromStringAndSize(NULL,
272
0
                                numnondigits + prec);
273
0
        char *b1;
274
0
        if (!r1) {
275
0
            Py_DECREF(result);
276
0
            return NULL;
277
0
        }
278
0
        b1 = PyBytes_AS_STRING(r1);
279
0
        for (i = 0; i < numnondigits; ++i)
280
0
            *b1++ = *buf++;
281
0
        for (i = 0; i < prec - numdigits; i++)
282
0
            *b1++ = '0';
283
0
        for (i = 0; i < numdigits; i++)
284
0
            *b1++ = *buf++;
285
0
        *b1 = '\0';
286
0
        Py_SETREF(result, r1);
287
0
        buf = PyBytes_AS_STRING(result);
288
0
        len = numnondigits + prec;
289
0
    }
290
291
    /* Fix up case for hex conversions. */
292
9.62M
    if (type == 'X') {
293
        /* Need to convert all lower case letters to upper case.
294
           and need to convert 0x to 0X (and -0x to -0X). */
295
41.2M
        for (i = 0; i < len; i++)
296
35.2M
            if (buf[i] >= 'a' && buf[i] <= 'x')
297
7.41M
                buf[i] -= 'a'-'A';
298
5.96M
    }
299
9.62M
    if (!PyUnicode_Check(result)
300
9.62M
        || buf != PyUnicode_DATA(result)) {
301
5.96M
        PyObject *unicode;
302
5.96M
        unicode = _PyUnicode_FromASCII(buf, len);
303
5.96M
        Py_SETREF(result, unicode);
304
5.96M
    }
305
3.66M
    else if (len != PyUnicode_GET_LENGTH(result)) {
306
0
        if (PyUnicode_Resize(&result, len) < 0)
307
0
            Py_CLEAR(result);
308
0
    }
309
9.62M
    return result;
310
9.62M
}
311
312
313
/* Format an integer or a float as an integer.
314
 * Return 1 if the number has been formatted into the writer,
315
 *        0 if the number has been formatted into *p_output
316
 *       -1 and raise an exception on error */
317
static int
318
mainformatlong(PyObject *v,
319
               struct unicode_formatter_t *ctx,
320
               struct unicode_format_arg_t *arg,
321
               PyObject **p_output,
322
               _PyUnicodeWriter *writer)
323
23.7M
{
324
23.7M
    PyObject *iobj, *res;
325
23.7M
    char type = (char)arg->ch;
326
327
23.7M
    if (!PyNumber_Check(v))
328
4.77M
        goto wrongtype;
329
330
    /* make sure number is a type of integer for o, x, and X */
331
18.9M
    if (!PyLong_Check(v)) {
332
0
        if (type == 'o' || type == 'x' || type == 'X') {
333
0
            iobj = _PyNumber_Index(v);
334
0
        }
335
0
        else {
336
0
            iobj = PyNumber_Long(v);
337
0
        }
338
0
        if (iobj == NULL ) {
339
0
            if (PyErr_ExceptionMatches(PyExc_TypeError))
340
0
                goto wrongtype;
341
0
            return -1;
342
0
        }
343
0
        assert(PyLong_Check(iobj));
344
0
    }
345
18.9M
    else {
346
18.9M
        iobj = Py_NewRef(v);
347
18.9M
    }
348
349
18.9M
    if (PyLong_CheckExact(v)
350
18.9M
        && arg->width == -1 && arg->prec == -1
351
15.3M
        && !(arg->flags & (F_SIGN | F_BLANK))
352
15.3M
        && type != 'X')
353
9.36M
    {
354
        /* Fast path */
355
9.36M
        int alternate = arg->flags & F_ALT;
356
9.36M
        int base;
357
358
9.36M
        switch(type)
359
9.36M
        {
360
0
            default:
361
0
                Py_UNREACHABLE();
362
9.36M
            case 'd':
363
9.36M
            case 'i':
364
9.36M
            case 'u':
365
9.36M
                base = 10;
366
9.36M
                break;
367
0
            case 'o':
368
0
                base = 8;
369
0
                break;
370
33
            case 'x':
371
33
            case 'X':
372
33
                base = 16;
373
33
                break;
374
9.36M
        }
375
376
9.36M
        if (_PyLong_FormatWriter(writer, v, base, alternate) == -1) {
377
0
            Py_DECREF(iobj);
378
0
            return -1;
379
0
        }
380
9.36M
        Py_DECREF(iobj);
381
9.36M
        return 1;
382
9.36M
    }
383
384
9.62M
    res = _PyUnicode_FormatLong(iobj, arg->flags & F_ALT, arg->prec, type);
385
9.62M
    Py_DECREF(iobj);
386
9.62M
    if (res == NULL)
387
0
        return -1;
388
9.62M
    *p_output = res;
389
9.62M
    return 0;
390
391
4.77M
wrongtype:
392
4.77M
    switch(type)
393
4.77M
    {
394
0
        case 'o':
395
0
        case 'x':
396
0
        case 'X':
397
0
            FORMAT_ERROR(PyExc_TypeError,
398
0
                         "%%%c requires an integer, not %T",
399
0
                         arg->ch, v);
400
0
            break;
401
4.77M
        default:
402
4.77M
            FORMAT_ERROR(PyExc_TypeError,
403
4.77M
                         "%%%c requires a real number, not %T",
404
4.77M
                         arg->ch, v);
405
4.77M
            break;
406
4.77M
    }
407
4.77M
    return -1;
408
4.77M
}
409
410
411
static Py_UCS4
412
formatchar(PyObject *v,
413
           struct unicode_formatter_t *ctx,
414
           struct unicode_format_arg_t *arg)
415
0
{
416
    /* presume that the buffer is at least 3 characters long */
417
0
    if (PyUnicode_Check(v)) {
418
0
        if (PyUnicode_GET_LENGTH(v) == 1) {
419
0
            return PyUnicode_READ_CHAR(v, 0);
420
0
        }
421
0
        FORMAT_ERROR(PyExc_TypeError,
422
0
                     "%%c requires an integer or a unicode character, "
423
0
                     "not a string of length %zd",
424
0
                     PyUnicode_GET_LENGTH(v));
425
0
        return (Py_UCS4) -1;
426
0
    }
427
0
    else {
428
0
        int overflow;
429
0
        long x = PyLong_AsLongAndOverflow(v, &overflow);
430
0
        if (x == -1 && PyErr_Occurred()) {
431
0
            if (PyErr_ExceptionMatches(PyExc_TypeError)) {
432
0
                FORMAT_ERROR(PyExc_TypeError,
433
0
                             "%%c requires an integer or a unicode character, "
434
0
                             "not %T",
435
0
                             v);
436
0
            }
437
0
            return (Py_UCS4) -1;
438
0
        }
439
440
0
        if (x < 0 || x > MAX_UNICODE) {
441
            /* this includes an overflow in converting to C long */
442
0
            FORMAT_ERROR(PyExc_OverflowError,
443
0
                         "%%c argument not in range(0x110000)%s", "");
444
0
            return (Py_UCS4) -1;
445
0
        }
446
447
0
        return (Py_UCS4) x;
448
0
    }
449
0
}
450
451
452
/* Parse options of an argument: flags, width, precision.
453
   Handle also "%(name)" syntax.
454
455
   Return 0 if the argument has been formatted into arg->str.
456
   Return 1 if the argument has been written into ctx->writer,
457
   Raise an exception and return -1 on error. */
458
static int
459
unicode_format_arg_parse(struct unicode_formatter_t *ctx,
460
                         struct unicode_format_arg_t *arg)
461
69.1M
{
462
69.1M
#define FORMAT_READ(ctx) \
463
73.0M
        PyUnicode_READ((ctx)->fmtkind, (ctx)->fmtdata, (ctx)->fmtpos)
464
465
69.1M
    PyObject *v;
466
467
69.1M
    if (arg->ch == '(') {
468
        /* Get argument value from a dictionary. Example: "%(name)s". */
469
39.0k
        Py_ssize_t keystart;
470
39.0k
        Py_ssize_t keylen;
471
39.0k
        int pcount = 1;
472
473
39.0k
        if (ctx->dict == NULL) {
474
0
            PyErr_Format(PyExc_TypeError,
475
0
                         "format requires a mapping, not %T",
476
0
                         ctx->args);
477
0
            return -1;
478
0
        }
479
39.0k
        ++ctx->fmtpos;
480
39.0k
        --ctx->fmtcnt;
481
39.0k
        keystart = ctx->fmtpos;
482
        /* Skip over balanced parentheses */
483
351k
        while (pcount > 0 && --ctx->fmtcnt >= 0) {
484
312k
            arg->ch = FORMAT_READ(ctx);
485
312k
            if (arg->ch == ')')
486
39.0k
                --pcount;
487
273k
            else if (arg->ch == '(')
488
0
                ++pcount;
489
312k
            ctx->fmtpos++;
490
312k
        }
491
39.0k
        keylen = ctx->fmtpos - keystart - 1;
492
39.0k
        if (ctx->fmtcnt < 0 || pcount > 0) {
493
0
            PyErr_Format(PyExc_ValueError,
494
0
                         "stray %% or incomplete format key at position %zd",
495
0
                         arg->fmtstart);
496
0
            return -1;
497
0
        }
498
39.0k
        arg->key = PyUnicode_Substring(ctx->fmtstr,
499
39.0k
                                       keystart, keystart + keylen);
500
39.0k
        if (arg->key == NULL)
501
0
            return -1;
502
39.0k
        if (ctx->args_owned) {
503
27.9k
            ctx->args_owned = 0;
504
27.9k
            Py_DECREF(ctx->args);
505
27.9k
        }
506
39.0k
        ctx->args = PyObject_GetItem(ctx->dict, arg->key);
507
39.0k
        if (ctx->args == NULL)
508
0
            return -1;
509
39.0k
        ctx->args_owned = 1;
510
39.0k
        ctx->arglen = -3;
511
39.0k
        ctx->argidx = -4;
512
39.0k
    }
513
69.0M
    else {
514
69.0M
        if (ctx->arglen < -1) {
515
0
            PyErr_Format(PyExc_ValueError,
516
0
                         "format requires a parenthesised mapping key "
517
0
                         "at position %zd",
518
0
                         arg->fmtstart);
519
0
            return -1;
520
0
        }
521
69.0M
    }
522
523
    /* Parse flags. Example: "%+i" => flags=F_SIGN. */
524
69.1M
    while (--ctx->fmtcnt >= 0) {
525
69.1M
        arg->ch = FORMAT_READ(ctx);
526
69.1M
        ctx->fmtpos++;
527
69.1M
        switch (arg->ch) {
528
0
        case '-': arg->flags |= F_LJUST; continue;
529
0
        case '+': arg->flags |= F_SIGN; continue;
530
0
        case ' ': arg->flags |= F_BLANK; continue;
531
33
        case '#': arg->flags |= F_ALT; continue;
532
2.16k
        case '0': arg->flags |= F_ZERO; continue;
533
69.1M
        }
534
69.1M
        break;
535
69.1M
    }
536
537
    /* Parse width. Example: "%10s" => width=10 */
538
69.1M
    if (arg->ch == '*') {
539
0
        if (ctx->arglen < -1) {
540
0
            PyErr_Format(PyExc_ValueError,
541
0
                    "* cannot be used with a parenthesised mapping key "
542
0
                    "at position %zd",
543
0
                    arg->fmtstart);
544
0
            return -1;
545
0
        }
546
0
        v = unicode_format_getnextarg(ctx, 0);
547
0
        if (v == NULL)
548
0
            return -1;
549
0
        if (!PyLong_Check(v)) {
550
0
            FORMAT_ERROR(PyExc_TypeError, "* requires int, not %T", v);
551
0
            return -1;
552
0
        }
553
0
        arg->width = PyLong_AsSsize_t(v);
554
0
        if (arg->width == -1 && PyErr_Occurred()) {
555
0
            if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
556
0
                FORMAT_ERROR(PyExc_OverflowError,
557
0
                             "too big for width%s", "");
558
0
            }
559
0
            return -1;
560
0
        }
561
0
        if (arg->width < 0) {
562
0
            arg->flags |= F_LJUST;
563
0
            arg->width = -arg->width;
564
0
        }
565
0
        if (--ctx->fmtcnt >= 0) {
566
0
            arg->ch = FORMAT_READ(ctx);
567
0
            ctx->fmtpos++;
568
0
        }
569
0
    }
570
69.1M
    else if (arg->ch >= '0' && arg->ch <= '9') {
571
3.66M
        arg->width = arg->ch - '0';
572
3.66M
        while (--ctx->fmtcnt >= 0) {
573
3.66M
            arg->ch = FORMAT_READ(ctx);
574
3.66M
            ctx->fmtpos++;
575
3.66M
            if (arg->ch < '0' || arg->ch > '9')
576
3.66M
                break;
577
            /* Since arg->ch is unsigned, the RHS would end up as unsigned,
578
               mixing signed and unsigned comparison. Since arg->ch is between
579
               '0' and '9', casting to int is safe. */
580
0
            if (arg->width > (PY_SSIZE_T_MAX - ((int)arg->ch - '0')) / 10) {
581
0
                PyErr_Format(PyExc_ValueError,
582
0
                             "width too big at position %zd",
583
0
                             arg->fmtstart);
584
0
                return -1;
585
0
            }
586
0
            arg->width = arg->width*10 + (arg->ch - '0');
587
0
        }
588
3.66M
    }
589
590
    /* Parse precision. Example: "%.3f" => prec=3 */
591
69.1M
    if (arg->ch == '.') {
592
106
        arg->prec = 0;
593
106
        if (--ctx->fmtcnt >= 0) {
594
106
            arg->ch = FORMAT_READ(ctx);
595
106
            ctx->fmtpos++;
596
106
        }
597
106
        if (arg->ch == '*') {
598
0
            if (ctx->arglen < -1) {
599
0
                PyErr_Format(PyExc_ValueError,
600
0
                        "* cannot be used with a parenthesised mapping key "
601
0
                        "at position %zd",
602
0
                        arg->fmtstart);
603
0
                return -1;
604
0
            }
605
0
            v = unicode_format_getnextarg(ctx, 0);
606
0
            if (v == NULL)
607
0
                return -1;
608
0
            if (!PyLong_Check(v)) {
609
0
                FORMAT_ERROR(PyExc_TypeError, "* requires int, not %T", v);
610
0
                return -1;
611
0
            }
612
0
            arg->prec = PyLong_AsInt(v);
613
0
            if (arg->prec == -1 && PyErr_Occurred()) {
614
0
                if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
615
0
                    FORMAT_ERROR(PyExc_OverflowError,
616
0
                                 "too big for precision%s", "");
617
0
                }
618
0
                return -1;
619
0
            }
620
0
            if (arg->prec < 0)
621
0
                arg->prec = 0;
622
0
            if (--ctx->fmtcnt >= 0) {
623
0
                arg->ch = FORMAT_READ(ctx);
624
0
                ctx->fmtpos++;
625
0
            }
626
0
        }
627
106
        else if (arg->ch >= '0' && arg->ch <= '9') {
628
106
            arg->prec = arg->ch - '0';
629
106
            while (--ctx->fmtcnt >= 0) {
630
106
                arg->ch = FORMAT_READ(ctx);
631
106
                ctx->fmtpos++;
632
106
                if (arg->ch < '0' || arg->ch > '9')
633
106
                    break;
634
0
                if (arg->prec > (INT_MAX - ((int)arg->ch - '0')) / 10) {
635
0
                    PyErr_Format(PyExc_ValueError,
636
0
                                 "precision too big at position %zd",
637
0
                                 arg->fmtstart);
638
0
                    return -1;
639
0
                }
640
0
                arg->prec = arg->prec*10 + (arg->ch - '0');
641
0
            }
642
106
        }
643
106
    }
644
645
    /* Ignore "h", "l" and "L" format prefix (ex: "%hi" or "%ls") */
646
69.1M
    if (ctx->fmtcnt >= 0) {
647
69.1M
        if (arg->ch == 'h' || arg->ch == 'l' || arg->ch == 'L') {
648
0
            if (--ctx->fmtcnt >= 0) {
649
0
                arg->ch = FORMAT_READ(ctx);
650
0
                ctx->fmtpos++;
651
0
            }
652
0
        }
653
69.1M
    }
654
69.1M
    if (ctx->fmtcnt < 0) {
655
0
        PyErr_Format(PyExc_ValueError,
656
0
                     "stray %% at position %zd", arg->fmtstart);
657
0
        return -1;
658
0
    }
659
69.1M
    return 0;
660
661
69.1M
#undef FORMAT_READ
662
69.1M
}
663
664
665
/* Format one argument. Supported conversion specifiers:
666
667
   - "s", "r", "a": any type
668
   - "i", "d", "u": int or float
669
   - "o", "x", "X": int
670
   - "e", "E", "f", "F", "g", "G": float
671
   - "c": int or str (1 character)
672
673
   When possible, the output is written directly into the Unicode writer
674
   (ctx->writer). A string is created when padding is required.
675
676
   Return 0 if the argument has been formatted into *p_str,
677
          1 if the argument has been written into ctx->writer,
678
         -1 on error. */
679
static int
680
unicode_format_arg_format(struct unicode_formatter_t *ctx,
681
                          struct unicode_format_arg_t *arg,
682
                          PyObject **p_str)
683
69.1M
{
684
69.1M
    PyObject *v;
685
69.1M
    _PyUnicodeWriter *writer = &ctx->writer;
686
687
69.1M
    if (ctx->fmtcnt == 0)
688
23.6M
        ctx->writer.overallocate = 0;
689
690
69.1M
    v = unicode_format_getnextarg(ctx, 1);
691
69.1M
    if (v == NULL)
692
0
        return -1;
693
694
695
69.1M
    switch (arg->ch) {
696
45.3M
    case 's':
697
45.3M
    case 'r':
698
45.3M
    case 'a':
699
45.3M
        if (PyLong_CheckExact(v) && arg->width == -1 && arg->prec == -1) {
700
            /* Fast path */
701
113
            if (_PyLong_FormatWriter(writer, v, 10, arg->flags & F_ALT) == -1)
702
0
                return -1;
703
113
            return 1;
704
113
        }
705
706
45.3M
        if (PyUnicode_CheckExact(v) && arg->ch == 's') {
707
39.3M
            *p_str = Py_NewRef(v);
708
39.3M
        }
709
5.96M
        else {
710
5.96M
            if (arg->ch == 's')
711
5.96M
                *p_str = PyObject_Str(v);
712
6.48k
            else if (arg->ch == 'r')
713
6.48k
                *p_str = PyObject_Repr(v);
714
0
            else
715
0
                *p_str = PyObject_ASCII(v);
716
5.96M
        }
717
45.3M
        break;
718
719
0
    case 'i':
720
17.8M
    case 'd':
721
17.8M
    case 'u':
722
17.8M
    case 'o':
723
17.8M
    case 'x':
724
23.7M
    case 'X':
725
23.7M
    {
726
23.7M
        int ret = mainformatlong(v, ctx, arg, p_str, writer);
727
23.7M
        if (ret != 0)
728
14.1M
            return ret;
729
9.62M
        arg->sign = 1;
730
9.62M
        break;
731
23.7M
    }
732
733
0
    case 'e':
734
0
    case 'E':
735
106
    case 'f':
736
106
    case 'F':
737
106
    case 'g':
738
106
    case 'G':
739
106
        if (arg->width == -1 && arg->prec == -1
740
0
            && !(arg->flags & (F_SIGN | F_BLANK)))
741
0
        {
742
            /* Fast path */
743
0
            if (formatfloat(v, ctx, arg, NULL, writer) == -1)
744
0
                return -1;
745
0
            return 1;
746
0
        }
747
748
106
        arg->sign = 1;
749
106
        if (formatfloat(v, ctx, arg, p_str, NULL) == -1)
750
0
            return -1;
751
106
        break;
752
753
106
    case 'c':
754
0
    {
755
0
        Py_UCS4 ch = formatchar(v, ctx, arg);
756
0
        if (ch == (Py_UCS4) -1)
757
0
            return -1;
758
0
        if (arg->width == -1 && arg->prec == -1) {
759
            /* Fast path */
760
0
            if (_PyUnicodeWriter_WriteCharInline(writer, ch) < 0)
761
0
                return -1;
762
0
            return 1;
763
0
        }
764
0
        *p_str = PyUnicode_FromOrdinal(ch);
765
0
        break;
766
0
    }
767
768
0
    default:
769
0
        if (arg->ch < 128 && Py_ISALPHA(arg->ch)) {
770
0
            PyErr_Format(PyExc_ValueError,
771
0
                         "unsupported format %%%c at position %zd",
772
0
                         (int)arg->ch, arg->fmtstart);
773
0
        }
774
0
        else if (arg->ch == '\'') {
775
0
            PyErr_Format(PyExc_ValueError,
776
0
                         "stray %% at position %zd or unexpected "
777
0
                         "format character \"'\" at position %zd",
778
0
                         arg->fmtstart,
779
0
                         ctx->fmtpos - 1);
780
0
        }
781
0
        else if (arg->ch >= 32 && arg->ch < 127) {
782
0
            PyErr_Format(PyExc_ValueError,
783
0
                         "stray %% at position %zd or unexpected "
784
0
                         "format character '%c' at position %zd",
785
0
                         arg->fmtstart,
786
0
                         (int)arg->ch, ctx->fmtpos - 1);
787
0
        }
788
0
        else if (Py_UNICODE_ISPRINTABLE(arg->ch)) {
789
0
            PyErr_Format(PyExc_ValueError,
790
0
                         "stray %% at position %zd or unexpected "
791
0
                         "format character '%c' (U+%04X) at position %zd",
792
0
                         arg->fmtstart,
793
0
                         (int)arg->ch, (int)arg->ch, ctx->fmtpos - 1);
794
0
        }
795
0
        else {
796
0
            PyErr_Format(PyExc_ValueError,
797
0
                         "stray %% at position %zd or unexpected "
798
0
                         "format character U+%04X at position %zd",
799
0
                         arg->fmtstart, (int)arg->ch, ctx->fmtpos - 1);
800
0
        }
801
0
        return -1;
802
69.1M
    }
803
54.9M
    if (*p_str == NULL)
804
0
        return -1;
805
54.9M
    assert (PyUnicode_Check(*p_str));
806
54.9M
    return 0;
807
54.9M
}
808
809
810
static int
811
unicode_format_arg_output(struct unicode_formatter_t *ctx,
812
                          struct unicode_format_arg_t *arg,
813
                          PyObject *str)
814
54.9M
{
815
54.9M
    Py_ssize_t len;
816
54.9M
    int kind;
817
54.9M
    const void *pbuf;
818
54.9M
    Py_ssize_t pindex;
819
54.9M
    Py_UCS4 signchar;
820
54.9M
    Py_ssize_t buflen;
821
54.9M
    Py_UCS4 maxchar;
822
54.9M
    Py_ssize_t sublen;
823
54.9M
    _PyUnicodeWriter *writer = &ctx->writer;
824
54.9M
    Py_UCS4 fill;
825
826
54.9M
    fill = ' ';
827
54.9M
    if (arg->sign && arg->flags & F_ZERO)
828
2.16k
        fill = '0';
829
830
54.9M
    len = PyUnicode_GET_LENGTH(str);
831
54.9M
    if ((arg->width == -1 || arg->width <= len)
832
54.9M
        && (arg->prec == -1 || arg->prec >= len)
833
54.9M
        && !(arg->flags & (F_SIGN | F_BLANK)))
834
54.9M
    {
835
        /* Fast path */
836
54.9M
        if (_PyUnicodeWriter_WriteStr(writer, str) == -1)
837
0
            return -1;
838
54.9M
        return 0;
839
54.9M
    }
840
841
    /* Truncate the string for "s", "r" and "a" formats
842
       if the precision is set */
843
17.3k
    if (arg->ch == 's' || arg->ch == 'r' || arg->ch == 'a') {
844
0
        if (arg->prec >= 0 && len > arg->prec)
845
0
            len = arg->prec;
846
0
    }
847
848
    /* Adjust sign and width */
849
17.3k
    kind = PyUnicode_KIND(str);
850
17.3k
    pbuf = PyUnicode_DATA(str);
851
17.3k
    pindex = 0;
852
17.3k
    signchar = '\0';
853
17.3k
    if (arg->sign) {
854
17.3k
        Py_UCS4 ch = PyUnicode_READ(kind, pbuf, pindex);
855
17.3k
        if (ch == '-' || ch == '+') {
856
0
            signchar = ch;
857
0
            len--;
858
0
            pindex++;
859
0
        }
860
17.3k
        else if (arg->flags & F_SIGN)
861
0
            signchar = '+';
862
17.3k
        else if (arg->flags & F_BLANK)
863
0
            signchar = ' ';
864
17.3k
        else
865
17.3k
            arg->sign = 0;
866
17.3k
    }
867
17.3k
    if (arg->width < len)
868
106
        arg->width = len;
869
870
    /* Prepare the writer */
871
17.3k
    maxchar = writer->maxchar;
872
17.3k
    if (!(arg->flags & F_LJUST)) {
873
17.3k
        if (arg->sign) {
874
0
            if ((arg->width-1) > len)
875
0
                maxchar = Py_MAX(maxchar, fill);
876
0
        }
877
17.3k
        else {
878
17.3k
            if (arg->width > len)
879
17.2k
                maxchar = Py_MAX(maxchar, fill);
880
17.3k
        }
881
17.3k
    }
882
17.3k
    if (PyUnicode_MAX_CHAR_VALUE(str) > maxchar) {
883
0
        Py_UCS4 strmaxchar = _PyUnicode_FindMaxChar(str, 0, pindex+len);
884
0
        maxchar = Py_MAX(maxchar, strmaxchar);
885
0
    }
886
887
17.3k
    buflen = arg->width;
888
17.3k
    if (arg->sign && len == arg->width)
889
0
        buflen++;
890
17.3k
    if (_PyUnicodeWriter_Prepare(writer, buflen, maxchar) == -1)
891
0
        return -1;
892
893
    /* Write the sign if needed */
894
17.3k
    if (arg->sign) {
895
0
        if (fill != ' ') {
896
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos, signchar);
897
0
            writer->pos += 1;
898
0
        }
899
0
        if (arg->width > len)
900
0
            arg->width--;
901
0
    }
902
903
    /* Write the numeric prefix for "x", "X" and "o" formats
904
       if the alternate form is used.
905
       For example, write "0x" for the "%#x" format. */
906
17.3k
    if ((arg->flags & F_ALT) && (arg->ch == 'x' || arg->ch == 'X' || arg->ch == 'o')) {
907
0
        assert(PyUnicode_READ(kind, pbuf, pindex) == '0');
908
0
        assert(PyUnicode_READ(kind, pbuf, pindex + 1) == arg->ch);
909
0
        if (fill != ' ') {
910
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos, '0');
911
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos+1, arg->ch);
912
0
            writer->pos += 2;
913
0
            pindex += 2;
914
0
        }
915
0
        arg->width -= 2;
916
0
        if (arg->width < 0)
917
0
            arg->width = 0;
918
0
        len -= 2;
919
0
    }
920
921
    /* Pad left with the fill character if needed */
922
17.3k
    if (arg->width > len && !(arg->flags & F_LJUST)) {
923
17.2k
        sublen = arg->width - len;
924
17.2k
        _PyUnicode_Fill(writer->kind, writer->data, fill, writer->pos, sublen);
925
17.2k
        writer->pos += sublen;
926
17.2k
        arg->width = len;
927
17.2k
    }
928
929
    /* If padding with spaces: write sign if needed and/or numeric prefix if
930
       the alternate form is used */
931
17.3k
    if (fill == ' ') {
932
17.1k
        if (arg->sign) {
933
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos, signchar);
934
0
            writer->pos += 1;
935
0
        }
936
17.1k
        if ((arg->flags & F_ALT) && (arg->ch == 'x' || arg->ch == 'X' || arg->ch == 'o')) {
937
0
            assert(PyUnicode_READ(kind, pbuf, pindex) == '0');
938
0
            assert(PyUnicode_READ(kind, pbuf, pindex+1) == arg->ch);
939
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos, '0');
940
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos+1, arg->ch);
941
0
            writer->pos += 2;
942
0
            pindex += 2;
943
0
        }
944
17.1k
    }
945
946
    /* Write characters */
947
17.3k
    if (len) {
948
17.3k
        _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
949
17.3k
                                      str, pindex, len);
950
17.3k
        writer->pos += len;
951
17.3k
    }
952
953
    /* Pad right with the fill character if needed */
954
17.3k
    if (arg->width > len) {
955
0
        sublen = arg->width - len;
956
0
        _PyUnicode_Fill(writer->kind, writer->data, ' ', writer->pos, sublen);
957
0
        writer->pos += sublen;
958
0
    }
959
17.3k
    return 0;
960
17.3k
}
961
962
963
/* Helper of PyUnicode_Format(): format one arg.
964
   Return 0 on success, raise an exception and return -1 on error. */
965
static int
966
unicode_format_arg(struct unicode_formatter_t *ctx)
967
69.1M
{
968
69.1M
    struct unicode_format_arg_t arg;
969
69.1M
    PyObject *str;
970
69.1M
    int ret;
971
972
69.1M
    arg.ch = PyUnicode_READ(ctx->fmtkind, ctx->fmtdata, ctx->fmtpos);
973
69.1M
    if (arg.ch == '%') {
974
0
        ctx->fmtpos++;
975
0
        ctx->fmtcnt--;
976
0
        if (_PyUnicodeWriter_WriteCharInline(&ctx->writer, '%') < 0)
977
0
            return -1;
978
0
        return 0;
979
0
    }
980
69.1M
    arg.flags = 0;
981
69.1M
    arg.width = -1;
982
69.1M
    arg.prec = -1;
983
69.1M
    arg.sign = 0;
984
69.1M
    arg.fmtstart = ctx->fmtpos - 1;
985
69.1M
    arg.key = NULL;
986
69.1M
    str = NULL;
987
988
69.1M
    ret = unicode_format_arg_parse(ctx, &arg);
989
69.1M
    if (ret == -1) {
990
0
        goto onError;
991
0
    }
992
993
69.1M
    ret = unicode_format_arg_format(ctx, &arg, &str);
994
69.1M
    if (ret == -1) {
995
4.77M
        goto onError;
996
4.77M
    }
997
998
64.3M
    if (ret != 1) {
999
54.9M
        ret = unicode_format_arg_output(ctx, &arg, str);
1000
54.9M
        Py_DECREF(str);
1001
54.9M
        if (ret == -1) {
1002
0
            goto onError;
1003
0
        }
1004
54.9M
    }
1005
1006
64.3M
    if (ctx->dict && (ctx->argidx < ctx->arglen)) {
1007
        // XXX: Never happens?
1008
0
        PyErr_SetString(PyExc_TypeError,
1009
0
                        "not all arguments converted during string formatting");
1010
0
        goto onError;
1011
0
    }
1012
64.3M
    Py_XDECREF(arg.key);
1013
64.3M
    return 0;
1014
1015
4.77M
  onError:
1016
4.77M
    Py_XDECREF(arg.key);
1017
4.77M
    return -1;
1018
64.3M
}
1019
1020
1021
PyObject *
1022
PyUnicode_Format(PyObject *format, PyObject *args)
1023
32.7M
{
1024
32.7M
    struct unicode_formatter_t ctx;
1025
1026
32.7M
    if (format == NULL || args == NULL) {
1027
0
        PyErr_BadInternalCall();
1028
0
        return NULL;
1029
0
    }
1030
1031
32.7M
    if (ensure_unicode(format) < 0)
1032
0
        return NULL;
1033
1034
32.7M
    ctx.fmtstr = format;
1035
32.7M
    ctx.fmtdata = PyUnicode_DATA(ctx.fmtstr);
1036
32.7M
    ctx.fmtkind = PyUnicode_KIND(ctx.fmtstr);
1037
32.7M
    ctx.fmtcnt = PyUnicode_GET_LENGTH(ctx.fmtstr);
1038
32.7M
    ctx.fmtpos = 0;
1039
1040
32.7M
    _PyUnicodeWriter_Init(&ctx.writer);
1041
32.7M
    ctx.writer.min_length = ctx.fmtcnt + 100;
1042
32.7M
    ctx.writer.overallocate = 1;
1043
1044
32.7M
    if (PyTuple_Check(args)) {
1045
17.4M
        ctx.arglen = PyTuple_Size(args);
1046
17.4M
        ctx.argidx = 0;
1047
17.4M
    }
1048
15.3M
    else {
1049
15.3M
        ctx.arglen = -1;
1050
15.3M
        ctx.argidx = -2;
1051
15.3M
    }
1052
32.7M
    ctx.args_owned = 0;
1053
32.7M
    if (PyMapping_Check(args) && !PyTuple_Check(args) && !PyUnicode_Check(args))
1054
12.1k
        ctx.dict = args;
1055
32.7M
    else
1056
32.7M
        ctx.dict = NULL;
1057
32.7M
    ctx.args = args;
1058
1059
162M
    while (--ctx.fmtcnt >= 0) {
1060
134M
        if (PyUnicode_READ(ctx.fmtkind, ctx.fmtdata, ctx.fmtpos) != '%') {
1061
65.6M
            Py_ssize_t nonfmtpos;
1062
1063
65.6M
            nonfmtpos = ctx.fmtpos++;
1064
616M
            while (ctx.fmtcnt >= 0 &&
1065
607M
                   PyUnicode_READ(ctx.fmtkind, ctx.fmtdata, ctx.fmtpos) != '%') {
1066
550M
                ctx.fmtpos++;
1067
550M
                ctx.fmtcnt--;
1068
550M
            }
1069
65.6M
            if (ctx.fmtcnt < 0) {
1070
9.14M
                ctx.fmtpos--;
1071
9.14M
                ctx.writer.overallocate = 0;
1072
9.14M
            }
1073
1074
65.6M
            if (_PyUnicodeWriter_WriteSubstring(&ctx.writer, ctx.fmtstr,
1075
65.6M
                                                nonfmtpos, ctx.fmtpos) < 0)
1076
0
                goto onError;
1077
65.6M
        }
1078
69.1M
        else {
1079
69.1M
            ctx.fmtpos++;
1080
69.1M
            if (unicode_format_arg(&ctx) == -1)
1081
4.77M
                goto onError;
1082
69.1M
        }
1083
134M
    }
1084
1085
28.0M
    if (ctx.argidx < ctx.arglen && !ctx.dict) {
1086
0
        PyErr_Format(PyExc_TypeError,
1087
0
                     "not all arguments converted during string formatting "
1088
0
                     "(required %zd, got %zd)",
1089
0
                     ctx.arglen < 0 ? 0 : ctx.argidx,
1090
0
                     ctx.arglen < 0 ? 1 : ctx.arglen);
1091
0
        goto onError;
1092
0
    }
1093
1094
28.0M
    if (ctx.args_owned) {
1095
11.1k
        Py_DECREF(ctx.args);
1096
11.1k
    }
1097
28.0M
    return _PyUnicodeWriter_Finish(&ctx.writer);
1098
1099
4.77M
  onError:
1100
4.77M
    _PyUnicodeWriter_Dealloc(&ctx.writer);
1101
4.77M
    if (ctx.args_owned) {
1102
0
        Py_DECREF(ctx.args);
1103
0
    }
1104
    return NULL;
1105
28.0M
}