Coverage Report

Created: 2026-01-09 06:26

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Objects/unicode_format.c
Line
Count
Source
1
/*
2
3
Unicode implementation based on original code by Fredrik Lundh,
4
modified by Marc-Andre Lemburg <mal@lemburg.com>.
5
6
Major speed upgrades to the method implementations at the Reykjavik
7
NeedForSpeed sprint, by Fredrik Lundh and Andrew Dalke.
8
9
Copyright (c) Corporation for National Research Initiatives.
10
11
--------------------------------------------------------------------
12
The original string type implementation is:
13
14
  Copyright (c) 1999 by Secret Labs AB
15
  Copyright (c) 1999 by Fredrik Lundh
16
17
By obtaining, using, and/or copying this software and/or its
18
associated documentation, you agree that you have read, understood,
19
and will comply with the following terms and conditions:
20
21
Permission to use, copy, modify, and distribute this software and its
22
associated documentation for any purpose and without fee is hereby
23
granted, provided that the above copyright notice appears in all
24
copies, and that both that copyright notice and this permission notice
25
appear in supporting documentation, and that the name of Secret Labs
26
AB or the author not be used in advertising or publicity pertaining to
27
distribution of the software without specific, written prior
28
permission.
29
30
SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
31
THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
32
FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
33
ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
34
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
35
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
36
OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
37
--------------------------------------------------------------------
38
39
*/
40
41
// PyUnicode_Format() implementation
42
43
#include "Python.h"
44
#include "pycore_abstract.h"      // _PyIndex_Check()
45
#include "pycore_format.h"        // F_ALT
46
#include "pycore_long.h"          // _PyLong_FormatWriter()
47
#include "pycore_object.h"        // _PyObject_IsUniquelyReferenced()
48
#include "pycore_unicodeobject.h" // _Py_MAX_UNICODE
49
50
51
0
#define MAX_UNICODE _Py_MAX_UNICODE
52
32.0M
#define ensure_unicode _PyUnicode_EnsureUnicode
53
54
struct unicode_formatter_t {
55
    PyObject *args;
56
    int args_owned;
57
    Py_ssize_t arglen, argidx;
58
    PyObject *dict;
59
60
    int fmtkind;
61
    Py_ssize_t fmtcnt, fmtpos;
62
    const void *fmtdata;
63
    PyObject *fmtstr;
64
65
    _PyUnicodeWriter writer;
66
};
67
68
69
struct unicode_format_arg_t {
70
    Py_UCS4 ch;
71
    int flags;
72
    Py_ssize_t width;
73
    int prec;
74
    int sign;
75
};
76
77
78
static PyObject *
79
unicode_format_getnextarg(struct unicode_formatter_t *ctx)
80
67.2M
{
81
67.2M
    Py_ssize_t argidx = ctx->argidx;
82
83
67.2M
    if (argidx < ctx->arglen) {
84
67.2M
        ctx->argidx++;
85
67.2M
        if (ctx->arglen < 0)
86
14.1M
            return ctx->args;
87
53.0M
        else
88
53.0M
            return PyTuple_GetItem(ctx->args, argidx);
89
67.2M
    }
90
0
    PyErr_SetString(PyExc_TypeError,
91
0
                    "not enough arguments for format string");
92
0
    return NULL;
93
67.2M
}
94
95
96
/* Returns a new reference to a PyUnicode object, or NULL on failure. */
97
98
/* Format a float into the writer if the writer is not NULL, or into *p_output
99
   otherwise.
100
101
   Return 0 on success, raise an exception and return -1 on error. */
102
static int
103
formatfloat(PyObject *v, struct unicode_format_arg_t *arg,
104
            PyObject **p_output,
105
            _PyUnicodeWriter *writer)
106
109
{
107
109
    char *p;
108
109
    double x;
109
109
    Py_ssize_t len;
110
109
    int prec;
111
109
    int dtoa_flags = 0;
112
113
109
    x = PyFloat_AsDouble(v);
114
109
    if (x == -1.0 && PyErr_Occurred())
115
0
        return -1;
116
117
109
    prec = arg->prec;
118
109
    if (prec < 0)
119
0
        prec = 6;
120
121
109
    if (arg->flags & F_ALT)
122
0
        dtoa_flags |= Py_DTSF_ALT;
123
109
    p = PyOS_double_to_string(x, arg->ch, prec, dtoa_flags, NULL);
124
109
    if (p == NULL)
125
0
        return -1;
126
109
    len = strlen(p);
127
109
    if (writer) {
128
0
        if (_PyUnicodeWriter_WriteASCIIString(writer, p, len) < 0) {
129
0
            PyMem_Free(p);
130
0
            return -1;
131
0
        }
132
0
    }
133
109
    else
134
109
        *p_output = _PyUnicode_FromASCII(p, len);
135
109
    PyMem_Free(p);
136
109
    return 0;
137
109
}
138
139
140
/* formatlong() emulates the format codes d, u, o, x and X, and
141
 * the F_ALT flag, for Python's long (unbounded) ints.  It's not used for
142
 * Python's regular ints.
143
 * Return value:  a new PyUnicodeObject*, or NULL if error.
144
 *     The output string is of the form
145
 *         "-"? ("0x" | "0X")? digit+
146
 *     "0x"/"0X" are present only for x and X conversions, with F_ALT
147
 *         set in flags.  The case of hex digits will be correct,
148
 *     There will be at least prec digits, zero-filled on the left if
149
 *         necessary to get that many.
150
 * val          object to be converted
151
 * flags        bitmask of format flags; only F_ALT is looked at
152
 * prec         minimum number of digits; 0-fill on left if needed
153
 * type         a character in [duoxX]; u acts the same as d
154
 *
155
 * CAUTION:  o, x and X conversions on regular ints can never
156
 * produce a '-' sign, but can for Python's unbounded ints.
157
 */
158
PyObject *
159
_PyUnicode_FormatLong(PyObject *val, int alt, int prec, int type)
160
10.9M
{
161
10.9M
    PyObject *result = NULL;
162
10.9M
    char *buf;
163
10.9M
    Py_ssize_t i;
164
10.9M
    int sign;           /* 1 if '-', else 0 */
165
10.9M
    int len;            /* number of characters */
166
10.9M
    Py_ssize_t llen;
167
10.9M
    int numdigits;      /* len == numnondigits + numdigits */
168
10.9M
    int numnondigits = 0;
169
170
    /* Avoid exceeding SSIZE_T_MAX */
171
10.9M
    if (prec > INT_MAX-3) {
172
0
        PyErr_SetString(PyExc_OverflowError,
173
0
                        "precision too large");
174
0
        return NULL;
175
0
    }
176
177
10.9M
    assert(PyLong_Check(val));
178
179
10.9M
    switch (type) {
180
0
    default:
181
0
        Py_UNREACHABLE();
182
5.25M
    case 'd':
183
5.25M
    case 'i':
184
5.25M
    case 'u':
185
        /* int and int subclasses should print numerically when a numeric */
186
        /* format code is used (see issue18780) */
187
5.25M
        result = PyNumber_ToBase(val, 10);
188
5.25M
        break;
189
0
    case 'o':
190
0
        numnondigits = 2;
191
0
        result = PyNumber_ToBase(val, 8);
192
0
        break;
193
99
    case 'x':
194
5.64M
    case 'X':
195
5.64M
        numnondigits = 2;
196
5.64M
        result = PyNumber_ToBase(val, 16);
197
5.64M
        break;
198
10.9M
    }
199
10.9M
    if (!result)
200
0
        return NULL;
201
202
10.9M
    assert(_PyUnicode_IsModifiable(result));
203
10.9M
    assert(PyUnicode_IS_ASCII(result));
204
205
    /* To modify the string in-place, there can only be one reference. */
206
10.9M
    if (!_PyObject_IsUniquelyReferenced(result)) {
207
0
        Py_DECREF(result);
208
0
        PyErr_BadInternalCall();
209
0
        return NULL;
210
0
    }
211
10.9M
    buf = PyUnicode_DATA(result);
212
10.9M
    llen = PyUnicode_GET_LENGTH(result);
213
10.9M
    if (llen > INT_MAX) {
214
0
        Py_DECREF(result);
215
0
        PyErr_SetString(PyExc_ValueError,
216
0
                        "string too large in _PyUnicode_FormatLong");
217
0
        return NULL;
218
0
    }
219
10.9M
    len = (int)llen;
220
10.9M
    sign = buf[0] == '-';
221
10.9M
    numnondigits += sign;
222
10.9M
    numdigits = len - numnondigits;
223
10.9M
    assert(numdigits > 0);
224
225
    /* Get rid of base marker unless F_ALT */
226
10.9M
    if (((alt) == 0 &&
227
10.9M
        (type == 'o' || type == 'x' || type == 'X'))) {
228
5.64M
        assert(buf[sign] == '0');
229
5.64M
        assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
230
5.64M
               buf[sign+1] == 'o');
231
5.64M
        numnondigits -= 2;
232
5.64M
        buf += 2;
233
5.64M
        len -= 2;
234
5.64M
        if (sign)
235
0
            buf[0] = '-';
236
5.64M
        assert(len == numnondigits + numdigits);
237
5.64M
        assert(numdigits > 0);
238
5.64M
    }
239
240
    /* Fill with leading zeroes to meet minimum width. */
241
10.9M
    if (prec > numdigits) {
242
0
        PyObject *r1 = PyBytes_FromStringAndSize(NULL,
243
0
                                numnondigits + prec);
244
0
        char *b1;
245
0
        if (!r1) {
246
0
            Py_DECREF(result);
247
0
            return NULL;
248
0
        }
249
0
        b1 = PyBytes_AS_STRING(r1);
250
0
        for (i = 0; i < numnondigits; ++i)
251
0
            *b1++ = *buf++;
252
0
        for (i = 0; i < prec - numdigits; i++)
253
0
            *b1++ = '0';
254
0
        for (i = 0; i < numdigits; i++)
255
0
            *b1++ = *buf++;
256
0
        *b1 = '\0';
257
0
        Py_SETREF(result, r1);
258
0
        buf = PyBytes_AS_STRING(result);
259
0
        len = numnondigits + prec;
260
0
    }
261
262
    /* Fix up case for hex conversions. */
263
10.9M
    if (type == 'X') {
264
        /* Need to convert all lower case letters to upper case.
265
           and need to convert 0x to 0X (and -0x to -0X). */
266
38.8M
        for (i = 0; i < len; i++)
267
33.1M
            if (buf[i] >= 'a' && buf[i] <= 'x')
268
6.88M
                buf[i] -= 'a'-'A';
269
5.64M
    }
270
10.9M
    if (!PyUnicode_Check(result)
271
10.9M
        || buf != PyUnicode_DATA(result)) {
272
5.64M
        PyObject *unicode;
273
5.64M
        unicode = _PyUnicode_FromASCII(buf, len);
274
5.64M
        Py_SETREF(result, unicode);
275
5.64M
    }
276
5.25M
    else if (len != PyUnicode_GET_LENGTH(result)) {
277
0
        if (PyUnicode_Resize(&result, len) < 0)
278
0
            Py_CLEAR(result);
279
0
    }
280
10.9M
    return result;
281
10.9M
}
282
283
284
/* Format an integer or a float as an integer.
285
 * Return 1 if the number has been formatted into the writer,
286
 *        0 if the number has been formatted into *p_output
287
 *       -1 and raise an exception on error */
288
static int
289
mainformatlong(PyObject *v,
290
               struct unicode_format_arg_t *arg,
291
               PyObject **p_output,
292
               _PyUnicodeWriter *writer)
293
23.7M
{
294
23.7M
    PyObject *iobj, *res;
295
23.7M
    char type = (char)arg->ch;
296
297
23.7M
    if (!PyNumber_Check(v))
298
4.46M
        goto wrongtype;
299
300
    /* make sure number is a type of integer for o, x, and X */
301
19.2M
    if (!PyLong_Check(v)) {
302
0
        if (type == 'o' || type == 'x' || type == 'X') {
303
0
            iobj = _PyNumber_Index(v);
304
0
        }
305
0
        else {
306
0
            iobj = PyNumber_Long(v);
307
0
        }
308
0
        if (iobj == NULL ) {
309
0
            if (PyErr_ExceptionMatches(PyExc_TypeError))
310
0
                goto wrongtype;
311
0
            return -1;
312
0
        }
313
0
        assert(PyLong_Check(iobj));
314
0
    }
315
19.2M
    else {
316
19.2M
        iobj = Py_NewRef(v);
317
19.2M
    }
318
319
19.2M
    if (PyLong_CheckExact(v)
320
19.2M
        && arg->width == -1 && arg->prec == -1
321
14.0M
        && !(arg->flags & (F_SIGN | F_BLANK))
322
14.0M
        && type != 'X')
323
8.36M
    {
324
        /* Fast path */
325
8.36M
        int alternate = arg->flags & F_ALT;
326
8.36M
        int base;
327
328
8.36M
        switch(type)
329
8.36M
        {
330
0
            default:
331
0
                Py_UNREACHABLE();
332
8.36M
            case 'd':
333
8.36M
            case 'i':
334
8.36M
            case 'u':
335
8.36M
                base = 10;
336
8.36M
                break;
337
0
            case 'o':
338
0
                base = 8;
339
0
                break;
340
35
            case 'x':
341
35
            case 'X':
342
35
                base = 16;
343
35
                break;
344
8.36M
        }
345
346
8.36M
        if (_PyLong_FormatWriter(writer, v, base, alternate) == -1) {
347
0
            Py_DECREF(iobj);
348
0
            return -1;
349
0
        }
350
8.36M
        Py_DECREF(iobj);
351
8.36M
        return 1;
352
8.36M
    }
353
354
10.9M
    res = _PyUnicode_FormatLong(iobj, arg->flags & F_ALT, arg->prec, type);
355
10.9M
    Py_DECREF(iobj);
356
10.9M
    if (res == NULL)
357
0
        return -1;
358
10.9M
    *p_output = res;
359
10.9M
    return 0;
360
361
4.46M
wrongtype:
362
4.46M
    switch(type)
363
4.46M
    {
364
0
        case 'o':
365
0
        case 'x':
366
0
        case 'X':
367
0
            PyErr_Format(PyExc_TypeError,
368
0
                    "%%%c format: an integer is required, "
369
0
                    "not %.200s",
370
0
                    type, Py_TYPE(v)->tp_name);
371
0
            break;
372
4.46M
        default:
373
4.46M
            PyErr_Format(PyExc_TypeError,
374
4.46M
                    "%%%c format: a real number is required, "
375
4.46M
                    "not %.200s",
376
4.46M
                    type, Py_TYPE(v)->tp_name);
377
4.46M
            break;
378
4.46M
    }
379
4.46M
    return -1;
380
4.46M
}
381
382
383
static Py_UCS4
384
formatchar(PyObject *v)
385
0
{
386
    /* presume that the buffer is at least 3 characters long */
387
0
    if (PyUnicode_Check(v)) {
388
0
        if (PyUnicode_GET_LENGTH(v) == 1) {
389
0
            return PyUnicode_READ_CHAR(v, 0);
390
0
        }
391
0
        PyErr_Format(PyExc_TypeError,
392
0
                     "%%c requires an int or a unicode character, "
393
0
                     "not a string of length %zd",
394
0
                     PyUnicode_GET_LENGTH(v));
395
0
        return (Py_UCS4) -1;
396
0
    }
397
0
    else {
398
0
        int overflow;
399
0
        long x = PyLong_AsLongAndOverflow(v, &overflow);
400
0
        if (x == -1 && PyErr_Occurred()) {
401
0
            if (PyErr_ExceptionMatches(PyExc_TypeError)) {
402
0
                PyErr_Format(PyExc_TypeError,
403
0
                             "%%c requires an int or a unicode character, not %T",
404
0
                             v);
405
0
                return (Py_UCS4) -1;
406
0
            }
407
0
            return (Py_UCS4) -1;
408
0
        }
409
410
0
        if (x < 0 || x > MAX_UNICODE) {
411
            /* this includes an overflow in converting to C long */
412
0
            PyErr_SetString(PyExc_OverflowError,
413
0
                            "%c arg not in range(0x110000)");
414
0
            return (Py_UCS4) -1;
415
0
        }
416
417
0
        return (Py_UCS4) x;
418
0
    }
419
0
}
420
421
422
/* Parse options of an argument: flags, width, precision.
423
   Handle also "%(name)" syntax.
424
425
   Return 0 if the argument has been formatted into arg->str.
426
   Return 1 if the argument has been written into ctx->writer,
427
   Raise an exception and return -1 on error. */
428
static int
429
unicode_format_arg_parse(struct unicode_formatter_t *ctx,
430
                         struct unicode_format_arg_t *arg)
431
67.2M
{
432
67.2M
#define FORMAT_READ(ctx) \
433
72.8M
        PyUnicode_READ((ctx)->fmtkind, (ctx)->fmtdata, (ctx)->fmtpos)
434
435
67.2M
    PyObject *v;
436
437
67.2M
    if (arg->ch == '(') {
438
        /* Get argument value from a dictionary. Example: "%(name)s". */
439
38.6k
        Py_ssize_t keystart;
440
38.6k
        Py_ssize_t keylen;
441
38.6k
        PyObject *key;
442
38.6k
        int pcount = 1;
443
444
38.6k
        if (ctx->dict == NULL) {
445
0
            PyErr_SetString(PyExc_TypeError,
446
0
                            "format requires a mapping");
447
0
            return -1;
448
0
        }
449
38.6k
        ++ctx->fmtpos;
450
38.6k
        --ctx->fmtcnt;
451
38.6k
        keystart = ctx->fmtpos;
452
        /* Skip over balanced parentheses */
453
347k
        while (pcount > 0 && --ctx->fmtcnt >= 0) {
454
309k
            arg->ch = FORMAT_READ(ctx);
455
309k
            if (arg->ch == ')')
456
38.6k
                --pcount;
457
270k
            else if (arg->ch == '(')
458
0
                ++pcount;
459
309k
            ctx->fmtpos++;
460
309k
        }
461
38.6k
        keylen = ctx->fmtpos - keystart - 1;
462
38.6k
        if (ctx->fmtcnt < 0 || pcount > 0) {
463
0
            PyErr_SetString(PyExc_ValueError,
464
0
                            "incomplete format key");
465
0
            return -1;
466
0
        }
467
38.6k
        key = PyUnicode_Substring(ctx->fmtstr,
468
38.6k
                                  keystart, keystart + keylen);
469
38.6k
        if (key == NULL)
470
0
            return -1;
471
38.6k
        if (ctx->args_owned) {
472
27.5k
            ctx->args_owned = 0;
473
27.5k
            Py_DECREF(ctx->args);
474
27.5k
        }
475
38.6k
        ctx->args = PyObject_GetItem(ctx->dict, key);
476
38.6k
        Py_DECREF(key);
477
38.6k
        if (ctx->args == NULL)
478
0
            return -1;
479
38.6k
        ctx->args_owned = 1;
480
38.6k
        ctx->arglen = -1;
481
38.6k
        ctx->argidx = -2;
482
38.6k
    }
483
484
    /* Parse flags. Example: "%+i" => flags=F_SIGN. */
485
67.2M
    while (--ctx->fmtcnt >= 0) {
486
67.2M
        arg->ch = FORMAT_READ(ctx);
487
67.2M
        ctx->fmtpos++;
488
67.2M
        switch (arg->ch) {
489
0
        case '-': arg->flags |= F_LJUST; continue;
490
0
        case '+': arg->flags |= F_SIGN; continue;
491
0
        case ' ': arg->flags |= F_BLANK; continue;
492
35
        case '#': arg->flags |= F_ALT; continue;
493
1.67k
        case '0': arg->flags |= F_ZERO; continue;
494
67.2M
        }
495
67.2M
        break;
496
67.2M
    }
497
498
    /* Parse width. Example: "%10s" => width=10 */
499
67.2M
    if (arg->ch == '*') {
500
0
        v = unicode_format_getnextarg(ctx);
501
0
        if (v == NULL)
502
0
            return -1;
503
0
        if (!PyLong_Check(v)) {
504
0
            PyErr_SetString(PyExc_TypeError,
505
0
                            "* wants int");
506
0
            return -1;
507
0
        }
508
0
        arg->width = PyLong_AsSsize_t(v);
509
0
        if (arg->width == -1 && PyErr_Occurred())
510
0
            return -1;
511
0
        if (arg->width < 0) {
512
0
            arg->flags |= F_LJUST;
513
0
            arg->width = -arg->width;
514
0
        }
515
0
        if (--ctx->fmtcnt >= 0) {
516
0
            arg->ch = FORMAT_READ(ctx);
517
0
            ctx->fmtpos++;
518
0
        }
519
0
    }
520
67.2M
    else if (arg->ch >= '0' && arg->ch <= '9') {
521
5.25M
        arg->width = arg->ch - '0';
522
5.25M
        while (--ctx->fmtcnt >= 0) {
523
5.25M
            arg->ch = FORMAT_READ(ctx);
524
5.25M
            ctx->fmtpos++;
525
5.25M
            if (arg->ch < '0' || arg->ch > '9')
526
5.25M
                break;
527
            /* Since arg->ch is unsigned, the RHS would end up as unsigned,
528
               mixing signed and unsigned comparison. Since arg->ch is between
529
               '0' and '9', casting to int is safe. */
530
0
            if (arg->width > (PY_SSIZE_T_MAX - ((int)arg->ch - '0')) / 10) {
531
0
                PyErr_SetString(PyExc_ValueError,
532
0
                                "width too big");
533
0
                return -1;
534
0
            }
535
0
            arg->width = arg->width*10 + (arg->ch - '0');
536
0
        }
537
5.25M
    }
538
539
    /* Parse precision. Example: "%.3f" => prec=3 */
540
67.2M
    if (arg->ch == '.') {
541
109
        arg->prec = 0;
542
109
        if (--ctx->fmtcnt >= 0) {
543
109
            arg->ch = FORMAT_READ(ctx);
544
109
            ctx->fmtpos++;
545
109
        }
546
109
        if (arg->ch == '*') {
547
0
            v = unicode_format_getnextarg(ctx);
548
0
            if (v == NULL)
549
0
                return -1;
550
0
            if (!PyLong_Check(v)) {
551
0
                PyErr_SetString(PyExc_TypeError,
552
0
                                "* wants int");
553
0
                return -1;
554
0
            }
555
0
            arg->prec = PyLong_AsInt(v);
556
0
            if (arg->prec == -1 && PyErr_Occurred())
557
0
                return -1;
558
0
            if (arg->prec < 0)
559
0
                arg->prec = 0;
560
0
            if (--ctx->fmtcnt >= 0) {
561
0
                arg->ch = FORMAT_READ(ctx);
562
0
                ctx->fmtpos++;
563
0
            }
564
0
        }
565
109
        else if (arg->ch >= '0' && arg->ch <= '9') {
566
109
            arg->prec = arg->ch - '0';
567
109
            while (--ctx->fmtcnt >= 0) {
568
109
                arg->ch = FORMAT_READ(ctx);
569
109
                ctx->fmtpos++;
570
109
                if (arg->ch < '0' || arg->ch > '9')
571
109
                    break;
572
0
                if (arg->prec > (INT_MAX - ((int)arg->ch - '0')) / 10) {
573
0
                    PyErr_SetString(PyExc_ValueError,
574
0
                                    "precision too big");
575
0
                    return -1;
576
0
                }
577
0
                arg->prec = arg->prec*10 + (arg->ch - '0');
578
0
            }
579
109
        }
580
109
    }
581
582
    /* Ignore "h", "l" and "L" format prefix (ex: "%hi" or "%ls") */
583
67.2M
    if (ctx->fmtcnt >= 0) {
584
67.2M
        if (arg->ch == 'h' || arg->ch == 'l' || arg->ch == 'L') {
585
0
            if (--ctx->fmtcnt >= 0) {
586
0
                arg->ch = FORMAT_READ(ctx);
587
0
                ctx->fmtpos++;
588
0
            }
589
0
        }
590
67.2M
    }
591
67.2M
    if (ctx->fmtcnt < 0) {
592
0
        PyErr_SetString(PyExc_ValueError,
593
0
                        "incomplete format");
594
0
        return -1;
595
0
    }
596
67.2M
    return 0;
597
598
67.2M
#undef FORMAT_READ
599
67.2M
}
600
601
602
/* Format one argument. Supported conversion specifiers:
603
604
   - "s", "r", "a": any type
605
   - "i", "d", "u": int or float
606
   - "o", "x", "X": int
607
   - "e", "E", "f", "F", "g", "G": float
608
   - "c": int or str (1 character)
609
610
   When possible, the output is written directly into the Unicode writer
611
   (ctx->writer). A string is created when padding is required.
612
613
   Return 0 if the argument has been formatted into *p_str,
614
          1 if the argument has been written into ctx->writer,
615
         -1 on error. */
616
static int
617
unicode_format_arg_format(struct unicode_formatter_t *ctx,
618
                          struct unicode_format_arg_t *arg,
619
                          PyObject **p_str)
620
67.2M
{
621
67.2M
    PyObject *v;
622
67.2M
    _PyUnicodeWriter *writer = &ctx->writer;
623
624
67.2M
    if (ctx->fmtcnt == 0)
625
23.6M
        ctx->writer.overallocate = 0;
626
627
67.2M
    v = unicode_format_getnextarg(ctx);
628
67.2M
    if (v == NULL)
629
0
        return -1;
630
631
632
67.2M
    switch (arg->ch) {
633
43.5M
    case 's':
634
43.5M
    case 'r':
635
43.5M
    case 'a':
636
43.5M
        if (PyLong_CheckExact(v) && arg->width == -1 && arg->prec == -1) {
637
            /* Fast path */
638
121
            if (_PyLong_FormatWriter(writer, v, 10, arg->flags & F_ALT) == -1)
639
0
                return -1;
640
121
            return 1;
641
121
        }
642
643
43.5M
        if (PyUnicode_CheckExact(v) && arg->ch == 's') {
644
37.8M
            *p_str = Py_NewRef(v);
645
37.8M
        }
646
5.65M
        else {
647
5.65M
            if (arg->ch == 's')
648
5.64M
                *p_str = PyObject_Str(v);
649
6.16k
            else if (arg->ch == 'r')
650
6.16k
                *p_str = PyObject_Repr(v);
651
0
            else
652
0
                *p_str = PyObject_ASCII(v);
653
5.65M
        }
654
43.5M
        break;
655
656
0
    case 'i':
657
18.0M
    case 'd':
658
18.0M
    case 'u':
659
18.0M
    case 'o':
660
18.0M
    case 'x':
661
23.7M
    case 'X':
662
23.7M
    {
663
23.7M
        int ret = mainformatlong(v, arg, p_str, writer);
664
23.7M
        if (ret != 0)
665
12.8M
            return ret;
666
10.9M
        arg->sign = 1;
667
10.9M
        break;
668
23.7M
    }
669
670
0
    case 'e':
671
0
    case 'E':
672
109
    case 'f':
673
109
    case 'F':
674
109
    case 'g':
675
109
    case 'G':
676
109
        if (arg->width == -1 && arg->prec == -1
677
0
            && !(arg->flags & (F_SIGN | F_BLANK)))
678
0
        {
679
            /* Fast path */
680
0
            if (formatfloat(v, arg, NULL, writer) == -1)
681
0
                return -1;
682
0
            return 1;
683
0
        }
684
685
109
        arg->sign = 1;
686
109
        if (formatfloat(v, arg, p_str, NULL) == -1)
687
0
            return -1;
688
109
        break;
689
690
109
    case 'c':
691
0
    {
692
0
        Py_UCS4 ch = formatchar(v);
693
0
        if (ch == (Py_UCS4) -1)
694
0
            return -1;
695
0
        if (arg->width == -1 && arg->prec == -1) {
696
            /* Fast path */
697
0
            if (_PyUnicodeWriter_WriteCharInline(writer, ch) < 0)
698
0
                return -1;
699
0
            return 1;
700
0
        }
701
0
        *p_str = PyUnicode_FromOrdinal(ch);
702
0
        break;
703
0
    }
704
705
0
    default:
706
0
        PyErr_Format(PyExc_ValueError,
707
0
                     "unsupported format character '%c' (0x%x) "
708
0
                     "at index %zd",
709
0
                     (31<=arg->ch && arg->ch<=126) ? (char)arg->ch : '?',
710
0
                     (int)arg->ch,
711
0
                     ctx->fmtpos - 1);
712
0
        return -1;
713
67.2M
    }
714
54.4M
    if (*p_str == NULL)
715
0
        return -1;
716
54.4M
    assert (PyUnicode_Check(*p_str));
717
54.4M
    return 0;
718
54.4M
}
719
720
721
static int
722
unicode_format_arg_output(struct unicode_formatter_t *ctx,
723
                          struct unicode_format_arg_t *arg,
724
                          PyObject *str)
725
54.4M
{
726
54.4M
    Py_ssize_t len;
727
54.4M
    int kind;
728
54.4M
    const void *pbuf;
729
54.4M
    Py_ssize_t pindex;
730
54.4M
    Py_UCS4 signchar;
731
54.4M
    Py_ssize_t buflen;
732
54.4M
    Py_UCS4 maxchar;
733
54.4M
    Py_ssize_t sublen;
734
54.4M
    _PyUnicodeWriter *writer = &ctx->writer;
735
54.4M
    Py_UCS4 fill;
736
737
54.4M
    fill = ' ';
738
54.4M
    if (arg->sign && arg->flags & F_ZERO)
739
1.67k
        fill = '0';
740
741
54.4M
    len = PyUnicode_GET_LENGTH(str);
742
54.4M
    if ((arg->width == -1 || arg->width <= len)
743
54.4M
        && (arg->prec == -1 || arg->prec >= len)
744
54.4M
        && !(arg->flags & (F_SIGN | F_BLANK)))
745
54.4M
    {
746
        /* Fast path */
747
54.4M
        if (_PyUnicodeWriter_WriteStr(writer, str) == -1)
748
0
            return -1;
749
54.4M
        return 0;
750
54.4M
    }
751
752
    /* Truncate the string for "s", "r" and "a" formats
753
       if the precision is set */
754
17.7k
    if (arg->ch == 's' || arg->ch == 'r' || arg->ch == 'a') {
755
0
        if (arg->prec >= 0 && len > arg->prec)
756
0
            len = arg->prec;
757
0
    }
758
759
    /* Adjust sign and width */
760
17.7k
    kind = PyUnicode_KIND(str);
761
17.7k
    pbuf = PyUnicode_DATA(str);
762
17.7k
    pindex = 0;
763
17.7k
    signchar = '\0';
764
17.7k
    if (arg->sign) {
765
17.7k
        Py_UCS4 ch = PyUnicode_READ(kind, pbuf, pindex);
766
17.7k
        if (ch == '-' || ch == '+') {
767
0
            signchar = ch;
768
0
            len--;
769
0
            pindex++;
770
0
        }
771
17.7k
        else if (arg->flags & F_SIGN)
772
0
            signchar = '+';
773
17.7k
        else if (arg->flags & F_BLANK)
774
0
            signchar = ' ';
775
17.7k
        else
776
17.7k
            arg->sign = 0;
777
17.7k
    }
778
17.7k
    if (arg->width < len)
779
109
        arg->width = len;
780
781
    /* Prepare the writer */
782
17.7k
    maxchar = writer->maxchar;
783
17.7k
    if (!(arg->flags & F_LJUST)) {
784
17.7k
        if (arg->sign) {
785
0
            if ((arg->width-1) > len)
786
0
                maxchar = Py_MAX(maxchar, fill);
787
0
        }
788
17.7k
        else {
789
17.7k
            if (arg->width > len)
790
17.6k
                maxchar = Py_MAX(maxchar, fill);
791
17.7k
        }
792
17.7k
    }
793
17.7k
    if (PyUnicode_MAX_CHAR_VALUE(str) > maxchar) {
794
0
        Py_UCS4 strmaxchar = _PyUnicode_FindMaxChar(str, 0, pindex+len);
795
0
        maxchar = Py_MAX(maxchar, strmaxchar);
796
0
    }
797
798
17.7k
    buflen = arg->width;
799
17.7k
    if (arg->sign && len == arg->width)
800
0
        buflen++;
801
17.7k
    if (_PyUnicodeWriter_Prepare(writer, buflen, maxchar) == -1)
802
0
        return -1;
803
804
    /* Write the sign if needed */
805
17.7k
    if (arg->sign) {
806
0
        if (fill != ' ') {
807
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos, signchar);
808
0
            writer->pos += 1;
809
0
        }
810
0
        if (arg->width > len)
811
0
            arg->width--;
812
0
    }
813
814
    /* Write the numeric prefix for "x", "X" and "o" formats
815
       if the alternate form is used.
816
       For example, write "0x" for the "%#x" format. */
817
17.7k
    if ((arg->flags & F_ALT) && (arg->ch == 'x' || arg->ch == 'X' || arg->ch == 'o')) {
818
0
        assert(PyUnicode_READ(kind, pbuf, pindex) == '0');
819
0
        assert(PyUnicode_READ(kind, pbuf, pindex + 1) == arg->ch);
820
0
        if (fill != ' ') {
821
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos, '0');
822
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos+1, arg->ch);
823
0
            writer->pos += 2;
824
0
            pindex += 2;
825
0
        }
826
0
        arg->width -= 2;
827
0
        if (arg->width < 0)
828
0
            arg->width = 0;
829
0
        len -= 2;
830
0
    }
831
832
    /* Pad left with the fill character if needed */
833
17.7k
    if (arg->width > len && !(arg->flags & F_LJUST)) {
834
17.6k
        sublen = arg->width - len;
835
17.6k
        _PyUnicode_Fill(writer->kind, writer->data, fill, writer->pos, sublen);
836
17.6k
        writer->pos += sublen;
837
17.6k
        arg->width = len;
838
17.6k
    }
839
840
    /* If padding with spaces: write sign if needed and/or numeric prefix if
841
       the alternate form is used */
842
17.7k
    if (fill == ' ') {
843
17.5k
        if (arg->sign) {
844
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos, signchar);
845
0
            writer->pos += 1;
846
0
        }
847
17.5k
        if ((arg->flags & F_ALT) && (arg->ch == 'x' || arg->ch == 'X' || arg->ch == 'o')) {
848
0
            assert(PyUnicode_READ(kind, pbuf, pindex) == '0');
849
0
            assert(PyUnicode_READ(kind, pbuf, pindex+1) == arg->ch);
850
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos, '0');
851
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos+1, arg->ch);
852
0
            writer->pos += 2;
853
0
            pindex += 2;
854
0
        }
855
17.5k
    }
856
857
    /* Write characters */
858
17.7k
    if (len) {
859
17.7k
        _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
860
17.7k
                                      str, pindex, len);
861
17.7k
        writer->pos += len;
862
17.7k
    }
863
864
    /* Pad right with the fill character if needed */
865
17.7k
    if (arg->width > len) {
866
0
        sublen = arg->width - len;
867
0
        _PyUnicode_Fill(writer->kind, writer->data, ' ', writer->pos, sublen);
868
0
        writer->pos += sublen;
869
0
    }
870
17.7k
    return 0;
871
17.7k
}
872
873
874
/* Helper of PyUnicode_Format(): format one arg.
875
   Return 0 on success, raise an exception and return -1 on error. */
876
static int
877
unicode_format_arg(struct unicode_formatter_t *ctx)
878
67.2M
{
879
67.2M
    struct unicode_format_arg_t arg;
880
67.2M
    PyObject *str;
881
67.2M
    int ret;
882
883
67.2M
    arg.ch = PyUnicode_READ(ctx->fmtkind, ctx->fmtdata, ctx->fmtpos);
884
67.2M
    if (arg.ch == '%') {
885
0
        ctx->fmtpos++;
886
0
        ctx->fmtcnt--;
887
0
        if (_PyUnicodeWriter_WriteCharInline(&ctx->writer, '%') < 0)
888
0
            return -1;
889
0
        return 0;
890
0
    }
891
67.2M
    arg.flags = 0;
892
67.2M
    arg.width = -1;
893
67.2M
    arg.prec = -1;
894
67.2M
    arg.sign = 0;
895
67.2M
    str = NULL;
896
897
67.2M
    ret = unicode_format_arg_parse(ctx, &arg);
898
67.2M
    if (ret == -1)
899
0
        return -1;
900
901
67.2M
    ret = unicode_format_arg_format(ctx, &arg, &str);
902
67.2M
    if (ret == -1)
903
4.46M
        return -1;
904
905
62.7M
    if (ret != 1) {
906
54.4M
        ret = unicode_format_arg_output(ctx, &arg, str);
907
54.4M
        Py_DECREF(str);
908
54.4M
        if (ret == -1)
909
0
            return -1;
910
54.4M
    }
911
912
62.7M
    if (ctx->dict && (ctx->argidx < ctx->arglen)) {
913
0
        PyErr_SetString(PyExc_TypeError,
914
0
                        "not all arguments converted during string formatting");
915
0
        return -1;
916
0
    }
917
62.7M
    return 0;
918
62.7M
}
919
920
921
PyObject *
922
PyUnicode_Format(PyObject *format, PyObject *args)
923
32.0M
{
924
32.0M
    struct unicode_formatter_t ctx;
925
926
32.0M
    if (format == NULL || args == NULL) {
927
0
        PyErr_BadInternalCall();
928
0
        return NULL;
929
0
    }
930
931
32.0M
    if (ensure_unicode(format) < 0)
932
0
        return NULL;
933
934
32.0M
    ctx.fmtstr = format;
935
32.0M
    ctx.fmtdata = PyUnicode_DATA(ctx.fmtstr);
936
32.0M
    ctx.fmtkind = PyUnicode_KIND(ctx.fmtstr);
937
32.0M
    ctx.fmtcnt = PyUnicode_GET_LENGTH(ctx.fmtstr);
938
32.0M
    ctx.fmtpos = 0;
939
940
32.0M
    _PyUnicodeWriter_Init(&ctx.writer);
941
32.0M
    ctx.writer.min_length = ctx.fmtcnt + 100;
942
32.0M
    ctx.writer.overallocate = 1;
943
944
32.0M
    if (PyTuple_Check(args)) {
945
17.9M
        ctx.arglen = PyTuple_Size(args);
946
17.9M
        ctx.argidx = 0;
947
17.9M
    }
948
14.1M
    else {
949
14.1M
        ctx.arglen = -1;
950
14.1M
        ctx.argidx = -2;
951
14.1M
    }
952
32.0M
    ctx.args_owned = 0;
953
32.0M
    if (PyMapping_Check(args) && !PyTuple_Check(args) && !PyUnicode_Check(args))
954
11.7k
        ctx.dict = args;
955
32.0M
    else
956
32.0M
        ctx.dict = NULL;
957
32.0M
    ctx.args = args;
958
959
159M
    while (--ctx.fmtcnt >= 0) {
960
131M
        if (PyUnicode_READ(ctx.fmtkind, ctx.fmtdata, ctx.fmtpos) != '%') {
961
64.1M
            Py_ssize_t nonfmtpos;
962
963
64.1M
            nonfmtpos = ctx.fmtpos++;
964
582M
            while (ctx.fmtcnt >= 0 &&
965
573M
                   PyUnicode_READ(ctx.fmtkind, ctx.fmtdata, ctx.fmtpos) != '%') {
966
518M
                ctx.fmtpos++;
967
518M
                ctx.fmtcnt--;
968
518M
            }
969
64.1M
            if (ctx.fmtcnt < 0) {
970
8.45M
                ctx.fmtpos--;
971
8.45M
                ctx.writer.overallocate = 0;
972
8.45M
            }
973
974
64.1M
            if (_PyUnicodeWriter_WriteSubstring(&ctx.writer, ctx.fmtstr,
975
64.1M
                                                nonfmtpos, ctx.fmtpos) < 0)
976
0
                goto onError;
977
64.1M
        }
978
67.2M
        else {
979
67.2M
            ctx.fmtpos++;
980
67.2M
            if (unicode_format_arg(&ctx) == -1)
981
4.46M
                goto onError;
982
67.2M
        }
983
131M
    }
984
985
27.5M
    if (ctx.argidx < ctx.arglen && !ctx.dict) {
986
0
        PyErr_SetString(PyExc_TypeError,
987
0
                        "not all arguments converted during string formatting");
988
0
        goto onError;
989
0
    }
990
991
27.5M
    if (ctx.args_owned) {
992
11.0k
        Py_DECREF(ctx.args);
993
11.0k
    }
994
27.5M
    return _PyUnicodeWriter_Finish(&ctx.writer);
995
996
4.46M
  onError:
997
4.46M
    _PyUnicodeWriter_Dealloc(&ctx.writer);
998
4.46M
    if (ctx.args_owned) {
999
0
        Py_DECREF(ctx.args);
1000
0
    }
1001
    return NULL;
1002
27.5M
}