Coverage Report

Created: 2025-12-14 07:06

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Objects/unicode_format.c
Line
Count
Source
1
/*
2
3
Unicode implementation based on original code by Fredrik Lundh,
4
modified by Marc-Andre Lemburg <mal@lemburg.com>.
5
6
Major speed upgrades to the method implementations at the Reykjavik
7
NeedForSpeed sprint, by Fredrik Lundh and Andrew Dalke.
8
9
Copyright (c) Corporation for National Research Initiatives.
10
11
--------------------------------------------------------------------
12
The original string type implementation is:
13
14
  Copyright (c) 1999 by Secret Labs AB
15
  Copyright (c) 1999 by Fredrik Lundh
16
17
By obtaining, using, and/or copying this software and/or its
18
associated documentation, you agree that you have read, understood,
19
and will comply with the following terms and conditions:
20
21
Permission to use, copy, modify, and distribute this software and its
22
associated documentation for any purpose and without fee is hereby
23
granted, provided that the above copyright notice appears in all
24
copies, and that both that copyright notice and this permission notice
25
appear in supporting documentation, and that the name of Secret Labs
26
AB or the author not be used in advertising or publicity pertaining to
27
distribution of the software without specific, written prior
28
permission.
29
30
SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
31
THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
32
FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
33
ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
34
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
35
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
36
OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
37
--------------------------------------------------------------------
38
39
*/
40
41
// PyUnicode_Format() implementation
42
43
#include "Python.h"
44
#include "pycore_abstract.h"      // _PyIndex_Check()
45
#include "pycore_format.h"        // F_ALT
46
#include "pycore_long.h"          // _PyLong_FormatWriter()
47
#include "pycore_object.h"        // _PyObject_IsUniquelyReferenced()
48
#include "pycore_unicodeobject.h" // _Py_MAX_UNICODE
49
50
51
0
#define MAX_UNICODE _Py_MAX_UNICODE
52
30.5M
#define ensure_unicode _PyUnicode_EnsureUnicode
53
54
struct unicode_formatter_t {
55
    PyObject *args;
56
    int args_owned;
57
    Py_ssize_t arglen, argidx;
58
    PyObject *dict;
59
60
    int fmtkind;
61
    Py_ssize_t fmtcnt, fmtpos;
62
    const void *fmtdata;
63
    PyObject *fmtstr;
64
65
    _PyUnicodeWriter writer;
66
};
67
68
69
struct unicode_format_arg_t {
70
    Py_UCS4 ch;
71
    int flags;
72
    Py_ssize_t width;
73
    int prec;
74
    int sign;
75
};
76
77
78
static PyObject *
79
unicode_format_getnextarg(struct unicode_formatter_t *ctx)
80
64.1M
{
81
64.1M
    Py_ssize_t argidx = ctx->argidx;
82
83
64.1M
    if (argidx < ctx->arglen) {
84
64.1M
        ctx->argidx++;
85
64.1M
        if (ctx->arglen < 0)
86
15.1M
            return ctx->args;
87
49.0M
        else
88
49.0M
            return PyTuple_GetItem(ctx->args, argidx);
89
64.1M
    }
90
0
    PyErr_SetString(PyExc_TypeError,
91
0
                    "not enough arguments for format string");
92
0
    return NULL;
93
64.1M
}
94
95
96
/* Returns a new reference to a PyUnicode object, or NULL on failure. */
97
98
/* Format a float into the writer if the writer is not NULL, or into *p_output
99
   otherwise.
100
101
   Return 0 on success, raise an exception and return -1 on error. */
102
static int
103
formatfloat(PyObject *v, struct unicode_format_arg_t *arg,
104
            PyObject **p_output,
105
            _PyUnicodeWriter *writer)
106
115
{
107
115
    char *p;
108
115
    double x;
109
115
    Py_ssize_t len;
110
115
    int prec;
111
115
    int dtoa_flags = 0;
112
113
115
    x = PyFloat_AsDouble(v);
114
115
    if (x == -1.0 && PyErr_Occurred())
115
0
        return -1;
116
117
115
    prec = arg->prec;
118
115
    if (prec < 0)
119
0
        prec = 6;
120
121
115
    if (arg->flags & F_ALT)
122
0
        dtoa_flags |= Py_DTSF_ALT;
123
115
    p = PyOS_double_to_string(x, arg->ch, prec, dtoa_flags, NULL);
124
115
    if (p == NULL)
125
0
        return -1;
126
115
    len = strlen(p);
127
115
    if (writer) {
128
0
        if (_PyUnicodeWriter_WriteASCIIString(writer, p, len) < 0) {
129
0
            PyMem_Free(p);
130
0
            return -1;
131
0
        }
132
0
    }
133
115
    else
134
115
        *p_output = _PyUnicode_FromASCII(p, len);
135
115
    PyMem_Free(p);
136
115
    return 0;
137
115
}
138
139
140
/* formatlong() emulates the format codes d, u, o, x and X, and
141
 * the F_ALT flag, for Python's long (unbounded) ints.  It's not used for
142
 * Python's regular ints.
143
 * Return value:  a new PyUnicodeObject*, or NULL if error.
144
 *     The output string is of the form
145
 *         "-"? ("0x" | "0X")? digit+
146
 *     "0x"/"0X" are present only for x and X conversions, with F_ALT
147
 *         set in flags.  The case of hex digits will be correct,
148
 *     There will be at least prec digits, zero-filled on the left if
149
 *         necessary to get that many.
150
 * val          object to be converted
151
 * flags        bitmask of format flags; only F_ALT is looked at
152
 * prec         minimum number of digits; 0-fill on left if needed
153
 * type         a character in [duoxX]; u acts the same as d
154
 *
155
 * CAUTION:  o, x and X conversions on regular ints can never
156
 * produce a '-' sign, but can for Python's unbounded ints.
157
 */
158
PyObject *
159
_PyUnicode_FormatLong(PyObject *val, int alt, int prec, int type)
160
8.61M
{
161
8.61M
    PyObject *result = NULL;
162
8.61M
    char *buf;
163
8.61M
    Py_ssize_t i;
164
8.61M
    int sign;           /* 1 if '-', else 0 */
165
8.61M
    int len;            /* number of characters */
166
8.61M
    Py_ssize_t llen;
167
8.61M
    int numdigits;      /* len == numnondigits + numdigits */
168
8.61M
    int numnondigits = 0;
169
170
    /* Avoid exceeding SSIZE_T_MAX */
171
8.61M
    if (prec > INT_MAX-3) {
172
0
        PyErr_SetString(PyExc_OverflowError,
173
0
                        "precision too large");
174
0
        return NULL;
175
0
    }
176
177
8.61M
    assert(PyLong_Check(val));
178
179
8.61M
    switch (type) {
180
0
    default:
181
0
        Py_UNREACHABLE();
182
4.47M
    case 'd':
183
4.47M
    case 'i':
184
4.47M
    case 'u':
185
        /* int and int subclasses should print numerically when a numeric */
186
        /* format code is used (see issue18780) */
187
4.47M
        result = PyNumber_ToBase(val, 10);
188
4.47M
        break;
189
0
    case 'o':
190
0
        numnondigits = 2;
191
0
        result = PyNumber_ToBase(val, 8);
192
0
        break;
193
100
    case 'x':
194
4.13M
    case 'X':
195
4.13M
        numnondigits = 2;
196
4.13M
        result = PyNumber_ToBase(val, 16);
197
4.13M
        break;
198
8.61M
    }
199
8.61M
    if (!result)
200
0
        return NULL;
201
202
8.61M
    assert(_PyUnicode_IsModifiable(result));
203
8.61M
    assert(PyUnicode_IS_ASCII(result));
204
205
    /* To modify the string in-place, there can only be one reference. */
206
8.61M
    if (!_PyObject_IsUniquelyReferenced(result)) {
207
0
        Py_DECREF(result);
208
0
        PyErr_BadInternalCall();
209
0
        return NULL;
210
0
    }
211
8.61M
    buf = PyUnicode_DATA(result);
212
8.61M
    llen = PyUnicode_GET_LENGTH(result);
213
8.61M
    if (llen > INT_MAX) {
214
0
        Py_DECREF(result);
215
0
        PyErr_SetString(PyExc_ValueError,
216
0
                        "string too large in _PyUnicode_FormatLong");
217
0
        return NULL;
218
0
    }
219
8.61M
    len = (int)llen;
220
8.61M
    sign = buf[0] == '-';
221
8.61M
    numnondigits += sign;
222
8.61M
    numdigits = len - numnondigits;
223
8.61M
    assert(numdigits > 0);
224
225
    /* Get rid of base marker unless F_ALT */
226
8.61M
    if (((alt) == 0 &&
227
8.61M
        (type == 'o' || type == 'x' || type == 'X'))) {
228
4.13M
        assert(buf[sign] == '0');
229
4.13M
        assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
230
4.13M
               buf[sign+1] == 'o');
231
4.13M
        numnondigits -= 2;
232
4.13M
        buf += 2;
233
4.13M
        len -= 2;
234
4.13M
        if (sign)
235
0
            buf[0] = '-';
236
4.13M
        assert(len == numnondigits + numdigits);
237
4.13M
        assert(numdigits > 0);
238
4.13M
    }
239
240
    /* Fill with leading zeroes to meet minimum width. */
241
8.61M
    if (prec > numdigits) {
242
0
        PyObject *r1 = PyBytes_FromStringAndSize(NULL,
243
0
                                numnondigits + prec);
244
0
        char *b1;
245
0
        if (!r1) {
246
0
            Py_DECREF(result);
247
0
            return NULL;
248
0
        }
249
0
        b1 = PyBytes_AS_STRING(r1);
250
0
        for (i = 0; i < numnondigits; ++i)
251
0
            *b1++ = *buf++;
252
0
        for (i = 0; i < prec - numdigits; i++)
253
0
            *b1++ = '0';
254
0
        for (i = 0; i < numdigits; i++)
255
0
            *b1++ = *buf++;
256
0
        *b1 = '\0';
257
0
        Py_SETREF(result, r1);
258
0
        buf = PyBytes_AS_STRING(result);
259
0
        len = numnondigits + prec;
260
0
    }
261
262
    /* Fix up case for hex conversions. */
263
8.61M
    if (type == 'X') {
264
        /* Need to convert all lower case letters to upper case.
265
           and need to convert 0x to 0X (and -0x to -0X). */
266
28.0M
        for (i = 0; i < len; i++)
267
23.9M
            if (buf[i] >= 'a' && buf[i] <= 'x')
268
4.88M
                buf[i] -= 'a'-'A';
269
4.13M
    }
270
8.61M
    if (!PyUnicode_Check(result)
271
8.61M
        || buf != PyUnicode_DATA(result)) {
272
4.13M
        PyObject *unicode;
273
4.13M
        unicode = _PyUnicode_FromASCII(buf, len);
274
4.13M
        Py_SETREF(result, unicode);
275
4.13M
    }
276
4.47M
    else if (len != PyUnicode_GET_LENGTH(result)) {
277
0
        if (PyUnicode_Resize(&result, len) < 0)
278
0
            Py_CLEAR(result);
279
0
    }
280
8.61M
    return result;
281
8.61M
}
282
283
284
/* Format an integer or a float as an integer.
285
 * Return 1 if the number has been formatted into the writer,
286
 *        0 if the number has been formatted into *p_output
287
 *       -1 and raise an exception on error */
288
static int
289
mainformatlong(PyObject *v,
290
               struct unicode_format_arg_t *arg,
291
               PyObject **p_output,
292
               _PyUnicodeWriter *writer)
293
21.5M
{
294
21.5M
    PyObject *iobj, *res;
295
21.5M
    char type = (char)arg->ch;
296
297
21.5M
    if (!PyNumber_Check(v))
298
4.65M
        goto wrongtype;
299
300
    /* make sure number is a type of integer for o, x, and X */
301
16.8M
    if (!PyLong_Check(v)) {
302
0
        if (type == 'o' || type == 'x' || type == 'X') {
303
0
            iobj = _PyNumber_Index(v);
304
0
        }
305
0
        else {
306
0
            iobj = PyNumber_Long(v);
307
0
        }
308
0
        if (iobj == NULL ) {
309
0
            if (PyErr_ExceptionMatches(PyExc_TypeError))
310
0
                goto wrongtype;
311
0
            return -1;
312
0
        }
313
0
        assert(PyLong_Check(iobj));
314
0
    }
315
16.8M
    else {
316
16.8M
        iobj = Py_NewRef(v);
317
16.8M
    }
318
319
16.8M
    if (PyLong_CheckExact(v)
320
16.8M
        && arg->width == -1 && arg->prec == -1
321
12.3M
        && !(arg->flags & (F_SIGN | F_BLANK))
322
12.3M
        && type != 'X')
323
8.24M
    {
324
        /* Fast path */
325
8.24M
        int alternate = arg->flags & F_ALT;
326
8.24M
        int base;
327
328
8.24M
        switch(type)
329
8.24M
        {
330
0
            default:
331
0
                Py_UNREACHABLE();
332
8.24M
            case 'd':
333
8.24M
            case 'i':
334
8.24M
            case 'u':
335
8.24M
                base = 10;
336
8.24M
                break;
337
0
            case 'o':
338
0
                base = 8;
339
0
                break;
340
21
            case 'x':
341
21
            case 'X':
342
21
                base = 16;
343
21
                break;
344
8.24M
        }
345
346
8.24M
        if (_PyLong_FormatWriter(writer, v, base, alternate) == -1) {
347
0
            Py_DECREF(iobj);
348
0
            return -1;
349
0
        }
350
8.24M
        Py_DECREF(iobj);
351
8.24M
        return 1;
352
8.24M
    }
353
354
8.61M
    res = _PyUnicode_FormatLong(iobj, arg->flags & F_ALT, arg->prec, type);
355
8.61M
    Py_DECREF(iobj);
356
8.61M
    if (res == NULL)
357
0
        return -1;
358
8.61M
    *p_output = res;
359
8.61M
    return 0;
360
361
4.65M
wrongtype:
362
4.65M
    switch(type)
363
4.65M
    {
364
0
        case 'o':
365
0
        case 'x':
366
0
        case 'X':
367
0
            PyErr_Format(PyExc_TypeError,
368
0
                    "%%%c format: an integer is required, "
369
0
                    "not %.200s",
370
0
                    type, Py_TYPE(v)->tp_name);
371
0
            break;
372
4.65M
        default:
373
4.65M
            PyErr_Format(PyExc_TypeError,
374
4.65M
                    "%%%c format: a real number is required, "
375
4.65M
                    "not %.200s",
376
4.65M
                    type, Py_TYPE(v)->tp_name);
377
4.65M
            break;
378
4.65M
    }
379
4.65M
    return -1;
380
4.65M
}
381
382
383
static Py_UCS4
384
formatchar(PyObject *v)
385
0
{
386
    /* presume that the buffer is at least 3 characters long */
387
0
    if (PyUnicode_Check(v)) {
388
0
        if (PyUnicode_GET_LENGTH(v) == 1) {
389
0
            return PyUnicode_READ_CHAR(v, 0);
390
0
        }
391
0
        PyErr_Format(PyExc_TypeError,
392
0
                     "%%c requires an int or a unicode character, "
393
0
                     "not a string of length %zd",
394
0
                     PyUnicode_GET_LENGTH(v));
395
0
        return (Py_UCS4) -1;
396
0
    }
397
0
    else {
398
0
        int overflow;
399
0
        long x = PyLong_AsLongAndOverflow(v, &overflow);
400
0
        if (x == -1 && PyErr_Occurred()) {
401
0
            if (PyErr_ExceptionMatches(PyExc_TypeError)) {
402
0
                PyErr_Format(PyExc_TypeError,
403
0
                             "%%c requires an int or a unicode character, not %T",
404
0
                             v);
405
0
                return (Py_UCS4) -1;
406
0
            }
407
0
            return (Py_UCS4) -1;
408
0
        }
409
410
0
        if (x < 0 || x > MAX_UNICODE) {
411
            /* this includes an overflow in converting to C long */
412
0
            PyErr_SetString(PyExc_OverflowError,
413
0
                            "%c arg not in range(0x110000)");
414
0
            return (Py_UCS4) -1;
415
0
        }
416
417
0
        return (Py_UCS4) x;
418
0
    }
419
0
}
420
421
422
/* Parse options of an argument: flags, width, precision.
423
   Handle also "%(name)" syntax.
424
425
   Return 0 if the argument has been formatted into arg->str.
426
   Return 1 if the argument has been written into ctx->writer,
427
   Raise an exception and return -1 on error. */
428
static int
429
unicode_format_arg_parse(struct unicode_formatter_t *ctx,
430
                         struct unicode_format_arg_t *arg)
431
64.1M
{
432
64.1M
#define FORMAT_READ(ctx) \
433
68.9M
        PyUnicode_READ((ctx)->fmtkind, (ctx)->fmtdata, (ctx)->fmtpos)
434
435
64.1M
    PyObject *v;
436
437
64.1M
    if (arg->ch == '(') {
438
        /* Get argument value from a dictionary. Example: "%(name)s". */
439
37.2k
        Py_ssize_t keystart;
440
37.2k
        Py_ssize_t keylen;
441
37.2k
        PyObject *key;
442
37.2k
        int pcount = 1;
443
444
37.2k
        if (ctx->dict == NULL) {
445
0
            PyErr_SetString(PyExc_TypeError,
446
0
                            "format requires a mapping");
447
0
            return -1;
448
0
        }
449
37.2k
        ++ctx->fmtpos;
450
37.2k
        --ctx->fmtcnt;
451
37.2k
        keystart = ctx->fmtpos;
452
        /* Skip over balanced parentheses */
453
335k
        while (pcount > 0 && --ctx->fmtcnt >= 0) {
454
298k
            arg->ch = FORMAT_READ(ctx);
455
298k
            if (arg->ch == ')')
456
37.2k
                --pcount;
457
260k
            else if (arg->ch == '(')
458
0
                ++pcount;
459
298k
            ctx->fmtpos++;
460
298k
        }
461
37.2k
        keylen = ctx->fmtpos - keystart - 1;
462
37.2k
        if (ctx->fmtcnt < 0 || pcount > 0) {
463
0
            PyErr_SetString(PyExc_ValueError,
464
0
                            "incomplete format key");
465
0
            return -1;
466
0
        }
467
37.2k
        key = PyUnicode_Substring(ctx->fmtstr,
468
37.2k
                                  keystart, keystart + keylen);
469
37.2k
        if (key == NULL)
470
0
            return -1;
471
37.2k
        if (ctx->args_owned) {
472
26.6k
            ctx->args_owned = 0;
473
26.6k
            Py_DECREF(ctx->args);
474
26.6k
        }
475
37.2k
        ctx->args = PyObject_GetItem(ctx->dict, key);
476
37.2k
        Py_DECREF(key);
477
37.2k
        if (ctx->args == NULL)
478
0
            return -1;
479
37.2k
        ctx->args_owned = 1;
480
37.2k
        ctx->arglen = -1;
481
37.2k
        ctx->argidx = -2;
482
37.2k
    }
483
484
    /* Parse flags. Example: "%+i" => flags=F_SIGN. */
485
64.2M
    while (--ctx->fmtcnt >= 0) {
486
64.2M
        arg->ch = FORMAT_READ(ctx);
487
64.2M
        ctx->fmtpos++;
488
64.2M
        switch (arg->ch) {
489
0
        case '-': arg->flags |= F_LJUST; continue;
490
0
        case '+': arg->flags |= F_SIGN; continue;
491
0
        case ' ': arg->flags |= F_BLANK; continue;
492
21
        case '#': arg->flags |= F_ALT; continue;
493
1.66k
        case '0': arg->flags |= F_ZERO; continue;
494
64.2M
        }
495
64.1M
        break;
496
64.2M
    }
497
498
    /* Parse width. Example: "%10s" => width=10 */
499
64.1M
    if (arg->ch == '*') {
500
0
        v = unicode_format_getnextarg(ctx);
501
0
        if (v == NULL)
502
0
            return -1;
503
0
        if (!PyLong_Check(v)) {
504
0
            PyErr_SetString(PyExc_TypeError,
505
0
                            "* wants int");
506
0
            return -1;
507
0
        }
508
0
        arg->width = PyLong_AsSsize_t(v);
509
0
        if (arg->width == -1 && PyErr_Occurred())
510
0
            return -1;
511
0
        if (arg->width < 0) {
512
0
            arg->flags |= F_LJUST;
513
0
            arg->width = -arg->width;
514
0
        }
515
0
        if (--ctx->fmtcnt >= 0) {
516
0
            arg->ch = FORMAT_READ(ctx);
517
0
            ctx->fmtpos++;
518
0
        }
519
0
    }
520
64.1M
    else if (arg->ch >= '0' && arg->ch <= '9') {
521
4.47M
        arg->width = arg->ch - '0';
522
4.47M
        while (--ctx->fmtcnt >= 0) {
523
4.47M
            arg->ch = FORMAT_READ(ctx);
524
4.47M
            ctx->fmtpos++;
525
4.47M
            if (arg->ch < '0' || arg->ch > '9')
526
4.47M
                break;
527
            /* Since arg->ch is unsigned, the RHS would end up as unsigned,
528
               mixing signed and unsigned comparison. Since arg->ch is between
529
               '0' and '9', casting to int is safe. */
530
0
            if (arg->width > (PY_SSIZE_T_MAX - ((int)arg->ch - '0')) / 10) {
531
0
                PyErr_SetString(PyExc_ValueError,
532
0
                                "width too big");
533
0
                return -1;
534
0
            }
535
0
            arg->width = arg->width*10 + (arg->ch - '0');
536
0
        }
537
4.47M
    }
538
539
    /* Parse precision. Example: "%.3f" => prec=3 */
540
64.1M
    if (arg->ch == '.') {
541
115
        arg->prec = 0;
542
115
        if (--ctx->fmtcnt >= 0) {
543
115
            arg->ch = FORMAT_READ(ctx);
544
115
            ctx->fmtpos++;
545
115
        }
546
115
        if (arg->ch == '*') {
547
0
            v = unicode_format_getnextarg(ctx);
548
0
            if (v == NULL)
549
0
                return -1;
550
0
            if (!PyLong_Check(v)) {
551
0
                PyErr_SetString(PyExc_TypeError,
552
0
                                "* wants int");
553
0
                return -1;
554
0
            }
555
0
            arg->prec = PyLong_AsInt(v);
556
0
            if (arg->prec == -1 && PyErr_Occurred())
557
0
                return -1;
558
0
            if (arg->prec < 0)
559
0
                arg->prec = 0;
560
0
            if (--ctx->fmtcnt >= 0) {
561
0
                arg->ch = FORMAT_READ(ctx);
562
0
                ctx->fmtpos++;
563
0
            }
564
0
        }
565
115
        else if (arg->ch >= '0' && arg->ch <= '9') {
566
115
            arg->prec = arg->ch - '0';
567
115
            while (--ctx->fmtcnt >= 0) {
568
115
                arg->ch = FORMAT_READ(ctx);
569
115
                ctx->fmtpos++;
570
115
                if (arg->ch < '0' || arg->ch > '9')
571
115
                    break;
572
0
                if (arg->prec > (INT_MAX - ((int)arg->ch - '0')) / 10) {
573
0
                    PyErr_SetString(PyExc_ValueError,
574
0
                                    "precision too big");
575
0
                    return -1;
576
0
                }
577
0
                arg->prec = arg->prec*10 + (arg->ch - '0');
578
0
            }
579
115
        }
580
115
    }
581
582
    /* Ignore "h", "l" and "L" format prefix (ex: "%hi" or "%ls") */
583
64.1M
    if (ctx->fmtcnt >= 0) {
584
64.1M
        if (arg->ch == 'h' || arg->ch == 'l' || arg->ch == 'L') {
585
0
            if (--ctx->fmtcnt >= 0) {
586
0
                arg->ch = FORMAT_READ(ctx);
587
0
                ctx->fmtpos++;
588
0
            }
589
0
        }
590
64.1M
    }
591
64.1M
    if (ctx->fmtcnt < 0) {
592
0
        PyErr_SetString(PyExc_ValueError,
593
0
                        "incomplete format");
594
0
        return -1;
595
0
    }
596
64.1M
    return 0;
597
598
64.1M
#undef FORMAT_READ
599
64.1M
}
600
601
602
/* Format one argument. Supported conversion specifiers:
603
604
   - "s", "r", "a": any type
605
   - "i", "d", "u": int or float
606
   - "o", "x", "X": int
607
   - "e", "E", "f", "F", "g", "G": float
608
   - "c": int or str (1 character)
609
610
   When possible, the output is written directly into the Unicode writer
611
   (ctx->writer). A string is created when padding is required.
612
613
   Return 0 if the argument has been formatted into *p_str,
614
          1 if the argument has been written into ctx->writer,
615
         -1 on error. */
616
static int
617
unicode_format_arg_format(struct unicode_formatter_t *ctx,
618
                          struct unicode_format_arg_t *arg,
619
                          PyObject **p_str)
620
64.1M
{
621
64.1M
    PyObject *v;
622
64.1M
    _PyUnicodeWriter *writer = &ctx->writer;
623
624
64.1M
    if (ctx->fmtcnt == 0)
625
21.3M
        ctx->writer.overallocate = 0;
626
627
64.1M
    v = unicode_format_getnextarg(ctx);
628
64.1M
    if (v == NULL)
629
0
        return -1;
630
631
632
64.1M
    switch (arg->ch) {
633
42.6M
    case 's':
634
42.6M
    case 'r':
635
42.6M
    case 'a':
636
42.6M
        if (PyLong_CheckExact(v) && arg->width == -1 && arg->prec == -1) {
637
            /* Fast path */
638
92
            if (_PyLong_FormatWriter(writer, v, 10, arg->flags & F_ALT) == -1)
639
0
                return -1;
640
92
            return 1;
641
92
        }
642
643
42.6M
        if (PyUnicode_CheckExact(v) && arg->ch == 's') {
644
38.5M
            *p_str = Py_NewRef(v);
645
38.5M
        }
646
4.13M
        else {
647
4.13M
            if (arg->ch == 's')
648
4.13M
                *p_str = PyObject_Str(v);
649
5.31k
            else if (arg->ch == 'r')
650
5.31k
                *p_str = PyObject_Repr(v);
651
0
            else
652
0
                *p_str = PyObject_ASCII(v);
653
4.13M
        }
654
42.6M
        break;
655
656
0
    case 'i':
657
17.3M
    case 'd':
658
17.3M
    case 'u':
659
17.3M
    case 'o':
660
17.3M
    case 'x':
661
21.5M
    case 'X':
662
21.5M
    {
663
21.5M
        int ret = mainformatlong(v, arg, p_str, writer);
664
21.5M
        if (ret != 0)
665
12.9M
            return ret;
666
8.61M
        arg->sign = 1;
667
8.61M
        break;
668
21.5M
    }
669
670
0
    case 'e':
671
0
    case 'E':
672
115
    case 'f':
673
115
    case 'F':
674
115
    case 'g':
675
115
    case 'G':
676
115
        if (arg->width == -1 && arg->prec == -1
677
0
            && !(arg->flags & (F_SIGN | F_BLANK)))
678
0
        {
679
            /* Fast path */
680
0
            if (formatfloat(v, arg, NULL, writer) == -1)
681
0
                return -1;
682
0
            return 1;
683
0
        }
684
685
115
        arg->sign = 1;
686
115
        if (formatfloat(v, arg, p_str, NULL) == -1)
687
0
            return -1;
688
115
        break;
689
690
115
    case 'c':
691
0
    {
692
0
        Py_UCS4 ch = formatchar(v);
693
0
        if (ch == (Py_UCS4) -1)
694
0
            return -1;
695
0
        if (arg->width == -1 && arg->prec == -1) {
696
            /* Fast path */
697
0
            if (_PyUnicodeWriter_WriteCharInline(writer, ch) < 0)
698
0
                return -1;
699
0
            return 1;
700
0
        }
701
0
        *p_str = PyUnicode_FromOrdinal(ch);
702
0
        break;
703
0
    }
704
705
0
    default:
706
0
        PyErr_Format(PyExc_ValueError,
707
0
                     "unsupported format character '%c' (0x%x) "
708
0
                     "at index %zd",
709
0
                     (31<=arg->ch && arg->ch<=126) ? (char)arg->ch : '?',
710
0
                     (int)arg->ch,
711
0
                     ctx->fmtpos - 1);
712
0
        return -1;
713
64.1M
    }
714
51.2M
    if (*p_str == NULL)
715
0
        return -1;
716
51.2M
    assert (PyUnicode_Check(*p_str));
717
51.2M
    return 0;
718
51.2M
}
719
720
721
static int
722
unicode_format_arg_output(struct unicode_formatter_t *ctx,
723
                          struct unicode_format_arg_t *arg,
724
                          PyObject *str)
725
51.2M
{
726
51.2M
    Py_ssize_t len;
727
51.2M
    int kind;
728
51.2M
    const void *pbuf;
729
51.2M
    Py_ssize_t pindex;
730
51.2M
    Py_UCS4 signchar;
731
51.2M
    Py_ssize_t buflen;
732
51.2M
    Py_UCS4 maxchar;
733
51.2M
    Py_ssize_t sublen;
734
51.2M
    _PyUnicodeWriter *writer = &ctx->writer;
735
51.2M
    Py_UCS4 fill;
736
737
51.2M
    fill = ' ';
738
51.2M
    if (arg->sign && arg->flags & F_ZERO)
739
1.66k
        fill = '0';
740
741
51.2M
    len = PyUnicode_GET_LENGTH(str);
742
51.2M
    if ((arg->width == -1 || arg->width <= len)
743
51.2M
        && (arg->prec == -1 || arg->prec >= len)
744
51.2M
        && !(arg->flags & (F_SIGN | F_BLANK)))
745
51.2M
    {
746
        /* Fast path */
747
51.2M
        if (_PyUnicodeWriter_WriteStr(writer, str) == -1)
748
0
            return -1;
749
51.2M
        return 0;
750
51.2M
    }
751
752
    /* Truncate the string for "s", "r" and "a" formats
753
       if the precision is set */
754
14.5k
    if (arg->ch == 's' || arg->ch == 'r' || arg->ch == 'a') {
755
0
        if (arg->prec >= 0 && len > arg->prec)
756
0
            len = arg->prec;
757
0
    }
758
759
    /* Adjust sign and width */
760
14.5k
    kind = PyUnicode_KIND(str);
761
14.5k
    pbuf = PyUnicode_DATA(str);
762
14.5k
    pindex = 0;
763
14.5k
    signchar = '\0';
764
14.5k
    if (arg->sign) {
765
14.5k
        Py_UCS4 ch = PyUnicode_READ(kind, pbuf, pindex);
766
14.5k
        if (ch == '-' || ch == '+') {
767
0
            signchar = ch;
768
0
            len--;
769
0
            pindex++;
770
0
        }
771
14.5k
        else if (arg->flags & F_SIGN)
772
0
            signchar = '+';
773
14.5k
        else if (arg->flags & F_BLANK)
774
0
            signchar = ' ';
775
14.5k
        else
776
14.5k
            arg->sign = 0;
777
14.5k
    }
778
14.5k
    if (arg->width < len)
779
115
        arg->width = len;
780
781
    /* Prepare the writer */
782
14.5k
    maxchar = writer->maxchar;
783
14.5k
    if (!(arg->flags & F_LJUST)) {
784
14.5k
        if (arg->sign) {
785
0
            if ((arg->width-1) > len)
786
0
                maxchar = Py_MAX(maxchar, fill);
787
0
        }
788
14.5k
        else {
789
14.5k
            if (arg->width > len)
790
14.4k
                maxchar = Py_MAX(maxchar, fill);
791
14.5k
        }
792
14.5k
    }
793
14.5k
    if (PyUnicode_MAX_CHAR_VALUE(str) > maxchar) {
794
0
        Py_UCS4 strmaxchar = _PyUnicode_FindMaxChar(str, 0, pindex+len);
795
0
        maxchar = Py_MAX(maxchar, strmaxchar);
796
0
    }
797
798
14.5k
    buflen = arg->width;
799
14.5k
    if (arg->sign && len == arg->width)
800
0
        buflen++;
801
14.5k
    if (_PyUnicodeWriter_Prepare(writer, buflen, maxchar) == -1)
802
0
        return -1;
803
804
    /* Write the sign if needed */
805
14.5k
    if (arg->sign) {
806
0
        if (fill != ' ') {
807
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos, signchar);
808
0
            writer->pos += 1;
809
0
        }
810
0
        if (arg->width > len)
811
0
            arg->width--;
812
0
    }
813
814
    /* Write the numeric prefix for "x", "X" and "o" formats
815
       if the alternate form is used.
816
       For example, write "0x" for the "%#x" format. */
817
14.5k
    if ((arg->flags & F_ALT) && (arg->ch == 'x' || arg->ch == 'X' || arg->ch == 'o')) {
818
0
        assert(PyUnicode_READ(kind, pbuf, pindex) == '0');
819
0
        assert(PyUnicode_READ(kind, pbuf, pindex + 1) == arg->ch);
820
0
        if (fill != ' ') {
821
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos, '0');
822
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos+1, arg->ch);
823
0
            writer->pos += 2;
824
0
            pindex += 2;
825
0
        }
826
0
        arg->width -= 2;
827
0
        if (arg->width < 0)
828
0
            arg->width = 0;
829
0
        len -= 2;
830
0
    }
831
832
    /* Pad left with the fill character if needed */
833
14.5k
    if (arg->width > len && !(arg->flags & F_LJUST)) {
834
14.4k
        sublen = arg->width - len;
835
14.4k
        _PyUnicode_Fill(writer->kind, writer->data, fill, writer->pos, sublen);
836
14.4k
        writer->pos += sublen;
837
14.4k
        arg->width = len;
838
14.4k
    }
839
840
    /* If padding with spaces: write sign if needed and/or numeric prefix if
841
       the alternate form is used */
842
14.5k
    if (fill == ' ') {
843
14.3k
        if (arg->sign) {
844
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos, signchar);
845
0
            writer->pos += 1;
846
0
        }
847
14.3k
        if ((arg->flags & F_ALT) && (arg->ch == 'x' || arg->ch == 'X' || arg->ch == 'o')) {
848
0
            assert(PyUnicode_READ(kind, pbuf, pindex) == '0');
849
0
            assert(PyUnicode_READ(kind, pbuf, pindex+1) == arg->ch);
850
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos, '0');
851
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos+1, arg->ch);
852
0
            writer->pos += 2;
853
0
            pindex += 2;
854
0
        }
855
14.3k
    }
856
857
    /* Write characters */
858
14.5k
    if (len) {
859
14.5k
        _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
860
14.5k
                                      str, pindex, len);
861
14.5k
        writer->pos += len;
862
14.5k
    }
863
864
    /* Pad right with the fill character if needed */
865
14.5k
    if (arg->width > len) {
866
0
        sublen = arg->width - len;
867
0
        _PyUnicode_Fill(writer->kind, writer->data, ' ', writer->pos, sublen);
868
0
        writer->pos += sublen;
869
0
    }
870
14.5k
    return 0;
871
14.5k
}
872
873
874
/* Helper of PyUnicode_Format(): format one arg.
875
   Return 0 on success, raise an exception and return -1 on error. */
876
static int
877
unicode_format_arg(struct unicode_formatter_t *ctx)
878
64.1M
{
879
64.1M
    struct unicode_format_arg_t arg;
880
64.1M
    PyObject *str;
881
64.1M
    int ret;
882
883
64.1M
    arg.ch = PyUnicode_READ(ctx->fmtkind, ctx->fmtdata, ctx->fmtpos);
884
64.1M
    if (arg.ch == '%') {
885
0
        ctx->fmtpos++;
886
0
        ctx->fmtcnt--;
887
0
        if (_PyUnicodeWriter_WriteCharInline(&ctx->writer, '%') < 0)
888
0
            return -1;
889
0
        return 0;
890
0
    }
891
64.1M
    arg.flags = 0;
892
64.1M
    arg.width = -1;
893
64.1M
    arg.prec = -1;
894
64.1M
    arg.sign = 0;
895
64.1M
    str = NULL;
896
897
64.1M
    ret = unicode_format_arg_parse(ctx, &arg);
898
64.1M
    if (ret == -1)
899
0
        return -1;
900
901
64.1M
    ret = unicode_format_arg_format(ctx, &arg, &str);
902
64.1M
    if (ret == -1)
903
4.65M
        return -1;
904
905
59.5M
    if (ret != 1) {
906
51.2M
        ret = unicode_format_arg_output(ctx, &arg, str);
907
51.2M
        Py_DECREF(str);
908
51.2M
        if (ret == -1)
909
0
            return -1;
910
51.2M
    }
911
912
59.5M
    if (ctx->dict && (ctx->argidx < ctx->arglen)) {
913
0
        PyErr_SetString(PyExc_TypeError,
914
0
                        "not all arguments converted during string formatting");
915
0
        return -1;
916
0
    }
917
59.5M
    return 0;
918
59.5M
}
919
920
921
PyObject *
922
PyUnicode_Format(PyObject *format, PyObject *args)
923
30.5M
{
924
30.5M
    struct unicode_formatter_t ctx;
925
926
30.5M
    if (format == NULL || args == NULL) {
927
0
        PyErr_BadInternalCall();
928
0
        return NULL;
929
0
    }
930
931
30.5M
    if (ensure_unicode(format) < 0)
932
0
        return NULL;
933
934
30.5M
    ctx.fmtstr = format;
935
30.5M
    ctx.fmtdata = PyUnicode_DATA(ctx.fmtstr);
936
30.5M
    ctx.fmtkind = PyUnicode_KIND(ctx.fmtstr);
937
30.5M
    ctx.fmtcnt = PyUnicode_GET_LENGTH(ctx.fmtstr);
938
30.5M
    ctx.fmtpos = 0;
939
940
30.5M
    _PyUnicodeWriter_Init(&ctx.writer);
941
30.5M
    ctx.writer.min_length = ctx.fmtcnt + 100;
942
30.5M
    ctx.writer.overallocate = 1;
943
944
30.5M
    if (PyTuple_Check(args)) {
945
15.4M
        ctx.arglen = PyTuple_Size(args);
946
15.4M
        ctx.argidx = 0;
947
15.4M
    }
948
15.1M
    else {
949
15.1M
        ctx.arglen = -1;
950
15.1M
        ctx.argidx = -2;
951
15.1M
    }
952
30.5M
    ctx.args_owned = 0;
953
30.5M
    if (PyMapping_Check(args) && !PyTuple_Check(args) && !PyUnicode_Check(args))
954
11.2k
        ctx.dict = args;
955
30.5M
    else
956
30.5M
        ctx.dict = NULL;
957
30.5M
    ctx.args = args;
958
959
151M
    while (--ctx.fmtcnt >= 0) {
960
125M
        if (PyUnicode_READ(ctx.fmtkind, ctx.fmtdata, ctx.fmtpos) != '%') {
961
61.1M
            Py_ssize_t nonfmtpos;
962
963
61.1M
            nonfmtpos = ctx.fmtpos++;
964
574M
            while (ctx.fmtcnt >= 0 &&
965
565M
                   PyUnicode_READ(ctx.fmtkind, ctx.fmtdata, ctx.fmtpos) != '%') {
966
513M
                ctx.fmtpos++;
967
513M
                ctx.fmtcnt--;
968
513M
            }
969
61.1M
            if (ctx.fmtcnt < 0) {
970
9.12M
                ctx.fmtpos--;
971
9.12M
                ctx.writer.overallocate = 0;
972
9.12M
            }
973
974
61.1M
            if (_PyUnicodeWriter_WriteSubstring(&ctx.writer, ctx.fmtstr,
975
61.1M
                                                nonfmtpos, ctx.fmtpos) < 0)
976
0
                goto onError;
977
61.1M
        }
978
64.1M
        else {
979
64.1M
            ctx.fmtpos++;
980
64.1M
            if (unicode_format_arg(&ctx) == -1)
981
4.65M
                goto onError;
982
64.1M
        }
983
125M
    }
984
985
25.8M
    if (ctx.argidx < ctx.arglen && !ctx.dict) {
986
0
        PyErr_SetString(PyExc_TypeError,
987
0
                        "not all arguments converted during string formatting");
988
0
        goto onError;
989
0
    }
990
991
25.8M
    if (ctx.args_owned) {
992
10.6k
        Py_DECREF(ctx.args);
993
10.6k
    }
994
25.8M
    return _PyUnicodeWriter_Finish(&ctx.writer);
995
996
4.65M
  onError:
997
4.65M
    _PyUnicodeWriter_Dealloc(&ctx.writer);
998
4.65M
    if (ctx.args_owned) {
999
0
        Py_DECREF(ctx.args);
1000
0
    }
1001
    return NULL;
1002
25.8M
}