Coverage Report

Created: 2025-10-12 06:48

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Objects/unicode_format.c
Line
Count
Source
1
/*
2
3
Unicode implementation based on original code by Fredrik Lundh,
4
modified by Marc-Andre Lemburg <mal@lemburg.com>.
5
6
Major speed upgrades to the method implementations at the Reykjavik
7
NeedForSpeed sprint, by Fredrik Lundh and Andrew Dalke.
8
9
Copyright (c) Corporation for National Research Initiatives.
10
11
--------------------------------------------------------------------
12
The original string type implementation is:
13
14
  Copyright (c) 1999 by Secret Labs AB
15
  Copyright (c) 1999 by Fredrik Lundh
16
17
By obtaining, using, and/or copying this software and/or its
18
associated documentation, you agree that you have read, understood,
19
and will comply with the following terms and conditions:
20
21
Permission to use, copy, modify, and distribute this software and its
22
associated documentation for any purpose and without fee is hereby
23
granted, provided that the above copyright notice appears in all
24
copies, and that both that copyright notice and this permission notice
25
appear in supporting documentation, and that the name of Secret Labs
26
AB or the author not be used in advertising or publicity pertaining to
27
distribution of the software without specific, written prior
28
permission.
29
30
SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
31
THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
32
FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
33
ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
34
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
35
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
36
OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
37
--------------------------------------------------------------------
38
39
*/
40
41
// PyUnicode_Format() implementation
42
43
#include "Python.h"
44
#include "pycore_abstract.h"      // _PyIndex_Check()
45
#include "pycore_format.h"        // F_ALT
46
#include "pycore_long.h"          // _PyLong_FormatWriter()
47
#include "pycore_object.h"        // _PyObject_IsUniquelyReferenced()
48
#include "pycore_unicodeobject.h" // _Py_MAX_UNICODE
49
50
51
0
#define MAX_UNICODE _Py_MAX_UNICODE
52
23.3M
#define ensure_unicode _PyUnicode_EnsureUnicode
53
54
struct unicode_formatter_t {
55
    PyObject *args;
56
    int args_owned;
57
    Py_ssize_t arglen, argidx;
58
    PyObject *dict;
59
60
    int fmtkind;
61
    Py_ssize_t fmtcnt, fmtpos;
62
    const void *fmtdata;
63
    PyObject *fmtstr;
64
65
    _PyUnicodeWriter writer;
66
};
67
68
69
struct unicode_format_arg_t {
70
    Py_UCS4 ch;
71
    int flags;
72
    Py_ssize_t width;
73
    int prec;
74
    int sign;
75
};
76
77
78
static PyObject *
79
unicode_format_getnextarg(struct unicode_formatter_t *ctx)
80
45.4M
{
81
45.4M
    Py_ssize_t argidx = ctx->argidx;
82
83
45.4M
    if (argidx < ctx->arglen) {
84
45.4M
        ctx->argidx++;
85
45.4M
        if (ctx->arglen < 0)
86
17.8M
            return ctx->args;
87
27.6M
        else
88
27.6M
            return PyTuple_GetItem(ctx->args, argidx);
89
45.4M
    }
90
0
    PyErr_SetString(PyExc_TypeError,
91
0
                    "not enough arguments for format string");
92
0
    return NULL;
93
45.4M
}
94
95
96
/* Returns a new reference to a PyUnicode object, or NULL on failure. */
97
98
/* Format a float into the writer if the writer is not NULL, or into *p_output
99
   otherwise.
100
101
   Return 0 on success, raise an exception and return -1 on error. */
102
static int
103
formatfloat(PyObject *v, struct unicode_format_arg_t *arg,
104
            PyObject **p_output,
105
            _PyUnicodeWriter *writer)
106
0
{
107
0
    char *p;
108
0
    double x;
109
0
    Py_ssize_t len;
110
0
    int prec;
111
0
    int dtoa_flags = 0;
112
113
0
    x = PyFloat_AsDouble(v);
114
0
    if (x == -1.0 && PyErr_Occurred())
115
0
        return -1;
116
117
0
    prec = arg->prec;
118
0
    if (prec < 0)
119
0
        prec = 6;
120
121
0
    if (arg->flags & F_ALT)
122
0
        dtoa_flags |= Py_DTSF_ALT;
123
0
    p = PyOS_double_to_string(x, arg->ch, prec, dtoa_flags, NULL);
124
0
    if (p == NULL)
125
0
        return -1;
126
0
    len = strlen(p);
127
0
    if (writer) {
128
0
        if (_PyUnicodeWriter_WriteASCIIString(writer, p, len) < 0) {
129
0
            PyMem_Free(p);
130
0
            return -1;
131
0
        }
132
0
    }
133
0
    else
134
0
        *p_output = _PyUnicode_FromASCII(p, len);
135
0
    PyMem_Free(p);
136
0
    return 0;
137
0
}
138
139
140
/* formatlong() emulates the format codes d, u, o, x and X, and
141
 * the F_ALT flag, for Python's long (unbounded) ints.  It's not used for
142
 * Python's regular ints.
143
 * Return value:  a new PyUnicodeObject*, or NULL if error.
144
 *     The output string is of the form
145
 *         "-"? ("0x" | "0X")? digit+
146
 *     "0x"/"0X" are present only for x and X conversions, with F_ALT
147
 *         set in flags.  The case of hex digits will be correct,
148
 *     There will be at least prec digits, zero-filled on the left if
149
 *         necessary to get that many.
150
 * val          object to be converted
151
 * flags        bitmask of format flags; only F_ALT is looked at
152
 * prec         minimum number of digits; 0-fill on left if needed
153
 * type         a character in [duoxX]; u acts the same as d
154
 *
155
 * CAUTION:  o, x and X conversions on regular ints can never
156
 * produce a '-' sign, but can for Python's unbounded ints.
157
 */
158
PyObject *
159
_PyUnicode_FormatLong(PyObject *val, int alt, int prec, int type)
160
1.53k
{
161
1.53k
    PyObject *result = NULL;
162
1.53k
    char *buf;
163
1.53k
    Py_ssize_t i;
164
1.53k
    int sign;           /* 1 if '-', else 0 */
165
1.53k
    int len;            /* number of characters */
166
1.53k
    Py_ssize_t llen;
167
1.53k
    int numdigits;      /* len == numnondigits + numdigits */
168
1.53k
    int numnondigits = 0;
169
170
    /* Avoid exceeding SSIZE_T_MAX */
171
1.53k
    if (prec > INT_MAX-3) {
172
0
        PyErr_SetString(PyExc_OverflowError,
173
0
                        "precision too large");
174
0
        return NULL;
175
0
    }
176
177
1.53k
    assert(PyLong_Check(val));
178
179
1.53k
    switch (type) {
180
0
    default:
181
0
        Py_UNREACHABLE();
182
0
    case 'd':
183
0
    case 'i':
184
0
    case 'u':
185
        /* int and int subclasses should print numerically when a numeric */
186
        /* format code is used (see issue18780) */
187
0
        result = PyNumber_ToBase(val, 10);
188
0
        break;
189
0
    case 'o':
190
0
        numnondigits = 2;
191
0
        result = PyNumber_ToBase(val, 8);
192
0
        break;
193
0
    case 'x':
194
1.53k
    case 'X':
195
1.53k
        numnondigits = 2;
196
1.53k
        result = PyNumber_ToBase(val, 16);
197
1.53k
        break;
198
1.53k
    }
199
1.53k
    if (!result)
200
0
        return NULL;
201
202
1.53k
    assert(_PyUnicode_IsModifiable(result));
203
1.53k
    assert(PyUnicode_IS_ASCII(result));
204
205
    /* To modify the string in-place, there can only be one reference. */
206
1.53k
    if (!_PyObject_IsUniquelyReferenced(result)) {
207
0
        Py_DECREF(result);
208
0
        PyErr_BadInternalCall();
209
0
        return NULL;
210
0
    }
211
1.53k
    buf = PyUnicode_DATA(result);
212
1.53k
    llen = PyUnicode_GET_LENGTH(result);
213
1.53k
    if (llen > INT_MAX) {
214
0
        Py_DECREF(result);
215
0
        PyErr_SetString(PyExc_ValueError,
216
0
                        "string too large in _PyUnicode_FormatLong");
217
0
        return NULL;
218
0
    }
219
1.53k
    len = (int)llen;
220
1.53k
    sign = buf[0] == '-';
221
1.53k
    numnondigits += sign;
222
1.53k
    numdigits = len - numnondigits;
223
1.53k
    assert(numdigits > 0);
224
225
    /* Get rid of base marker unless F_ALT */
226
1.53k
    if (((alt) == 0 &&
227
1.53k
        (type == 'o' || type == 'x' || type == 'X'))) {
228
1.53k
        assert(buf[sign] == '0');
229
1.53k
        assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
230
1.53k
               buf[sign+1] == 'o');
231
1.53k
        numnondigits -= 2;
232
1.53k
        buf += 2;
233
1.53k
        len -= 2;
234
1.53k
        if (sign)
235
0
            buf[0] = '-';
236
1.53k
        assert(len == numnondigits + numdigits);
237
1.53k
        assert(numdigits > 0);
238
1.53k
    }
239
240
    /* Fill with leading zeroes to meet minimum width. */
241
1.53k
    if (prec > numdigits) {
242
0
        PyObject *r1 = PyBytes_FromStringAndSize(NULL,
243
0
                                numnondigits + prec);
244
0
        char *b1;
245
0
        if (!r1) {
246
0
            Py_DECREF(result);
247
0
            return NULL;
248
0
        }
249
0
        b1 = PyBytes_AS_STRING(r1);
250
0
        for (i = 0; i < numnondigits; ++i)
251
0
            *b1++ = *buf++;
252
0
        for (i = 0; i < prec - numdigits; i++)
253
0
            *b1++ = '0';
254
0
        for (i = 0; i < numdigits; i++)
255
0
            *b1++ = *buf++;
256
0
        *b1 = '\0';
257
0
        Py_SETREF(result, r1);
258
0
        buf = PyBytes_AS_STRING(result);
259
0
        len = numnondigits + prec;
260
0
    }
261
262
    /* Fix up case for hex conversions. */
263
1.53k
    if (type == 'X') {
264
        /* Need to convert all lower case letters to upper case.
265
           and need to convert 0x to 0X (and -0x to -0X). */
266
4.51k
        for (i = 0; i < len; i++)
267
2.97k
            if (buf[i] >= 'a' && buf[i] <= 'x')
268
1.15k
                buf[i] -= 'a'-'A';
269
1.53k
    }
270
1.53k
    if (!PyUnicode_Check(result)
271
1.53k
        || buf != PyUnicode_DATA(result)) {
272
1.53k
        PyObject *unicode;
273
1.53k
        unicode = _PyUnicode_FromASCII(buf, len);
274
1.53k
        Py_SETREF(result, unicode);
275
1.53k
    }
276
0
    else if (len != PyUnicode_GET_LENGTH(result)) {
277
0
        if (PyUnicode_Resize(&result, len) < 0)
278
0
            Py_CLEAR(result);
279
0
    }
280
1.53k
    return result;
281
1.53k
}
282
283
284
/* Format an integer or a float as an integer.
285
 * Return 1 if the number has been formatted into the writer,
286
 *        0 if the number has been formatted into *p_output
287
 *       -1 and raise an exception on error */
288
static int
289
mainformatlong(PyObject *v,
290
               struct unicode_format_arg_t *arg,
291
               PyObject **p_output,
292
               _PyUnicodeWriter *writer)
293
11.1M
{
294
11.1M
    PyObject *iobj, *res;
295
11.1M
    char type = (char)arg->ch;
296
297
11.1M
    if (!PyNumber_Check(v))
298
4.23M
        goto wrongtype;
299
300
    /* make sure number is a type of integer for o, x, and X */
301
6.87M
    if (!PyLong_Check(v)) {
302
0
        if (type == 'o' || type == 'x' || type == 'X') {
303
0
            iobj = _PyNumber_Index(v);
304
0
        }
305
0
        else {
306
0
            iobj = PyNumber_Long(v);
307
0
        }
308
0
        if (iobj == NULL ) {
309
0
            if (PyErr_ExceptionMatches(PyExc_TypeError))
310
0
                goto wrongtype;
311
0
            return -1;
312
0
        }
313
0
        assert(PyLong_Check(iobj));
314
0
    }
315
6.87M
    else {
316
6.87M
        iobj = Py_NewRef(v);
317
6.87M
    }
318
319
6.87M
    if (PyLong_CheckExact(v)
320
6.87M
        && arg->width == -1 && arg->prec == -1
321
6.86M
        && !(arg->flags & (F_SIGN | F_BLANK))
322
6.86M
        && type != 'X')
323
6.86M
    {
324
        /* Fast path */
325
6.86M
        int alternate = arg->flags & F_ALT;
326
6.86M
        int base;
327
328
6.86M
        switch(type)
329
6.86M
        {
330
0
            default:
331
0
                Py_UNREACHABLE();
332
6.86M
            case 'd':
333
6.86M
            case 'i':
334
6.86M
            case 'u':
335
6.86M
                base = 10;
336
6.86M
                break;
337
0
            case 'o':
338
0
                base = 8;
339
0
                break;
340
0
            case 'x':
341
0
            case 'X':
342
0
                base = 16;
343
0
                break;
344
6.86M
        }
345
346
6.86M
        if (_PyLong_FormatWriter(writer, v, base, alternate) == -1) {
347
0
            Py_DECREF(iobj);
348
0
            return -1;
349
0
        }
350
6.86M
        Py_DECREF(iobj);
351
6.86M
        return 1;
352
6.86M
    }
353
354
1.53k
    res = _PyUnicode_FormatLong(iobj, arg->flags & F_ALT, arg->prec, type);
355
1.53k
    Py_DECREF(iobj);
356
1.53k
    if (res == NULL)
357
0
        return -1;
358
1.53k
    *p_output = res;
359
1.53k
    return 0;
360
361
4.23M
wrongtype:
362
4.23M
    switch(type)
363
4.23M
    {
364
0
        case 'o':
365
0
        case 'x':
366
0
        case 'X':
367
0
            PyErr_Format(PyExc_TypeError,
368
0
                    "%%%c format: an integer is required, "
369
0
                    "not %.200s",
370
0
                    type, Py_TYPE(v)->tp_name);
371
0
            break;
372
4.23M
        default:
373
4.23M
            PyErr_Format(PyExc_TypeError,
374
4.23M
                    "%%%c format: a real number is required, "
375
4.23M
                    "not %.200s",
376
4.23M
                    type, Py_TYPE(v)->tp_name);
377
4.23M
            break;
378
4.23M
    }
379
4.23M
    return -1;
380
4.23M
}
381
382
383
static Py_UCS4
384
formatchar(PyObject *v)
385
0
{
386
    /* presume that the buffer is at least 3 characters long */
387
0
    if (PyUnicode_Check(v)) {
388
0
        if (PyUnicode_GET_LENGTH(v) == 1) {
389
0
            return PyUnicode_READ_CHAR(v, 0);
390
0
        }
391
0
        PyErr_Format(PyExc_TypeError,
392
0
                     "%%c requires an int or a unicode character, "
393
0
                     "not a string of length %zd",
394
0
                     PyUnicode_GET_LENGTH(v));
395
0
        return (Py_UCS4) -1;
396
0
    }
397
0
    else {
398
0
        int overflow;
399
0
        long x = PyLong_AsLongAndOverflow(v, &overflow);
400
0
        if (x == -1 && PyErr_Occurred()) {
401
0
            if (PyErr_ExceptionMatches(PyExc_TypeError)) {
402
0
                PyErr_Format(PyExc_TypeError,
403
0
                             "%%c requires an int or a unicode character, not %T",
404
0
                             v);
405
0
                return (Py_UCS4) -1;
406
0
            }
407
0
            return (Py_UCS4) -1;
408
0
        }
409
410
0
        if (x < 0 || x > MAX_UNICODE) {
411
            /* this includes an overflow in converting to C long */
412
0
            PyErr_SetString(PyExc_OverflowError,
413
0
                            "%c arg not in range(0x110000)");
414
0
            return (Py_UCS4) -1;
415
0
        }
416
417
0
        return (Py_UCS4) x;
418
0
    }
419
0
}
420
421
422
/* Parse options of an argument: flags, width, precision.
423
   Handle also "%(name)" syntax.
424
425
   Return 0 if the argument has been formatted into arg->str.
426
   Return 1 if the argument has been written into ctx->writer,
427
   Raise an exception and return -1 on error. */
428
static int
429
unicode_format_arg_parse(struct unicode_formatter_t *ctx,
430
                         struct unicode_format_arg_t *arg)
431
45.4M
{
432
45.4M
#define FORMAT_READ(ctx) \
433
45.8M
        PyUnicode_READ((ctx)->fmtkind, (ctx)->fmtdata, (ctx)->fmtpos)
434
435
45.4M
    PyObject *v;
436
437
45.4M
    if (arg->ch == '(') {
438
        /* Get argument value from a dictionary. Example: "%(name)s". */
439
39.1k
        Py_ssize_t keystart;
440
39.1k
        Py_ssize_t keylen;
441
39.1k
        PyObject *key;
442
39.1k
        int pcount = 1;
443
444
39.1k
        if (ctx->dict == NULL) {
445
0
            PyErr_SetString(PyExc_TypeError,
446
0
                            "format requires a mapping");
447
0
            return -1;
448
0
        }
449
39.1k
        ++ctx->fmtpos;
450
39.1k
        --ctx->fmtcnt;
451
39.1k
        keystart = ctx->fmtpos;
452
        /* Skip over balanced parentheses */
453
352k
        while (pcount > 0 && --ctx->fmtcnt >= 0) {
454
313k
            arg->ch = FORMAT_READ(ctx);
455
313k
            if (arg->ch == ')')
456
39.1k
                --pcount;
457
273k
            else if (arg->ch == '(')
458
0
                ++pcount;
459
313k
            ctx->fmtpos++;
460
313k
        }
461
39.1k
        keylen = ctx->fmtpos - keystart - 1;
462
39.1k
        if (ctx->fmtcnt < 0 || pcount > 0) {
463
0
            PyErr_SetString(PyExc_ValueError,
464
0
                            "incomplete format key");
465
0
            return -1;
466
0
        }
467
39.1k
        key = PyUnicode_Substring(ctx->fmtstr,
468
39.1k
                                  keystart, keystart + keylen);
469
39.1k
        if (key == NULL)
470
0
            return -1;
471
39.1k
        if (ctx->args_owned) {
472
27.9k
            ctx->args_owned = 0;
473
27.9k
            Py_DECREF(ctx->args);
474
27.9k
        }
475
39.1k
        ctx->args = PyObject_GetItem(ctx->dict, key);
476
39.1k
        Py_DECREF(key);
477
39.1k
        if (ctx->args == NULL)
478
0
            return -1;
479
39.1k
        ctx->args_owned = 1;
480
39.1k
        ctx->arglen = -1;
481
39.1k
        ctx->argidx = -2;
482
39.1k
    }
483
484
    /* Parse flags. Example: "%+i" => flags=F_SIGN. */
485
45.4M
    while (--ctx->fmtcnt >= 0) {
486
45.4M
        arg->ch = FORMAT_READ(ctx);
487
45.4M
        ctx->fmtpos++;
488
45.4M
        switch (arg->ch) {
489
0
        case '-': arg->flags |= F_LJUST; continue;
490
0
        case '+': arg->flags |= F_SIGN; continue;
491
0
        case ' ': arg->flags |= F_BLANK; continue;
492
0
        case '#': arg->flags |= F_ALT; continue;
493
1.53k
        case '0': arg->flags |= F_ZERO; continue;
494
45.4M
        }
495
45.4M
        break;
496
45.4M
    }
497
498
    /* Parse width. Example: "%10s" => width=10 */
499
45.4M
    if (arg->ch == '*') {
500
0
        v = unicode_format_getnextarg(ctx);
501
0
        if (v == NULL)
502
0
            return -1;
503
0
        if (!PyLong_Check(v)) {
504
0
            PyErr_SetString(PyExc_TypeError,
505
0
                            "* wants int");
506
0
            return -1;
507
0
        }
508
0
        arg->width = PyLong_AsSsize_t(v);
509
0
        if (arg->width == -1 && PyErr_Occurred())
510
0
            return -1;
511
0
        if (arg->width < 0) {
512
0
            arg->flags |= F_LJUST;
513
0
            arg->width = -arg->width;
514
0
        }
515
0
        if (--ctx->fmtcnt >= 0) {
516
0
            arg->ch = FORMAT_READ(ctx);
517
0
            ctx->fmtpos++;
518
0
        }
519
0
    }
520
45.4M
    else if (arg->ch >= '0' && arg->ch <= '9') {
521
1.53k
        arg->width = arg->ch - '0';
522
1.53k
        while (--ctx->fmtcnt >= 0) {
523
1.53k
            arg->ch = FORMAT_READ(ctx);
524
1.53k
            ctx->fmtpos++;
525
1.53k
            if (arg->ch < '0' || arg->ch > '9')
526
1.53k
                break;
527
            /* Since arg->ch is unsigned, the RHS would end up as unsigned,
528
               mixing signed and unsigned comparison. Since arg->ch is between
529
               '0' and '9', casting to int is safe. */
530
0
            if (arg->width > (PY_SSIZE_T_MAX - ((int)arg->ch - '0')) / 10) {
531
0
                PyErr_SetString(PyExc_ValueError,
532
0
                                "width too big");
533
0
                return -1;
534
0
            }
535
0
            arg->width = arg->width*10 + (arg->ch - '0');
536
0
        }
537
1.53k
    }
538
539
    /* Parse precision. Example: "%.3f" => prec=3 */
540
45.4M
    if (arg->ch == '.') {
541
0
        arg->prec = 0;
542
0
        if (--ctx->fmtcnt >= 0) {
543
0
            arg->ch = FORMAT_READ(ctx);
544
0
            ctx->fmtpos++;
545
0
        }
546
0
        if (arg->ch == '*') {
547
0
            v = unicode_format_getnextarg(ctx);
548
0
            if (v == NULL)
549
0
                return -1;
550
0
            if (!PyLong_Check(v)) {
551
0
                PyErr_SetString(PyExc_TypeError,
552
0
                                "* wants int");
553
0
                return -1;
554
0
            }
555
0
            arg->prec = PyLong_AsInt(v);
556
0
            if (arg->prec == -1 && PyErr_Occurred())
557
0
                return -1;
558
0
            if (arg->prec < 0)
559
0
                arg->prec = 0;
560
0
            if (--ctx->fmtcnt >= 0) {
561
0
                arg->ch = FORMAT_READ(ctx);
562
0
                ctx->fmtpos++;
563
0
            }
564
0
        }
565
0
        else if (arg->ch >= '0' && arg->ch <= '9') {
566
0
            arg->prec = arg->ch - '0';
567
0
            while (--ctx->fmtcnt >= 0) {
568
0
                arg->ch = FORMAT_READ(ctx);
569
0
                ctx->fmtpos++;
570
0
                if (arg->ch < '0' || arg->ch > '9')
571
0
                    break;
572
0
                if (arg->prec > (INT_MAX - ((int)arg->ch - '0')) / 10) {
573
0
                    PyErr_SetString(PyExc_ValueError,
574
0
                                    "precision too big");
575
0
                    return -1;
576
0
                }
577
0
                arg->prec = arg->prec*10 + (arg->ch - '0');
578
0
            }
579
0
        }
580
0
    }
581
582
    /* Ignore "h", "l" and "L" format prefix (ex: "%hi" or "%ls") */
583
45.4M
    if (ctx->fmtcnt >= 0) {
584
45.4M
        if (arg->ch == 'h' || arg->ch == 'l' || arg->ch == 'L') {
585
0
            if (--ctx->fmtcnt >= 0) {
586
0
                arg->ch = FORMAT_READ(ctx);
587
0
                ctx->fmtpos++;
588
0
            }
589
0
        }
590
45.4M
    }
591
45.4M
    if (ctx->fmtcnt < 0) {
592
0
        PyErr_SetString(PyExc_ValueError,
593
0
                        "incomplete format");
594
0
        return -1;
595
0
    }
596
45.4M
    return 0;
597
598
45.4M
#undef FORMAT_READ
599
45.4M
}
600
601
602
/* Format one argument. Supported conversion specifiers:
603
604
   - "s", "r", "a": any type
605
   - "i", "d", "u": int or float
606
   - "o", "x", "X": int
607
   - "e", "E", "f", "F", "g", "G": float
608
   - "c": int or str (1 character)
609
610
   When possible, the output is written directly into the Unicode writer
611
   (ctx->writer). A string is created when padding is required.
612
613
   Return 0 if the argument has been formatted into *p_str,
614
          1 if the argument has been written into ctx->writer,
615
         -1 on error. */
616
static int
617
unicode_format_arg_format(struct unicode_formatter_t *ctx,
618
                          struct unicode_format_arg_t *arg,
619
                          PyObject **p_str)
620
45.4M
{
621
45.4M
    PyObject *v;
622
45.4M
    _PyUnicodeWriter *writer = &ctx->writer;
623
624
45.4M
    if (ctx->fmtcnt == 0)
625
10.9M
        ctx->writer.overallocate = 0;
626
627
45.4M
    v = unicode_format_getnextarg(ctx);
628
45.4M
    if (v == NULL)
629
0
        return -1;
630
631
632
45.4M
    switch (arg->ch) {
633
34.3M
    case 's':
634
34.3M
    case 'r':
635
34.3M
    case 'a':
636
34.3M
        if (PyLong_CheckExact(v) && arg->width == -1 && arg->prec == -1) {
637
            /* Fast path */
638
0
            if (_PyLong_FormatWriter(writer, v, 10, arg->flags & F_ALT) == -1)
639
0
                return -1;
640
0
            return 1;
641
0
        }
642
643
34.3M
        if (PyUnicode_CheckExact(v) && arg->ch == 's') {
644
34.3M
            *p_str = Py_NewRef(v);
645
34.3M
        }
646
0
        else {
647
0
            if (arg->ch == 's')
648
0
                *p_str = PyObject_Str(v);
649
0
            else if (arg->ch == 'r')
650
0
                *p_str = PyObject_Repr(v);
651
0
            else
652
0
                *p_str = PyObject_ASCII(v);
653
0
        }
654
34.3M
        break;
655
656
0
    case 'i':
657
11.1M
    case 'd':
658
11.1M
    case 'u':
659
11.1M
    case 'o':
660
11.1M
    case 'x':
661
11.1M
    case 'X':
662
11.1M
    {
663
11.1M
        int ret = mainformatlong(v, arg, p_str, writer);
664
11.1M
        if (ret != 0)
665
11.1M
            return ret;
666
1.53k
        arg->sign = 1;
667
1.53k
        break;
668
11.1M
    }
669
670
0
    case 'e':
671
0
    case 'E':
672
0
    case 'f':
673
0
    case 'F':
674
0
    case 'g':
675
0
    case 'G':
676
0
        if (arg->width == -1 && arg->prec == -1
677
0
            && !(arg->flags & (F_SIGN | F_BLANK)))
678
0
        {
679
            /* Fast path */
680
0
            if (formatfloat(v, arg, NULL, writer) == -1)
681
0
                return -1;
682
0
            return 1;
683
0
        }
684
685
0
        arg->sign = 1;
686
0
        if (formatfloat(v, arg, p_str, NULL) == -1)
687
0
            return -1;
688
0
        break;
689
690
0
    case 'c':
691
0
    {
692
0
        Py_UCS4 ch = formatchar(v);
693
0
        if (ch == (Py_UCS4) -1)
694
0
            return -1;
695
0
        if (arg->width == -1 && arg->prec == -1) {
696
            /* Fast path */
697
0
            if (_PyUnicodeWriter_WriteCharInline(writer, ch) < 0)
698
0
                return -1;
699
0
            return 1;
700
0
        }
701
0
        *p_str = PyUnicode_FromOrdinal(ch);
702
0
        break;
703
0
    }
704
705
0
    default:
706
0
        PyErr_Format(PyExc_ValueError,
707
0
                     "unsupported format character '%c' (0x%x) "
708
0
                     "at index %zd",
709
0
                     (31<=arg->ch && arg->ch<=126) ? (char)arg->ch : '?',
710
0
                     (int)arg->ch,
711
0
                     ctx->fmtpos - 1);
712
0
        return -1;
713
45.4M
    }
714
34.3M
    if (*p_str == NULL)
715
0
        return -1;
716
34.3M
    assert (PyUnicode_Check(*p_str));
717
34.3M
    return 0;
718
34.3M
}
719
720
721
static int
722
unicode_format_arg_output(struct unicode_formatter_t *ctx,
723
                          struct unicode_format_arg_t *arg,
724
                          PyObject *str)
725
34.3M
{
726
34.3M
    Py_ssize_t len;
727
34.3M
    int kind;
728
34.3M
    const void *pbuf;
729
34.3M
    Py_ssize_t pindex;
730
34.3M
    Py_UCS4 signchar;
731
34.3M
    Py_ssize_t buflen;
732
34.3M
    Py_UCS4 maxchar;
733
34.3M
    Py_ssize_t sublen;
734
34.3M
    _PyUnicodeWriter *writer = &ctx->writer;
735
34.3M
    Py_UCS4 fill;
736
737
34.3M
    fill = ' ';
738
34.3M
    if (arg->sign && arg->flags & F_ZERO)
739
1.53k
        fill = '0';
740
741
34.3M
    len = PyUnicode_GET_LENGTH(str);
742
34.3M
    if ((arg->width == -1 || arg->width <= len)
743
34.3M
        && (arg->prec == -1 || arg->prec >= len)
744
34.3M
        && !(arg->flags & (F_SIGN | F_BLANK)))
745
34.3M
    {
746
        /* Fast path */
747
34.3M
        if (_PyUnicodeWriter_WriteStr(writer, str) == -1)
748
0
            return -1;
749
34.3M
        return 0;
750
34.3M
    }
751
752
    /* Truncate the string for "s", "r" and "a" formats
753
       if the precision is set */
754
96
    if (arg->ch == 's' || arg->ch == 'r' || arg->ch == 'a') {
755
0
        if (arg->prec >= 0 && len > arg->prec)
756
0
            len = arg->prec;
757
0
    }
758
759
    /* Adjust sign and width */
760
96
    kind = PyUnicode_KIND(str);
761
96
    pbuf = PyUnicode_DATA(str);
762
96
    pindex = 0;
763
96
    signchar = '\0';
764
96
    if (arg->sign) {
765
96
        Py_UCS4 ch = PyUnicode_READ(kind, pbuf, pindex);
766
96
        if (ch == '-' || ch == '+') {
767
0
            signchar = ch;
768
0
            len--;
769
0
            pindex++;
770
0
        }
771
96
        else if (arg->flags & F_SIGN)
772
0
            signchar = '+';
773
96
        else if (arg->flags & F_BLANK)
774
0
            signchar = ' ';
775
96
        else
776
96
            arg->sign = 0;
777
96
    }
778
96
    if (arg->width < len)
779
0
        arg->width = len;
780
781
    /* Prepare the writer */
782
96
    maxchar = writer->maxchar;
783
96
    if (!(arg->flags & F_LJUST)) {
784
96
        if (arg->sign) {
785
0
            if ((arg->width-1) > len)
786
0
                maxchar = Py_MAX(maxchar, fill);
787
0
        }
788
96
        else {
789
96
            if (arg->width > len)
790
96
                maxchar = Py_MAX(maxchar, fill);
791
96
        }
792
96
    }
793
96
    if (PyUnicode_MAX_CHAR_VALUE(str) > maxchar) {
794
0
        Py_UCS4 strmaxchar = _PyUnicode_FindMaxChar(str, 0, pindex+len);
795
0
        maxchar = Py_MAX(maxchar, strmaxchar);
796
0
    }
797
798
96
    buflen = arg->width;
799
96
    if (arg->sign && len == arg->width)
800
0
        buflen++;
801
96
    if (_PyUnicodeWriter_Prepare(writer, buflen, maxchar) == -1)
802
0
        return -1;
803
804
    /* Write the sign if needed */
805
96
    if (arg->sign) {
806
0
        if (fill != ' ') {
807
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos, signchar);
808
0
            writer->pos += 1;
809
0
        }
810
0
        if (arg->width > len)
811
0
            arg->width--;
812
0
    }
813
814
    /* Write the numeric prefix for "x", "X" and "o" formats
815
       if the alternate form is used.
816
       For example, write "0x" for the "%#x" format. */
817
96
    if ((arg->flags & F_ALT) && (arg->ch == 'x' || arg->ch == 'X' || arg->ch == 'o')) {
818
0
        assert(PyUnicode_READ(kind, pbuf, pindex) == '0');
819
0
        assert(PyUnicode_READ(kind, pbuf, pindex + 1) == arg->ch);
820
0
        if (fill != ' ') {
821
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos, '0');
822
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos+1, arg->ch);
823
0
            writer->pos += 2;
824
0
            pindex += 2;
825
0
        }
826
0
        arg->width -= 2;
827
0
        if (arg->width < 0)
828
0
            arg->width = 0;
829
0
        len -= 2;
830
0
    }
831
832
    /* Pad left with the fill character if needed */
833
96
    if (arg->width > len && !(arg->flags & F_LJUST)) {
834
96
        sublen = arg->width - len;
835
96
        _PyUnicode_Fill(writer->kind, writer->data, fill, writer->pos, sublen);
836
96
        writer->pos += sublen;
837
96
        arg->width = len;
838
96
    }
839
840
    /* If padding with spaces: write sign if needed and/or numeric prefix if
841
       the alternate form is used */
842
96
    if (fill == ' ') {
843
0
        if (arg->sign) {
844
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos, signchar);
845
0
            writer->pos += 1;
846
0
        }
847
0
        if ((arg->flags & F_ALT) && (arg->ch == 'x' || arg->ch == 'X' || arg->ch == 'o')) {
848
0
            assert(PyUnicode_READ(kind, pbuf, pindex) == '0');
849
0
            assert(PyUnicode_READ(kind, pbuf, pindex+1) == arg->ch);
850
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos, '0');
851
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos+1, arg->ch);
852
0
            writer->pos += 2;
853
0
            pindex += 2;
854
0
        }
855
0
    }
856
857
    /* Write characters */
858
96
    if (len) {
859
96
        _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
860
96
                                      str, pindex, len);
861
96
        writer->pos += len;
862
96
    }
863
864
    /* Pad right with the fill character if needed */
865
96
    if (arg->width > len) {
866
0
        sublen = arg->width - len;
867
0
        _PyUnicode_Fill(writer->kind, writer->data, ' ', writer->pos, sublen);
868
0
        writer->pos += sublen;
869
0
    }
870
96
    return 0;
871
96
}
872
873
874
/* Helper of PyUnicode_Format(): format one arg.
875
   Return 0 on success, raise an exception and return -1 on error. */
876
static int
877
unicode_format_arg(struct unicode_formatter_t *ctx)
878
45.4M
{
879
45.4M
    struct unicode_format_arg_t arg;
880
45.4M
    PyObject *str;
881
45.4M
    int ret;
882
883
45.4M
    arg.ch = PyUnicode_READ(ctx->fmtkind, ctx->fmtdata, ctx->fmtpos);
884
45.4M
    if (arg.ch == '%') {
885
0
        ctx->fmtpos++;
886
0
        ctx->fmtcnt--;
887
0
        if (_PyUnicodeWriter_WriteCharInline(&ctx->writer, '%') < 0)
888
0
            return -1;
889
0
        return 0;
890
0
    }
891
45.4M
    arg.flags = 0;
892
45.4M
    arg.width = -1;
893
45.4M
    arg.prec = -1;
894
45.4M
    arg.sign = 0;
895
45.4M
    str = NULL;
896
897
45.4M
    ret = unicode_format_arg_parse(ctx, &arg);
898
45.4M
    if (ret == -1)
899
0
        return -1;
900
901
45.4M
    ret = unicode_format_arg_format(ctx, &arg, &str);
902
45.4M
    if (ret == -1)
903
4.23M
        return -1;
904
905
41.2M
    if (ret != 1) {
906
34.3M
        ret = unicode_format_arg_output(ctx, &arg, str);
907
34.3M
        Py_DECREF(str);
908
34.3M
        if (ret == -1)
909
0
            return -1;
910
34.3M
    }
911
912
41.2M
    if (ctx->dict && (ctx->argidx < ctx->arglen)) {
913
0
        PyErr_SetString(PyExc_TypeError,
914
0
                        "not all arguments converted during string formatting");
915
0
        return -1;
916
0
    }
917
41.2M
    return 0;
918
41.2M
}
919
920
921
PyObject *
922
PyUnicode_Format(PyObject *format, PyObject *args)
923
23.3M
{
924
23.3M
    struct unicode_formatter_t ctx;
925
926
23.3M
    if (format == NULL || args == NULL) {
927
0
        PyErr_BadInternalCall();
928
0
        return NULL;
929
0
    }
930
931
23.3M
    if (ensure_unicode(format) < 0)
932
0
        return NULL;
933
934
23.3M
    ctx.fmtstr = format;
935
23.3M
    ctx.fmtdata = PyUnicode_DATA(ctx.fmtstr);
936
23.3M
    ctx.fmtkind = PyUnicode_KIND(ctx.fmtstr);
937
23.3M
    ctx.fmtcnt = PyUnicode_GET_LENGTH(ctx.fmtstr);
938
23.3M
    ctx.fmtpos = 0;
939
940
23.3M
    _PyUnicodeWriter_Init(&ctx.writer);
941
23.3M
    ctx.writer.min_length = ctx.fmtcnt + 100;
942
23.3M
    ctx.writer.overallocate = 1;
943
944
23.3M
    if (PyTuple_Check(args)) {
945
5.59M
        ctx.arglen = PyTuple_Size(args);
946
5.59M
        ctx.argidx = 0;
947
5.59M
    }
948
17.7M
    else {
949
17.7M
        ctx.arglen = -1;
950
17.7M
        ctx.argidx = -2;
951
17.7M
    }
952
23.3M
    ctx.args_owned = 0;
953
23.3M
    if (PyMapping_Check(args) && !PyTuple_Check(args) && !PyUnicode_Check(args))
954
11.1k
        ctx.dict = args;
955
23.3M
    else
956
23.3M
        ctx.dict = NULL;
957
23.3M
    ctx.args = args;
958
959
111M
    while (--ctx.fmtcnt >= 0) {
960
92.4M
        if (PyUnicode_READ(ctx.fmtkind, ctx.fmtdata, ctx.fmtpos) != '%') {
961
46.9M
            Py_ssize_t nonfmtpos;
962
963
46.9M
            nonfmtpos = ctx.fmtpos++;
964
458M
            while (ctx.fmtcnt >= 0 &&
965
446M
                   PyUnicode_READ(ctx.fmtkind, ctx.fmtdata, ctx.fmtpos) != '%') {
966
411M
                ctx.fmtpos++;
967
411M
                ctx.fmtcnt--;
968
411M
            }
969
46.9M
            if (ctx.fmtcnt < 0) {
970
12.4M
                ctx.fmtpos--;
971
12.4M
                ctx.writer.overallocate = 0;
972
12.4M
            }
973
974
46.9M
            if (_PyUnicodeWriter_WriteSubstring(&ctx.writer, ctx.fmtstr,
975
46.9M
                                                nonfmtpos, ctx.fmtpos) < 0)
976
0
                goto onError;
977
46.9M
        }
978
45.4M
        else {
979
45.4M
            ctx.fmtpos++;
980
45.4M
            if (unicode_format_arg(&ctx) == -1)
981
4.23M
                goto onError;
982
45.4M
        }
983
92.4M
    }
984
985
19.1M
    if (ctx.argidx < ctx.arglen && !ctx.dict) {
986
0
        PyErr_SetString(PyExc_TypeError,
987
0
                        "not all arguments converted during string formatting");
988
0
        goto onError;
989
0
    }
990
991
19.1M
    if (ctx.args_owned) {
992
11.1k
        Py_DECREF(ctx.args);
993
11.1k
    }
994
19.1M
    return _PyUnicodeWriter_Finish(&ctx.writer);
995
996
4.23M
  onError:
997
4.23M
    _PyUnicodeWriter_Dealloc(&ctx.writer);
998
4.23M
    if (ctx.args_owned) {
999
0
        Py_DECREF(ctx.args);
1000
0
    }
1001
    return NULL;
1002
19.1M
}