Coverage Report

Created: 2026-01-13 06:09

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython3/Objects/unicode_format.c
Line
Count
Source
1
/*
2
3
Unicode implementation based on original code by Fredrik Lundh,
4
modified by Marc-Andre Lemburg <mal@lemburg.com>.
5
6
Major speed upgrades to the method implementations at the Reykjavik
7
NeedForSpeed sprint, by Fredrik Lundh and Andrew Dalke.
8
9
Copyright (c) Corporation for National Research Initiatives.
10
11
--------------------------------------------------------------------
12
The original string type implementation is:
13
14
  Copyright (c) 1999 by Secret Labs AB
15
  Copyright (c) 1999 by Fredrik Lundh
16
17
By obtaining, using, and/or copying this software and/or its
18
associated documentation, you agree that you have read, understood,
19
and will comply with the following terms and conditions:
20
21
Permission to use, copy, modify, and distribute this software and its
22
associated documentation for any purpose and without fee is hereby
23
granted, provided that the above copyright notice appears in all
24
copies, and that both that copyright notice and this permission notice
25
appear in supporting documentation, and that the name of Secret Labs
26
AB or the author not be used in advertising or publicity pertaining to
27
distribution of the software without specific, written prior
28
permission.
29
30
SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
31
THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
32
FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
33
ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
34
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
35
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
36
OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
37
--------------------------------------------------------------------
38
39
*/
40
41
// PyUnicode_Format() implementation
42
43
#include "Python.h"
44
#include "pycore_abstract.h"      // _PyIndex_Check()
45
#include "pycore_format.h"        // F_ALT
46
#include "pycore_long.h"          // _PyLong_FormatWriter()
47
#include "pycore_object.h"        // _PyObject_IsUniquelyReferenced()
48
#include "pycore_unicodeobject.h" // _Py_MAX_UNICODE
49
50
51
0
#define MAX_UNICODE _Py_MAX_UNICODE
52
1.60M
#define ensure_unicode _PyUnicode_EnsureUnicode
53
54
struct unicode_formatter_t {
55
    PyObject *args;
56
    int args_owned;
57
    Py_ssize_t arglen, argidx;
58
    PyObject *dict;
59
60
    int fmtkind;
61
    Py_ssize_t fmtcnt, fmtpos;
62
    const void *fmtdata;
63
    PyObject *fmtstr;
64
65
    _PyUnicodeWriter writer;
66
};
67
68
69
struct unicode_format_arg_t {
70
    Py_UCS4 ch;
71
    int flags;
72
    Py_ssize_t width;
73
    int prec;
74
    int sign;
75
};
76
77
78
static PyObject *
79
unicode_format_getnextarg(struct unicode_formatter_t *ctx)
80
2.94M
{
81
2.94M
    Py_ssize_t argidx = ctx->argidx;
82
83
2.94M
    if (argidx < ctx->arglen) {
84
2.94M
        ctx->argidx++;
85
2.94M
        if (ctx->arglen < 0)
86
280k
            return ctx->args;
87
2.66M
        else
88
2.66M
            return PyTuple_GetItem(ctx->args, argidx);
89
2.94M
    }
90
0
    PyErr_SetString(PyExc_TypeError,
91
0
                    "not enough arguments for format string");
92
0
    return NULL;
93
2.94M
}
94
95
96
/* Returns a new reference to a PyUnicode object, or NULL on failure. */
97
98
/* Format a float into the writer if the writer is not NULL, or into *p_output
99
   otherwise.
100
101
   Return 0 on success, raise an exception and return -1 on error. */
102
static int
103
formatfloat(PyObject *v, struct unicode_format_arg_t *arg,
104
            PyObject **p_output,
105
            _PyUnicodeWriter *writer)
106
0
{
107
0
    char *p;
108
0
    double x;
109
0
    Py_ssize_t len;
110
0
    int prec;
111
0
    int dtoa_flags = 0;
112
113
0
    x = PyFloat_AsDouble(v);
114
0
    if (x == -1.0 && PyErr_Occurred())
115
0
        return -1;
116
117
0
    prec = arg->prec;
118
0
    if (prec < 0)
119
0
        prec = 6;
120
121
0
    if (arg->flags & F_ALT)
122
0
        dtoa_flags |= Py_DTSF_ALT;
123
0
    p = PyOS_double_to_string(x, arg->ch, prec, dtoa_flags, NULL);
124
0
    if (p == NULL)
125
0
        return -1;
126
0
    len = strlen(p);
127
0
    if (writer) {
128
0
        if (_PyUnicodeWriter_WriteASCIIString(writer, p, len) < 0) {
129
0
            PyMem_Free(p);
130
0
            return -1;
131
0
        }
132
0
    }
133
0
    else
134
0
        *p_output = _PyUnicode_FromASCII(p, len);
135
0
    PyMem_Free(p);
136
0
    return 0;
137
0
}
138
139
140
/* formatlong() emulates the format codes d, u, o, x and X, and
141
 * the F_ALT flag, for Python's long (unbounded) ints.  It's not used for
142
 * Python's regular ints.
143
 * Return value:  a new PyUnicodeObject*, or NULL if error.
144
 *     The output string is of the form
145
 *         "-"? ("0x" | "0X")? digit+
146
 *     "0x"/"0X" are present only for x and X conversions, with F_ALT
147
 *         set in flags.  The case of hex digits will be correct,
148
 *     There will be at least prec digits, zero-filled on the left if
149
 *         necessary to get that many.
150
 * val          object to be converted
151
 * flags        bitmask of format flags; only F_ALT is looked at
152
 * prec         minimum number of digits; 0-fill on left if needed
153
 * type         a character in [duoxX]; u acts the same as d
154
 *
155
 * CAUTION:  o, x and X conversions on regular ints can never
156
 * produce a '-' sign, but can for Python's unbounded ints.
157
 */
158
PyObject *
159
_PyUnicode_FormatLong(PyObject *val, int alt, int prec, int type)
160
0
{
161
0
    PyObject *result = NULL;
162
0
    char *buf;
163
0
    Py_ssize_t i;
164
0
    int sign;           /* 1 if '-', else 0 */
165
0
    int len;            /* number of characters */
166
0
    Py_ssize_t llen;
167
0
    int numdigits;      /* len == numnondigits + numdigits */
168
0
    int numnondigits = 0;
169
170
    /* Avoid exceeding SSIZE_T_MAX */
171
0
    if (prec > INT_MAX-3) {
172
0
        PyErr_SetString(PyExc_OverflowError,
173
0
                        "precision too large");
174
0
        return NULL;
175
0
    }
176
177
0
    assert(PyLong_Check(val));
178
179
0
    switch (type) {
180
0
    default:
181
0
        Py_UNREACHABLE();
182
0
    case 'd':
183
0
    case 'i':
184
0
    case 'u':
185
        /* int and int subclasses should print numerically when a numeric */
186
        /* format code is used (see issue18780) */
187
0
        result = PyNumber_ToBase(val, 10);
188
0
        break;
189
0
    case 'o':
190
0
        numnondigits = 2;
191
0
        result = PyNumber_ToBase(val, 8);
192
0
        break;
193
0
    case 'x':
194
0
    case 'X':
195
0
        numnondigits = 2;
196
0
        result = PyNumber_ToBase(val, 16);
197
0
        break;
198
0
    }
199
0
    if (!result)
200
0
        return NULL;
201
202
0
    assert(_PyUnicode_IsModifiable(result));
203
0
    assert(PyUnicode_IS_ASCII(result));
204
205
    /* To modify the string in-place, there can only be one reference. */
206
0
    if (!_PyObject_IsUniquelyReferenced(result)) {
207
0
        Py_DECREF(result);
208
0
        PyErr_BadInternalCall();
209
0
        return NULL;
210
0
    }
211
0
    buf = PyUnicode_DATA(result);
212
0
    llen = PyUnicode_GET_LENGTH(result);
213
0
    if (llen > INT_MAX) {
214
0
        Py_DECREF(result);
215
0
        PyErr_SetString(PyExc_ValueError,
216
0
                        "string too large in _PyUnicode_FormatLong");
217
0
        return NULL;
218
0
    }
219
0
    len = (int)llen;
220
0
    sign = buf[0] == '-';
221
0
    numnondigits += sign;
222
0
    numdigits = len - numnondigits;
223
0
    assert(numdigits > 0);
224
225
    /* Get rid of base marker unless F_ALT */
226
0
    if (((alt) == 0 &&
227
0
        (type == 'o' || type == 'x' || type == 'X'))) {
228
0
        assert(buf[sign] == '0');
229
0
        assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
230
0
               buf[sign+1] == 'o');
231
0
        numnondigits -= 2;
232
0
        buf += 2;
233
0
        len -= 2;
234
0
        if (sign)
235
0
            buf[0] = '-';
236
0
        assert(len == numnondigits + numdigits);
237
0
        assert(numdigits > 0);
238
0
    }
239
240
    /* Fill with leading zeroes to meet minimum width. */
241
0
    if (prec > numdigits) {
242
0
        PyObject *r1 = PyBytes_FromStringAndSize(NULL,
243
0
                                numnondigits + prec);
244
0
        char *b1;
245
0
        if (!r1) {
246
0
            Py_DECREF(result);
247
0
            return NULL;
248
0
        }
249
0
        b1 = PyBytes_AS_STRING(r1);
250
0
        for (i = 0; i < numnondigits; ++i)
251
0
            *b1++ = *buf++;
252
0
        for (i = 0; i < prec - numdigits; i++)
253
0
            *b1++ = '0';
254
0
        for (i = 0; i < numdigits; i++)
255
0
            *b1++ = *buf++;
256
0
        *b1 = '\0';
257
0
        Py_SETREF(result, r1);
258
0
        buf = PyBytes_AS_STRING(result);
259
0
        len = numnondigits + prec;
260
0
    }
261
262
    /* Fix up case for hex conversions. */
263
0
    if (type == 'X') {
264
        /* Need to convert all lower case letters to upper case.
265
           and need to convert 0x to 0X (and -0x to -0X). */
266
0
        for (i = 0; i < len; i++)
267
0
            if (buf[i] >= 'a' && buf[i] <= 'x')
268
0
                buf[i] -= 'a'-'A';
269
0
    }
270
0
    if (!PyUnicode_Check(result)
271
0
        || buf != PyUnicode_DATA(result)) {
272
0
        PyObject *unicode;
273
0
        unicode = _PyUnicode_FromASCII(buf, len);
274
0
        Py_SETREF(result, unicode);
275
0
    }
276
0
    else if (len != PyUnicode_GET_LENGTH(result)) {
277
0
        if (PyUnicode_Resize(&result, len) < 0)
278
0
            Py_CLEAR(result);
279
0
    }
280
0
    return result;
281
0
}
282
283
284
/* Format an integer or a float as an integer.
285
 * Return 1 if the number has been formatted into the writer,
286
 *        0 if the number has been formatted into *p_output
287
 *       -1 and raise an exception on error */
288
static int
289
mainformatlong(PyObject *v,
290
               struct unicode_format_arg_t *arg,
291
               PyObject **p_output,
292
               _PyUnicodeWriter *writer)
293
1.61M
{
294
1.61M
    PyObject *iobj, *res;
295
1.61M
    char type = (char)arg->ch;
296
297
1.61M
    if (!PyNumber_Check(v))
298
0
        goto wrongtype;
299
300
    /* make sure number is a type of integer for o, x, and X */
301
1.61M
    if (!PyLong_Check(v)) {
302
0
        if (type == 'o' || type == 'x' || type == 'X') {
303
0
            iobj = _PyNumber_Index(v);
304
0
        }
305
0
        else {
306
0
            iobj = PyNumber_Long(v);
307
0
        }
308
0
        if (iobj == NULL ) {
309
0
            if (PyErr_ExceptionMatches(PyExc_TypeError))
310
0
                goto wrongtype;
311
0
            return -1;
312
0
        }
313
0
        assert(PyLong_Check(iobj));
314
0
    }
315
1.61M
    else {
316
1.61M
        iobj = Py_NewRef(v);
317
1.61M
    }
318
319
1.61M
    if (PyLong_CheckExact(v)
320
1.61M
        && arg->width == -1 && arg->prec == -1
321
1.61M
        && !(arg->flags & (F_SIGN | F_BLANK))
322
1.61M
        && type != 'X')
323
1.61M
    {
324
        /* Fast path */
325
1.61M
        int alternate = arg->flags & F_ALT;
326
1.61M
        int base;
327
328
1.61M
        switch(type)
329
1.61M
        {
330
0
            default:
331
0
                Py_UNREACHABLE();
332
1.61M
            case 'd':
333
1.61M
            case 'i':
334
1.61M
            case 'u':
335
1.61M
                base = 10;
336
1.61M
                break;
337
0
            case 'o':
338
0
                base = 8;
339
0
                break;
340
0
            case 'x':
341
0
            case 'X':
342
0
                base = 16;
343
0
                break;
344
1.61M
        }
345
346
1.61M
        if (_PyLong_FormatWriter(writer, v, base, alternate) == -1) {
347
0
            Py_DECREF(iobj);
348
0
            return -1;
349
0
        }
350
1.61M
        Py_DECREF(iobj);
351
1.61M
        return 1;
352
1.61M
    }
353
354
0
    res = _PyUnicode_FormatLong(iobj, arg->flags & F_ALT, arg->prec, type);
355
0
    Py_DECREF(iobj);
356
0
    if (res == NULL)
357
0
        return -1;
358
0
    *p_output = res;
359
0
    return 0;
360
361
0
wrongtype:
362
0
    switch(type)
363
0
    {
364
0
        case 'o':
365
0
        case 'x':
366
0
        case 'X':
367
0
            PyErr_Format(PyExc_TypeError,
368
0
                    "%%%c format: an integer is required, "
369
0
                    "not %.200s",
370
0
                    type, Py_TYPE(v)->tp_name);
371
0
            break;
372
0
        default:
373
0
            PyErr_Format(PyExc_TypeError,
374
0
                    "%%%c format: a real number is required, "
375
0
                    "not %.200s",
376
0
                    type, Py_TYPE(v)->tp_name);
377
0
            break;
378
0
    }
379
0
    return -1;
380
0
}
381
382
383
static Py_UCS4
384
formatchar(PyObject *v)
385
0
{
386
    /* presume that the buffer is at least 3 characters long */
387
0
    if (PyUnicode_Check(v)) {
388
0
        if (PyUnicode_GET_LENGTH(v) == 1) {
389
0
            return PyUnicode_READ_CHAR(v, 0);
390
0
        }
391
0
        PyErr_Format(PyExc_TypeError,
392
0
                     "%%c requires an int or a unicode character, "
393
0
                     "not a string of length %zd",
394
0
                     PyUnicode_GET_LENGTH(v));
395
0
        return (Py_UCS4) -1;
396
0
    }
397
0
    else {
398
0
        int overflow;
399
0
        long x = PyLong_AsLongAndOverflow(v, &overflow);
400
0
        if (x == -1 && PyErr_Occurred()) {
401
0
            if (PyErr_ExceptionMatches(PyExc_TypeError)) {
402
0
                PyErr_Format(PyExc_TypeError,
403
0
                             "%%c requires an int or a unicode character, not %T",
404
0
                             v);
405
0
                return (Py_UCS4) -1;
406
0
            }
407
0
            return (Py_UCS4) -1;
408
0
        }
409
410
0
        if (x < 0 || x > MAX_UNICODE) {
411
            /* this includes an overflow in converting to C long */
412
0
            PyErr_SetString(PyExc_OverflowError,
413
0
                            "%c arg not in range(0x110000)");
414
0
            return (Py_UCS4) -1;
415
0
        }
416
417
0
        return (Py_UCS4) x;
418
0
    }
419
0
}
420
421
422
/* Parse options of an argument: flags, width, precision.
423
   Handle also "%(name)" syntax.
424
425
   Return 0 if the argument has been formatted into arg->str.
426
   Return 1 if the argument has been written into ctx->writer,
427
   Raise an exception and return -1 on error. */
428
static int
429
unicode_format_arg_parse(struct unicode_formatter_t *ctx,
430
                         struct unicode_format_arg_t *arg)
431
2.94M
{
432
2.94M
#define FORMAT_READ(ctx) \
433
2.94M
        PyUnicode_READ((ctx)->fmtkind, (ctx)->fmtdata, (ctx)->fmtpos)
434
435
2.94M
    PyObject *v;
436
437
2.94M
    if (arg->ch == '(') {
438
        /* Get argument value from a dictionary. Example: "%(name)s". */
439
0
        Py_ssize_t keystart;
440
0
        Py_ssize_t keylen;
441
0
        PyObject *key;
442
0
        int pcount = 1;
443
444
0
        if (ctx->dict == NULL) {
445
0
            PyErr_SetString(PyExc_TypeError,
446
0
                            "format requires a mapping");
447
0
            return -1;
448
0
        }
449
0
        ++ctx->fmtpos;
450
0
        --ctx->fmtcnt;
451
0
        keystart = ctx->fmtpos;
452
        /* Skip over balanced parentheses */
453
0
        while (pcount > 0 && --ctx->fmtcnt >= 0) {
454
0
            arg->ch = FORMAT_READ(ctx);
455
0
            if (arg->ch == ')')
456
0
                --pcount;
457
0
            else if (arg->ch == '(')
458
0
                ++pcount;
459
0
            ctx->fmtpos++;
460
0
        }
461
0
        keylen = ctx->fmtpos - keystart - 1;
462
0
        if (ctx->fmtcnt < 0 || pcount > 0) {
463
0
            PyErr_SetString(PyExc_ValueError,
464
0
                            "incomplete format key");
465
0
            return -1;
466
0
        }
467
0
        key = PyUnicode_Substring(ctx->fmtstr,
468
0
                                  keystart, keystart + keylen);
469
0
        if (key == NULL)
470
0
            return -1;
471
0
        if (ctx->args_owned) {
472
0
            ctx->args_owned = 0;
473
0
            Py_DECREF(ctx->args);
474
0
        }
475
0
        ctx->args = PyObject_GetItem(ctx->dict, key);
476
0
        Py_DECREF(key);
477
0
        if (ctx->args == NULL)
478
0
            return -1;
479
0
        ctx->args_owned = 1;
480
0
        ctx->arglen = -1;
481
0
        ctx->argidx = -2;
482
0
    }
483
484
    /* Parse flags. Example: "%+i" => flags=F_SIGN. */
485
2.94M
    while (--ctx->fmtcnt >= 0) {
486
2.94M
        arg->ch = FORMAT_READ(ctx);
487
2.94M
        ctx->fmtpos++;
488
2.94M
        switch (arg->ch) {
489
0
        case '-': arg->flags |= F_LJUST; continue;
490
0
        case '+': arg->flags |= F_SIGN; continue;
491
0
        case ' ': arg->flags |= F_BLANK; continue;
492
0
        case '#': arg->flags |= F_ALT; continue;
493
0
        case '0': arg->flags |= F_ZERO; continue;
494
2.94M
        }
495
2.94M
        break;
496
2.94M
    }
497
498
    /* Parse width. Example: "%10s" => width=10 */
499
2.94M
    if (arg->ch == '*') {
500
0
        v = unicode_format_getnextarg(ctx);
501
0
        if (v == NULL)
502
0
            return -1;
503
0
        if (!PyLong_Check(v)) {
504
0
            PyErr_SetString(PyExc_TypeError,
505
0
                            "* wants int");
506
0
            return -1;
507
0
        }
508
0
        arg->width = PyLong_AsSsize_t(v);
509
0
        if (arg->width == -1 && PyErr_Occurred())
510
0
            return -1;
511
0
        if (arg->width < 0) {
512
0
            arg->flags |= F_LJUST;
513
0
            arg->width = -arg->width;
514
0
        }
515
0
        if (--ctx->fmtcnt >= 0) {
516
0
            arg->ch = FORMAT_READ(ctx);
517
0
            ctx->fmtpos++;
518
0
        }
519
0
    }
520
2.94M
    else if (arg->ch >= '0' && arg->ch <= '9') {
521
0
        arg->width = arg->ch - '0';
522
0
        while (--ctx->fmtcnt >= 0) {
523
0
            arg->ch = FORMAT_READ(ctx);
524
0
            ctx->fmtpos++;
525
0
            if (arg->ch < '0' || arg->ch > '9')
526
0
                break;
527
            /* Since arg->ch is unsigned, the RHS would end up as unsigned,
528
               mixing signed and unsigned comparison. Since arg->ch is between
529
               '0' and '9', casting to int is safe. */
530
0
            if (arg->width > (PY_SSIZE_T_MAX - ((int)arg->ch - '0')) / 10) {
531
0
                PyErr_SetString(PyExc_ValueError,
532
0
                                "width too big");
533
0
                return -1;
534
0
            }
535
0
            arg->width = arg->width*10 + (arg->ch - '0');
536
0
        }
537
0
    }
538
539
    /* Parse precision. Example: "%.3f" => prec=3 */
540
2.94M
    if (arg->ch == '.') {
541
0
        arg->prec = 0;
542
0
        if (--ctx->fmtcnt >= 0) {
543
0
            arg->ch = FORMAT_READ(ctx);
544
0
            ctx->fmtpos++;
545
0
        }
546
0
        if (arg->ch == '*') {
547
0
            v = unicode_format_getnextarg(ctx);
548
0
            if (v == NULL)
549
0
                return -1;
550
0
            if (!PyLong_Check(v)) {
551
0
                PyErr_SetString(PyExc_TypeError,
552
0
                                "* wants int");
553
0
                return -1;
554
0
            }
555
0
            arg->prec = PyLong_AsInt(v);
556
0
            if (arg->prec == -1 && PyErr_Occurred())
557
0
                return -1;
558
0
            if (arg->prec < 0)
559
0
                arg->prec = 0;
560
0
            if (--ctx->fmtcnt >= 0) {
561
0
                arg->ch = FORMAT_READ(ctx);
562
0
                ctx->fmtpos++;
563
0
            }
564
0
        }
565
0
        else if (arg->ch >= '0' && arg->ch <= '9') {
566
0
            arg->prec = arg->ch - '0';
567
0
            while (--ctx->fmtcnt >= 0) {
568
0
                arg->ch = FORMAT_READ(ctx);
569
0
                ctx->fmtpos++;
570
0
                if (arg->ch < '0' || arg->ch > '9')
571
0
                    break;
572
0
                if (arg->prec > (INT_MAX - ((int)arg->ch - '0')) / 10) {
573
0
                    PyErr_SetString(PyExc_ValueError,
574
0
                                    "precision too big");
575
0
                    return -1;
576
0
                }
577
0
                arg->prec = arg->prec*10 + (arg->ch - '0');
578
0
            }
579
0
        }
580
0
    }
581
582
    /* Ignore "h", "l" and "L" format prefix (ex: "%hi" or "%ls") */
583
2.94M
    if (ctx->fmtcnt >= 0) {
584
2.94M
        if (arg->ch == 'h' || arg->ch == 'l' || arg->ch == 'L') {
585
0
            if (--ctx->fmtcnt >= 0) {
586
0
                arg->ch = FORMAT_READ(ctx);
587
0
                ctx->fmtpos++;
588
0
            }
589
0
        }
590
2.94M
    }
591
2.94M
    if (ctx->fmtcnt < 0) {
592
0
        PyErr_SetString(PyExc_ValueError,
593
0
                        "incomplete format");
594
0
        return -1;
595
0
    }
596
2.94M
    return 0;
597
598
2.94M
#undef FORMAT_READ
599
2.94M
}
600
601
602
/* Format one argument. Supported conversion specifiers:
603
604
   - "s", "r", "a": any type
605
   - "i", "d", "u": int or float
606
   - "o", "x", "X": int
607
   - "e", "E", "f", "F", "g", "G": float
608
   - "c": int or str (1 character)
609
610
   When possible, the output is written directly into the Unicode writer
611
   (ctx->writer). A string is created when padding is required.
612
613
   Return 0 if the argument has been formatted into *p_str,
614
          1 if the argument has been written into ctx->writer,
615
         -1 on error. */
616
static int
617
unicode_format_arg_format(struct unicode_formatter_t *ctx,
618
                          struct unicode_format_arg_t *arg,
619
                          PyObject **p_str)
620
2.94M
{
621
2.94M
    PyObject *v;
622
2.94M
    _PyUnicodeWriter *writer = &ctx->writer;
623
624
2.94M
    if (ctx->fmtcnt == 0)
625
1.59M
        ctx->writer.overallocate = 0;
626
627
2.94M
    v = unicode_format_getnextarg(ctx);
628
2.94M
    if (v == NULL)
629
0
        return -1;
630
631
632
2.94M
    switch (arg->ch) {
633
1.32M
    case 's':
634
1.32M
    case 'r':
635
1.32M
    case 'a':
636
1.32M
        if (PyLong_CheckExact(v) && arg->width == -1 && arg->prec == -1) {
637
            /* Fast path */
638
0
            if (_PyLong_FormatWriter(writer, v, 10, arg->flags & F_ALT) == -1)
639
0
                return -1;
640
0
            return 1;
641
0
        }
642
643
1.32M
        if (PyUnicode_CheckExact(v) && arg->ch == 's') {
644
1.32M
            *p_str = Py_NewRef(v);
645
1.32M
        }
646
454
        else {
647
454
            if (arg->ch == 's')
648
0
                *p_str = PyObject_Str(v);
649
454
            else if (arg->ch == 'r')
650
152
                *p_str = PyObject_Repr(v);
651
302
            else
652
302
                *p_str = PyObject_ASCII(v);
653
454
        }
654
1.32M
        break;
655
656
0
    case 'i':
657
1.61M
    case 'd':
658
1.61M
    case 'u':
659
1.61M
    case 'o':
660
1.61M
    case 'x':
661
1.61M
    case 'X':
662
1.61M
    {
663
1.61M
        int ret = mainformatlong(v, arg, p_str, writer);
664
1.61M
        if (ret != 0)
665
1.61M
            return ret;
666
0
        arg->sign = 1;
667
0
        break;
668
1.61M
    }
669
670
0
    case 'e':
671
0
    case 'E':
672
0
    case 'f':
673
0
    case 'F':
674
0
    case 'g':
675
0
    case 'G':
676
0
        if (arg->width == -1 && arg->prec == -1
677
0
            && !(arg->flags & (F_SIGN | F_BLANK)))
678
0
        {
679
            /* Fast path */
680
0
            if (formatfloat(v, arg, NULL, writer) == -1)
681
0
                return -1;
682
0
            return 1;
683
0
        }
684
685
0
        arg->sign = 1;
686
0
        if (formatfloat(v, arg, p_str, NULL) == -1)
687
0
            return -1;
688
0
        break;
689
690
0
    case 'c':
691
0
    {
692
0
        Py_UCS4 ch = formatchar(v);
693
0
        if (ch == (Py_UCS4) -1)
694
0
            return -1;
695
0
        if (arg->width == -1 && arg->prec == -1) {
696
            /* Fast path */
697
0
            if (_PyUnicodeWriter_WriteCharInline(writer, ch) < 0)
698
0
                return -1;
699
0
            return 1;
700
0
        }
701
0
        *p_str = PyUnicode_FromOrdinal(ch);
702
0
        break;
703
0
    }
704
705
0
    default:
706
0
        PyErr_Format(PyExc_ValueError,
707
0
                     "unsupported format character '%c' (0x%x) "
708
0
                     "at index %zd",
709
0
                     (31<=arg->ch && arg->ch<=126) ? (char)arg->ch : '?',
710
0
                     (int)arg->ch,
711
0
                     ctx->fmtpos - 1);
712
0
        return -1;
713
2.94M
    }
714
1.32M
    if (*p_str == NULL)
715
0
        return -1;
716
1.32M
    assert (PyUnicode_Check(*p_str));
717
1.32M
    return 0;
718
1.32M
}
719
720
721
static int
722
unicode_format_arg_output(struct unicode_formatter_t *ctx,
723
                          struct unicode_format_arg_t *arg,
724
                          PyObject *str)
725
1.32M
{
726
1.32M
    Py_ssize_t len;
727
1.32M
    int kind;
728
1.32M
    const void *pbuf;
729
1.32M
    Py_ssize_t pindex;
730
1.32M
    Py_UCS4 signchar;
731
1.32M
    Py_ssize_t buflen;
732
1.32M
    Py_UCS4 maxchar;
733
1.32M
    Py_ssize_t sublen;
734
1.32M
    _PyUnicodeWriter *writer = &ctx->writer;
735
1.32M
    Py_UCS4 fill;
736
737
1.32M
    fill = ' ';
738
1.32M
    if (arg->sign && arg->flags & F_ZERO)
739
0
        fill = '0';
740
741
1.32M
    len = PyUnicode_GET_LENGTH(str);
742
1.32M
    if ((arg->width == -1 || arg->width <= len)
743
1.32M
        && (arg->prec == -1 || arg->prec >= len)
744
1.32M
        && !(arg->flags & (F_SIGN | F_BLANK)))
745
1.32M
    {
746
        /* Fast path */
747
1.32M
        if (_PyUnicodeWriter_WriteStr(writer, str) == -1)
748
0
            return -1;
749
1.32M
        return 0;
750
1.32M
    }
751
752
    /* Truncate the string for "s", "r" and "a" formats
753
       if the precision is set */
754
0
    if (arg->ch == 's' || arg->ch == 'r' || arg->ch == 'a') {
755
0
        if (arg->prec >= 0 && len > arg->prec)
756
0
            len = arg->prec;
757
0
    }
758
759
    /* Adjust sign and width */
760
0
    kind = PyUnicode_KIND(str);
761
0
    pbuf = PyUnicode_DATA(str);
762
0
    pindex = 0;
763
0
    signchar = '\0';
764
0
    if (arg->sign) {
765
0
        Py_UCS4 ch = PyUnicode_READ(kind, pbuf, pindex);
766
0
        if (ch == '-' || ch == '+') {
767
0
            signchar = ch;
768
0
            len--;
769
0
            pindex++;
770
0
        }
771
0
        else if (arg->flags & F_SIGN)
772
0
            signchar = '+';
773
0
        else if (arg->flags & F_BLANK)
774
0
            signchar = ' ';
775
0
        else
776
0
            arg->sign = 0;
777
0
    }
778
0
    if (arg->width < len)
779
0
        arg->width = len;
780
781
    /* Prepare the writer */
782
0
    maxchar = writer->maxchar;
783
0
    if (!(arg->flags & F_LJUST)) {
784
0
        if (arg->sign) {
785
0
            if ((arg->width-1) > len)
786
0
                maxchar = Py_MAX(maxchar, fill);
787
0
        }
788
0
        else {
789
0
            if (arg->width > len)
790
0
                maxchar = Py_MAX(maxchar, fill);
791
0
        }
792
0
    }
793
0
    if (PyUnicode_MAX_CHAR_VALUE(str) > maxchar) {
794
0
        Py_UCS4 strmaxchar = _PyUnicode_FindMaxChar(str, 0, pindex+len);
795
0
        maxchar = Py_MAX(maxchar, strmaxchar);
796
0
    }
797
798
0
    buflen = arg->width;
799
0
    if (arg->sign && len == arg->width)
800
0
        buflen++;
801
0
    if (_PyUnicodeWriter_Prepare(writer, buflen, maxchar) == -1)
802
0
        return -1;
803
804
    /* Write the sign if needed */
805
0
    if (arg->sign) {
806
0
        if (fill != ' ') {
807
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos, signchar);
808
0
            writer->pos += 1;
809
0
        }
810
0
        if (arg->width > len)
811
0
            arg->width--;
812
0
    }
813
814
    /* Write the numeric prefix for "x", "X" and "o" formats
815
       if the alternate form is used.
816
       For example, write "0x" for the "%#x" format. */
817
0
    if ((arg->flags & F_ALT) && (arg->ch == 'x' || arg->ch == 'X' || arg->ch == 'o')) {
818
0
        assert(PyUnicode_READ(kind, pbuf, pindex) == '0');
819
0
        assert(PyUnicode_READ(kind, pbuf, pindex + 1) == arg->ch);
820
0
        if (fill != ' ') {
821
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos, '0');
822
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos+1, arg->ch);
823
0
            writer->pos += 2;
824
0
            pindex += 2;
825
0
        }
826
0
        arg->width -= 2;
827
0
        if (arg->width < 0)
828
0
            arg->width = 0;
829
0
        len -= 2;
830
0
    }
831
832
    /* Pad left with the fill character if needed */
833
0
    if (arg->width > len && !(arg->flags & F_LJUST)) {
834
0
        sublen = arg->width - len;
835
0
        _PyUnicode_Fill(writer->kind, writer->data, fill, writer->pos, sublen);
836
0
        writer->pos += sublen;
837
0
        arg->width = len;
838
0
    }
839
840
    /* If padding with spaces: write sign if needed and/or numeric prefix if
841
       the alternate form is used */
842
0
    if (fill == ' ') {
843
0
        if (arg->sign) {
844
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos, signchar);
845
0
            writer->pos += 1;
846
0
        }
847
0
        if ((arg->flags & F_ALT) && (arg->ch == 'x' || arg->ch == 'X' || arg->ch == 'o')) {
848
0
            assert(PyUnicode_READ(kind, pbuf, pindex) == '0');
849
0
            assert(PyUnicode_READ(kind, pbuf, pindex+1) == arg->ch);
850
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos, '0');
851
0
            PyUnicode_WRITE(writer->kind, writer->data, writer->pos+1, arg->ch);
852
0
            writer->pos += 2;
853
0
            pindex += 2;
854
0
        }
855
0
    }
856
857
    /* Write characters */
858
0
    if (len) {
859
0
        _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
860
0
                                      str, pindex, len);
861
0
        writer->pos += len;
862
0
    }
863
864
    /* Pad right with the fill character if needed */
865
0
    if (arg->width > len) {
866
0
        sublen = arg->width - len;
867
0
        _PyUnicode_Fill(writer->kind, writer->data, ' ', writer->pos, sublen);
868
0
        writer->pos += sublen;
869
0
    }
870
0
    return 0;
871
0
}
872
873
874
/* Helper of PyUnicode_Format(): format one arg.
875
   Return 0 on success, raise an exception and return -1 on error. */
876
static int
877
unicode_format_arg(struct unicode_formatter_t *ctx)
878
2.94M
{
879
2.94M
    struct unicode_format_arg_t arg;
880
2.94M
    PyObject *str;
881
2.94M
    int ret;
882
883
2.94M
    arg.ch = PyUnicode_READ(ctx->fmtkind, ctx->fmtdata, ctx->fmtpos);
884
2.94M
    if (arg.ch == '%') {
885
0
        ctx->fmtpos++;
886
0
        ctx->fmtcnt--;
887
0
        if (_PyUnicodeWriter_WriteCharInline(&ctx->writer, '%') < 0)
888
0
            return -1;
889
0
        return 0;
890
0
    }
891
2.94M
    arg.flags = 0;
892
2.94M
    arg.width = -1;
893
2.94M
    arg.prec = -1;
894
2.94M
    arg.sign = 0;
895
2.94M
    str = NULL;
896
897
2.94M
    ret = unicode_format_arg_parse(ctx, &arg);
898
2.94M
    if (ret == -1)
899
0
        return -1;
900
901
2.94M
    ret = unicode_format_arg_format(ctx, &arg, &str);
902
2.94M
    if (ret == -1)
903
0
        return -1;
904
905
2.94M
    if (ret != 1) {
906
1.32M
        ret = unicode_format_arg_output(ctx, &arg, str);
907
1.32M
        Py_DECREF(str);
908
1.32M
        if (ret == -1)
909
0
            return -1;
910
1.32M
    }
911
912
2.94M
    if (ctx->dict && (ctx->argidx < ctx->arglen)) {
913
0
        PyErr_SetString(PyExc_TypeError,
914
0
                        "not all arguments converted during string formatting");
915
0
        return -1;
916
0
    }
917
2.94M
    return 0;
918
2.94M
}
919
920
921
PyObject *
922
PyUnicode_Format(PyObject *format, PyObject *args)
923
1.60M
{
924
1.60M
    struct unicode_formatter_t ctx;
925
926
1.60M
    if (format == NULL || args == NULL) {
927
0
        PyErr_BadInternalCall();
928
0
        return NULL;
929
0
    }
930
931
1.60M
    if (ensure_unicode(format) < 0)
932
0
        return NULL;
933
934
1.60M
    ctx.fmtstr = format;
935
1.60M
    ctx.fmtdata = PyUnicode_DATA(ctx.fmtstr);
936
1.60M
    ctx.fmtkind = PyUnicode_KIND(ctx.fmtstr);
937
1.60M
    ctx.fmtcnt = PyUnicode_GET_LENGTH(ctx.fmtstr);
938
1.60M
    ctx.fmtpos = 0;
939
940
1.60M
    _PyUnicodeWriter_Init(&ctx.writer);
941
1.60M
    ctx.writer.min_length = ctx.fmtcnt + 100;
942
1.60M
    ctx.writer.overallocate = 1;
943
944
1.60M
    if (PyTuple_Check(args)) {
945
1.32M
        ctx.arglen = PyTuple_Size(args);
946
1.32M
        ctx.argidx = 0;
947
1.32M
    }
948
280k
    else {
949
280k
        ctx.arglen = -1;
950
280k
        ctx.argidx = -2;
951
280k
    }
952
1.60M
    ctx.args_owned = 0;
953
1.60M
    if (PyMapping_Check(args) && !PyTuple_Check(args) && !PyUnicode_Check(args))
954
0
        ctx.dict = args;
955
1.60M
    else
956
1.60M
        ctx.dict = NULL;
957
1.60M
    ctx.args = args;
958
959
7.49M
    while (--ctx.fmtcnt >= 0) {
960
5.88M
        if (PyUnicode_READ(ctx.fmtkind, ctx.fmtdata, ctx.fmtpos) != '%') {
961
2.94M
            Py_ssize_t nonfmtpos;
962
963
2.94M
            nonfmtpos = ctx.fmtpos++;
964
43.6M
            while (ctx.fmtcnt >= 0 &&
965
43.6M
                   PyUnicode_READ(ctx.fmtkind, ctx.fmtdata, ctx.fmtpos) != '%') {
966
40.7M
                ctx.fmtpos++;
967
40.7M
                ctx.fmtcnt--;
968
40.7M
            }
969
2.94M
            if (ctx.fmtcnt < 0) {
970
7.23k
                ctx.fmtpos--;
971
7.23k
                ctx.writer.overallocate = 0;
972
7.23k
            }
973
974
2.94M
            if (_PyUnicodeWriter_WriteSubstring(&ctx.writer, ctx.fmtstr,
975
2.94M
                                                nonfmtpos, ctx.fmtpos) < 0)
976
0
                goto onError;
977
2.94M
        }
978
2.94M
        else {
979
2.94M
            ctx.fmtpos++;
980
2.94M
            if (unicode_format_arg(&ctx) == -1)
981
0
                goto onError;
982
2.94M
        }
983
5.88M
    }
984
985
1.60M
    if (ctx.argidx < ctx.arglen && !ctx.dict) {
986
0
        PyErr_SetString(PyExc_TypeError,
987
0
                        "not all arguments converted during string formatting");
988
0
        goto onError;
989
0
    }
990
991
1.60M
    if (ctx.args_owned) {
992
0
        Py_DECREF(ctx.args);
993
0
    }
994
1.60M
    return _PyUnicodeWriter_Finish(&ctx.writer);
995
996
0
  onError:
997
0
    _PyUnicodeWriter_Dealloc(&ctx.writer);
998
0
    if (ctx.args_owned) {
999
0
        Py_DECREF(ctx.args);
1000
0
    }
1001
    return NULL;
1002
1.60M
}