Coverage Report

Created: 2025-07-04 06:49

/src/cpython/Python/formatter_unicode.c
Line
Count
Source (jump to first uncovered line)
1
/* implements the unicode (as opposed to string) version of the
2
   built-in formatters for string, int, float.  that is, the versions
3
   of int.__float__, etc., that take and return unicode objects */
4
5
#include "Python.h"
6
#include "pycore_fileutils.h"     // _Py_GetLocaleconvNumeric()
7
#include "pycore_long.h"          // _PyLong_FormatWriter()
8
#include "pycore_unicodeobject.h" // PyUnicode_MAX_CHAR_VALUE()
9
#include <locale.h>
10
11
/* Raises an exception about an unknown presentation type for this
12
 * type. */
13
14
static void
15
unknown_presentation_type(Py_UCS4 presentation_type,
16
                          const char* type_name)
17
0
{
18
    /* %c might be out-of-range, hence the two cases. */
19
0
    if (presentation_type > 32 && presentation_type < 128)
20
0
        PyErr_Format(PyExc_ValueError,
21
0
                     "Unknown format code '%c' "
22
0
                     "for object of type '%.200s'",
23
0
                     (char)presentation_type,
24
0
                     type_name);
25
0
    else
26
0
        PyErr_Format(PyExc_ValueError,
27
0
                     "Unknown format code '\\x%x' "
28
0
                     "for object of type '%.200s'",
29
0
                     (unsigned int)presentation_type,
30
0
                     type_name);
31
0
}
32
33
static void
34
invalid_thousands_separator_type(char specifier, Py_UCS4 presentation_type)
35
0
{
36
0
    assert(specifier == ',' || specifier == '_');
37
0
    if (presentation_type > 32 && presentation_type < 128)
38
0
        PyErr_Format(PyExc_ValueError,
39
0
                     "Cannot specify '%c' with '%c'.",
40
0
                     specifier, (char)presentation_type);
41
0
    else
42
0
        PyErr_Format(PyExc_ValueError,
43
0
                     "Cannot specify '%c' with '\\x%x'.",
44
0
                     specifier, (unsigned int)presentation_type);
45
0
}
46
47
static void
48
invalid_comma_and_underscore(void)
49
0
{
50
0
    PyErr_Format(PyExc_ValueError, "Cannot specify both ',' and '_'.");
51
0
}
52
53
/*
54
    get_integer consumes 0 or more decimal digit characters from an
55
    input string, updates *result with the corresponding positive
56
    integer, and returns the number of digits consumed.
57
58
    returns -1 on error.
59
*/
60
static int
61
get_integer(PyObject *str, Py_ssize_t *ppos, Py_ssize_t end,
62
                  Py_ssize_t *result)
63
382
{
64
382
    Py_ssize_t accumulator, digitval, pos = *ppos;
65
382
    int numdigits;
66
382
    int kind = PyUnicode_KIND(str);
67
382
    const void *data = PyUnicode_DATA(str);
68
69
382
    accumulator = numdigits = 0;
70
446
    for (; pos < end; pos++, numdigits++) {
71
446
        digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ(kind, data, pos));
72
446
        if (digitval < 0)
73
382
            break;
74
        /*
75
           Detect possible overflow before it happens:
76
77
              accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
78
              accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
79
        */
80
64
        if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
81
0
            PyErr_Format(PyExc_ValueError,
82
0
                         "Too many decimal digits in format string");
83
0
            *ppos = pos;
84
0
            return -1;
85
0
        }
86
64
        accumulator = accumulator * 10 + digitval;
87
64
    }
88
382
    *ppos = pos;
89
382
    *result = accumulator;
90
382
    return numdigits;
91
382
}
92
93
/************************************************************************/
94
/*********** standard format specifier parsing **************************/
95
/************************************************************************/
96
97
/* returns true if this character is a specifier alignment token */
98
Py_LOCAL_INLINE(int)
99
is_alignment_token(Py_UCS4 c)
100
446
{
101
446
    switch (c) {
102
0
    case '<': case '>': case '=': case '^':
103
0
        return 1;
104
446
    default:
105
446
        return 0;
106
446
    }
107
446
}
108
109
/* returns true if this character is a sign element */
110
Py_LOCAL_INLINE(int)
111
is_sign_element(Py_UCS4 c)
112
382
{
113
382
    switch (c) {
114
0
    case ' ': case '+': case '-':
115
0
        return 1;
116
382
    default:
117
382
        return 0;
118
382
    }
119
382
}
120
121
/* Locale type codes. LT_NO_LOCALE must be zero. */
122
enum LocaleType {
123
    LT_NO_LOCALE = 0,
124
    LT_DEFAULT_LOCALE = ',',
125
    LT_UNDERSCORE_LOCALE = '_',
126
    LT_UNDER_FOUR_LOCALE,
127
    LT_CURRENT_LOCALE
128
};
129
130
typedef struct {
131
    Py_UCS4 fill_char;
132
    Py_UCS4 align;
133
    int alternate;
134
    int no_neg_0;
135
    Py_UCS4 sign;
136
    Py_ssize_t width;
137
    enum LocaleType thousands_separators;
138
    Py_ssize_t precision;
139
    enum LocaleType frac_thousands_separator;
140
    Py_UCS4 type;
141
} InternalFormatSpec;
142
143
144
/*
145
  ptr points to the start of the format_spec, end points just past its end.
146
  fills in format with the parsed information.
147
  returns 1 on success, 0 on failure.
148
  if failure, sets the exception
149
*/
150
static int
151
parse_internal_render_format_spec(PyObject *obj,
152
                                  PyObject *format_spec,
153
                                  Py_ssize_t start, Py_ssize_t end,
154
                                  InternalFormatSpec *format,
155
                                  char default_type,
156
                                  char default_align)
157
382
{
158
382
    Py_ssize_t pos = start;
159
382
    int kind = PyUnicode_KIND(format_spec);
160
382
    const void *data = PyUnicode_DATA(format_spec);
161
    /* end-pos is used throughout this code to specify the length of
162
       the input string */
163
3.88k
#define READ_spec(index) PyUnicode_READ(kind, data, index)
164
165
382
    Py_ssize_t consumed;
166
382
    int align_specified = 0;
167
382
    int fill_char_specified = 0;
168
169
382
    format->fill_char = ' ';
170
382
    format->align = default_align;
171
382
    format->alternate = 0;
172
382
    format->no_neg_0 = 0;
173
382
    format->sign = '\0';
174
382
    format->width = -1;
175
382
    format->thousands_separators = LT_NO_LOCALE;
176
382
    format->frac_thousands_separator = LT_NO_LOCALE;
177
382
    format->precision = -1;
178
382
    format->type = default_type;
179
180
    /* If the second char is an alignment token,
181
       then parse the fill char */
182
382
    if (end-pos >= 2 && is_alignment_token(READ_spec(pos+1))) {
183
0
        format->align = READ_spec(pos+1);
184
0
        format->fill_char = READ_spec(pos);
185
0
        fill_char_specified = 1;
186
0
        align_specified = 1;
187
0
        pos += 2;
188
0
    }
189
382
    else if (end-pos >= 1 && is_alignment_token(READ_spec(pos))) {
190
0
        format->align = READ_spec(pos);
191
0
        align_specified = 1;
192
0
        ++pos;
193
0
    }
194
195
    /* Parse the various sign options */
196
382
    if (end-pos >= 1 && is_sign_element(READ_spec(pos))) {
197
0
        format->sign = READ_spec(pos);
198
0
        ++pos;
199
0
    }
200
201
    /* If the next character is z, request coercion of negative 0.
202
       Applies only to floats. */
203
382
    if (end-pos >= 1 && READ_spec(pos) == 'z') {
204
0
        format->no_neg_0 = 1;
205
0
        ++pos;
206
0
    }
207
208
    /* If the next character is #, we're in alternate mode.  This only
209
       applies to integers. */
210
382
    if (end-pos >= 1 && READ_spec(pos) == '#') {
211
0
        format->alternate = 1;
212
0
        ++pos;
213
0
    }
214
215
    /* The special case for 0-padding (backwards compat) */
216
382
    if (!fill_char_specified && end-pos >= 1 && READ_spec(pos) == '0') {
217
64
        format->fill_char = '0';
218
64
        if (!align_specified && default_align == '>') {
219
64
            format->align = '=';
220
64
        }
221
64
        ++pos;
222
64
    }
223
224
382
    consumed = get_integer(format_spec, &pos, end, &format->width);
225
382
    if (consumed == -1)
226
        /* Overflow error. Exception already set. */
227
0
        return 0;
228
229
    /* If consumed is 0, we didn't consume any characters for the
230
       width. In that case, reset the width to -1, because
231
       get_integer() will have set it to zero. -1 is how we record
232
       that the width wasn't specified. */
233
382
    if (consumed == 0)
234
318
        format->width = -1;
235
236
    /* Comma signifies add thousands separators */
237
382
    if (end-pos && READ_spec(pos) == ',') {
238
0
        format->thousands_separators = LT_DEFAULT_LOCALE;
239
0
        ++pos;
240
0
    }
241
    /* Underscore signifies add thousands separators */
242
382
    if (end-pos && READ_spec(pos) == '_') {
243
0
        if (format->thousands_separators != LT_NO_LOCALE) {
244
0
            invalid_comma_and_underscore();
245
0
            return 0;
246
0
        }
247
0
        format->thousands_separators = LT_UNDERSCORE_LOCALE;
248
0
        ++pos;
249
0
    }
250
382
    if (end-pos && READ_spec(pos) == ',') {
251
0
        if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
252
0
            invalid_comma_and_underscore();
253
0
            return 0;
254
0
        }
255
0
    }
256
257
    /* Parse field precision */
258
382
    if (end-pos && READ_spec(pos) == '.') {
259
0
        ++pos;
260
261
0
        consumed = get_integer(format_spec, &pos, end, &format->precision);
262
0
        if (consumed == -1)
263
            /* Overflow error. Exception already set. */
264
0
            return 0;
265
266
0
        if (end-pos && READ_spec(pos) == ',') {
267
0
            if (consumed == 0) {
268
0
                format->precision = -1;
269
0
            }
270
0
            format->frac_thousands_separator = LT_DEFAULT_LOCALE;
271
0
            ++pos;
272
0
            ++consumed;
273
0
        }
274
0
        if (end-pos && READ_spec(pos) == '_') {
275
0
            if (format->frac_thousands_separator != LT_NO_LOCALE) {
276
0
                invalid_comma_and_underscore();
277
0
                return 0;
278
0
            }
279
0
            if (consumed == 0) {
280
0
                format->precision = -1;
281
0
            }
282
0
            format->frac_thousands_separator = LT_UNDERSCORE_LOCALE;
283
0
            ++pos;
284
0
            ++consumed;
285
0
        }
286
0
        if (end-pos && READ_spec(pos) == ',') {
287
0
            if (format->frac_thousands_separator == LT_UNDERSCORE_LOCALE) {
288
0
                invalid_comma_and_underscore();
289
0
                return 0;
290
0
            }
291
0
        }
292
293
        /* Not having a precision or underscore/comma after a dot
294
           is an error. */
295
0
        if (consumed == 0) {
296
0
            PyErr_Format(PyExc_ValueError,
297
0
                         "Format specifier missing precision");
298
0
            return 0;
299
0
        }
300
301
0
    }
302
303
    /* Finally, parse the type field. */
304
305
382
    if (end-pos > 1) {
306
        /* More than one char remains, so this is an invalid format
307
           specifier. */
308
        /* Create a temporary object that contains the format spec we're
309
           operating on.  It's format_spec[start:end] (in Python syntax). */
310
0
        PyObject* actual_format_spec = PyUnicode_FromKindAndData(kind,
311
0
                                         (char*)data + kind*start,
312
0
                                         end-start);
313
0
        if (actual_format_spec != NULL) {
314
0
            PyErr_Format(PyExc_ValueError,
315
0
                "Invalid format specifier '%U' for object of type '%.200s'",
316
0
                actual_format_spec, Py_TYPE(obj)->tp_name);
317
0
            Py_DECREF(actual_format_spec);
318
0
        }
319
0
        return 0;
320
0
    }
321
322
382
    if (end-pos == 1) {
323
382
        format->type = READ_spec(pos);
324
382
        ++pos;
325
382
    }
326
327
    /* Do as much validating as we can, just by looking at the format
328
       specifier.  Do not take into account what type of formatting
329
       we're doing (int, float, string). */
330
331
382
    if (format->thousands_separators) {
332
0
        switch (format->type) {
333
0
        case 'd':
334
0
        case 'e':
335
0
        case 'f':
336
0
        case 'g':
337
0
        case 'E':
338
0
        case 'G':
339
0
        case '%':
340
0
        case 'F':
341
0
        case '\0':
342
            /* These are allowed. See PEP 378.*/
343
0
            break;
344
0
        case 'b':
345
0
        case 'o':
346
0
        case 'x':
347
0
        case 'X':
348
            /* Underscores are allowed in bin/oct/hex. See PEP 515. */
349
0
            if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
350
                /* Every four digits, not every three, in bin/oct/hex. */
351
0
                format->thousands_separators = LT_UNDER_FOUR_LOCALE;
352
0
                break;
353
0
            }
354
0
            _Py_FALLTHROUGH;
355
0
        default:
356
0
            invalid_thousands_separator_type(format->thousands_separators, format->type);
357
0
            return 0;
358
0
        }
359
0
    }
360
361
382
    if (format->type == 'n'
362
382
        && format->frac_thousands_separator != LT_NO_LOCALE)
363
0
    {
364
0
        invalid_thousands_separator_type(format->frac_thousands_separator,
365
0
                                         format->type);
366
0
        return 0;
367
0
    }
368
369
382
    assert (format->align <= 127);
370
382
    assert (format->sign <= 127);
371
382
    return 1;
372
382
}
373
374
/* Calculate the padding needed. */
375
static void
376
calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align,
377
             Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding,
378
             Py_ssize_t *n_total)
379
0
{
380
0
    if (width >= 0) {
381
0
        if (nchars > width)
382
0
            *n_total = nchars;
383
0
        else
384
0
            *n_total = width;
385
0
    }
386
0
    else {
387
        /* not specified, use all of the chars and no more */
388
0
        *n_total = nchars;
389
0
    }
390
391
    /* Figure out how much leading space we need, based on the
392
       aligning */
393
0
    if (align == '>')
394
0
        *n_lpadding = *n_total - nchars;
395
0
    else if (align == '^')
396
0
        *n_lpadding = (*n_total - nchars) / 2;
397
0
    else if (align == '<' || align == '=')
398
0
        *n_lpadding = 0;
399
0
    else {
400
        /* We should never have an unspecified alignment. */
401
0
        Py_UNREACHABLE();
402
0
    }
403
404
0
    *n_rpadding = *n_total - nchars - *n_lpadding;
405
0
}
406
407
/* Do the padding, and return a pointer to where the caller-supplied
408
   content goes. */
409
static int
410
fill_padding(_PyUnicodeWriter *writer,
411
             Py_ssize_t nchars,
412
             Py_UCS4 fill_char, Py_ssize_t n_lpadding,
413
             Py_ssize_t n_rpadding)
414
0
{
415
0
    Py_ssize_t pos;
416
417
    /* Pad on left. */
418
0
    if (n_lpadding) {
419
0
        pos = writer->pos;
420
0
        _PyUnicode_FastFill(writer->buffer, pos, n_lpadding, fill_char);
421
0
    }
422
423
    /* Pad on right. */
424
0
    if (n_rpadding) {
425
0
        pos = writer->pos + nchars + n_lpadding;
426
0
        _PyUnicode_FastFill(writer->buffer, pos, n_rpadding, fill_char);
427
0
    }
428
429
    /* Pointer to the user content. */
430
0
    writer->pos += n_lpadding;
431
0
    return 0;
432
0
}
433
434
/************************************************************************/
435
/*********** common routines for numeric formatting *********************/
436
/************************************************************************/
437
438
/* Locale info needed for formatting integers and the part of floats
439
   before and including the decimal. Note that locales only support
440
   8-bit chars, not unicode. */
441
typedef struct {
442
    PyObject *decimal_point;
443
    PyObject *thousands_sep;
444
    PyObject *frac_thousands_sep;
445
    const char *grouping;
446
    char *grouping_buffer;
447
} LocaleInfo;
448
449
382
#define LocaleInfo_STATIC_INIT {0, 0, 0, 0}
450
451
/* describes the layout for an integer, see the comment in
452
   calc_number_widths() for details */
453
typedef struct {
454
    Py_ssize_t n_lpadding;
455
    Py_ssize_t n_prefix;
456
    Py_ssize_t n_spadding;
457
    Py_ssize_t n_rpadding;
458
    char sign;
459
    Py_ssize_t n_sign;      /* number of digits needed for sign (0/1) */
460
    Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
461
                                    any grouping chars. */
462
    Py_ssize_t n_decimal;   /* 0 if only an integer */
463
    Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
464
                               excluding the decimal itself, if
465
                               present. */
466
    Py_ssize_t n_frac;
467
    Py_ssize_t n_grouped_frac_digits;
468
469
    /* These 2 are not the widths of fields, but are needed by
470
       STRINGLIB_GROUPING. */
471
    Py_ssize_t n_digits;    /* The number of digits before a decimal
472
                               or exponent. */
473
    Py_ssize_t n_min_width; /* The min_width we used when we computed
474
                               the n_grouped_digits width. */
475
} NumberFieldWidths;
476
477
478
/* Given a number of the form:
479
   digits[remainder]
480
   where ptr points to the start and end points to the end, find where
481
    the integer part ends. This could be a decimal, an exponent, both,
482
    or neither.
483
   If a decimal point is present, set *has_decimal and increment
484
    remainder beyond it.
485
   Results are undefined (but shouldn't crash) for improperly
486
    formatted strings.
487
*/
488
static void
489
parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
490
             Py_ssize_t *n_remainder, Py_ssize_t *n_frac, int *has_decimal)
491
0
{
492
0
    Py_ssize_t frac;
493
0
    int kind = PyUnicode_KIND(s);
494
0
    const void *data = PyUnicode_DATA(s);
495
496
0
    while (pos<end && Py_ISDIGIT(PyUnicode_READ(kind, data, pos))) {
497
0
        ++pos;
498
0
    }
499
0
    frac = pos;
500
501
    /* Does remainder start with a decimal point? */
502
0
    *has_decimal = pos<end && PyUnicode_READ(kind, data, frac) == '.';
503
504
    /* Skip the decimal point. */
505
0
    if (*has_decimal) {
506
0
        frac++;
507
0
        pos++;
508
0
    }
509
510
0
    while (pos<end && Py_ISDIGIT(PyUnicode_READ(kind, data, pos))) {
511
0
        ++pos;
512
0
    }
513
514
0
    *n_frac = pos - frac;
515
0
    *n_remainder = end - pos;
516
0
}
517
518
/* not all fields of format are used.  for example, precision is
519
   unused.  should this take discrete params in order to be more clear
520
   about what it does?  or is passing a single format parameter easier
521
   and more efficient enough to justify a little obfuscation?
522
   Return -1 on error. */
523
static Py_ssize_t
524
calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
525
                   Py_UCS4 sign_char, Py_ssize_t n_start,
526
                   Py_ssize_t n_end, Py_ssize_t n_remainder, Py_ssize_t n_frac,
527
                   int has_decimal, const LocaleInfo *locale,
528
                   const InternalFormatSpec *format, Py_UCS4 *maxchar)
529
64
{
530
64
    Py_ssize_t n_non_digit_non_padding;
531
64
    Py_ssize_t n_padding;
532
533
64
    spec->n_digits = n_end - n_start - n_frac - n_remainder - (has_decimal?1:0);
534
64
    spec->n_lpadding = 0;
535
64
    spec->n_prefix = n_prefix;
536
64
    spec->n_decimal = has_decimal ? PyUnicode_GET_LENGTH(locale->decimal_point) : 0;
537
64
    spec->n_remainder = n_remainder;
538
64
    spec->n_frac = n_frac;
539
64
    spec->n_spadding = 0;
540
64
    spec->n_rpadding = 0;
541
64
    spec->sign = '\0';
542
64
    spec->n_sign = 0;
543
544
    /* the output will look like:
545
       |                                                                                         |
546
       | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
547
       |                                                                                         |
548
549
       sign is computed from format->sign and the actual
550
       sign of the number
551
552
       prefix is given (it's for the '0x' prefix)
553
554
       digits is already known
555
556
       the total width is either given, or computed from the
557
       actual digits
558
559
       only one of lpadding, spadding, and rpadding can be non-zero,
560
       and it's calculated from the width and other fields
561
    */
562
563
    /* compute the various parts we're going to write */
564
64
    switch (format->sign) {
565
0
    case '+':
566
        /* always put a + or - */
567
0
        spec->n_sign = 1;
568
0
        spec->sign = (sign_char == '-' ? '-' : '+');
569
0
        break;
570
0
    case ' ':
571
0
        spec->n_sign = 1;
572
0
        spec->sign = (sign_char == '-' ? '-' : ' ');
573
0
        break;
574
64
    default:
575
        /* Not specified, or the default (-) */
576
64
        if (sign_char == '-') {
577
0
            spec->n_sign = 1;
578
0
            spec->sign = '-';
579
0
        }
580
64
    }
581
582
64
    if (spec->n_frac == 0) {
583
64
        spec->n_grouped_frac_digits = 0;
584
64
    }
585
0
    else {
586
0
        Py_UCS4 grouping_maxchar;
587
0
        spec->n_grouped_frac_digits = _PyUnicode_InsertThousandsGrouping(
588
0
            NULL, 0,
589
0
            NULL, 0, spec->n_frac,
590
0
            spec->n_frac,
591
0
            locale->grouping, locale->frac_thousands_sep, &grouping_maxchar, 1);
592
0
        if (spec->n_grouped_frac_digits == -1) {
593
0
            return -1;
594
0
        }
595
0
        *maxchar = Py_MAX(*maxchar, grouping_maxchar);
596
0
    }
597
598
    /* The number of chars used for non-digits and non-padding. */
599
64
    n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
600
64
        + spec->n_frac + spec->n_remainder;
601
602
    /* min_width can go negative, that's okay. format->width == -1 means
603
       we don't care. */
604
64
    if (format->fill_char == '0' && format->align == '=')
605
64
        spec->n_min_width = (format->width - n_non_digit_non_padding
606
64
                             + spec->n_frac - spec->n_grouped_frac_digits);
607
0
    else
608
0
        spec->n_min_width = 0;
609
610
64
    if (spec->n_digits == 0)
611
        /* This case only occurs when using 'c' formatting, we need
612
           to special case it because the grouping code always wants
613
           to have at least one character. */
614
0
        spec->n_grouped_digits = 0;
615
64
    else {
616
64
        Py_UCS4 grouping_maxchar;
617
64
        spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
618
64
            NULL, 0,
619
64
            NULL, 0, spec->n_digits,
620
64
            spec->n_min_width,
621
64
            locale->grouping, locale->thousands_sep, &grouping_maxchar, 0);
622
64
        if (spec->n_grouped_digits == -1) {
623
0
            return -1;
624
0
        }
625
64
        *maxchar = Py_MAX(*maxchar, grouping_maxchar);
626
64
    }
627
628
    /* Given the desired width and the total of digit and non-digit
629
       space we consume, see if we need any padding. format->width can
630
       be negative (meaning no padding), but this code still works in
631
       that case. */
632
64
    n_padding = format->width -
633
64
                        (n_non_digit_non_padding + spec->n_grouped_digits
634
64
                         + spec->n_grouped_frac_digits - spec->n_frac);
635
64
    if (n_padding > 0) {
636
        /* Some padding is needed. Determine if it's left, space, or right. */
637
0
        switch (format->align) {
638
0
        case '<':
639
0
            spec->n_rpadding = n_padding;
640
0
            break;
641
0
        case '^':
642
0
            spec->n_lpadding = n_padding / 2;
643
0
            spec->n_rpadding = n_padding - spec->n_lpadding;
644
0
            break;
645
0
        case '=':
646
0
            spec->n_spadding = n_padding;
647
0
            break;
648
0
        case '>':
649
0
            spec->n_lpadding = n_padding;
650
0
            break;
651
0
        default:
652
            /* Shouldn't get here */
653
0
            Py_UNREACHABLE();
654
0
        }
655
0
    }
656
657
64
    if (spec->n_lpadding || spec->n_spadding || spec->n_rpadding)
658
0
        *maxchar = Py_MAX(*maxchar, format->fill_char);
659
660
64
    if (spec->n_decimal)
661
0
        *maxchar = Py_MAX(*maxchar, PyUnicode_MAX_CHAR_VALUE(locale->decimal_point));
662
663
64
    return spec->n_lpadding + spec->n_sign + spec->n_prefix +
664
64
        spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
665
64
        spec->n_grouped_frac_digits + spec->n_remainder + spec->n_rpadding;
666
64
}
667
668
/* Fill in the digit parts of a number's string representation,
669
   as determined in calc_number_widths().
670
   Return -1 on error, or 0 on success. */
671
static int
672
fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
673
            PyObject *digits, Py_ssize_t d_start,
674
            PyObject *prefix, Py_ssize_t p_start,
675
            Py_UCS4 fill_char,
676
            LocaleInfo *locale, int toupper)
677
64
{
678
    /* Used to keep track of digits, decimal, and remainder. */
679
64
    Py_ssize_t d_pos = d_start;
680
64
    const int kind = writer->kind;
681
64
    const void *data = writer->data;
682
64
    Py_ssize_t r;
683
684
64
    if (spec->n_lpadding) {
685
0
        _PyUnicode_FastFill(writer->buffer,
686
0
                            writer->pos, spec->n_lpadding, fill_char);
687
0
        writer->pos += spec->n_lpadding;
688
0
    }
689
64
    if (spec->n_sign == 1) {
690
0
        PyUnicode_WRITE(kind, data, writer->pos, spec->sign);
691
0
        writer->pos++;
692
0
    }
693
64
    if (spec->n_prefix) {
694
0
        _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
695
0
                                      prefix, p_start,
696
0
                                      spec->n_prefix);
697
0
        if (toupper) {
698
0
            Py_ssize_t t;
699
0
            for (t = 0; t < spec->n_prefix; t++) {
700
0
                Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
701
0
                c = Py_TOUPPER(c);
702
0
                assert (c <= 127);
703
0
                PyUnicode_WRITE(kind, data, writer->pos + t, c);
704
0
            }
705
0
        }
706
0
        writer->pos += spec->n_prefix;
707
0
    }
708
64
    if (spec->n_spadding) {
709
0
        _PyUnicode_FastFill(writer->buffer,
710
0
                            writer->pos, spec->n_spadding, fill_char);
711
0
        writer->pos += spec->n_spadding;
712
0
    }
713
714
    /* Only for type 'c' special case, it has no digits. */
715
64
    if (spec->n_digits != 0) {
716
        /* Fill the digits with InsertThousandsGrouping. */
717
64
        r = _PyUnicode_InsertThousandsGrouping(
718
64
                writer, spec->n_grouped_digits,
719
64
                digits, d_pos, spec->n_digits,
720
64
                spec->n_min_width,
721
64
                locale->grouping, locale->thousands_sep, NULL, 0);
722
64
        if (r == -1)
723
0
            return -1;
724
64
        assert(r == spec->n_grouped_digits);
725
64
        d_pos += spec->n_digits;
726
64
    }
727
64
    if (toupper) {
728
0
        Py_ssize_t t;
729
0
        for (t = 0; t < spec->n_grouped_digits; t++) {
730
0
            Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
731
0
            c = Py_TOUPPER(c);
732
0
            if (c > 127) {
733
0
                PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit");
734
0
                return -1;
735
0
            }
736
0
            PyUnicode_WRITE(kind, data, writer->pos + t, c);
737
0
        }
738
0
    }
739
64
    writer->pos += spec->n_grouped_digits;
740
741
64
    if (spec->n_decimal) {
742
0
        _PyUnicode_FastCopyCharacters(
743
0
            writer->buffer, writer->pos,
744
0
            locale->decimal_point, 0, spec->n_decimal);
745
0
        writer->pos += spec->n_decimal;
746
0
        d_pos += 1;
747
0
    }
748
749
64
    if (spec->n_frac) {
750
0
        r = _PyUnicode_InsertThousandsGrouping(
751
0
                writer, spec->n_grouped_frac_digits,
752
0
                digits, d_pos, spec->n_frac, spec->n_frac,
753
0
                locale->grouping, locale->frac_thousands_sep, NULL, 1);
754
0
        if (r == -1) {
755
0
            return -1;
756
0
        }
757
0
        assert(r == spec->n_grouped_frac_digits);
758
0
        d_pos += spec->n_frac;
759
0
        writer->pos += spec->n_grouped_frac_digits;
760
0
    }
761
762
64
    if (spec->n_remainder) {
763
0
        _PyUnicode_FastCopyCharacters(
764
0
            writer->buffer, writer->pos,
765
0
            digits, d_pos, spec->n_remainder);
766
0
        writer->pos += spec->n_remainder;
767
        /* d_pos += spec->n_remainder; */
768
0
    }
769
770
64
    if (spec->n_rpadding) {
771
0
        _PyUnicode_FastFill(writer->buffer,
772
0
                            writer->pos, spec->n_rpadding,
773
0
                            fill_char);
774
0
        writer->pos += spec->n_rpadding;
775
0
    }
776
64
    return 0;
777
64
}
778
779
static const char no_grouping[1] = {CHAR_MAX};
780
781
/* Find the decimal point character(s?), thousands_separator(s?), and
782
   grouping description, either for the current locale if type is
783
   LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE or
784
   LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */
785
static int
786
get_locale_info(enum LocaleType type, enum LocaleType frac_type,
787
                LocaleInfo *locale_info)
788
64
{
789
64
    switch (type) {
790
0
    case LT_CURRENT_LOCALE: {
791
0
        struct lconv *lc = localeconv();
792
0
        if (_Py_GetLocaleconvNumeric(lc,
793
0
                                     &locale_info->decimal_point,
794
0
                                     &locale_info->thousands_sep) < 0) {
795
0
            return -1;
796
0
        }
797
798
        /* localeconv() grouping can become a dangling pointer or point
799
           to a different string if another thread calls localeconv() during
800
           the string formatting. Copy the string to avoid this risk. */
801
0
        locale_info->grouping_buffer = _PyMem_Strdup(lc->grouping);
802
0
        if (locale_info->grouping_buffer == NULL) {
803
0
            PyErr_NoMemory();
804
0
            return -1;
805
0
        }
806
0
        locale_info->grouping = locale_info->grouping_buffer;
807
0
        break;
808
0
    }
809
0
    case LT_DEFAULT_LOCALE:
810
0
    case LT_UNDERSCORE_LOCALE:
811
0
    case LT_UNDER_FOUR_LOCALE:
812
0
        locale_info->decimal_point = PyUnicode_FromOrdinal('.');
813
0
        locale_info->thousands_sep = PyUnicode_FromOrdinal(
814
0
            type == LT_DEFAULT_LOCALE ? ',' : '_');
815
0
        if (!locale_info->decimal_point || !locale_info->thousands_sep)
816
0
            return -1;
817
0
        if (type != LT_UNDER_FOUR_LOCALE)
818
0
            locale_info->grouping = "\3"; /* Group every 3 characters.  The
819
                                         (implicit) trailing 0 means repeat
820
                                         infinitely. */
821
0
        else
822
0
            locale_info->grouping = "\4"; /* Bin/oct/hex group every four. */
823
0
        break;
824
64
    case LT_NO_LOCALE:
825
64
        locale_info->decimal_point = PyUnicode_FromOrdinal('.');
826
64
        locale_info->thousands_sep = Py_GetConstant(Py_CONSTANT_EMPTY_STR);
827
64
        if (!locale_info->decimal_point || !locale_info->thousands_sep)
828
0
            return -1;
829
64
        locale_info->grouping = no_grouping;
830
64
        break;
831
64
    }
832
64
    if (frac_type != LT_NO_LOCALE) {
833
0
        locale_info->frac_thousands_sep = PyUnicode_FromOrdinal(
834
0
            frac_type == LT_DEFAULT_LOCALE ? ',' : '_');
835
0
        if (!locale_info->frac_thousands_sep) {
836
0
            return -1;
837
0
        }
838
0
        if (locale_info->grouping == no_grouping) {
839
0
            locale_info->grouping = "\3";
840
0
        }
841
0
    }
842
64
    else {
843
64
        locale_info->frac_thousands_sep = Py_GetConstant(Py_CONSTANT_EMPTY_STR);
844
64
    }
845
64
    return 0;
846
64
}
847
848
static void
849
free_locale_info(LocaleInfo *locale_info)
850
64
{
851
64
    Py_XDECREF(locale_info->decimal_point);
852
64
    Py_XDECREF(locale_info->thousands_sep);
853
64
    Py_XDECREF(locale_info->frac_thousands_sep);
854
64
    PyMem_Free(locale_info->grouping_buffer);
855
64
}
856
857
/************************************************************************/
858
/*********** string formatting ******************************************/
859
/************************************************************************/
860
861
static int
862
format_string_internal(PyObject *value, const InternalFormatSpec *format,
863
                       _PyUnicodeWriter *writer)
864
0
{
865
0
    Py_ssize_t lpad;
866
0
    Py_ssize_t rpad;
867
0
    Py_ssize_t total;
868
0
    Py_ssize_t len;
869
0
    int result = -1;
870
0
    Py_UCS4 maxchar;
871
872
0
    len = PyUnicode_GET_LENGTH(value);
873
874
    /* sign is not allowed on strings */
875
0
    if (format->sign != '\0') {
876
0
        if (format->sign == ' ') {
877
0
            PyErr_SetString(PyExc_ValueError,
878
0
                "Space not allowed in string format specifier");
879
0
        }
880
0
        else {
881
0
            PyErr_SetString(PyExc_ValueError,
882
0
                "Sign not allowed in string format specifier");
883
0
        }
884
0
        goto done;
885
0
    }
886
887
    /* negative 0 coercion is not allowed on strings */
888
0
    if (format->no_neg_0) {
889
0
        PyErr_SetString(PyExc_ValueError,
890
0
                        "Negative zero coercion (z) not allowed in string format "
891
0
                        "specifier");
892
0
        goto done;
893
0
    }
894
895
    /* alternate is not allowed on strings */
896
0
    if (format->alternate) {
897
0
        PyErr_SetString(PyExc_ValueError,
898
0
                        "Alternate form (#) not allowed in string format "
899
0
                        "specifier");
900
0
        goto done;
901
0
    }
902
903
    /* '=' alignment not allowed on strings */
904
0
    if (format->align == '=') {
905
0
        PyErr_SetString(PyExc_ValueError,
906
0
                        "'=' alignment not allowed "
907
0
                        "in string format specifier");
908
0
        goto done;
909
0
    }
910
911
0
    if ((format->width == -1 || format->width <= len)
912
0
        && (format->precision == -1 || format->precision >= len)) {
913
        /* Fast path */
914
0
        return _PyUnicodeWriter_WriteStr(writer, value);
915
0
    }
916
917
    /* if precision is specified, output no more that format.precision
918
       characters */
919
0
    if (format->precision >= 0 && len >= format->precision) {
920
0
        len = format->precision;
921
0
    }
922
923
0
    calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
924
925
0
    maxchar = writer->maxchar;
926
0
    if (lpad != 0 || rpad != 0)
927
0
        maxchar = Py_MAX(maxchar, format->fill_char);
928
0
    if (PyUnicode_MAX_CHAR_VALUE(value) > maxchar) {
929
0
        Py_UCS4 valmaxchar = _PyUnicode_FindMaxChar(value, 0, len);
930
0
        maxchar = Py_MAX(maxchar, valmaxchar);
931
0
    }
932
933
    /* allocate the resulting string */
934
0
    if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
935
0
        goto done;
936
937
    /* Write into that space. First the padding. */
938
0
    result = fill_padding(writer, len, format->fill_char, lpad, rpad);
939
0
    if (result == -1)
940
0
        goto done;
941
942
    /* Then the source string. */
943
0
    if (len) {
944
0
        _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
945
0
                                      value, 0, len);
946
0
    }
947
0
    writer->pos += (len + rpad);
948
0
    result = 0;
949
950
0
done:
951
0
    return result;
952
0
}
953
954
955
/************************************************************************/
956
/*********** long formatting ********************************************/
957
/************************************************************************/
958
959
static int
960
format_long_internal(PyObject *value, const InternalFormatSpec *format,
961
                     _PyUnicodeWriter *writer)
962
382
{
963
382
    int result = -1;
964
382
    Py_UCS4 maxchar = 127;
965
382
    PyObject *tmp = NULL;
966
382
    Py_ssize_t inumeric_chars;
967
382
    Py_UCS4 sign_char = '\0';
968
382
    Py_ssize_t n_digits;       /* count of digits need from the computed
969
                                  string */
970
382
    Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
971
                                   produces non-digits */
972
382
    Py_ssize_t n_prefix = 0;   /* Count of prefix chars, (e.g., '0x') */
973
382
    Py_ssize_t n_total;
974
382
    Py_ssize_t prefix = 0;
975
382
    NumberFieldWidths spec;
976
382
    long x;
977
978
    /* Locale settings, either from the actual locale or
979
       from a hard-code pseudo-locale */
980
382
    LocaleInfo locale = LocaleInfo_STATIC_INIT;
981
982
    /* no precision allowed on integers */
983
382
    if (format->precision != -1) {
984
0
        PyErr_SetString(PyExc_ValueError,
985
0
                        "Precision not allowed in integer format specifier");
986
0
        goto done;
987
0
    }
988
    /* no negative zero coercion on integers */
989
382
    if (format->no_neg_0) {
990
0
        PyErr_SetString(PyExc_ValueError,
991
0
                        "Negative zero coercion (z) not allowed in integer"
992
0
                        " format specifier");
993
0
        goto done;
994
0
    }
995
996
    /* special case for character formatting */
997
382
    if (format->type == 'c') {
998
        /* error to specify a sign */
999
0
        if (format->sign != '\0') {
1000
0
            PyErr_SetString(PyExc_ValueError,
1001
0
                            "Sign not allowed with integer"
1002
0
                            " format specifier 'c'");
1003
0
            goto done;
1004
0
        }
1005
        /* error to request alternate format */
1006
0
        if (format->alternate) {
1007
0
            PyErr_SetString(PyExc_ValueError,
1008
0
                            "Alternate form (#) not allowed with integer"
1009
0
                            " format specifier 'c'");
1010
0
            goto done;
1011
0
        }
1012
1013
        /* taken from unicodeobject.c formatchar() */
1014
        /* Integer input truncated to a character */
1015
0
        x = PyLong_AsLong(value);
1016
0
        if (x == -1 && PyErr_Occurred())
1017
0
            goto done;
1018
0
        if (x < 0 || x > 0x10ffff) {
1019
0
            PyErr_SetString(PyExc_OverflowError,
1020
0
                            "%c arg not in range(0x110000)");
1021
0
            goto done;
1022
0
        }
1023
0
        tmp = PyUnicode_FromOrdinal(x);
1024
0
        inumeric_chars = 0;
1025
0
        n_digits = 1;
1026
0
        maxchar = Py_MAX(maxchar, (Py_UCS4)x);
1027
1028
        /* As a sort-of hack, we tell calc_number_widths that we only
1029
           have "remainder" characters. calc_number_widths thinks
1030
           these are characters that don't get formatted, only copied
1031
           into the output string. We do this for 'c' formatting,
1032
           because the characters are likely to be non-digits. */
1033
0
        n_remainder = 1;
1034
0
    }
1035
382
    else {
1036
382
        int base;
1037
382
        int leading_chars_to_skip = 0;  /* Number of characters added by
1038
                                           PyNumber_ToBase that we want to
1039
                                           skip over. */
1040
1041
        /* Compute the base and how many characters will be added by
1042
           PyNumber_ToBase */
1043
382
        switch (format->type) {
1044
0
        case 'b':
1045
0
            base = 2;
1046
0
            leading_chars_to_skip = 2; /* 0b */
1047
0
            break;
1048
0
        case 'o':
1049
0
            base = 8;
1050
0
            leading_chars_to_skip = 2; /* 0o */
1051
0
            break;
1052
382
        case 'x':
1053
382
        case 'X':
1054
382
            base = 16;
1055
382
            leading_chars_to_skip = 2; /* 0x */
1056
382
            break;
1057
0
        default:  /* shouldn't be needed, but stops a compiler warning */
1058
0
        case 'd':
1059
0
        case 'n':
1060
0
            base = 10;
1061
0
            break;
1062
382
        }
1063
1064
382
        if (format->sign != '+' && format->sign != ' '
1065
382
            && format->width == -1
1066
382
            && format->type != 'X' && format->type != 'n'
1067
382
            && !format->thousands_separators
1068
382
            && PyLong_CheckExact(value))
1069
318
        {
1070
            /* Fast path */
1071
318
            return _PyLong_FormatWriter(writer, value, base, format->alternate);
1072
318
        }
1073
1074
        /* The number of prefix chars is the same as the leading
1075
           chars to skip */
1076
64
        if (format->alternate)
1077
0
            n_prefix = leading_chars_to_skip;
1078
1079
        /* Do the hard part, converting to a string in a given base */
1080
64
        tmp = _PyLong_Format(value, base);
1081
64
        if (tmp == NULL)
1082
0
            goto done;
1083
1084
64
        inumeric_chars = 0;
1085
64
        n_digits = PyUnicode_GET_LENGTH(tmp);
1086
1087
64
        prefix = inumeric_chars;
1088
1089
        /* Is a sign character present in the output?  If so, remember it
1090
           and skip it */
1091
64
        if (PyUnicode_READ_CHAR(tmp, inumeric_chars) == '-') {
1092
0
            sign_char = '-';
1093
0
            ++prefix;
1094
0
            ++leading_chars_to_skip;
1095
0
        }
1096
1097
        /* Skip over the leading chars (0x, 0b, etc.) */
1098
64
        n_digits -= leading_chars_to_skip;
1099
64
        inumeric_chars += leading_chars_to_skip;
1100
64
    }
1101
1102
    /* Determine the grouping, separator, and decimal point, if any. */
1103
64
    if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1104
64
                        format->thousands_separators, 0,
1105
64
                        &locale) == -1)
1106
0
        goto done;
1107
1108
    /* Calculate how much memory we'll need. */
1109
64
    n_total = calc_number_widths(&spec, n_prefix, sign_char, inumeric_chars,
1110
64
                                 inumeric_chars + n_digits, n_remainder, 0, 0,
1111
64
                                 &locale, format, &maxchar);
1112
64
    if (n_total == -1) {
1113
0
        goto done;
1114
0
    }
1115
1116
    /* Allocate the memory. */
1117
64
    if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
1118
0
        goto done;
1119
1120
    /* Populate the memory. */
1121
64
    result = fill_number(writer, &spec,
1122
64
                         tmp, inumeric_chars,
1123
64
                         tmp, prefix, format->fill_char,
1124
64
                         &locale, format->type == 'X');
1125
1126
64
done:
1127
64
    Py_XDECREF(tmp);
1128
64
    free_locale_info(&locale);
1129
64
    return result;
1130
64
}
1131
1132
/************************************************************************/
1133
/*********** float formatting *******************************************/
1134
/************************************************************************/
1135
1136
/* much of this is taken from unicodeobject.c */
1137
static int
1138
format_float_internal(PyObject *value,
1139
                      const InternalFormatSpec *format,
1140
                      _PyUnicodeWriter *writer)
1141
0
{
1142
0
    char *buf = NULL;       /* buffer returned from PyOS_double_to_string */
1143
0
    Py_ssize_t n_digits;
1144
0
    Py_ssize_t n_remainder;
1145
0
    Py_ssize_t n_frac;
1146
0
    Py_ssize_t n_total;
1147
0
    int has_decimal;
1148
0
    double val;
1149
0
    int precision, default_precision = 6;
1150
0
    Py_UCS4 type = format->type;
1151
0
    int add_pct = 0;
1152
0
    Py_ssize_t index;
1153
0
    NumberFieldWidths spec;
1154
0
    int flags = 0;
1155
0
    int result = -1;
1156
0
    Py_UCS4 maxchar = 127;
1157
0
    Py_UCS4 sign_char = '\0';
1158
0
    int float_type; /* Used to see if we have a nan, inf, or regular float. */
1159
0
    PyObject *unicode_tmp = NULL;
1160
1161
    /* Locale settings, either from the actual locale or
1162
       from a hard-code pseudo-locale */
1163
0
    LocaleInfo locale = LocaleInfo_STATIC_INIT;
1164
1165
0
    if (format->precision > INT_MAX) {
1166
0
        PyErr_SetString(PyExc_ValueError, "precision too big");
1167
0
        goto done;
1168
0
    }
1169
0
    precision = (int)format->precision;
1170
1171
0
    if (format->alternate)
1172
0
        flags |= Py_DTSF_ALT;
1173
0
    if (format->no_neg_0)
1174
0
        flags |= Py_DTSF_NO_NEG_0;
1175
1176
0
    if (type == '\0') {
1177
        /* Omitted type specifier.  Behaves in the same way as repr(x)
1178
           and str(x) if no precision is given, else like 'g', but with
1179
           at least one digit after the decimal point. */
1180
0
        flags |= Py_DTSF_ADD_DOT_0;
1181
0
        type = 'r';
1182
0
        default_precision = 0;
1183
0
    }
1184
1185
0
    if (type == 'n')
1186
        /* 'n' is the same as 'g', except for the locale used to
1187
           format the result. We take care of that later. */
1188
0
        type = 'g';
1189
1190
0
    val = PyFloat_AsDouble(value);
1191
0
    if (val == -1.0 && PyErr_Occurred())
1192
0
        goto done;
1193
1194
0
    if (type == '%') {
1195
0
        type = 'f';
1196
0
        val *= 100;
1197
0
        add_pct = 1;
1198
0
    }
1199
1200
0
    if (precision < 0)
1201
0
        precision = default_precision;
1202
0
    else if (type == 'r')
1203
0
        type = 'g';
1204
1205
    /* Cast "type", because if we're in unicode we need to pass an
1206
       8-bit char. This is safe, because we've restricted what "type"
1207
       can be. */
1208
0
    buf = PyOS_double_to_string(val, (char)type, precision, flags,
1209
0
                                &float_type);
1210
0
    if (buf == NULL)
1211
0
        goto done;
1212
0
    n_digits = strlen(buf);
1213
1214
0
    if (add_pct) {
1215
        /* We know that buf has a trailing zero (since we just called
1216
           strlen() on it), and we don't use that fact any more. So we
1217
           can just write over the trailing zero. */
1218
0
        buf[n_digits] = '%';
1219
0
        n_digits += 1;
1220
0
    }
1221
1222
0
    if (format->sign != '+' && format->sign != ' '
1223
0
        && format->width == -1
1224
0
        && format->type != 'n'
1225
0
        && !format->thousands_separators
1226
0
        && !format->frac_thousands_separator)
1227
0
    {
1228
        /* Fast path */
1229
0
        result = _PyUnicodeWriter_WriteASCIIString(writer, buf, n_digits);
1230
0
        PyMem_Free(buf);
1231
0
        return result;
1232
0
    }
1233
1234
    /* Since there is no unicode version of PyOS_double_to_string,
1235
       just use the 8 bit version and then convert to unicode. */
1236
0
    unicode_tmp = _PyUnicode_FromASCII(buf, n_digits);
1237
0
    PyMem_Free(buf);
1238
0
    if (unicode_tmp == NULL)
1239
0
        goto done;
1240
1241
    /* Is a sign character present in the output?  If so, remember it
1242
       and skip it */
1243
0
    index = 0;
1244
0
    if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') {
1245
0
        sign_char = '-';
1246
0
        ++index;
1247
0
        --n_digits;
1248
0
    }
1249
1250
    /* Determine if we have any "remainder" (after the digits, might include
1251
       decimal or exponent or both (or neither)) */
1252
0
    parse_number(unicode_tmp, index, index + n_digits,
1253
0
                 &n_remainder, &n_frac, &has_decimal);
1254
1255
    /* Determine the grouping, separator, and decimal point, if any. */
1256
0
    if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1257
0
                        format->thousands_separators,
1258
0
                        format->frac_thousands_separator,
1259
0
                        &locale) == -1)
1260
0
        goto done;
1261
1262
    /* Calculate how much memory we'll need. */
1263
0
    n_total = calc_number_widths(&spec, 0, sign_char, index,
1264
0
                                 index + n_digits, n_remainder, n_frac,
1265
0
                                 has_decimal, &locale, format, &maxchar);
1266
0
    if (n_total == -1) {
1267
0
        goto done;
1268
0
    }
1269
1270
    /* Allocate the memory. */
1271
0
    if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
1272
0
        goto done;
1273
1274
    /* Populate the memory. */
1275
0
    result = fill_number(writer, &spec,
1276
0
                         unicode_tmp, index,
1277
0
                         NULL, 0, format->fill_char,
1278
0
                         &locale, 0);
1279
1280
0
done:
1281
0
    Py_XDECREF(unicode_tmp);
1282
0
    free_locale_info(&locale);
1283
0
    return result;
1284
0
}
1285
1286
/************************************************************************/
1287
/*********** complex formatting *****************************************/
1288
/************************************************************************/
1289
1290
static int
1291
format_complex_internal(PyObject *value,
1292
                        const InternalFormatSpec *format,
1293
                        _PyUnicodeWriter *writer)
1294
0
{
1295
0
    double re;
1296
0
    double im;
1297
0
    char *re_buf = NULL;       /* buffer returned from PyOS_double_to_string */
1298
0
    char *im_buf = NULL;       /* buffer returned from PyOS_double_to_string */
1299
1300
0
    InternalFormatSpec tmp_format = *format;
1301
0
    Py_ssize_t n_re_digits;
1302
0
    Py_ssize_t n_im_digits;
1303
0
    Py_ssize_t n_re_remainder;
1304
0
    Py_ssize_t n_im_remainder;
1305
0
    Py_ssize_t n_re_frac;
1306
0
    Py_ssize_t n_im_frac;
1307
0
    Py_ssize_t n_re_total;
1308
0
    Py_ssize_t n_im_total;
1309
0
    int re_has_decimal;
1310
0
    int im_has_decimal;
1311
0
    int precision, default_precision = 6;
1312
0
    Py_UCS4 type = format->type;
1313
0
    Py_ssize_t i_re;
1314
0
    Py_ssize_t i_im;
1315
0
    NumberFieldWidths re_spec;
1316
0
    NumberFieldWidths im_spec;
1317
0
    int flags = 0;
1318
0
    int result = -1;
1319
0
    Py_UCS4 maxchar = 127;
1320
0
    int rkind;
1321
0
    void *rdata;
1322
0
    Py_UCS4 re_sign_char = '\0';
1323
0
    Py_UCS4 im_sign_char = '\0';
1324
0
    int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
1325
0
    int im_float_type;
1326
0
    int add_parens = 0;
1327
0
    int skip_re = 0;
1328
0
    Py_ssize_t lpad;
1329
0
    Py_ssize_t rpad;
1330
0
    Py_ssize_t total;
1331
0
    PyObject *re_unicode_tmp = NULL;
1332
0
    PyObject *im_unicode_tmp = NULL;
1333
1334
    /* Locale settings, either from the actual locale or
1335
       from a hard-code pseudo-locale */
1336
0
    LocaleInfo locale = LocaleInfo_STATIC_INIT;
1337
1338
0
    if (format->precision > INT_MAX) {
1339
0
        PyErr_SetString(PyExc_ValueError, "precision too big");
1340
0
        goto done;
1341
0
    }
1342
0
    precision = (int)format->precision;
1343
1344
    /* Zero padding is not allowed. */
1345
0
    if (format->fill_char == '0') {
1346
0
        PyErr_SetString(PyExc_ValueError,
1347
0
                        "Zero padding is not allowed in complex format "
1348
0
                        "specifier");
1349
0
        goto done;
1350
0
    }
1351
1352
    /* Neither is '=' alignment . */
1353
0
    if (format->align == '=') {
1354
0
        PyErr_SetString(PyExc_ValueError,
1355
0
                        "'=' alignment flag is not allowed in complex format "
1356
0
                        "specifier");
1357
0
        goto done;
1358
0
    }
1359
1360
0
    re = PyComplex_RealAsDouble(value);
1361
0
    if (re == -1.0 && PyErr_Occurred())
1362
0
        goto done;
1363
0
    im = PyComplex_ImagAsDouble(value);
1364
0
    if (im == -1.0 && PyErr_Occurred())
1365
0
        goto done;
1366
1367
0
    if (format->alternate)
1368
0
        flags |= Py_DTSF_ALT;
1369
0
    if (format->no_neg_0)
1370
0
        flags |= Py_DTSF_NO_NEG_0;
1371
1372
0
    if (type == '\0') {
1373
        /* Omitted type specifier. Should be like str(self). */
1374
0
        type = 'r';
1375
0
        default_precision = 0;
1376
0
        if (re == 0.0 && copysign(1.0, re) == 1.0)
1377
0
            skip_re = 1;
1378
0
        else
1379
0
            add_parens = 1;
1380
0
    }
1381
1382
0
    if (type == 'n')
1383
        /* 'n' is the same as 'g', except for the locale used to
1384
           format the result. We take care of that later. */
1385
0
        type = 'g';
1386
1387
0
    if (precision < 0)
1388
0
        precision = default_precision;
1389
0
    else if (type == 'r')
1390
0
        type = 'g';
1391
1392
    /* Cast "type", because if we're in unicode we need to pass an
1393
       8-bit char. This is safe, because we've restricted what "type"
1394
       can be. */
1395
0
    re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
1396
0
                                   &re_float_type);
1397
0
    if (re_buf == NULL)
1398
0
        goto done;
1399
0
    im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
1400
0
                                   &im_float_type);
1401
0
    if (im_buf == NULL)
1402
0
        goto done;
1403
1404
0
    n_re_digits = strlen(re_buf);
1405
0
    n_im_digits = strlen(im_buf);
1406
1407
    /* Since there is no unicode version of PyOS_double_to_string,
1408
       just use the 8 bit version and then convert to unicode. */
1409
0
    re_unicode_tmp = _PyUnicode_FromASCII(re_buf, n_re_digits);
1410
0
    if (re_unicode_tmp == NULL)
1411
0
        goto done;
1412
0
    i_re = 0;
1413
1414
0
    im_unicode_tmp = _PyUnicode_FromASCII(im_buf, n_im_digits);
1415
0
    if (im_unicode_tmp == NULL)
1416
0
        goto done;
1417
0
    i_im = 0;
1418
1419
    /* Is a sign character present in the output?  If so, remember it
1420
       and skip it */
1421
0
    if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') {
1422
0
        re_sign_char = '-';
1423
0
        ++i_re;
1424
0
        --n_re_digits;
1425
0
    }
1426
0
    if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') {
1427
0
        im_sign_char = '-';
1428
0
        ++i_im;
1429
0
        --n_im_digits;
1430
0
    }
1431
1432
    /* Determine if we have any "remainder" (after the digits, might include
1433
       decimal or exponent or both (or neither)) */
1434
0
    parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
1435
0
                 &n_re_remainder, &n_re_frac, &re_has_decimal);
1436
0
    parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
1437
0
                 &n_im_remainder, &n_im_frac, &im_has_decimal);
1438
1439
    /* Determine the grouping, separator, and decimal point, if any. */
1440
0
    if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1441
0
                        format->thousands_separators,
1442
0
                        format->frac_thousands_separator,
1443
0
                        &locale) == -1)
1444
0
        goto done;
1445
1446
    /* Turn off any padding. We'll do it later after we've composed
1447
       the numbers without padding. */
1448
0
    tmp_format.fill_char = '\0';
1449
0
    tmp_format.align = '<';
1450
0
    tmp_format.width = -1;
1451
1452
    /* Calculate how much memory we'll need. */
1453
0
    n_re_total = calc_number_widths(&re_spec, 0, re_sign_char,
1454
0
                                    i_re, i_re + n_re_digits, n_re_remainder,
1455
0
                                    n_re_frac, re_has_decimal, &locale,
1456
0
                                    &tmp_format, &maxchar);
1457
0
    if (n_re_total == -1) {
1458
0
        goto done;
1459
0
    }
1460
1461
    /* Same formatting, but always include a sign, unless the real part is
1462
     * going to be omitted, in which case we use whatever sign convention was
1463
     * requested by the original format. */
1464
0
    if (!skip_re)
1465
0
        tmp_format.sign = '+';
1466
0
    n_im_total = calc_number_widths(&im_spec, 0, im_sign_char,
1467
0
                                    i_im, i_im + n_im_digits, n_im_remainder,
1468
0
                                    n_im_frac, im_has_decimal, &locale,
1469
0
                                    &tmp_format, &maxchar);
1470
0
    if (n_im_total == -1) {
1471
0
        goto done;
1472
0
    }
1473
1474
0
    if (skip_re)
1475
0
        n_re_total = 0;
1476
1477
    /* Add 1 for the 'j', and optionally 2 for parens. */
1478
0
    calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
1479
0
                 format->width, format->align, &lpad, &rpad, &total);
1480
1481
0
    if (lpad || rpad)
1482
0
        maxchar = Py_MAX(maxchar, format->fill_char);
1483
1484
0
    if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
1485
0
        goto done;
1486
0
    rkind = writer->kind;
1487
0
    rdata = writer->data;
1488
1489
    /* Populate the memory. First, the padding. */
1490
0
    result = fill_padding(writer,
1491
0
                          n_re_total + n_im_total + 1 + add_parens * 2,
1492
0
                          format->fill_char, lpad, rpad);
1493
0
    if (result == -1)
1494
0
        goto done;
1495
1496
0
    if (add_parens) {
1497
0
        PyUnicode_WRITE(rkind, rdata, writer->pos, '(');
1498
0
        writer->pos++;
1499
0
    }
1500
1501
0
    if (!skip_re) {
1502
0
        result = fill_number(writer, &re_spec,
1503
0
                             re_unicode_tmp, i_re,
1504
0
                             NULL, 0,
1505
0
                             0,
1506
0
                             &locale, 0);
1507
0
        if (result == -1)
1508
0
            goto done;
1509
0
    }
1510
0
    result = fill_number(writer, &im_spec,
1511
0
                         im_unicode_tmp, i_im,
1512
0
                         NULL, 0,
1513
0
                         0,
1514
0
                         &locale, 0);
1515
0
    if (result == -1)
1516
0
        goto done;
1517
0
    PyUnicode_WRITE(rkind, rdata, writer->pos, 'j');
1518
0
    writer->pos++;
1519
1520
0
    if (add_parens) {
1521
0
        PyUnicode_WRITE(rkind, rdata, writer->pos, ')');
1522
0
        writer->pos++;
1523
0
    }
1524
1525
0
    writer->pos += rpad;
1526
1527
0
done:
1528
0
    PyMem_Free(re_buf);
1529
0
    PyMem_Free(im_buf);
1530
0
    Py_XDECREF(re_unicode_tmp);
1531
0
    Py_XDECREF(im_unicode_tmp);
1532
0
    free_locale_info(&locale);
1533
0
    return result;
1534
0
}
1535
1536
/************************************************************************/
1537
/*********** built in formatters ****************************************/
1538
/************************************************************************/
1539
static int
1540
format_obj(PyObject *obj, _PyUnicodeWriter *writer)
1541
0
{
1542
0
    PyObject *str;
1543
0
    int err;
1544
1545
0
    str = PyObject_Str(obj);
1546
0
    if (str == NULL)
1547
0
        return -1;
1548
0
    err = _PyUnicodeWriter_WriteStr(writer, str);
1549
0
    Py_DECREF(str);
1550
0
    return err;
1551
0
}
1552
1553
int
1554
_PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1555
                                PyObject *obj,
1556
                                PyObject *format_spec,
1557
                                Py_ssize_t start, Py_ssize_t end)
1558
16.2M
{
1559
16.2M
    InternalFormatSpec format;
1560
1561
16.2M
    assert(PyUnicode_Check(obj));
1562
1563
    /* check for the special case of zero length format spec, make
1564
       it equivalent to str(obj) */
1565
16.2M
    if (start == end) {
1566
16.2M
        if (PyUnicode_CheckExact(obj))
1567
16.2M
            return _PyUnicodeWriter_WriteStr(writer, obj);
1568
0
        else
1569
0
            return format_obj(obj, writer);
1570
16.2M
    }
1571
1572
    /* parse the format_spec */
1573
0
    if (!parse_internal_render_format_spec(obj, format_spec, start, end,
1574
0
                                           &format, 's', '<'))
1575
0
        return -1;
1576
1577
    /* type conversion? */
1578
0
    switch (format.type) {
1579
0
    case 's':
1580
        /* no type conversion needed, already a string.  do the formatting */
1581
0
        return format_string_internal(obj, &format, writer);
1582
0
    default:
1583
        /* unknown */
1584
0
        unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
1585
0
        return -1;
1586
0
    }
1587
0
}
1588
1589
int
1590
_PyLong_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1591
                             PyObject *obj,
1592
                             PyObject *format_spec,
1593
                             Py_ssize_t start, Py_ssize_t end)
1594
79.7k
{
1595
79.7k
    PyObject *tmp = NULL;
1596
79.7k
    InternalFormatSpec format;
1597
79.7k
    int result = -1;
1598
1599
    /* check for the special case of zero length format spec, make
1600
       it equivalent to str(obj) */
1601
79.7k
    if (start == end) {
1602
79.3k
        if (PyLong_CheckExact(obj))
1603
79.3k
            return _PyLong_FormatWriter(writer, obj, 10, 0);
1604
0
        else
1605
0
            return format_obj(obj, writer);
1606
79.3k
    }
1607
1608
    /* parse the format_spec */
1609
382
    if (!parse_internal_render_format_spec(obj, format_spec, start, end,
1610
382
                                           &format, 'd', '>'))
1611
0
        goto done;
1612
1613
    /* type conversion? */
1614
382
    switch (format.type) {
1615
0
    case 'b':
1616
0
    case 'c':
1617
0
    case 'd':
1618
0
    case 'o':
1619
382
    case 'x':
1620
382
    case 'X':
1621
382
    case 'n':
1622
        /* no type conversion needed, already an int.  do the formatting */
1623
382
        result = format_long_internal(obj, &format, writer);
1624
382
        break;
1625
1626
0
    case 'e':
1627
0
    case 'E':
1628
0
    case 'f':
1629
0
    case 'F':
1630
0
    case 'g':
1631
0
    case 'G':
1632
0
    case '%':
1633
        /* convert to float */
1634
0
        tmp = PyNumber_Float(obj);
1635
0
        if (tmp == NULL)
1636
0
            goto done;
1637
0
        result = format_float_internal(tmp, &format, writer);
1638
0
        break;
1639
1640
0
    default:
1641
        /* unknown */
1642
0
        unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
1643
0
        goto done;
1644
382
    }
1645
1646
382
done:
1647
382
    Py_XDECREF(tmp);
1648
382
    return result;
1649
382
}
1650
1651
int
1652
_PyFloat_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1653
                              PyObject *obj,
1654
                              PyObject *format_spec,
1655
                              Py_ssize_t start, Py_ssize_t end)
1656
0
{
1657
0
    InternalFormatSpec format;
1658
1659
    /* check for the special case of zero length format spec, make
1660
       it equivalent to str(obj) */
1661
0
    if (start == end)
1662
0
        return format_obj(obj, writer);
1663
1664
    /* parse the format_spec */
1665
0
    if (!parse_internal_render_format_spec(obj, format_spec, start, end,
1666
0
                                           &format, '\0', '>'))
1667
0
        return -1;
1668
1669
    /* type conversion? */
1670
0
    switch (format.type) {
1671
0
    case '\0': /* No format code: like 'g', but with at least one decimal. */
1672
0
    case 'e':
1673
0
    case 'E':
1674
0
    case 'f':
1675
0
    case 'F':
1676
0
    case 'g':
1677
0
    case 'G':
1678
0
    case 'n':
1679
0
    case '%':
1680
        /* no conversion, already a float.  do the formatting */
1681
0
        return format_float_internal(obj, &format, writer);
1682
1683
0
    default:
1684
        /* unknown */
1685
0
        unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
1686
0
        return -1;
1687
0
    }
1688
0
}
1689
1690
int
1691
_PyComplex_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1692
                                PyObject *obj,
1693
                                PyObject *format_spec,
1694
                                Py_ssize_t start, Py_ssize_t end)
1695
0
{
1696
0
    InternalFormatSpec format;
1697
1698
    /* check for the special case of zero length format spec, make
1699
       it equivalent to str(obj) */
1700
0
    if (start == end)
1701
0
        return format_obj(obj, writer);
1702
1703
    /* parse the format_spec */
1704
0
    if (!parse_internal_render_format_spec(obj, format_spec, start, end,
1705
0
                                           &format, '\0', '>'))
1706
0
        return -1;
1707
1708
    /* type conversion? */
1709
0
    switch (format.type) {
1710
0
    case '\0': /* No format code: like 'g', but with at least one decimal. */
1711
0
    case 'e':
1712
0
    case 'E':
1713
0
    case 'f':
1714
0
    case 'F':
1715
0
    case 'g':
1716
0
    case 'G':
1717
0
    case 'n':
1718
        /* no conversion, already a complex.  do the formatting */
1719
0
        return format_complex_internal(obj, &format, writer);
1720
1721
0
    default:
1722
        /* unknown */
1723
0
        unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
1724
0
        return -1;
1725
0
    }
1726
0
}