Coverage Report

Created: 2025-07-11 06:59

/src/Python-3.8.3/Python/formatter_unicode.c
Line
Count
Source (jump to first uncovered line)
1
/* implements the unicode (as opposed to string) version of the
2
   built-in formatters for string, int, float.  that is, the versions
3
   of int.__float__, etc., that take and return unicode objects */
4
5
#include "Python.h"
6
#include "pycore_fileutils.h"
7
#include <locale.h>
8
9
/* Raises an exception about an unknown presentation type for this
10
 * type. */
11
12
static void
13
unknown_presentation_type(Py_UCS4 presentation_type,
14
                          const char* type_name)
15
0
{
16
    /* %c might be out-of-range, hence the two cases. */
17
0
    if (presentation_type > 32 && presentation_type < 128)
18
0
        PyErr_Format(PyExc_ValueError,
19
0
                     "Unknown format code '%c' "
20
0
                     "for object of type '%.200s'",
21
0
                     (char)presentation_type,
22
0
                     type_name);
23
0
    else
24
0
        PyErr_Format(PyExc_ValueError,
25
0
                     "Unknown format code '\\x%x' "
26
0
                     "for object of type '%.200s'",
27
0
                     (unsigned int)presentation_type,
28
0
                     type_name);
29
0
}
30
31
static void
32
invalid_thousands_separator_type(char specifier, Py_UCS4 presentation_type)
33
0
{
34
0
    assert(specifier == ',' || specifier == '_');
35
0
    if (presentation_type > 32 && presentation_type < 128)
36
0
        PyErr_Format(PyExc_ValueError,
37
0
                     "Cannot specify '%c' with '%c'.",
38
0
                     specifier, (char)presentation_type);
39
0
    else
40
0
        PyErr_Format(PyExc_ValueError,
41
0
                     "Cannot specify '%c' with '\\x%x'.",
42
0
                     specifier, (unsigned int)presentation_type);
43
0
}
44
45
static void
46
invalid_comma_and_underscore(void)
47
0
{
48
0
    PyErr_Format(PyExc_ValueError, "Cannot specify both ',' and '_'.");
49
0
}
50
51
/*
52
    get_integer consumes 0 or more decimal digit characters from an
53
    input string, updates *result with the corresponding positive
54
    integer, and returns the number of digits consumed.
55
56
    returns -1 on error.
57
*/
58
static int
59
get_integer(PyObject *str, Py_ssize_t *ppos, Py_ssize_t end,
60
                  Py_ssize_t *result)
61
0
{
62
0
    Py_ssize_t accumulator, digitval, pos = *ppos;
63
0
    int numdigits;
64
0
    int kind = PyUnicode_KIND(str);
65
0
    void *data = PyUnicode_DATA(str);
66
67
0
    accumulator = numdigits = 0;
68
0
    for (; pos < end; pos++, numdigits++) {
69
0
        digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ(kind, data, pos));
70
0
        if (digitval < 0)
71
0
            break;
72
        /*
73
           Detect possible overflow before it happens:
74
75
              accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
76
              accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
77
        */
78
0
        if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
79
0
            PyErr_Format(PyExc_ValueError,
80
0
                         "Too many decimal digits in format string");
81
0
            *ppos = pos;
82
0
            return -1;
83
0
        }
84
0
        accumulator = accumulator * 10 + digitval;
85
0
    }
86
0
    *ppos = pos;
87
0
    *result = accumulator;
88
0
    return numdigits;
89
0
}
90
91
/************************************************************************/
92
/*********** standard format specifier parsing **************************/
93
/************************************************************************/
94
95
/* returns true if this character is a specifier alignment token */
96
Py_LOCAL_INLINE(int)
97
is_alignment_token(Py_UCS4 c)
98
0
{
99
0
    switch (c) {
100
0
    case '<': case '>': case '=': case '^':
101
0
        return 1;
102
0
    default:
103
0
        return 0;
104
0
    }
105
0
}
106
107
/* returns true if this character is a sign element */
108
Py_LOCAL_INLINE(int)
109
is_sign_element(Py_UCS4 c)
110
0
{
111
0
    switch (c) {
112
0
    case ' ': case '+': case '-':
113
0
        return 1;
114
0
    default:
115
0
        return 0;
116
0
    }
117
0
}
118
119
/* Locale type codes. LT_NO_LOCALE must be zero. */
120
enum LocaleType {
121
    LT_NO_LOCALE = 0,
122
    LT_DEFAULT_LOCALE = ',',
123
    LT_UNDERSCORE_LOCALE = '_',
124
    LT_UNDER_FOUR_LOCALE,
125
    LT_CURRENT_LOCALE
126
};
127
128
typedef struct {
129
    Py_UCS4 fill_char;
130
    Py_UCS4 align;
131
    int alternate;
132
    Py_UCS4 sign;
133
    Py_ssize_t width;
134
    enum LocaleType thousands_separators;
135
    Py_ssize_t precision;
136
    Py_UCS4 type;
137
} InternalFormatSpec;
138
139
#if 0
140
/* Occasionally useful for debugging. Should normally be commented out. */
141
static void
142
DEBUG_PRINT_FORMAT_SPEC(InternalFormatSpec *format)
143
{
144
    printf("internal format spec: fill_char %d\n", format->fill_char);
145
    printf("internal format spec: align %d\n", format->align);
146
    printf("internal format spec: alternate %d\n", format->alternate);
147
    printf("internal format spec: sign %d\n", format->sign);
148
    printf("internal format spec: width %zd\n", format->width);
149
    printf("internal format spec: thousands_separators %d\n",
150
           format->thousands_separators);
151
    printf("internal format spec: precision %zd\n", format->precision);
152
    printf("internal format spec: type %c\n", format->type);
153
    printf("\n");
154
}
155
#endif
156
157
158
/*
159
  ptr points to the start of the format_spec, end points just past its end.
160
  fills in format with the parsed information.
161
  returns 1 on success, 0 on failure.
162
  if failure, sets the exception
163
*/
164
static int
165
parse_internal_render_format_spec(PyObject *format_spec,
166
                                  Py_ssize_t start, Py_ssize_t end,
167
                                  InternalFormatSpec *format,
168
                                  char default_type,
169
                                  char default_align)
170
0
{
171
0
    Py_ssize_t pos = start;
172
0
    int kind = PyUnicode_KIND(format_spec);
173
0
    void *data = PyUnicode_DATA(format_spec);
174
    /* end-pos is used throughout this code to specify the length of
175
       the input string */
176
0
#define READ_spec(index) PyUnicode_READ(kind, data, index)
177
178
0
    Py_ssize_t consumed;
179
0
    int align_specified = 0;
180
0
    int fill_char_specified = 0;
181
182
0
    format->fill_char = ' ';
183
0
    format->align = default_align;
184
0
    format->alternate = 0;
185
0
    format->sign = '\0';
186
0
    format->width = -1;
187
0
    format->thousands_separators = LT_NO_LOCALE;
188
0
    format->precision = -1;
189
0
    format->type = default_type;
190
191
    /* If the second char is an alignment token,
192
       then parse the fill char */
193
0
    if (end-pos >= 2 && is_alignment_token(READ_spec(pos+1))) {
194
0
        format->align = READ_spec(pos+1);
195
0
        format->fill_char = READ_spec(pos);
196
0
        fill_char_specified = 1;
197
0
        align_specified = 1;
198
0
        pos += 2;
199
0
    }
200
0
    else if (end-pos >= 1 && is_alignment_token(READ_spec(pos))) {
201
0
        format->align = READ_spec(pos);
202
0
        align_specified = 1;
203
0
        ++pos;
204
0
    }
205
206
    /* Parse the various sign options */
207
0
    if (end-pos >= 1 && is_sign_element(READ_spec(pos))) {
208
0
        format->sign = READ_spec(pos);
209
0
        ++pos;
210
0
    }
211
212
    /* If the next character is #, we're in alternate mode.  This only
213
       applies to integers. */
214
0
    if (end-pos >= 1 && READ_spec(pos) == '#') {
215
0
        format->alternate = 1;
216
0
        ++pos;
217
0
    }
218
219
    /* The special case for 0-padding (backwards compat) */
220
0
    if (!fill_char_specified && end-pos >= 1 && READ_spec(pos) == '0') {
221
0
        format->fill_char = '0';
222
0
        if (!align_specified) {
223
0
            format->align = '=';
224
0
        }
225
0
        ++pos;
226
0
    }
227
228
0
    consumed = get_integer(format_spec, &pos, end, &format->width);
229
0
    if (consumed == -1)
230
        /* Overflow error. Exception already set. */
231
0
        return 0;
232
233
    /* If consumed is 0, we didn't consume any characters for the
234
       width. In that case, reset the width to -1, because
235
       get_integer() will have set it to zero. -1 is how we record
236
       that the width wasn't specified. */
237
0
    if (consumed == 0)
238
0
        format->width = -1;
239
240
    /* Comma signifies add thousands separators */
241
0
    if (end-pos && READ_spec(pos) == ',') {
242
0
        format->thousands_separators = LT_DEFAULT_LOCALE;
243
0
        ++pos;
244
0
    }
245
    /* Underscore signifies add thousands separators */
246
0
    if (end-pos && READ_spec(pos) == '_') {
247
0
        if (format->thousands_separators != LT_NO_LOCALE) {
248
0
            invalid_comma_and_underscore();
249
0
            return 0;
250
0
        }
251
0
        format->thousands_separators = LT_UNDERSCORE_LOCALE;
252
0
        ++pos;
253
0
    }
254
0
    if (end-pos && READ_spec(pos) == ',') {
255
0
        invalid_comma_and_underscore();
256
0
        return 0;
257
0
    }
258
259
    /* Parse field precision */
260
0
    if (end-pos && READ_spec(pos) == '.') {
261
0
        ++pos;
262
263
0
        consumed = get_integer(format_spec, &pos, end, &format->precision);
264
0
        if (consumed == -1)
265
            /* Overflow error. Exception already set. */
266
0
            return 0;
267
268
        /* Not having a precision after a dot is an error. */
269
0
        if (consumed == 0) {
270
0
            PyErr_Format(PyExc_ValueError,
271
0
                         "Format specifier missing precision");
272
0
            return 0;
273
0
        }
274
275
0
    }
276
277
    /* Finally, parse the type field. */
278
279
0
    if (end-pos > 1) {
280
        /* More than one char remain, invalid format specifier. */
281
0
        PyErr_Format(PyExc_ValueError, "Invalid format specifier");
282
0
        return 0;
283
0
    }
284
285
0
    if (end-pos == 1) {
286
0
        format->type = READ_spec(pos);
287
0
        ++pos;
288
0
    }
289
290
    /* Do as much validating as we can, just by looking at the format
291
       specifier.  Do not take into account what type of formatting
292
       we're doing (int, float, string). */
293
294
0
    if (format->thousands_separators) {
295
0
        switch (format->type) {
296
0
        case 'd':
297
0
        case 'e':
298
0
        case 'f':
299
0
        case 'g':
300
0
        case 'E':
301
0
        case 'G':
302
0
        case '%':
303
0
        case 'F':
304
0
        case '\0':
305
            /* These are allowed. See PEP 378.*/
306
0
            break;
307
0
        case 'b':
308
0
        case 'o':
309
0
        case 'x':
310
0
        case 'X':
311
            /* Underscores are allowed in bin/oct/hex. See PEP 515. */
312
0
            if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
313
                /* Every four digits, not every three, in bin/oct/hex. */
314
0
                format->thousands_separators = LT_UNDER_FOUR_LOCALE;
315
0
                break;
316
0
            }
317
            /* fall through */
318
0
        default:
319
0
            invalid_thousands_separator_type(format->thousands_separators, format->type);
320
0
            return 0;
321
0
        }
322
0
    }
323
324
0
    assert (format->align <= 127);
325
0
    assert (format->sign <= 127);
326
0
    return 1;
327
0
}
328
329
/* Calculate the padding needed. */
330
static void
331
calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align,
332
             Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding,
333
             Py_ssize_t *n_total)
334
0
{
335
0
    if (width >= 0) {
336
0
        if (nchars > width)
337
0
            *n_total = nchars;
338
0
        else
339
0
            *n_total = width;
340
0
    }
341
0
    else {
342
        /* not specified, use all of the chars and no more */
343
0
        *n_total = nchars;
344
0
    }
345
346
    /* Figure out how much leading space we need, based on the
347
       aligning */
348
0
    if (align == '>')
349
0
        *n_lpadding = *n_total - nchars;
350
0
    else if (align == '^')
351
0
        *n_lpadding = (*n_total - nchars) / 2;
352
0
    else if (align == '<' || align == '=')
353
0
        *n_lpadding = 0;
354
0
    else {
355
        /* We should never have an unspecified alignment. */
356
0
        Py_UNREACHABLE();
357
0
    }
358
359
0
    *n_rpadding = *n_total - nchars - *n_lpadding;
360
0
}
361
362
/* Do the padding, and return a pointer to where the caller-supplied
363
   content goes. */
364
static int
365
fill_padding(_PyUnicodeWriter *writer,
366
             Py_ssize_t nchars,
367
             Py_UCS4 fill_char, Py_ssize_t n_lpadding,
368
             Py_ssize_t n_rpadding)
369
0
{
370
0
    Py_ssize_t pos;
371
372
    /* Pad on left. */
373
0
    if (n_lpadding) {
374
0
        pos = writer->pos;
375
0
        _PyUnicode_FastFill(writer->buffer, pos, n_lpadding, fill_char);
376
0
    }
377
378
    /* Pad on right. */
379
0
    if (n_rpadding) {
380
0
        pos = writer->pos + nchars + n_lpadding;
381
0
        _PyUnicode_FastFill(writer->buffer, pos, n_rpadding, fill_char);
382
0
    }
383
384
    /* Pointer to the user content. */
385
0
    writer->pos += n_lpadding;
386
0
    return 0;
387
0
}
388
389
/************************************************************************/
390
/*********** common routines for numeric formatting *********************/
391
/************************************************************************/
392
393
/* Locale info needed for formatting integers and the part of floats
394
   before and including the decimal. Note that locales only support
395
   8-bit chars, not unicode. */
396
typedef struct {
397
    PyObject *decimal_point;
398
    PyObject *thousands_sep;
399
    const char *grouping;
400
    char *grouping_buffer;
401
} LocaleInfo;
402
403
0
#define LocaleInfo_STATIC_INIT {0, 0, 0, 0}
404
405
/* describes the layout for an integer, see the comment in
406
   calc_number_widths() for details */
407
typedef struct {
408
    Py_ssize_t n_lpadding;
409
    Py_ssize_t n_prefix;
410
    Py_ssize_t n_spadding;
411
    Py_ssize_t n_rpadding;
412
    char sign;
413
    Py_ssize_t n_sign;      /* number of digits needed for sign (0/1) */
414
    Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
415
                                    any grouping chars. */
416
    Py_ssize_t n_decimal;   /* 0 if only an integer */
417
    Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
418
                               excluding the decimal itself, if
419
                               present. */
420
421
    /* These 2 are not the widths of fields, but are needed by
422
       STRINGLIB_GROUPING. */
423
    Py_ssize_t n_digits;    /* The number of digits before a decimal
424
                               or exponent. */
425
    Py_ssize_t n_min_width; /* The min_width we used when we computed
426
                               the n_grouped_digits width. */
427
} NumberFieldWidths;
428
429
430
/* Given a number of the form:
431
   digits[remainder]
432
   where ptr points to the start and end points to the end, find where
433
    the integer part ends. This could be a decimal, an exponent, both,
434
    or neither.
435
   If a decimal point is present, set *has_decimal and increment
436
    remainder beyond it.
437
   Results are undefined (but shouldn't crash) for improperly
438
    formatted strings.
439
*/
440
static void
441
parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
442
             Py_ssize_t *n_remainder, int *has_decimal)
443
0
{
444
0
    Py_ssize_t remainder;
445
0
    int kind = PyUnicode_KIND(s);
446
0
    void *data = PyUnicode_DATA(s);
447
448
0
    while (pos<end && Py_ISDIGIT(PyUnicode_READ(kind, data, pos)))
449
0
        ++pos;
450
0
    remainder = pos;
451
452
    /* Does remainder start with a decimal point? */
453
0
    *has_decimal = pos<end && PyUnicode_READ(kind, data, remainder) == '.';
454
455
    /* Skip the decimal point. */
456
0
    if (*has_decimal)
457
0
        remainder++;
458
459
0
    *n_remainder = end - remainder;
460
0
}
461
462
/* not all fields of format are used.  for example, precision is
463
   unused.  should this take discrete params in order to be more clear
464
   about what it does?  or is passing a single format parameter easier
465
   and more efficient enough to justify a little obfuscation?
466
   Return -1 on error. */
467
static Py_ssize_t
468
calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
469
                   Py_UCS4 sign_char, PyObject *number, Py_ssize_t n_start,
470
                   Py_ssize_t n_end, Py_ssize_t n_remainder,
471
                   int has_decimal, const LocaleInfo *locale,
472
                   const InternalFormatSpec *format, Py_UCS4 *maxchar)
473
0
{
474
0
    Py_ssize_t n_non_digit_non_padding;
475
0
    Py_ssize_t n_padding;
476
477
0
    spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0);
478
0
    spec->n_lpadding = 0;
479
0
    spec->n_prefix = n_prefix;
480
0
    spec->n_decimal = has_decimal ? PyUnicode_GET_LENGTH(locale->decimal_point) : 0;
481
0
    spec->n_remainder = n_remainder;
482
0
    spec->n_spadding = 0;
483
0
    spec->n_rpadding = 0;
484
0
    spec->sign = '\0';
485
0
    spec->n_sign = 0;
486
487
    /* the output will look like:
488
       |                                                                                         |
489
       | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
490
       |                                                                                         |
491
492
       sign is computed from format->sign and the actual
493
       sign of the number
494
495
       prefix is given (it's for the '0x' prefix)
496
497
       digits is already known
498
499
       the total width is either given, or computed from the
500
       actual digits
501
502
       only one of lpadding, spadding, and rpadding can be non-zero,
503
       and it's calculated from the width and other fields
504
    */
505
506
    /* compute the various parts we're going to write */
507
0
    switch (format->sign) {
508
0
    case '+':
509
        /* always put a + or - */
510
0
        spec->n_sign = 1;
511
0
        spec->sign = (sign_char == '-' ? '-' : '+');
512
0
        break;
513
0
    case ' ':
514
0
        spec->n_sign = 1;
515
0
        spec->sign = (sign_char == '-' ? '-' : ' ');
516
0
        break;
517
0
    default:
518
        /* Not specified, or the default (-) */
519
0
        if (sign_char == '-') {
520
0
            spec->n_sign = 1;
521
0
            spec->sign = '-';
522
0
        }
523
0
    }
524
525
    /* The number of chars used for non-digits and non-padding. */
526
0
    n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
527
0
        spec->n_remainder;
528
529
    /* min_width can go negative, that's okay. format->width == -1 means
530
       we don't care. */
531
0
    if (format->fill_char == '0' && format->align == '=')
532
0
        spec->n_min_width = format->width - n_non_digit_non_padding;
533
0
    else
534
0
        spec->n_min_width = 0;
535
536
0
    if (spec->n_digits == 0)
537
        /* This case only occurs when using 'c' formatting, we need
538
           to special case it because the grouping code always wants
539
           to have at least one character. */
540
0
        spec->n_grouped_digits = 0;
541
0
    else {
542
0
        Py_UCS4 grouping_maxchar;
543
0
        spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
544
0
            NULL, 0,
545
0
            NULL, 0, spec->n_digits,
546
0
            spec->n_min_width,
547
0
            locale->grouping, locale->thousands_sep, &grouping_maxchar);
548
0
        if (spec->n_grouped_digits == -1) {
549
0
            return -1;
550
0
        }
551
0
        *maxchar = Py_MAX(*maxchar, grouping_maxchar);
552
0
    }
553
554
    /* Given the desired width and the total of digit and non-digit
555
       space we consume, see if we need any padding. format->width can
556
       be negative (meaning no padding), but this code still works in
557
       that case. */
558
0
    n_padding = format->width -
559
0
                        (n_non_digit_non_padding + spec->n_grouped_digits);
560
0
    if (n_padding > 0) {
561
        /* Some padding is needed. Determine if it's left, space, or right. */
562
0
        switch (format->align) {
563
0
        case '<':
564
0
            spec->n_rpadding = n_padding;
565
0
            break;
566
0
        case '^':
567
0
            spec->n_lpadding = n_padding / 2;
568
0
            spec->n_rpadding = n_padding - spec->n_lpadding;
569
0
            break;
570
0
        case '=':
571
0
            spec->n_spadding = n_padding;
572
0
            break;
573
0
        case '>':
574
0
            spec->n_lpadding = n_padding;
575
0
            break;
576
0
        default:
577
            /* Shouldn't get here, but treat it as '>' */
578
0
            Py_UNREACHABLE();
579
0
        }
580
0
    }
581
582
0
    if (spec->n_lpadding || spec->n_spadding || spec->n_rpadding)
583
0
        *maxchar = Py_MAX(*maxchar, format->fill_char);
584
585
0
    if (spec->n_decimal)
586
0
        *maxchar = Py_MAX(*maxchar, PyUnicode_MAX_CHAR_VALUE(locale->decimal_point));
587
588
0
    return spec->n_lpadding + spec->n_sign + spec->n_prefix +
589
0
        spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
590
0
        spec->n_remainder + spec->n_rpadding;
591
0
}
592
593
/* Fill in the digit parts of a number's string representation,
594
   as determined in calc_number_widths().
595
   Return -1 on error, or 0 on success. */
596
static int
597
fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
598
            PyObject *digits, Py_ssize_t d_start, Py_ssize_t d_end,
599
            PyObject *prefix, Py_ssize_t p_start,
600
            Py_UCS4 fill_char,
601
            LocaleInfo *locale, int toupper)
602
0
{
603
    /* Used to keep track of digits, decimal, and remainder. */
604
0
    Py_ssize_t d_pos = d_start;
605
0
    const unsigned int kind = writer->kind;
606
0
    const void *data = writer->data;
607
0
    Py_ssize_t r;
608
609
0
    if (spec->n_lpadding) {
610
0
        _PyUnicode_FastFill(writer->buffer,
611
0
                            writer->pos, spec->n_lpadding, fill_char);
612
0
        writer->pos += spec->n_lpadding;
613
0
    }
614
0
    if (spec->n_sign == 1) {
615
0
        PyUnicode_WRITE(kind, data, writer->pos, spec->sign);
616
0
        writer->pos++;
617
0
    }
618
0
    if (spec->n_prefix) {
619
0
        _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
620
0
                                      prefix, p_start,
621
0
                                      spec->n_prefix);
622
0
        if (toupper) {
623
0
            Py_ssize_t t;
624
0
            for (t = 0; t < spec->n_prefix; t++) {
625
0
                Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
626
0
                c = Py_TOUPPER(c);
627
0
                assert (c <= 127);
628
0
                PyUnicode_WRITE(kind, data, writer->pos + t, c);
629
0
            }
630
0
        }
631
0
        writer->pos += spec->n_prefix;
632
0
    }
633
0
    if (spec->n_spadding) {
634
0
        _PyUnicode_FastFill(writer->buffer,
635
0
                            writer->pos, spec->n_spadding, fill_char);
636
0
        writer->pos += spec->n_spadding;
637
0
    }
638
639
    /* Only for type 'c' special case, it has no digits. */
640
0
    if (spec->n_digits != 0) {
641
        /* Fill the digits with InsertThousandsGrouping. */
642
0
        r = _PyUnicode_InsertThousandsGrouping(
643
0
                writer, spec->n_grouped_digits,
644
0
                digits, d_pos, spec->n_digits,
645
0
                spec->n_min_width,
646
0
                locale->grouping, locale->thousands_sep, NULL);
647
0
        if (r == -1)
648
0
            return -1;
649
0
        assert(r == spec->n_grouped_digits);
650
0
        d_pos += spec->n_digits;
651
0
    }
652
0
    if (toupper) {
653
0
        Py_ssize_t t;
654
0
        for (t = 0; t < spec->n_grouped_digits; t++) {
655
0
            Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
656
0
            c = Py_TOUPPER(c);
657
0
            if (c > 127) {
658
0
                PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit");
659
0
                return -1;
660
0
            }
661
0
            PyUnicode_WRITE(kind, data, writer->pos + t, c);
662
0
        }
663
0
    }
664
0
    writer->pos += spec->n_grouped_digits;
665
666
0
    if (spec->n_decimal) {
667
0
        _PyUnicode_FastCopyCharacters(
668
0
            writer->buffer, writer->pos,
669
0
            locale->decimal_point, 0, spec->n_decimal);
670
0
        writer->pos += spec->n_decimal;
671
0
        d_pos += 1;
672
0
    }
673
674
0
    if (spec->n_remainder) {
675
0
        _PyUnicode_FastCopyCharacters(
676
0
            writer->buffer, writer->pos,
677
0
            digits, d_pos, spec->n_remainder);
678
0
        writer->pos += spec->n_remainder;
679
        /* d_pos += spec->n_remainder; */
680
0
    }
681
682
0
    if (spec->n_rpadding) {
683
0
        _PyUnicode_FastFill(writer->buffer,
684
0
                            writer->pos, spec->n_rpadding,
685
0
                            fill_char);
686
0
        writer->pos += spec->n_rpadding;
687
0
    }
688
0
    return 0;
689
0
}
690
691
static const char no_grouping[1] = {CHAR_MAX};
692
693
/* Find the decimal point character(s?), thousands_separator(s?), and
694
   grouping description, either for the current locale if type is
695
   LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE or
696
   LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */
697
static int
698
get_locale_info(enum LocaleType type, LocaleInfo *locale_info)
699
0
{
700
0
    switch (type) {
701
0
    case LT_CURRENT_LOCALE: {
702
0
        struct lconv *lc = localeconv();
703
0
        if (_Py_GetLocaleconvNumeric(lc,
704
0
                                     &locale_info->decimal_point,
705
0
                                     &locale_info->thousands_sep) < 0) {
706
0
            return -1;
707
0
        }
708
709
        /* localeconv() grouping can become a dangling pointer or point
710
           to a different string if another thread calls localeconv() during
711
           the string formatting. Copy the string to avoid this risk. */
712
0
        locale_info->grouping_buffer = _PyMem_Strdup(lc->grouping);
713
0
        if (locale_info->grouping_buffer == NULL) {
714
0
            PyErr_NoMemory();
715
0
            return -1;
716
0
        }
717
0
        locale_info->grouping = locale_info->grouping_buffer;
718
0
        break;
719
0
    }
720
0
    case LT_DEFAULT_LOCALE:
721
0
    case LT_UNDERSCORE_LOCALE:
722
0
    case LT_UNDER_FOUR_LOCALE:
723
0
        locale_info->decimal_point = PyUnicode_FromOrdinal('.');
724
0
        locale_info->thousands_sep = PyUnicode_FromOrdinal(
725
0
            type == LT_DEFAULT_LOCALE ? ',' : '_');
726
0
        if (!locale_info->decimal_point || !locale_info->thousands_sep)
727
0
            return -1;
728
0
        if (type != LT_UNDER_FOUR_LOCALE)
729
0
            locale_info->grouping = "\3"; /* Group every 3 characters.  The
730
                                         (implicit) trailing 0 means repeat
731
                                         infinitely. */
732
0
        else
733
0
            locale_info->grouping = "\4"; /* Bin/oct/hex group every four. */
734
0
        break;
735
0
    case LT_NO_LOCALE:
736
0
        locale_info->decimal_point = PyUnicode_FromOrdinal('.');
737
0
        locale_info->thousands_sep = PyUnicode_New(0, 0);
738
0
        if (!locale_info->decimal_point || !locale_info->thousands_sep)
739
0
            return -1;
740
0
        locale_info->grouping = no_grouping;
741
0
        break;
742
0
    }
743
0
    return 0;
744
0
}
745
746
static void
747
free_locale_info(LocaleInfo *locale_info)
748
0
{
749
0
    Py_XDECREF(locale_info->decimal_point);
750
0
    Py_XDECREF(locale_info->thousands_sep);
751
0
    PyMem_Free(locale_info->grouping_buffer);
752
0
}
753
754
/************************************************************************/
755
/*********** string formatting ******************************************/
756
/************************************************************************/
757
758
static int
759
format_string_internal(PyObject *value, const InternalFormatSpec *format,
760
                       _PyUnicodeWriter *writer)
761
0
{
762
0
    Py_ssize_t lpad;
763
0
    Py_ssize_t rpad;
764
0
    Py_ssize_t total;
765
0
    Py_ssize_t len;
766
0
    int result = -1;
767
0
    Py_UCS4 maxchar;
768
769
0
    assert(PyUnicode_IS_READY(value));
770
0
    len = PyUnicode_GET_LENGTH(value);
771
772
    /* sign is not allowed on strings */
773
0
    if (format->sign != '\0') {
774
0
        PyErr_SetString(PyExc_ValueError,
775
0
                        "Sign not allowed in string format specifier");
776
0
        goto done;
777
0
    }
778
779
    /* alternate is not allowed on strings */
780
0
    if (format->alternate) {
781
0
        PyErr_SetString(PyExc_ValueError,
782
0
                        "Alternate form (#) not allowed in string format "
783
0
                        "specifier");
784
0
        goto done;
785
0
    }
786
787
    /* '=' alignment not allowed on strings */
788
0
    if (format->align == '=') {
789
0
        PyErr_SetString(PyExc_ValueError,
790
0
                        "'=' alignment not allowed "
791
0
                        "in string format specifier");
792
0
        goto done;
793
0
    }
794
795
0
    if ((format->width == -1 || format->width <= len)
796
0
        && (format->precision == -1 || format->precision >= len)) {
797
        /* Fast path */
798
0
        return _PyUnicodeWriter_WriteStr(writer, value);
799
0
    }
800
801
    /* if precision is specified, output no more that format.precision
802
       characters */
803
0
    if (format->precision >= 0 && len >= format->precision) {
804
0
        len = format->precision;
805
0
    }
806
807
0
    calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
808
809
0
    maxchar = writer->maxchar;
810
0
    if (lpad != 0 || rpad != 0)
811
0
        maxchar = Py_MAX(maxchar, format->fill_char);
812
0
    if (PyUnicode_MAX_CHAR_VALUE(value) > maxchar) {
813
0
        Py_UCS4 valmaxchar = _PyUnicode_FindMaxChar(value, 0, len);
814
0
        maxchar = Py_MAX(maxchar, valmaxchar);
815
0
    }
816
817
    /* allocate the resulting string */
818
0
    if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
819
0
        goto done;
820
821
    /* Write into that space. First the padding. */
822
0
    result = fill_padding(writer, len, format->fill_char, lpad, rpad);
823
0
    if (result == -1)
824
0
        goto done;
825
826
    /* Then the source string. */
827
0
    if (len) {
828
0
        _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
829
0
                                      value, 0, len);
830
0
    }
831
0
    writer->pos += (len + rpad);
832
0
    result = 0;
833
834
0
done:
835
0
    return result;
836
0
}
837
838
839
/************************************************************************/
840
/*********** long formatting ********************************************/
841
/************************************************************************/
842
843
static int
844
format_long_internal(PyObject *value, const InternalFormatSpec *format,
845
                     _PyUnicodeWriter *writer)
846
0
{
847
0
    int result = -1;
848
0
    Py_UCS4 maxchar = 127;
849
0
    PyObject *tmp = NULL;
850
0
    Py_ssize_t inumeric_chars;
851
0
    Py_UCS4 sign_char = '\0';
852
0
    Py_ssize_t n_digits;       /* count of digits need from the computed
853
                                  string */
854
0
    Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
855
                                   produces non-digits */
856
0
    Py_ssize_t n_prefix = 0;   /* Count of prefix chars, (e.g., '0x') */
857
0
    Py_ssize_t n_total;
858
0
    Py_ssize_t prefix = 0;
859
0
    NumberFieldWidths spec;
860
0
    long x;
861
862
    /* Locale settings, either from the actual locale or
863
       from a hard-code pseudo-locale */
864
0
    LocaleInfo locale = LocaleInfo_STATIC_INIT;
865
866
    /* no precision allowed on integers */
867
0
    if (format->precision != -1) {
868
0
        PyErr_SetString(PyExc_ValueError,
869
0
                        "Precision not allowed in integer format specifier");
870
0
        goto done;
871
0
    }
872
873
    /* special case for character formatting */
874
0
    if (format->type == 'c') {
875
        /* error to specify a sign */
876
0
        if (format->sign != '\0') {
877
0
            PyErr_SetString(PyExc_ValueError,
878
0
                            "Sign not allowed with integer"
879
0
                            " format specifier 'c'");
880
0
            goto done;
881
0
        }
882
        /* error to request alternate format */
883
0
        if (format->alternate) {
884
0
            PyErr_SetString(PyExc_ValueError,
885
0
                            "Alternate form (#) not allowed with integer"
886
0
                            " format specifier 'c'");
887
0
            goto done;
888
0
        }
889
890
        /* taken from unicodeobject.c formatchar() */
891
        /* Integer input truncated to a character */
892
0
        x = PyLong_AsLong(value);
893
0
        if (x == -1 && PyErr_Occurred())
894
0
            goto done;
895
0
        if (x < 0 || x > 0x10ffff) {
896
0
            PyErr_SetString(PyExc_OverflowError,
897
0
                            "%c arg not in range(0x110000)");
898
0
            goto done;
899
0
        }
900
0
        tmp = PyUnicode_FromOrdinal(x);
901
0
        inumeric_chars = 0;
902
0
        n_digits = 1;
903
0
        maxchar = Py_MAX(maxchar, (Py_UCS4)x);
904
905
        /* As a sort-of hack, we tell calc_number_widths that we only
906
           have "remainder" characters. calc_number_widths thinks
907
           these are characters that don't get formatted, only copied
908
           into the output string. We do this for 'c' formatting,
909
           because the characters are likely to be non-digits. */
910
0
        n_remainder = 1;
911
0
    }
912
0
    else {
913
0
        int base;
914
0
        int leading_chars_to_skip = 0;  /* Number of characters added by
915
                                           PyNumber_ToBase that we want to
916
                                           skip over. */
917
918
        /* Compute the base and how many characters will be added by
919
           PyNumber_ToBase */
920
0
        switch (format->type) {
921
0
        case 'b':
922
0
            base = 2;
923
0
            leading_chars_to_skip = 2; /* 0b */
924
0
            break;
925
0
        case 'o':
926
0
            base = 8;
927
0
            leading_chars_to_skip = 2; /* 0o */
928
0
            break;
929
0
        case 'x':
930
0
        case 'X':
931
0
            base = 16;
932
0
            leading_chars_to_skip = 2; /* 0x */
933
0
            break;
934
0
        default:  /* shouldn't be needed, but stops a compiler warning */
935
0
        case 'd':
936
0
        case 'n':
937
0
            base = 10;
938
0
            break;
939
0
        }
940
941
0
        if (format->sign != '+' && format->sign != ' '
942
0
            && format->width == -1
943
0
            && format->type != 'X' && format->type != 'n'
944
0
            && !format->thousands_separators
945
0
            && PyLong_CheckExact(value))
946
0
        {
947
            /* Fast path */
948
0
            return _PyLong_FormatWriter(writer, value, base, format->alternate);
949
0
        }
950
951
        /* The number of prefix chars is the same as the leading
952
           chars to skip */
953
0
        if (format->alternate)
954
0
            n_prefix = leading_chars_to_skip;
955
956
        /* Do the hard part, converting to a string in a given base */
957
0
        tmp = _PyLong_Format(value, base);
958
0
        if (tmp == NULL || PyUnicode_READY(tmp) == -1)
959
0
            goto done;
960
961
0
        inumeric_chars = 0;
962
0
        n_digits = PyUnicode_GET_LENGTH(tmp);
963
964
0
        prefix = inumeric_chars;
965
966
        /* Is a sign character present in the output?  If so, remember it
967
           and skip it */
968
0
        if (PyUnicode_READ_CHAR(tmp, inumeric_chars) == '-') {
969
0
            sign_char = '-';
970
0
            ++prefix;
971
0
            ++leading_chars_to_skip;
972
0
        }
973
974
        /* Skip over the leading chars (0x, 0b, etc.) */
975
0
        n_digits -= leading_chars_to_skip;
976
0
        inumeric_chars += leading_chars_to_skip;
977
0
    }
978
979
    /* Determine the grouping, separator, and decimal point, if any. */
980
0
    if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
981
0
                        format->thousands_separators,
982
0
                        &locale) == -1)
983
0
        goto done;
984
985
    /* Calculate how much memory we'll need. */
986
0
    n_total = calc_number_widths(&spec, n_prefix, sign_char, tmp, inumeric_chars,
987
0
                                 inumeric_chars + n_digits, n_remainder, 0,
988
0
                                 &locale, format, &maxchar);
989
0
    if (n_total == -1) {
990
0
        goto done;
991
0
    }
992
993
    /* Allocate the memory. */
994
0
    if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
995
0
        goto done;
996
997
    /* Populate the memory. */
998
0
    result = fill_number(writer, &spec,
999
0
                         tmp, inumeric_chars, inumeric_chars + n_digits,
1000
0
                         tmp, prefix, format->fill_char,
1001
0
                         &locale, format->type == 'X');
1002
1003
0
done:
1004
0
    Py_XDECREF(tmp);
1005
0
    free_locale_info(&locale);
1006
0
    return result;
1007
0
}
1008
1009
/************************************************************************/
1010
/*********** float formatting *******************************************/
1011
/************************************************************************/
1012
1013
/* much of this is taken from unicodeobject.c */
1014
static int
1015
format_float_internal(PyObject *value,
1016
                      const InternalFormatSpec *format,
1017
                      _PyUnicodeWriter *writer)
1018
0
{
1019
0
    char *buf = NULL;       /* buffer returned from PyOS_double_to_string */
1020
0
    Py_ssize_t n_digits;
1021
0
    Py_ssize_t n_remainder;
1022
0
    Py_ssize_t n_total;
1023
0
    int has_decimal;
1024
0
    double val;
1025
0
    int precision, default_precision = 6;
1026
0
    Py_UCS4 type = format->type;
1027
0
    int add_pct = 0;
1028
0
    Py_ssize_t index;
1029
0
    NumberFieldWidths spec;
1030
0
    int flags = 0;
1031
0
    int result = -1;
1032
0
    Py_UCS4 maxchar = 127;
1033
0
    Py_UCS4 sign_char = '\0';
1034
0
    int float_type; /* Used to see if we have a nan, inf, or regular float. */
1035
0
    PyObject *unicode_tmp = NULL;
1036
1037
    /* Locale settings, either from the actual locale or
1038
       from a hard-code pseudo-locale */
1039
0
    LocaleInfo locale = LocaleInfo_STATIC_INIT;
1040
1041
0
    if (format->precision > INT_MAX) {
1042
0
        PyErr_SetString(PyExc_ValueError, "precision too big");
1043
0
        goto done;
1044
0
    }
1045
0
    precision = (int)format->precision;
1046
1047
0
    if (format->alternate)
1048
0
        flags |= Py_DTSF_ALT;
1049
1050
0
    if (type == '\0') {
1051
        /* Omitted type specifier.  Behaves in the same way as repr(x)
1052
           and str(x) if no precision is given, else like 'g', but with
1053
           at least one digit after the decimal point. */
1054
0
        flags |= Py_DTSF_ADD_DOT_0;
1055
0
        type = 'r';
1056
0
        default_precision = 0;
1057
0
    }
1058
1059
0
    if (type == 'n')
1060
        /* 'n' is the same as 'g', except for the locale used to
1061
           format the result. We take care of that later. */
1062
0
        type = 'g';
1063
1064
0
    val = PyFloat_AsDouble(value);
1065
0
    if (val == -1.0 && PyErr_Occurred())
1066
0
        goto done;
1067
1068
0
    if (type == '%') {
1069
0
        type = 'f';
1070
0
        val *= 100;
1071
0
        add_pct = 1;
1072
0
    }
1073
1074
0
    if (precision < 0)
1075
0
        precision = default_precision;
1076
0
    else if (type == 'r')
1077
0
        type = 'g';
1078
1079
    /* Cast "type", because if we're in unicode we need to pass an
1080
       8-bit char. This is safe, because we've restricted what "type"
1081
       can be. */
1082
0
    buf = PyOS_double_to_string(val, (char)type, precision, flags,
1083
0
                                &float_type);
1084
0
    if (buf == NULL)
1085
0
        goto done;
1086
0
    n_digits = strlen(buf);
1087
1088
0
    if (add_pct) {
1089
        /* We know that buf has a trailing zero (since we just called
1090
           strlen() on it), and we don't use that fact any more. So we
1091
           can just write over the trailing zero. */
1092
0
        buf[n_digits] = '%';
1093
0
        n_digits += 1;
1094
0
    }
1095
1096
0
    if (format->sign != '+' && format->sign != ' '
1097
0
        && format->width == -1
1098
0
        && format->type != 'n'
1099
0
        && !format->thousands_separators)
1100
0
    {
1101
        /* Fast path */
1102
0
        result = _PyUnicodeWriter_WriteASCIIString(writer, buf, n_digits);
1103
0
        PyMem_Free(buf);
1104
0
        return result;
1105
0
    }
1106
1107
    /* Since there is no unicode version of PyOS_double_to_string,
1108
       just use the 8 bit version and then convert to unicode. */
1109
0
    unicode_tmp = _PyUnicode_FromASCII(buf, n_digits);
1110
0
    PyMem_Free(buf);
1111
0
    if (unicode_tmp == NULL)
1112
0
        goto done;
1113
1114
    /* Is a sign character present in the output?  If so, remember it
1115
       and skip it */
1116
0
    index = 0;
1117
0
    if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') {
1118
0
        sign_char = '-';
1119
0
        ++index;
1120
0
        --n_digits;
1121
0
    }
1122
1123
    /* Determine if we have any "remainder" (after the digits, might include
1124
       decimal or exponent or both (or neither)) */
1125
0
    parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal);
1126
1127
    /* Determine the grouping, separator, and decimal point, if any. */
1128
0
    if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1129
0
                        format->thousands_separators,
1130
0
                        &locale) == -1)
1131
0
        goto done;
1132
1133
    /* Calculate how much memory we'll need. */
1134
0
    n_total = calc_number_widths(&spec, 0, sign_char, unicode_tmp, index,
1135
0
                                 index + n_digits, n_remainder, has_decimal,
1136
0
                                 &locale, format, &maxchar);
1137
0
    if (n_total == -1) {
1138
0
        goto done;
1139
0
    }
1140
1141
    /* Allocate the memory. */
1142
0
    if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
1143
0
        goto done;
1144
1145
    /* Populate the memory. */
1146
0
    result = fill_number(writer, &spec,
1147
0
                         unicode_tmp, index, index + n_digits,
1148
0
                         NULL, 0, format->fill_char,
1149
0
                         &locale, 0);
1150
1151
0
done:
1152
0
    Py_XDECREF(unicode_tmp);
1153
0
    free_locale_info(&locale);
1154
0
    return result;
1155
0
}
1156
1157
/************************************************************************/
1158
/*********** complex formatting *****************************************/
1159
/************************************************************************/
1160
1161
static int
1162
format_complex_internal(PyObject *value,
1163
                        const InternalFormatSpec *format,
1164
                        _PyUnicodeWriter *writer)
1165
0
{
1166
0
    double re;
1167
0
    double im;
1168
0
    char *re_buf = NULL;       /* buffer returned from PyOS_double_to_string */
1169
0
    char *im_buf = NULL;       /* buffer returned from PyOS_double_to_string */
1170
1171
0
    InternalFormatSpec tmp_format = *format;
1172
0
    Py_ssize_t n_re_digits;
1173
0
    Py_ssize_t n_im_digits;
1174
0
    Py_ssize_t n_re_remainder;
1175
0
    Py_ssize_t n_im_remainder;
1176
0
    Py_ssize_t n_re_total;
1177
0
    Py_ssize_t n_im_total;
1178
0
    int re_has_decimal;
1179
0
    int im_has_decimal;
1180
0
    int precision, default_precision = 6;
1181
0
    Py_UCS4 type = format->type;
1182
0
    Py_ssize_t i_re;
1183
0
    Py_ssize_t i_im;
1184
0
    NumberFieldWidths re_spec;
1185
0
    NumberFieldWidths im_spec;
1186
0
    int flags = 0;
1187
0
    int result = -1;
1188
0
    Py_UCS4 maxchar = 127;
1189
0
    enum PyUnicode_Kind rkind;
1190
0
    void *rdata;
1191
0
    Py_UCS4 re_sign_char = '\0';
1192
0
    Py_UCS4 im_sign_char = '\0';
1193
0
    int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
1194
0
    int im_float_type;
1195
0
    int add_parens = 0;
1196
0
    int skip_re = 0;
1197
0
    Py_ssize_t lpad;
1198
0
    Py_ssize_t rpad;
1199
0
    Py_ssize_t total;
1200
0
    PyObject *re_unicode_tmp = NULL;
1201
0
    PyObject *im_unicode_tmp = NULL;
1202
1203
    /* Locale settings, either from the actual locale or
1204
       from a hard-code pseudo-locale */
1205
0
    LocaleInfo locale = LocaleInfo_STATIC_INIT;
1206
1207
0
    if (format->precision > INT_MAX) {
1208
0
        PyErr_SetString(PyExc_ValueError, "precision too big");
1209
0
        goto done;
1210
0
    }
1211
0
    precision = (int)format->precision;
1212
1213
    /* Zero padding is not allowed. */
1214
0
    if (format->fill_char == '0') {
1215
0
        PyErr_SetString(PyExc_ValueError,
1216
0
                        "Zero padding is not allowed in complex format "
1217
0
                        "specifier");
1218
0
        goto done;
1219
0
    }
1220
1221
    /* Neither is '=' alignment . */
1222
0
    if (format->align == '=') {
1223
0
        PyErr_SetString(PyExc_ValueError,
1224
0
                        "'=' alignment flag is not allowed in complex format "
1225
0
                        "specifier");
1226
0
        goto done;
1227
0
    }
1228
1229
0
    re = PyComplex_RealAsDouble(value);
1230
0
    if (re == -1.0 && PyErr_Occurred())
1231
0
        goto done;
1232
0
    im = PyComplex_ImagAsDouble(value);
1233
0
    if (im == -1.0 && PyErr_Occurred())
1234
0
        goto done;
1235
1236
0
    if (format->alternate)
1237
0
        flags |= Py_DTSF_ALT;
1238
1239
0
    if (type == '\0') {
1240
        /* Omitted type specifier. Should be like str(self). */
1241
0
        type = 'r';
1242
0
        default_precision = 0;
1243
0
        if (re == 0.0 && copysign(1.0, re) == 1.0)
1244
0
            skip_re = 1;
1245
0
        else
1246
0
            add_parens = 1;
1247
0
    }
1248
1249
0
    if (type == 'n')
1250
        /* 'n' is the same as 'g', except for the locale used to
1251
           format the result. We take care of that later. */
1252
0
        type = 'g';
1253
1254
0
    if (precision < 0)
1255
0
        precision = default_precision;
1256
0
    else if (type == 'r')
1257
0
        type = 'g';
1258
1259
    /* Cast "type", because if we're in unicode we need to pass an
1260
       8-bit char. This is safe, because we've restricted what "type"
1261
       can be. */
1262
0
    re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
1263
0
                                   &re_float_type);
1264
0
    if (re_buf == NULL)
1265
0
        goto done;
1266
0
    im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
1267
0
                                   &im_float_type);
1268
0
    if (im_buf == NULL)
1269
0
        goto done;
1270
1271
0
    n_re_digits = strlen(re_buf);
1272
0
    n_im_digits = strlen(im_buf);
1273
1274
    /* Since there is no unicode version of PyOS_double_to_string,
1275
       just use the 8 bit version and then convert to unicode. */
1276
0
    re_unicode_tmp = _PyUnicode_FromASCII(re_buf, n_re_digits);
1277
0
    if (re_unicode_tmp == NULL)
1278
0
        goto done;
1279
0
    i_re = 0;
1280
1281
0
    im_unicode_tmp = _PyUnicode_FromASCII(im_buf, n_im_digits);
1282
0
    if (im_unicode_tmp == NULL)
1283
0
        goto done;
1284
0
    i_im = 0;
1285
1286
    /* Is a sign character present in the output?  If so, remember it
1287
       and skip it */
1288
0
    if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') {
1289
0
        re_sign_char = '-';
1290
0
        ++i_re;
1291
0
        --n_re_digits;
1292
0
    }
1293
0
    if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') {
1294
0
        im_sign_char = '-';
1295
0
        ++i_im;
1296
0
        --n_im_digits;
1297
0
    }
1298
1299
    /* Determine if we have any "remainder" (after the digits, might include
1300
       decimal or exponent or both (or neither)) */
1301
0
    parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
1302
0
                 &n_re_remainder, &re_has_decimal);
1303
0
    parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
1304
0
                 &n_im_remainder, &im_has_decimal);
1305
1306
    /* Determine the grouping, separator, and decimal point, if any. */
1307
0
    if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1308
0
                        format->thousands_separators,
1309
0
                        &locale) == -1)
1310
0
        goto done;
1311
1312
    /* Turn off any padding. We'll do it later after we've composed
1313
       the numbers without padding. */
1314
0
    tmp_format.fill_char = '\0';
1315
0
    tmp_format.align = '<';
1316
0
    tmp_format.width = -1;
1317
1318
    /* Calculate how much memory we'll need. */
1319
0
    n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, re_unicode_tmp,
1320
0
                                    i_re, i_re + n_re_digits, n_re_remainder,
1321
0
                                    re_has_decimal, &locale, &tmp_format,
1322
0
                                    &maxchar);
1323
0
    if (n_re_total == -1) {
1324
0
        goto done;
1325
0
    }
1326
1327
    /* Same formatting, but always include a sign, unless the real part is
1328
     * going to be omitted, in which case we use whatever sign convention was
1329
     * requested by the original format. */
1330
0
    if (!skip_re)
1331
0
        tmp_format.sign = '+';
1332
0
    n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, im_unicode_tmp,
1333
0
                                    i_im, i_im + n_im_digits, n_im_remainder,
1334
0
                                    im_has_decimal, &locale, &tmp_format,
1335
0
                                    &maxchar);
1336
0
    if (n_im_total == -1) {
1337
0
        goto done;
1338
0
    }
1339
1340
0
    if (skip_re)
1341
0
        n_re_total = 0;
1342
1343
    /* Add 1 for the 'j', and optionally 2 for parens. */
1344
0
    calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
1345
0
                 format->width, format->align, &lpad, &rpad, &total);
1346
1347
0
    if (lpad || rpad)
1348
0
        maxchar = Py_MAX(maxchar, format->fill_char);
1349
1350
0
    if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
1351
0
        goto done;
1352
0
    rkind = writer->kind;
1353
0
    rdata = writer->data;
1354
1355
    /* Populate the memory. First, the padding. */
1356
0
    result = fill_padding(writer,
1357
0
                          n_re_total + n_im_total + 1 + add_parens * 2,
1358
0
                          format->fill_char, lpad, rpad);
1359
0
    if (result == -1)
1360
0
        goto done;
1361
1362
0
    if (add_parens) {
1363
0
        PyUnicode_WRITE(rkind, rdata, writer->pos, '(');
1364
0
        writer->pos++;
1365
0
    }
1366
1367
0
    if (!skip_re) {
1368
0
        result = fill_number(writer, &re_spec,
1369
0
                             re_unicode_tmp, i_re, i_re + n_re_digits,
1370
0
                             NULL, 0,
1371
0
                             0,
1372
0
                             &locale, 0);
1373
0
        if (result == -1)
1374
0
            goto done;
1375
0
    }
1376
0
    result = fill_number(writer, &im_spec,
1377
0
                         im_unicode_tmp, i_im, i_im + n_im_digits,
1378
0
                         NULL, 0,
1379
0
                         0,
1380
0
                         &locale, 0);
1381
0
    if (result == -1)
1382
0
        goto done;
1383
0
    PyUnicode_WRITE(rkind, rdata, writer->pos, 'j');
1384
0
    writer->pos++;
1385
1386
0
    if (add_parens) {
1387
0
        PyUnicode_WRITE(rkind, rdata, writer->pos, ')');
1388
0
        writer->pos++;
1389
0
    }
1390
1391
0
    writer->pos += rpad;
1392
1393
0
done:
1394
0
    PyMem_Free(re_buf);
1395
0
    PyMem_Free(im_buf);
1396
0
    Py_XDECREF(re_unicode_tmp);
1397
0
    Py_XDECREF(im_unicode_tmp);
1398
0
    free_locale_info(&locale);
1399
0
    return result;
1400
0
}
1401
1402
/************************************************************************/
1403
/*********** built in formatters ****************************************/
1404
/************************************************************************/
1405
static int
1406
format_obj(PyObject *obj, _PyUnicodeWriter *writer)
1407
0
{
1408
0
    PyObject *str;
1409
0
    int err;
1410
1411
0
    str = PyObject_Str(obj);
1412
0
    if (str == NULL)
1413
0
        return -1;
1414
0
    err = _PyUnicodeWriter_WriteStr(writer, str);
1415
0
    Py_DECREF(str);
1416
0
    return err;
1417
0
}
1418
1419
int
1420
_PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1421
                                PyObject *obj,
1422
                                PyObject *format_spec,
1423
                                Py_ssize_t start, Py_ssize_t end)
1424
147
{
1425
147
    InternalFormatSpec format;
1426
1427
147
    assert(PyUnicode_Check(obj));
1428
1429
    /* check for the special case of zero length format spec, make
1430
       it equivalent to str(obj) */
1431
147
    if (start == end) {
1432
147
        if (PyUnicode_CheckExact(obj))
1433
147
            return _PyUnicodeWriter_WriteStr(writer, obj);
1434
0
        else
1435
0
            return format_obj(obj, writer);
1436
147
    }
1437
1438
    /* parse the format_spec */
1439
0
    if (!parse_internal_render_format_spec(format_spec, start, end,
1440
0
                                           &format, 's', '<'))
1441
0
        return -1;
1442
1443
    /* type conversion? */
1444
0
    switch (format.type) {
1445
0
    case 's':
1446
        /* no type conversion needed, already a string.  do the formatting */
1447
0
        return format_string_internal(obj, &format, writer);
1448
0
    default:
1449
        /* unknown */
1450
0
        unknown_presentation_type(format.type, obj->ob_type->tp_name);
1451
0
        return -1;
1452
0
    }
1453
0
}
1454
1455
int
1456
_PyLong_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1457
                             PyObject *obj,
1458
                             PyObject *format_spec,
1459
                             Py_ssize_t start, Py_ssize_t end)
1460
28
{
1461
28
    PyObject *tmp = NULL, *str = NULL;
1462
28
    InternalFormatSpec format;
1463
28
    int result = -1;
1464
1465
    /* check for the special case of zero length format spec, make
1466
       it equivalent to str(obj) */
1467
28
    if (start == end) {
1468
28
        if (PyLong_CheckExact(obj))
1469
28
            return _PyLong_FormatWriter(writer, obj, 10, 0);
1470
0
        else
1471
0
            return format_obj(obj, writer);
1472
28
    }
1473
1474
    /* parse the format_spec */
1475
0
    if (!parse_internal_render_format_spec(format_spec, start, end,
1476
0
                                           &format, 'd', '>'))
1477
0
        goto done;
1478
1479
    /* type conversion? */
1480
0
    switch (format.type) {
1481
0
    case 'b':
1482
0
    case 'c':
1483
0
    case 'd':
1484
0
    case 'o':
1485
0
    case 'x':
1486
0
    case 'X':
1487
0
    case 'n':
1488
        /* no type conversion needed, already an int.  do the formatting */
1489
0
        result = format_long_internal(obj, &format, writer);
1490
0
        break;
1491
1492
0
    case 'e':
1493
0
    case 'E':
1494
0
    case 'f':
1495
0
    case 'F':
1496
0
    case 'g':
1497
0
    case 'G':
1498
0
    case '%':
1499
        /* convert to float */
1500
0
        tmp = PyNumber_Float(obj);
1501
0
        if (tmp == NULL)
1502
0
            goto done;
1503
0
        result = format_float_internal(tmp, &format, writer);
1504
0
        break;
1505
1506
0
    default:
1507
        /* unknown */
1508
0
        unknown_presentation_type(format.type, obj->ob_type->tp_name);
1509
0
        goto done;
1510
0
    }
1511
1512
0
done:
1513
0
    Py_XDECREF(tmp);
1514
0
    Py_XDECREF(str);
1515
0
    return result;
1516
0
}
1517
1518
int
1519
_PyFloat_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1520
                              PyObject *obj,
1521
                              PyObject *format_spec,
1522
                              Py_ssize_t start, Py_ssize_t end)
1523
0
{
1524
0
    InternalFormatSpec format;
1525
1526
    /* check for the special case of zero length format spec, make
1527
       it equivalent to str(obj) */
1528
0
    if (start == end)
1529
0
        return format_obj(obj, writer);
1530
1531
    /* parse the format_spec */
1532
0
    if (!parse_internal_render_format_spec(format_spec, start, end,
1533
0
                                           &format, '\0', '>'))
1534
0
        return -1;
1535
1536
    /* type conversion? */
1537
0
    switch (format.type) {
1538
0
    case '\0': /* No format code: like 'g', but with at least one decimal. */
1539
0
    case 'e':
1540
0
    case 'E':
1541
0
    case 'f':
1542
0
    case 'F':
1543
0
    case 'g':
1544
0
    case 'G':
1545
0
    case 'n':
1546
0
    case '%':
1547
        /* no conversion, already a float.  do the formatting */
1548
0
        return format_float_internal(obj, &format, writer);
1549
1550
0
    default:
1551
        /* unknown */
1552
0
        unknown_presentation_type(format.type, obj->ob_type->tp_name);
1553
0
        return -1;
1554
0
    }
1555
0
}
1556
1557
int
1558
_PyComplex_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1559
                                PyObject *obj,
1560
                                PyObject *format_spec,
1561
                                Py_ssize_t start, Py_ssize_t end)
1562
0
{
1563
0
    InternalFormatSpec format;
1564
1565
    /* check for the special case of zero length format spec, make
1566
       it equivalent to str(obj) */
1567
0
    if (start == end)
1568
0
        return format_obj(obj, writer);
1569
1570
    /* parse the format_spec */
1571
0
    if (!parse_internal_render_format_spec(format_spec, start, end,
1572
0
                                           &format, '\0', '>'))
1573
0
        return -1;
1574
1575
    /* type conversion? */
1576
0
    switch (format.type) {
1577
0
    case '\0': /* No format code: like 'g', but with at least one decimal. */
1578
0
    case 'e':
1579
0
    case 'E':
1580
0
    case 'f':
1581
0
    case 'F':
1582
0
    case 'g':
1583
0
    case 'G':
1584
0
    case 'n':
1585
        /* no conversion, already a complex.  do the formatting */
1586
0
        return format_complex_internal(obj, &format, writer);
1587
1588
0
    default:
1589
        /* unknown */
1590
0
        unknown_presentation_type(format.type, obj->ob_type->tp_name);
1591
0
        return -1;
1592
0
    }
1593
0
}