Coverage Report

Created: 2025-07-04 06:49

/src/cpython/Python/pystrtod.c
Line
Count
Source (jump to first uncovered line)
1
/* -*- Mode: C; c-file-style: "python" -*- */
2
3
#include <Python.h>
4
#include "pycore_dtoa.h"          // _Py_dg_strtod()
5
#include "pycore_pymath.h"        // _PY_SHORT_FLOAT_REPR
6
7
#include <locale.h>               // localeconv()
8
9
/* Case-insensitive string match used for nan and inf detection; t should be
10
   lower-case.  Returns 1 for a successful match, 0 otherwise. */
11
12
static int
13
case_insensitive_match(const char *s, const char *t)
14
46
{
15
70
    while(*t && Py_TOLOWER(*s) == *t) {
16
24
        s++;
17
24
        t++;
18
24
    }
19
46
    return *t ? 0 : 1;
20
46
}
21
22
/* _Py_parse_inf_or_nan: Attempt to parse a string of the form "nan", "inf" or
23
   "infinity", with an optional leading sign of "+" or "-".  On success,
24
   return the NaN or Infinity as a double and set *endptr to point just beyond
25
   the successfully parsed portion of the string.  On failure, return -1.0 and
26
   set *endptr to point to the start of the string. */
27
double
28
_Py_parse_inf_or_nan(const char *p, char **endptr)
29
23
{
30
23
    double retval;
31
23
    const char *s;
32
23
    int negate = 0;
33
34
23
    s = p;
35
23
    if (*s == '-') {
36
2
        negate = 1;
37
2
        s++;
38
2
    }
39
21
    else if (*s == '+') {
40
0
        s++;
41
0
    }
42
23
    if (case_insensitive_match(s, "inf")) {
43
6
        s += 3;
44
6
        if (case_insensitive_match(s, "inity"))
45
0
            s += 5;
46
6
        retval = negate ? -Py_INFINITY : Py_INFINITY;
47
6
    }
48
17
    else if (case_insensitive_match(s, "nan")) {
49
2
        s += 3;
50
2
        retval = negate ? -fabs(Py_NAN) : fabs(Py_NAN);
51
2
    }
52
15
    else {
53
15
        s = p;
54
15
        retval = -1.0;
55
15
    }
56
23
    *endptr = (char *)s;
57
23
    return retval;
58
23
}
59
60
61
/**
62
 * _PyOS_ascii_strtod:
63
 * @nptr:    the string to convert to a numeric value.
64
 * @endptr:  if non-%NULL, it returns the character after
65
 *           the last character used in the conversion.
66
 *
67
 * Converts a string to a #gdouble value.
68
 * This function behaves like the standard strtod() function
69
 * does in the C locale. It does this without actually
70
 * changing the current locale, since that would not be
71
 * thread-safe.
72
 *
73
 * This function is typically used when reading configuration
74
 * files or other non-user input that should be locale independent.
75
 * To handle input from the user you should normally use the
76
 * locale-sensitive system strtod() function.
77
 *
78
 * If the correct value would cause overflow, plus or minus %HUGE_VAL
79
 * is returned (according to the sign of the value), and %ERANGE is
80
 * stored in %errno. If the correct value would cause underflow,
81
 * zero is returned and %ERANGE is stored in %errno.
82
 * If memory allocation fails, %ENOMEM is stored in %errno.
83
 *
84
 * This function resets %errno before calling strtod() so that
85
 * you can reliably detect overflow and underflow.
86
 *
87
 * Return value: the #gdouble value.
88
 **/
89
90
#if _PY_SHORT_FLOAT_REPR == 1
91
92
static double
93
_PyOS_ascii_strtod(const char *nptr, char **endptr)
94
636k
{
95
636k
    double result;
96
636k
    _Py_SET_53BIT_PRECISION_HEADER;
97
98
636k
    assert(nptr != NULL);
99
    /* Set errno to zero, so that we can distinguish zero results
100
       and underflows */
101
636k
    errno = 0;
102
103
636k
    _Py_SET_53BIT_PRECISION_START;
104
636k
    result = _Py_dg_strtod(nptr, endptr);
105
636k
    _Py_SET_53BIT_PRECISION_END;
106
107
636k
    if (*endptr == nptr)
108
        /* string might represent an inf or nan */
109
23
        result = _Py_parse_inf_or_nan(nptr, endptr);
110
111
636k
    return result;
112
113
636k
}
114
115
#else
116
117
/*
118
   Use system strtod;  since strtod is locale aware, we may
119
   have to first fix the decimal separator.
120
121
   Note that unlike _Py_dg_strtod, the system strtod may not always give
122
   correctly rounded results.
123
*/
124
125
static double
126
_PyOS_ascii_strtod(const char *nptr, char **endptr)
127
{
128
    char *fail_pos;
129
    double val;
130
    struct lconv *locale_data;
131
    const char *decimal_point;
132
    size_t decimal_point_len;
133
    const char *p, *decimal_point_pos;
134
    const char *end = NULL; /* Silence gcc */
135
    const char *digits_pos = NULL;
136
    int negate = 0;
137
138
    assert(nptr != NULL);
139
140
    fail_pos = NULL;
141
142
    locale_data = localeconv();
143
    decimal_point = locale_data->decimal_point;
144
    decimal_point_len = strlen(decimal_point);
145
146
    assert(decimal_point_len != 0);
147
148
    decimal_point_pos = NULL;
149
150
    /* Parse infinities and nans */
151
    val = _Py_parse_inf_or_nan(nptr, endptr);
152
    if (*endptr != nptr)
153
        return val;
154
155
    /* Set errno to zero, so that we can distinguish zero results
156
       and underflows */
157
    errno = 0;
158
159
    /* We process the optional sign manually, then pass the remainder to
160
       the system strtod.  This ensures that the result of an underflow
161
       has the correct sign. (bug #1725)  */
162
    p = nptr;
163
    /* Process leading sign, if present */
164
    if (*p == '-') {
165
        negate = 1;
166
        p++;
167
    }
168
    else if (*p == '+') {
169
        p++;
170
    }
171
172
    /* Some platform strtods accept hex floats; Python shouldn't (at the
173
       moment), so we check explicitly for strings starting with '0x'. */
174
    if (*p == '0' && (*(p+1) == 'x' || *(p+1) == 'X'))
175
        goto invalid_string;
176
177
    /* Check that what's left begins with a digit or decimal point */
178
    if (!Py_ISDIGIT(*p) && *p != '.')
179
        goto invalid_string;
180
181
    digits_pos = p;
182
    if (decimal_point[0] != '.' ||
183
        decimal_point[1] != 0)
184
    {
185
        /* Look for a '.' in the input; if present, it'll need to be
186
           swapped for the current locale's decimal point before we
187
           call strtod.  On the other hand, if we find the current
188
           locale's decimal point then the input is invalid. */
189
        while (Py_ISDIGIT(*p))
190
            p++;
191
192
        if (*p == '.')
193
        {
194
            decimal_point_pos = p++;
195
196
            /* locate end of number */
197
            while (Py_ISDIGIT(*p))
198
                p++;
199
200
            if (*p == 'e' || *p == 'E')
201
                p++;
202
            if (*p == '+' || *p == '-')
203
                p++;
204
            while (Py_ISDIGIT(*p))
205
                p++;
206
            end = p;
207
        }
208
        else if (strncmp(p, decimal_point, decimal_point_len) == 0)
209
            /* Python bug #1417699 */
210
            goto invalid_string;
211
        /* For the other cases, we need not convert the decimal
212
           point */
213
    }
214
215
    if (decimal_point_pos) {
216
        char *copy, *c;
217
        /* Create a copy of the input, with the '.' converted to the
218
           locale-specific decimal point */
219
        copy = (char *)PyMem_Malloc(end - digits_pos +
220
                                    1 + decimal_point_len);
221
        if (copy == NULL) {
222
            *endptr = (char *)nptr;
223
            errno = ENOMEM;
224
            return val;
225
        }
226
227
        c = copy;
228
        memcpy(c, digits_pos, decimal_point_pos - digits_pos);
229
        c += decimal_point_pos - digits_pos;
230
        memcpy(c, decimal_point, decimal_point_len);
231
        c += decimal_point_len;
232
        memcpy(c, decimal_point_pos + 1,
233
               end - (decimal_point_pos + 1));
234
        c += end - (decimal_point_pos + 1);
235
        *c = 0;
236
237
        val = strtod(copy, &fail_pos);
238
239
        if (fail_pos)
240
        {
241
            if (fail_pos > decimal_point_pos)
242
                fail_pos = (char *)digits_pos +
243
                    (fail_pos - copy) -
244
                    (decimal_point_len - 1);
245
            else
246
                fail_pos = (char *)digits_pos +
247
                    (fail_pos - copy);
248
        }
249
250
        PyMem_Free(copy);
251
252
    }
253
    else {
254
        val = strtod(digits_pos, &fail_pos);
255
    }
256
257
    if (fail_pos == digits_pos)
258
        goto invalid_string;
259
260
    if (negate && fail_pos != nptr)
261
        val = -val;
262
    *endptr = fail_pos;
263
264
    return val;
265
266
  invalid_string:
267
    *endptr = (char*)nptr;
268
    errno = EINVAL;
269
    return -1.0;
270
}
271
272
#endif
273
274
/* PyOS_string_to_double converts a null-terminated byte string s (interpreted
275
   as a string of ASCII characters) to a float.  The string should not have
276
   leading or trailing whitespace.  The conversion is independent of the
277
   current locale.
278
279
   If endptr is NULL, try to convert the whole string.  Raise ValueError and
280
   return -1.0 if the string is not a valid representation of a floating-point
281
   number.
282
283
   If endptr is non-NULL, try to convert as much of the string as possible.
284
   If no initial segment of the string is the valid representation of a
285
   floating-point number then *endptr is set to point to the beginning of the
286
   string, -1.0 is returned and again ValueError is raised.
287
288
   On overflow (e.g., when trying to convert '1e500' on an IEEE 754 machine),
289
   if overflow_exception is NULL then +-Py_INFINITY is returned, and no Python
290
   exception is raised.  Otherwise, overflow_exception should point to
291
   a Python exception, this exception will be raised, -1.0 will be returned,
292
   and *endptr will point just past the end of the converted value.
293
294
   If any other failure occurs (for example lack of memory), -1.0 is returned
295
   and the appropriate Python exception will have been set.
296
*/
297
298
double
299
PyOS_string_to_double(const char *s,
300
                      char **endptr,
301
                      PyObject *overflow_exception)
302
636k
{
303
636k
    double x, result=-1.0;
304
636k
    char *fail_pos;
305
306
636k
    errno = 0;
307
636k
    x = _PyOS_ascii_strtod(s, &fail_pos);
308
309
636k
    if (errno == ENOMEM) {
310
0
        PyErr_NoMemory();
311
0
        fail_pos = (char *)s;
312
0
    }
313
636k
    else if (!endptr && (fail_pos == s || *fail_pos != '\0'))
314
14
        PyErr_Format(PyExc_ValueError,
315
14
                      "could not convert string to float: "
316
14
                      "'%.200s'", s);
317
636k
    else if (fail_pos == s)
318
1
        PyErr_Format(PyExc_ValueError,
319
1
                      "could not convert string to float: "
320
1
                      "'%.200s'", s);
321
636k
    else if (errno == ERANGE && fabs(x) >= 1.0 && overflow_exception)
322
0
        PyErr_Format(overflow_exception,
323
0
                      "value too large to convert to float: "
324
0
                      "'%.200s'", s);
325
636k
    else
326
636k
        result = x;
327
328
636k
    if (endptr != NULL)
329
585k
        *endptr = fail_pos;
330
636k
    return result;
331
636k
}
332
333
/* Remove underscores that follow the underscore placement rule from
334
   the string and then call the `innerfunc` function on the result.
335
   It should return a new object or NULL on exception.
336
337
   `what` is used for the error message emitted when underscores are detected
338
   that don't follow the rule. `arg` is an opaque pointer passed to the inner
339
   function.
340
341
   This is used to implement underscore-agnostic conversion for floats
342
   and complex numbers.
343
*/
344
PyObject *
345
_Py_string_to_number_with_underscores(
346
    const char *s, Py_ssize_t orig_len, const char *what, PyObject *obj, void *arg,
347
    PyObject *(*innerfunc)(const char *, Py_ssize_t, void *))
348
575k
{
349
575k
    char prev;
350
575k
    const char *p, *last;
351
575k
    char *dup, *end;
352
575k
    PyObject *result;
353
354
575k
    assert(s[orig_len] == '\0');
355
356
575k
    if (strchr(s, '_') == NULL) {
357
575k
        return innerfunc(s, orig_len, arg);
358
575k
    }
359
360
0
    dup = PyMem_Malloc(orig_len + 1);
361
0
    if (dup == NULL) {
362
0
        return PyErr_NoMemory();
363
0
    }
364
0
    end = dup;
365
0
    prev = '\0';
366
0
    last = s + orig_len;
367
0
    for (p = s; *p; p++) {
368
0
        if (*p == '_') {
369
            /* Underscores are only allowed after digits. */
370
0
            if (!(prev >= '0' && prev <= '9')) {
371
0
                goto error;
372
0
            }
373
0
        }
374
0
        else {
375
0
            *end++ = *p;
376
            /* Underscores are only allowed before digits. */
377
0
            if (prev == '_' && !(*p >= '0' && *p <= '9')) {
378
0
                goto error;
379
0
            }
380
0
        }
381
0
        prev = *p;
382
0
    }
383
    /* Underscores are not allowed at the end. */
384
0
    if (prev == '_') {
385
0
        goto error;
386
0
    }
387
    /* No embedded NULs allowed. */
388
0
    if (p != last) {
389
0
        goto error;
390
0
    }
391
0
    *end = '\0';
392
0
    result = innerfunc(dup, end - dup, arg);
393
0
    PyMem_Free(dup);
394
0
    return result;
395
396
0
  error:
397
0
    PyMem_Free(dup);
398
0
    PyErr_Format(PyExc_ValueError,
399
0
                 "could not convert string to %s: "
400
0
                 "%R", what, obj);
401
0
    return NULL;
402
0
}
403
404
#if _PY_SHORT_FLOAT_REPR == 0
405
406
/* Given a string that may have a decimal point in the current
407
   locale, change it back to a dot.  Since the string cannot get
408
   longer, no need for a maximum buffer size parameter. */
409
Py_LOCAL_INLINE(void)
410
change_decimal_from_locale_to_dot(char* buffer)
411
{
412
    struct lconv *locale_data = localeconv();
413
    const char *decimal_point = locale_data->decimal_point;
414
415
    if (decimal_point[0] != '.' || decimal_point[1] != 0) {
416
        size_t decimal_point_len = strlen(decimal_point);
417
418
        if (*buffer == '+' || *buffer == '-')
419
            buffer++;
420
        while (Py_ISDIGIT(*buffer))
421
            buffer++;
422
        if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
423
            *buffer = '.';
424
            buffer++;
425
            if (decimal_point_len > 1) {
426
                /* buffer needs to get smaller */
427
                size_t rest_len = strlen(buffer +
428
                                     (decimal_point_len - 1));
429
                memmove(buffer,
430
                    buffer + (decimal_point_len - 1),
431
                    rest_len);
432
                buffer[rest_len] = 0;
433
            }
434
        }
435
    }
436
}
437
438
439
/* From the C99 standard, section 7.19.6:
440
The exponent always contains at least two digits, and only as many more digits
441
as necessary to represent the exponent.
442
*/
443
#define MIN_EXPONENT_DIGITS 2
444
445
/* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
446
   in length. */
447
Py_LOCAL_INLINE(void)
448
ensure_minimum_exponent_length(char* buffer, size_t buf_size)
449
{
450
    char *p = strpbrk(buffer, "eE");
451
    if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
452
        char *start = p + 2;
453
        int exponent_digit_cnt = 0;
454
        int leading_zero_cnt = 0;
455
        int in_leading_zeros = 1;
456
        int significant_digit_cnt;
457
458
        /* Skip over the exponent and the sign. */
459
        p += 2;
460
461
        /* Find the end of the exponent, keeping track of leading
462
           zeros. */
463
        while (*p && Py_ISDIGIT(*p)) {
464
            if (in_leading_zeros && *p == '0')
465
                ++leading_zero_cnt;
466
            if (*p != '0')
467
                in_leading_zeros = 0;
468
            ++p;
469
            ++exponent_digit_cnt;
470
        }
471
472
        significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
473
        if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
474
            /* If there are 2 exactly digits, we're done,
475
               regardless of what they contain */
476
        }
477
        else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
478
            int extra_zeros_cnt;
479
480
            /* There are more than 2 digits in the exponent.  See
481
               if we can delete some of the leading zeros */
482
            if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
483
                significant_digit_cnt = MIN_EXPONENT_DIGITS;
484
            extra_zeros_cnt = exponent_digit_cnt -
485
                significant_digit_cnt;
486
487
            /* Delete extra_zeros_cnt worth of characters from the
488
               front of the exponent */
489
            assert(extra_zeros_cnt >= 0);
490
491
            /* Add one to significant_digit_cnt to copy the
492
               trailing 0 byte, thus setting the length */
493
            memmove(start,
494
                start + extra_zeros_cnt,
495
                significant_digit_cnt + 1);
496
        }
497
        else {
498
            /* If there are fewer than 2 digits, add zeros
499
               until there are 2, if there's enough room */
500
            int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
501
            if (start + zeros + exponent_digit_cnt + 1
502
                  < buffer + buf_size) {
503
                memmove(start + zeros, start,
504
                    exponent_digit_cnt + 1);
505
                memset(start, '0', zeros);
506
            }
507
        }
508
    }
509
}
510
511
/* Remove trailing zeros after the decimal point from a numeric string; also
512
   remove the decimal point if all digits following it are zero.  The numeric
513
   string must end in '\0', and should not have any leading or trailing
514
   whitespace.  Assumes that the decimal point is '.'. */
515
Py_LOCAL_INLINE(void)
516
remove_trailing_zeros(char *buffer)
517
{
518
    char *old_fraction_end, *new_fraction_end, *end, *p;
519
520
    p = buffer;
521
    if (*p == '-' || *p == '+')
522
        /* Skip leading sign, if present */
523
        ++p;
524
    while (Py_ISDIGIT(*p))
525
        ++p;
526
527
    /* if there's no decimal point there's nothing to do */
528
    if (*p++ != '.')
529
        return;
530
531
    /* scan any digits after the point */
532
    while (Py_ISDIGIT(*p))
533
        ++p;
534
    old_fraction_end = p;
535
536
    /* scan up to ending '\0' */
537
    while (*p != '\0')
538
        p++;
539
    /* +1 to make sure that we move the null byte as well */
540
    end = p+1;
541
542
    /* scan back from fraction_end, looking for removable zeros */
543
    p = old_fraction_end;
544
    while (*(p-1) == '0')
545
        --p;
546
    /* and remove point if we've got that far */
547
    if (*(p-1) == '.')
548
        --p;
549
    new_fraction_end = p;
550
551
    memmove(new_fraction_end, old_fraction_end, end-old_fraction_end);
552
}
553
554
/* Ensure that buffer has a decimal point in it.  The decimal point will not
555
   be in the current locale, it will always be '.'. Don't add a decimal point
556
   if an exponent is present.  Also, convert to exponential notation where
557
   adding a '.0' would produce too many significant digits (see issue 5864).
558
559
   Returns a pointer to the fixed buffer, or NULL on failure.
560
*/
561
Py_LOCAL_INLINE(char *)
562
ensure_decimal_point(char* buffer, size_t buf_size, int precision)
563
{
564
    int digit_count, insert_count = 0, convert_to_exp = 0;
565
    const char *chars_to_insert;
566
    char *digits_start;
567
568
    /* search for the first non-digit character */
569
    char *p = buffer;
570
    if (*p == '-' || *p == '+')
571
        /* Skip leading sign, if present.  I think this could only
572
           ever be '-', but it can't hurt to check for both. */
573
        ++p;
574
    digits_start = p;
575
    while (*p && Py_ISDIGIT(*p))
576
        ++p;
577
    digit_count = Py_SAFE_DOWNCAST(p - digits_start, Py_ssize_t, int);
578
579
    if (*p == '.') {
580
        if (Py_ISDIGIT(*(p+1))) {
581
            /* Nothing to do, we already have a decimal
582
               point and a digit after it */
583
        }
584
        else {
585
            /* We have a decimal point, but no following
586
               digit.  Insert a zero after the decimal. */
587
            /* can't ever get here via PyOS_double_to_string */
588
            assert(precision == -1);
589
            ++p;
590
            chars_to_insert = "0";
591
            insert_count = 1;
592
        }
593
    }
594
    else if (!(*p == 'e' || *p == 'E')) {
595
        /* Don't add ".0" if we have an exponent. */
596
        if (digit_count == precision) {
597
            /* issue 5864: don't add a trailing .0 in the case
598
               where the '%g'-formatted result already has as many
599
               significant digits as were requested.  Switch to
600
               exponential notation instead. */
601
            convert_to_exp = 1;
602
            /* no exponent, no point, and we shouldn't land here
603
               for infs and nans, so we must be at the end of the
604
               string. */
605
            assert(*p == '\0');
606
        }
607
        else {
608
            assert(precision == -1 || digit_count < precision);
609
            chars_to_insert = ".0";
610
            insert_count = 2;
611
        }
612
    }
613
    if (insert_count) {
614
        size_t buf_len = strlen(buffer);
615
        if (buf_len + insert_count + 1 >= buf_size) {
616
            /* If there is not enough room in the buffer
617
               for the additional text, just skip it.  It's
618
               not worth generating an error over. */
619
        }
620
        else {
621
            memmove(p + insert_count, p,
622
                buffer + strlen(buffer) - p + 1);
623
            memcpy(p, chars_to_insert, insert_count);
624
        }
625
    }
626
    if (convert_to_exp) {
627
        int written;
628
        size_t buf_avail;
629
        p = digits_start;
630
        /* insert decimal point */
631
        assert(digit_count >= 1);
632
        memmove(p+2, p+1, digit_count); /* safe, but overwrites nul */
633
        p[1] = '.';
634
        p += digit_count+1;
635
        assert(p <= buf_size+buffer);
636
        buf_avail = buf_size+buffer-p;
637
        if (buf_avail == 0)
638
            return NULL;
639
        /* Add exponent.  It's okay to use lower case 'e': we only
640
           arrive here as a result of using the empty format code or
641
           repr/str builtins and those never want an upper case 'E' */
642
        written = PyOS_snprintf(p, buf_avail, "e%+.02d", digit_count-1);
643
        if (!(0 <= written &&
644
              written < Py_SAFE_DOWNCAST(buf_avail, size_t, int)))
645
            /* output truncated, or something else bad happened */
646
            return NULL;
647
        remove_trailing_zeros(buffer);
648
    }
649
    return buffer;
650
}
651
652
/* see FORMATBUFLEN in unicodeobject.c */
653
#define FLOAT_FORMATBUFLEN 120
654
655
/**
656
 * _PyOS_ascii_formatd:
657
 * @buffer: A buffer to place the resulting string in
658
 * @buf_size: The length of the buffer.
659
 * @format: The printf()-style format to use for the
660
 *          code to use for converting.
661
 * @d: The #gdouble to convert
662
 * @precision: The precision to use when formatting.
663
 *
664
 * Converts a #gdouble to a string, using the '.' as
665
 * decimal point. To format the number you pass in
666
 * a printf()-style format string. Allowed conversion
667
 * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'Z'.
668
 *
669
 * 'Z' is the same as 'g', except it always has a decimal and
670
 *     at least one digit after the decimal.
671
 *
672
 * Return value: The pointer to the buffer with the converted string.
673
 * On failure returns NULL but does not set any Python exception.
674
 **/
675
static char *
676
_PyOS_ascii_formatd(char       *buffer,
677
                   size_t      buf_size,
678
                   const char *format,
679
                   double      d,
680
                   int         precision)
681
{
682
    char format_char;
683
    size_t format_len = strlen(format);
684
685
    /* Issue 2264: code 'Z' requires copying the format.  'Z' is 'g', but
686
       also with at least one character past the decimal. */
687
    char tmp_format[FLOAT_FORMATBUFLEN];
688
689
    /* The last character in the format string must be the format char */
690
    format_char = format[format_len - 1];
691
692
    if (format[0] != '%')
693
        return NULL;
694
695
    /* I'm not sure why this test is here.  It's ensuring that the format
696
       string after the first character doesn't have a single quote, a
697
       lowercase l, or a percent. This is the reverse of the commented-out
698
       test about 10 lines ago. */
699
    if (strpbrk(format + 1, "'l%"))
700
        return NULL;
701
702
    /* Also curious about this function is that it accepts format strings
703
       like "%xg", which are invalid for floats.  In general, the
704
       interface to this function is not very good, but changing it is
705
       difficult because it's a public API. */
706
707
    if (!(format_char == 'e' || format_char == 'E' ||
708
          format_char == 'f' || format_char == 'F' ||
709
          format_char == 'g' || format_char == 'G' ||
710
          format_char == 'Z'))
711
        return NULL;
712
713
    /* Map 'Z' format_char to 'g', by copying the format string and
714
       replacing the final char with a 'g' */
715
    if (format_char == 'Z') {
716
        if (format_len + 1 >= sizeof(tmp_format)) {
717
            /* The format won't fit in our copy.  Error out.  In
718
               practice, this will never happen and will be
719
               detected by returning NULL */
720
            return NULL;
721
        }
722
        strcpy(tmp_format, format);
723
        tmp_format[format_len - 1] = 'g';
724
        format = tmp_format;
725
    }
726
727
728
    /* Have PyOS_snprintf do the hard work */
729
    PyOS_snprintf(buffer, buf_size, format, d);
730
731
    /* Do various fixups on the return string */
732
733
    /* Get the current locale, and find the decimal point string.
734
       Convert that string back to a dot. */
735
    change_decimal_from_locale_to_dot(buffer);
736
737
    /* If an exponent exists, ensure that the exponent is at least
738
       MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
739
       for the extra zeros.  Also, if there are more than
740
       MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
741
       back to MIN_EXPONENT_DIGITS */
742
    ensure_minimum_exponent_length(buffer, buf_size);
743
744
    /* If format_char is 'Z', make sure we have at least one character
745
       after the decimal point (and make sure we have a decimal point);
746
       also switch to exponential notation in some edge cases where the
747
       extra character would produce more significant digits that we
748
       really want. */
749
    if (format_char == 'Z')
750
        buffer = ensure_decimal_point(buffer, buf_size, precision);
751
752
    return buffer;
753
}
754
755
/* The fallback code to use if _Py_dg_dtoa is not available. */
756
757
char * PyOS_double_to_string(double val,
758
                                         char format_code,
759
                                         int precision,
760
                                         int flags,
761
                                         int *type)
762
{
763
    char format[32];
764
    Py_ssize_t bufsize;
765
    char *buf;
766
    int t, exp;
767
    int upper = 0;
768
769
    /* Validate format_code, and map upper and lower case */
770
    switch (format_code) {
771
    case 'e':          /* exponent */
772
    case 'f':          /* fixed */
773
    case 'g':          /* general */
774
        break;
775
    case 'E':
776
        upper = 1;
777
        format_code = 'e';
778
        break;
779
    case 'F':
780
        upper = 1;
781
        format_code = 'f';
782
        break;
783
    case 'G':
784
        upper = 1;
785
        format_code = 'g';
786
        break;
787
    case 'r':          /* repr format */
788
        /* Supplied precision is unused, must be 0. */
789
        if (precision != 0) {
790
            PyErr_BadInternalCall();
791
            return NULL;
792
        }
793
        /* The repr() precision (17 significant decimal digits) is the
794
           minimal number that is guaranteed to have enough precision
795
           so that if the number is read back in the exact same binary
796
           value is recreated.  This is true for IEEE floating point
797
           by design, and also happens to work for all other modern
798
           hardware. */
799
        precision = 17;
800
        format_code = 'g';
801
        break;
802
    default:
803
        PyErr_BadInternalCall();
804
        return NULL;
805
    }
806
807
    /* Here's a quick-and-dirty calculation to figure out how big a buffer
808
       we need.  In general, for a finite float we need:
809
810
         1 byte for each digit of the decimal significand, and
811
812
         1 for a possible sign
813
         1 for a possible decimal point
814
         2 for a possible [eE][+-]
815
         1 for each digit of the exponent;  if we allow 19 digits
816
           total then we're safe up to exponents of 2**63.
817
         1 for the trailing nul byte
818
819
       This gives a total of 24 + the number of digits in the significand,
820
       and the number of digits in the significand is:
821
822
         for 'g' format: at most precision, except possibly
823
           when precision == 0, when it's 1.
824
         for 'e' format: precision+1
825
         for 'f' format: precision digits after the point, at least 1
826
           before.  To figure out how many digits appear before the point
827
           we have to examine the size of the number.  If fabs(val) < 1.0
828
           then there will be only one digit before the point.  If
829
           fabs(val) >= 1.0, then there are at most
830
831
         1+floor(log10(ceiling(fabs(val))))
832
833
           digits before the point (where the 'ceiling' allows for the
834
           possibility that the rounding rounds the integer part of val
835
           up).  A safe upper bound for the above quantity is
836
           1+floor(exp/3), where exp is the unique integer such that 0.5
837
           <= fabs(val)/2**exp < 1.0.  This exp can be obtained from
838
           frexp.
839
840
       So we allow room for precision+1 digits for all formats, plus an
841
       extra floor(exp/3) digits for 'f' format.
842
843
    */
844
845
    if (isnan(val) || isinf(val))
846
        /* 3 for 'inf'/'nan', 1 for sign, 1 for '\0' */
847
        bufsize = 5;
848
    else {
849
        bufsize = 25 + precision;
850
        if (format_code == 'f' && fabs(val) >= 1.0) {
851
            frexp(val, &exp);
852
            bufsize += exp/3;
853
        }
854
    }
855
856
    buf = PyMem_Malloc(bufsize);
857
    if (buf == NULL) {
858
        PyErr_NoMemory();
859
        return NULL;
860
    }
861
862
    /* Handle nan and inf. */
863
    if (isnan(val)) {
864
        strcpy(buf, "nan");
865
        t = Py_DTST_NAN;
866
    } else if (isinf(val)) {
867
        if (copysign(1., val) == 1.)
868
            strcpy(buf, "inf");
869
        else
870
            strcpy(buf, "-inf");
871
        t = Py_DTST_INFINITE;
872
    } else {
873
        t = Py_DTST_FINITE;
874
        if (flags & Py_DTSF_ADD_DOT_0)
875
            format_code = 'Z';
876
877
        PyOS_snprintf(format, sizeof(format), "%%%s.%i%c",
878
                      (flags & Py_DTSF_ALT ? "#" : ""), precision,
879
                      format_code);
880
        _PyOS_ascii_formatd(buf, bufsize, format, val, precision);
881
882
        if (flags & Py_DTSF_NO_NEG_0 && buf[0] == '-') {
883
            char *buf2 = buf + 1;
884
            while (*buf2 == '0' || *buf2 == '.') {
885
                ++buf2;
886
            }
887
            if (*buf2 == 0 || *buf2 == 'e') {
888
                size_t len = buf2 - buf + strlen(buf2);
889
                assert(buf[len] == 0);
890
                memmove(buf, buf+1, len);
891
            }
892
        }
893
    }
894
895
    /* Add sign when requested.  It's convenient (esp. when formatting
896
     complex numbers) to include a sign even for inf and nan. */
897
    if (flags & Py_DTSF_SIGN && buf[0] != '-') {
898
        size_t len = strlen(buf);
899
        /* the bufsize calculations above should ensure that we've got
900
           space to add a sign */
901
        assert((size_t)bufsize >= len+2);
902
        memmove(buf+1, buf, len+1);
903
        buf[0] = '+';
904
    }
905
    if (upper) {
906
        /* Convert to upper case. */
907
        char *p1;
908
        for (p1 = buf; *p1; p1++)
909
            *p1 = Py_TOUPPER(*p1);
910
    }
911
912
    if (type)
913
        *type = t;
914
    return buf;
915
}
916
917
#else  // _PY_SHORT_FLOAT_REPR == 1
918
919
/* _Py_dg_dtoa is available. */
920
921
/* I'm using a lookup table here so that I don't have to invent a non-locale
922
   specific way to convert to uppercase */
923
692
#define OFS_INF 0
924
0
#define OFS_NAN 1
925
23.4k
#define OFS_E 2
926
927
/* The lengths of these are known to the code below, so don't change them */
928
static const char * const lc_float_strings[] = {
929
    "inf",
930
    "nan",
931
    "e",
932
};
933
static const char * const uc_float_strings[] = {
934
    "INF",
935
    "NAN",
936
    "E",
937
};
938
939
940
/* Convert a double d to a string, and return a PyMem_Malloc'd block of
941
   memory contain the resulting string.
942
943
   Arguments:
944
     d is the double to be converted
945
     format_code is one of 'e', 'f', 'g', 'r'.  'e', 'f' and 'g'
946
       correspond to '%e', '%f' and '%g';  'r' corresponds to repr.
947
     mode is one of '0', '2' or '3', and is completely determined by
948
       format_code: 'e' and 'g' use mode 2; 'f' mode 3, 'r' mode 0.
949
     precision is the desired precision
950
     always_add_sign is nonzero if a '+' sign should be included for positive
951
       numbers
952
     add_dot_0_if_integer is nonzero if integers in non-exponential form
953
       should have ".0" added.  Only applies to format codes 'r' and 'g'.
954
     use_alt_formatting is nonzero if alternative formatting should be
955
       used.  Only applies to format codes 'e', 'f' and 'g'.  For code 'g',
956
       at most one of use_alt_formatting and add_dot_0_if_integer should
957
       be nonzero.
958
     type, if non-NULL, will be set to one of these constants to identify
959
       the type of the 'd' argument:
960
     Py_DTST_FINITE
961
     Py_DTST_INFINITE
962
     Py_DTST_NAN
963
964
   Returns a PyMem_Malloc'd block of memory containing the resulting string,
965
    or NULL on error. If NULL is returned, the Python error has been set.
966
 */
967
968
static char *
969
format_float_short(double d, char format_code,
970
                   int mode, int precision,
971
                   int always_add_sign, int add_dot_0_if_integer,
972
                   int use_alt_formatting, int no_negative_zero,
973
                   const char * const *float_strings, int *type)
974
55.1k
{
975
55.1k
    char *buf = NULL;
976
55.1k
    char *p = NULL;
977
55.1k
    Py_ssize_t bufsize = 0;
978
55.1k
    char *digits, *digits_end;
979
55.1k
    int decpt_as_int, sign, exp_len, exp = 0, use_exp = 0;
980
55.1k
    Py_ssize_t decpt, digits_len, vdigits_start, vdigits_end;
981
55.1k
    _Py_SET_53BIT_PRECISION_HEADER;
982
983
    /* _Py_dg_dtoa returns a digit string (no decimal point or exponent).
984
       Must be matched by a call to _Py_dg_freedtoa. */
985
55.1k
    _Py_SET_53BIT_PRECISION_START;
986
55.1k
    digits = _Py_dg_dtoa(d, mode, precision, &decpt_as_int, &sign,
987
55.1k
                         &digits_end);
988
55.1k
    _Py_SET_53BIT_PRECISION_END;
989
990
55.1k
    decpt = (Py_ssize_t)decpt_as_int;
991
55.1k
    if (digits == NULL) {
992
        /* The only failure mode is no memory. */
993
0
        PyErr_NoMemory();
994
0
        goto exit;
995
0
    }
996
55.1k
    assert(digits_end != NULL && digits_end >= digits);
997
55.1k
    digits_len = digits_end - digits;
998
999
55.1k
    if (no_negative_zero && sign == 1 &&
1000
55.1k
            (digits_len == 0 || (digits_len == 1 && digits[0] == '0'))) {
1001
0
        sign = 0;
1002
0
    }
1003
1004
55.1k
    if (digits_len && !Py_ISDIGIT(digits[0])) {
1005
        /* Infinities and nans here; adapt Gay's output,
1006
           so convert Infinity to inf and NaN to nan, and
1007
           ignore sign of nan. Then return. */
1008
1009
        /* ignore the actual sign of a nan */
1010
692
        if (digits[0] == 'n' || digits[0] == 'N')
1011
0
            sign = 0;
1012
1013
        /* We only need 5 bytes to hold the result "+inf\0" . */
1014
692
        bufsize = 5; /* Used later in an assert. */
1015
692
        buf = (char *)PyMem_Malloc(bufsize);
1016
692
        if (buf == NULL) {
1017
0
            PyErr_NoMemory();
1018
0
            goto exit;
1019
0
        }
1020
692
        p = buf;
1021
1022
692
        if (sign == 1) {
1023
0
            *p++ = '-';
1024
0
        }
1025
692
        else if (always_add_sign) {
1026
0
            *p++ = '+';
1027
0
        }
1028
692
        if (digits[0] == 'i' || digits[0] == 'I') {
1029
692
            strncpy(p, float_strings[OFS_INF], 3);
1030
692
            p += 3;
1031
1032
692
            if (type)
1033
0
                *type = Py_DTST_INFINITE;
1034
692
        }
1035
0
        else if (digits[0] == 'n' || digits[0] == 'N') {
1036
0
            strncpy(p, float_strings[OFS_NAN], 3);
1037
0
            p += 3;
1038
1039
0
            if (type)
1040
0
                *type = Py_DTST_NAN;
1041
0
        }
1042
0
        else {
1043
            /* shouldn't get here: Gay's code should always return
1044
               something starting with a digit, an 'I',  or 'N' */
1045
0
            Py_UNREACHABLE();
1046
0
        }
1047
692
        goto exit;
1048
692
    }
1049
1050
    /* The result must be finite (not inf or nan). */
1051
54.4k
    if (type)
1052
0
        *type = Py_DTST_FINITE;
1053
1054
1055
    /* We got digits back, format them.  We may need to pad 'digits'
1056
       either on the left or right (or both) with extra zeros, so in
1057
       general the resulting string has the form
1058
1059
         [<sign>]<zeros><digits><zeros>[<exponent>]
1060
1061
       where either of the <zeros> pieces could be empty, and there's a
1062
       decimal point that could appear either in <digits> or in the
1063
       leading or trailing <zeros>.
1064
1065
       Imagine an infinite 'virtual' string vdigits, consisting of the
1066
       string 'digits' (starting at index 0) padded on both the left and
1067
       right with infinite strings of zeros.  We want to output a slice
1068
1069
         vdigits[vdigits_start : vdigits_end]
1070
1071
       of this virtual string.  Thus if vdigits_start < 0 then we'll end
1072
       up producing some leading zeros; if vdigits_end > digits_len there
1073
       will be trailing zeros in the output.  The next section of code
1074
       determines whether to use an exponent or not, figures out the
1075
       position 'decpt' of the decimal point, and computes 'vdigits_start'
1076
       and 'vdigits_end'. */
1077
54.4k
    vdigits_end = digits_len;
1078
54.4k
    switch (format_code) {
1079
0
    case 'e':
1080
0
        use_exp = 1;
1081
0
        vdigits_end = precision;
1082
0
        break;
1083
0
    case 'f':
1084
0
        vdigits_end = decpt + precision;
1085
0
        break;
1086
0
    case 'g':
1087
0
        if (decpt <= -4 || decpt >
1088
0
            (add_dot_0_if_integer ? precision-1 : precision))
1089
0
            use_exp = 1;
1090
0
        if (use_alt_formatting)
1091
0
            vdigits_end = precision;
1092
0
        break;
1093
54.4k
    case 'r':
1094
        /* convert to exponential format at 1e16.  We used to convert
1095
           at 1e17, but that gives odd-looking results for some values
1096
           when a 16-digit 'shortest' repr is padded with bogus zeros.
1097
           For example, repr(2e16+8) would give 20000000000000010.0;
1098
           the true value is 20000000000000008.0. */
1099
54.4k
        if (decpt <= -4 || decpt > 16)
1100
23.4k
            use_exp = 1;
1101
54.4k
        break;
1102
0
    default:
1103
0
        PyErr_BadInternalCall();
1104
0
        goto exit;
1105
54.4k
    }
1106
1107
    /* if using an exponent, reset decimal point position to 1 and adjust
1108
       exponent accordingly.*/
1109
54.4k
    if (use_exp) {
1110
23.4k
        exp = (int)decpt - 1;
1111
23.4k
        decpt = 1;
1112
23.4k
    }
1113
    /* ensure vdigits_start < decpt <= vdigits_end, or vdigits_start <
1114
       decpt < vdigits_end if add_dot_0_if_integer and no exponent */
1115
54.4k
    vdigits_start = decpt <= 0 ? decpt-1 : 0;
1116
54.4k
    if (!use_exp && add_dot_0_if_integer)
1117
28.7k
        vdigits_end = vdigits_end > decpt ? vdigits_end : decpt + 1;
1118
25.7k
    else
1119
25.7k
        vdigits_end = vdigits_end > decpt ? vdigits_end : decpt;
1120
1121
    /* double check inequalities */
1122
54.4k
    assert(vdigits_start <= 0 &&
1123
54.4k
           0 <= digits_len &&
1124
54.4k
           digits_len <= vdigits_end);
1125
    /* decimal point should be in (vdigits_start, vdigits_end] */
1126
54.4k
    assert(vdigits_start < decpt && decpt <= vdigits_end);
1127
1128
    /* Compute an upper bound how much memory we need. This might be a few
1129
       chars too long, but no big deal. */
1130
54.4k
    bufsize =
1131
        /* sign, decimal point and trailing 0 byte */
1132
54.4k
        3 +
1133
1134
        /* total digit count (including zero padding on both sides) */
1135
54.4k
        (vdigits_end - vdigits_start) +
1136
1137
        /* exponent "e+100", max 3 numerical digits */
1138
54.4k
        (use_exp ? 5 : 0);
1139
1140
    /* Now allocate the memory and initialize p to point to the start of
1141
       it. */
1142
54.4k
    buf = (char *)PyMem_Malloc(bufsize);
1143
54.4k
    if (buf == NULL) {
1144
0
        PyErr_NoMemory();
1145
0
        goto exit;
1146
0
    }
1147
54.4k
    p = buf;
1148
1149
    /* Add a negative sign if negative, and a plus sign if non-negative
1150
       and always_add_sign is true. */
1151
54.4k
    if (sign == 1)
1152
14.1k
        *p++ = '-';
1153
40.3k
    else if (always_add_sign)
1154
0
        *p++ = '+';
1155
1156
    /* note that exactly one of the three 'if' conditions is true,
1157
       so we include exactly one decimal point */
1158
    /* Zero padding on left of digit string */
1159
54.4k
    if (decpt <= 0) {
1160
6.80k
        memset(p, '0', decpt-vdigits_start);
1161
6.80k
        p += decpt - vdigits_start;
1162
6.80k
        *p++ = '.';
1163
6.80k
        memset(p, '0', 0-decpt);
1164
6.80k
        p += 0-decpt;
1165
6.80k
    }
1166
47.6k
    else {
1167
47.6k
        memset(p, '0', 0-vdigits_start);
1168
47.6k
        p += 0 - vdigits_start;
1169
47.6k
    }
1170
1171
    /* Digits, with included decimal point */
1172
54.4k
    if (0 < decpt && decpt <= digits_len) {
1173
37.5k
        strncpy(p, digits, decpt-0);
1174
37.5k
        p += decpt-0;
1175
37.5k
        *p++ = '.';
1176
37.5k
        strncpy(p, digits+decpt, digits_len-decpt);
1177
37.5k
        p += digits_len-decpt;
1178
37.5k
    }
1179
16.9k
    else {
1180
16.9k
        strncpy(p, digits, digits_len);
1181
16.9k
        p += digits_len;
1182
16.9k
    }
1183
1184
    /* And zeros on the right */
1185
54.4k
    if (digits_len < decpt) {
1186
10.0k
        memset(p, '0', decpt-digits_len);
1187
10.0k
        p += decpt-digits_len;
1188
10.0k
        *p++ = '.';
1189
10.0k
        memset(p, '0', vdigits_end-decpt);
1190
10.0k
        p += vdigits_end-decpt;
1191
10.0k
    }
1192
44.3k
    else {
1193
44.3k
        memset(p, '0', vdigits_end-digits_len);
1194
44.3k
        p += vdigits_end-digits_len;
1195
44.3k
    }
1196
1197
    /* Delete a trailing decimal pt unless using alternative formatting. */
1198
54.4k
    if (p[-1] == '.' && !use_alt_formatting)
1199
18.0k
        p--;
1200
1201
    /* Now that we've done zero padding, add an exponent if needed. */
1202
54.4k
    if (use_exp) {
1203
23.4k
        *p++ = float_strings[OFS_E][0];
1204
23.4k
        exp_len = sprintf(p, "%+.02d", exp);
1205
23.4k
        p += exp_len;
1206
23.4k
    }
1207
55.1k
  exit:
1208
55.1k
    if (buf) {
1209
55.1k
        *p = '\0';
1210
        /* It's too late if this fails, as we've already stepped on
1211
           memory that isn't ours. But it's an okay debugging test. */
1212
55.1k
        assert(p-buf < bufsize);
1213
55.1k
    }
1214
55.1k
    if (digits)
1215
55.1k
        _Py_dg_freedtoa(digits);
1216
1217
55.1k
    return buf;
1218
54.4k
}
1219
1220
1221
char * PyOS_double_to_string(double val,
1222
                                         char format_code,
1223
                                         int precision,
1224
                                         int flags,
1225
                                         int *type)
1226
55.1k
{
1227
55.1k
    const char * const *float_strings = lc_float_strings;
1228
55.1k
    int mode;
1229
1230
    /* Validate format_code, and map upper and lower case. Compute the
1231
       mode and make any adjustments as needed. */
1232
55.1k
    switch (format_code) {
1233
    /* exponent */
1234
0
    case 'E':
1235
0
        float_strings = uc_float_strings;
1236
0
        format_code = 'e';
1237
0
        _Py_FALLTHROUGH;
1238
0
    case 'e':
1239
0
        mode = 2;
1240
0
        precision++;
1241
0
        break;
1242
1243
    /* fixed */
1244
0
    case 'F':
1245
0
        float_strings = uc_float_strings;
1246
0
        format_code = 'f';
1247
0
        _Py_FALLTHROUGH;
1248
0
    case 'f':
1249
0
        mode = 3;
1250
0
        break;
1251
1252
    /* general */
1253
0
    case 'G':
1254
0
        float_strings = uc_float_strings;
1255
0
        format_code = 'g';
1256
0
        _Py_FALLTHROUGH;
1257
0
    case 'g':
1258
0
        mode = 2;
1259
        /* precision 0 makes no sense for 'g' format; interpret as 1 */
1260
0
        if (precision == 0)
1261
0
            precision = 1;
1262
0
        break;
1263
1264
    /* repr format */
1265
55.1k
    case 'r':
1266
55.1k
        mode = 0;
1267
        /* Supplied precision is unused, must be 0. */
1268
55.1k
        if (precision != 0) {
1269
0
            PyErr_BadInternalCall();
1270
0
            return NULL;
1271
0
        }
1272
55.1k
        break;
1273
1274
55.1k
    default:
1275
0
        PyErr_BadInternalCall();
1276
0
        return NULL;
1277
55.1k
    }
1278
1279
55.1k
    return format_float_short(val, format_code, mode, precision,
1280
55.1k
                              flags & Py_DTSF_SIGN,
1281
55.1k
                              flags & Py_DTSF_ADD_DOT_0,
1282
55.1k
                              flags & Py_DTSF_ALT,
1283
55.1k
                              flags & Py_DTSF_NO_NEG_0,
1284
55.1k
                              float_strings, type);
1285
55.1k
}
1286
#endif  // _PY_SHORT_FLOAT_REPR == 1