Coverage Report

Created: 2025-07-11 06:59

/src/Python-3.8.3/Python/pystrtod.c
Line
Count
Source (jump to first uncovered line)
1
/* -*- Mode: C; c-file-style: "python" -*- */
2
3
#include <Python.h>
4
#include <locale.h>
5
6
/* Case-insensitive string match used for nan and inf detection; t should be
7
   lower-case.  Returns 1 for a successful match, 0 otherwise. */
8
9
static int
10
case_insensitive_match(const char *s, const char *t)
11
0
{
12
0
    while(*t && Py_TOLOWER(*s) == *t) {
13
0
        s++;
14
0
        t++;
15
0
    }
16
0
    return *t ? 0 : 1;
17
0
}
18
19
/* _Py_parse_inf_or_nan: Attempt to parse a string of the form "nan", "inf" or
20
   "infinity", with an optional leading sign of "+" or "-".  On success,
21
   return the NaN or Infinity as a double and set *endptr to point just beyond
22
   the successfully parsed portion of the string.  On failure, return -1.0 and
23
   set *endptr to point to the start of the string. */
24
25
#ifndef PY_NO_SHORT_FLOAT_REPR
26
27
double
28
_Py_parse_inf_or_nan(const char *p, char **endptr)
29
0
{
30
0
    double retval;
31
0
    const char *s;
32
0
    int negate = 0;
33
34
0
    s = p;
35
0
    if (*s == '-') {
36
0
        negate = 1;
37
0
        s++;
38
0
    }
39
0
    else if (*s == '+') {
40
0
        s++;
41
0
    }
42
0
    if (case_insensitive_match(s, "inf")) {
43
0
        s += 3;
44
0
        if (case_insensitive_match(s, "inity"))
45
0
            s += 5;
46
0
        retval = _Py_dg_infinity(negate);
47
0
    }
48
0
    else if (case_insensitive_match(s, "nan")) {
49
0
        s += 3;
50
0
        retval = _Py_dg_stdnan(negate);
51
0
    }
52
0
    else {
53
0
        s = p;
54
0
        retval = -1.0;
55
0
    }
56
0
    *endptr = (char *)s;
57
0
    return retval;
58
0
}
59
60
#else
61
62
double
63
_Py_parse_inf_or_nan(const char *p, char **endptr)
64
{
65
    double retval;
66
    const char *s;
67
    int negate = 0;
68
69
    s = p;
70
    if (*s == '-') {
71
        negate = 1;
72
        s++;
73
    }
74
    else if (*s == '+') {
75
        s++;
76
    }
77
    if (case_insensitive_match(s, "inf")) {
78
        s += 3;
79
        if (case_insensitive_match(s, "inity"))
80
            s += 5;
81
        retval = negate ? -Py_HUGE_VAL : Py_HUGE_VAL;
82
    }
83
#ifdef Py_NAN
84
    else if (case_insensitive_match(s, "nan")) {
85
        s += 3;
86
        retval = negate ? -Py_NAN : Py_NAN;
87
    }
88
#endif
89
    else {
90
        s = p;
91
        retval = -1.0;
92
    }
93
    *endptr = (char *)s;
94
    return retval;
95
}
96
97
#endif
98
99
/**
100
 * _PyOS_ascii_strtod:
101
 * @nptr:    the string to convert to a numeric value.
102
 * @endptr:  if non-%NULL, it returns the character after
103
 *           the last character used in the conversion.
104
 *
105
 * Converts a string to a #gdouble value.
106
 * This function behaves like the standard strtod() function
107
 * does in the C locale. It does this without actually
108
 * changing the current locale, since that would not be
109
 * thread-safe.
110
 *
111
 * This function is typically used when reading configuration
112
 * files or other non-user input that should be locale independent.
113
 * To handle input from the user you should normally use the
114
 * locale-sensitive system strtod() function.
115
 *
116
 * If the correct value would cause overflow, plus or minus %HUGE_VAL
117
 * is returned (according to the sign of the value), and %ERANGE is
118
 * stored in %errno. If the correct value would cause underflow,
119
 * zero is returned and %ERANGE is stored in %errno.
120
 * If memory allocation fails, %ENOMEM is stored in %errno.
121
 *
122
 * This function resets %errno before calling strtod() so that
123
 * you can reliably detect overflow and underflow.
124
 *
125
 * Return value: the #gdouble value.
126
 **/
127
128
#ifndef PY_NO_SHORT_FLOAT_REPR
129
130
static double
131
_PyOS_ascii_strtod(const char *nptr, char **endptr)
132
2
{
133
2
    double result;
134
2
    _Py_SET_53BIT_PRECISION_HEADER;
135
136
2
    assert(nptr != NULL);
137
    /* Set errno to zero, so that we can distinguish zero results
138
       and underflows */
139
2
    errno = 0;
140
141
2
    _Py_SET_53BIT_PRECISION_START;
142
2
    result = _Py_dg_strtod(nptr, endptr);
143
2
    _Py_SET_53BIT_PRECISION_END;
144
145
2
    if (*endptr == nptr)
146
        /* string might represent an inf or nan */
147
0
        result = _Py_parse_inf_or_nan(nptr, endptr);
148
149
2
    return result;
150
151
2
}
152
153
#else
154
155
/*
156
   Use system strtod;  since strtod is locale aware, we may
157
   have to first fix the decimal separator.
158
159
   Note that unlike _Py_dg_strtod, the system strtod may not always give
160
   correctly rounded results.
161
*/
162
163
static double
164
_PyOS_ascii_strtod(const char *nptr, char **endptr)
165
{
166
    char *fail_pos;
167
    double val;
168
    struct lconv *locale_data;
169
    const char *decimal_point;
170
    size_t decimal_point_len;
171
    const char *p, *decimal_point_pos;
172
    const char *end = NULL; /* Silence gcc */
173
    const char *digits_pos = NULL;
174
    int negate = 0;
175
176
    assert(nptr != NULL);
177
178
    fail_pos = NULL;
179
180
    locale_data = localeconv();
181
    decimal_point = locale_data->decimal_point;
182
    decimal_point_len = strlen(decimal_point);
183
184
    assert(decimal_point_len != 0);
185
186
    decimal_point_pos = NULL;
187
188
    /* Parse infinities and nans */
189
    val = _Py_parse_inf_or_nan(nptr, endptr);
190
    if (*endptr != nptr)
191
        return val;
192
193
    /* Set errno to zero, so that we can distinguish zero results
194
       and underflows */
195
    errno = 0;
196
197
    /* We process the optional sign manually, then pass the remainder to
198
       the system strtod.  This ensures that the result of an underflow
199
       has the correct sign. (bug #1725)  */
200
    p = nptr;
201
    /* Process leading sign, if present */
202
    if (*p == '-') {
203
        negate = 1;
204
        p++;
205
    }
206
    else if (*p == '+') {
207
        p++;
208
    }
209
210
    /* Some platform strtods accept hex floats; Python shouldn't (at the
211
       moment), so we check explicitly for strings starting with '0x'. */
212
    if (*p == '0' && (*(p+1) == 'x' || *(p+1) == 'X'))
213
        goto invalid_string;
214
215
    /* Check that what's left begins with a digit or decimal point */
216
    if (!Py_ISDIGIT(*p) && *p != '.')
217
        goto invalid_string;
218
219
    digits_pos = p;
220
    if (decimal_point[0] != '.' ||
221
        decimal_point[1] != 0)
222
    {
223
        /* Look for a '.' in the input; if present, it'll need to be
224
           swapped for the current locale's decimal point before we
225
           call strtod.  On the other hand, if we find the current
226
           locale's decimal point then the input is invalid. */
227
        while (Py_ISDIGIT(*p))
228
            p++;
229
230
        if (*p == '.')
231
        {
232
            decimal_point_pos = p++;
233
234
            /* locate end of number */
235
            while (Py_ISDIGIT(*p))
236
                p++;
237
238
            if (*p == 'e' || *p == 'E')
239
                p++;
240
            if (*p == '+' || *p == '-')
241
                p++;
242
            while (Py_ISDIGIT(*p))
243
                p++;
244
            end = p;
245
        }
246
        else if (strncmp(p, decimal_point, decimal_point_len) == 0)
247
            /* Python bug #1417699 */
248
            goto invalid_string;
249
        /* For the other cases, we need not convert the decimal
250
           point */
251
    }
252
253
    if (decimal_point_pos) {
254
        char *copy, *c;
255
        /* Create a copy of the input, with the '.' converted to the
256
           locale-specific decimal point */
257
        copy = (char *)PyMem_MALLOC(end - digits_pos +
258
                                    1 + decimal_point_len);
259
        if (copy == NULL) {
260
            *endptr = (char *)nptr;
261
            errno = ENOMEM;
262
            return val;
263
        }
264
265
        c = copy;
266
        memcpy(c, digits_pos, decimal_point_pos - digits_pos);
267
        c += decimal_point_pos - digits_pos;
268
        memcpy(c, decimal_point, decimal_point_len);
269
        c += decimal_point_len;
270
        memcpy(c, decimal_point_pos + 1,
271
               end - (decimal_point_pos + 1));
272
        c += end - (decimal_point_pos + 1);
273
        *c = 0;
274
275
        val = strtod(copy, &fail_pos);
276
277
        if (fail_pos)
278
        {
279
            if (fail_pos > decimal_point_pos)
280
                fail_pos = (char *)digits_pos +
281
                    (fail_pos - copy) -
282
                    (decimal_point_len - 1);
283
            else
284
                fail_pos = (char *)digits_pos +
285
                    (fail_pos - copy);
286
        }
287
288
        PyMem_FREE(copy);
289
290
    }
291
    else {
292
        val = strtod(digits_pos, &fail_pos);
293
    }
294
295
    if (fail_pos == digits_pos)
296
        goto invalid_string;
297
298
    if (negate && fail_pos != nptr)
299
        val = -val;
300
    *endptr = fail_pos;
301
302
    return val;
303
304
  invalid_string:
305
    *endptr = (char*)nptr;
306
    errno = EINVAL;
307
    return -1.0;
308
}
309
310
#endif
311
312
/* PyOS_string_to_double converts a null-terminated byte string s (interpreted
313
   as a string of ASCII characters) to a float.  The string should not have
314
   leading or trailing whitespace.  The conversion is independent of the
315
   current locale.
316
317
   If endptr is NULL, try to convert the whole string.  Raise ValueError and
318
   return -1.0 if the string is not a valid representation of a floating-point
319
   number.
320
321
   If endptr is non-NULL, try to convert as much of the string as possible.
322
   If no initial segment of the string is the valid representation of a
323
   floating-point number then *endptr is set to point to the beginning of the
324
   string, -1.0 is returned and again ValueError is raised.
325
326
   On overflow (e.g., when trying to convert '1e500' on an IEEE 754 machine),
327
   if overflow_exception is NULL then +-Py_HUGE_VAL is returned, and no Python
328
   exception is raised.  Otherwise, overflow_exception should point to
329
   a Python exception, this exception will be raised, -1.0 will be returned,
330
   and *endptr will point just past the end of the converted value.
331
332
   If any other failure occurs (for example lack of memory), -1.0 is returned
333
   and the appropriate Python exception will have been set.
334
*/
335
336
double
337
PyOS_string_to_double(const char *s,
338
                      char **endptr,
339
                      PyObject *overflow_exception)
340
2
{
341
2
    double x, result=-1.0;
342
2
    char *fail_pos;
343
344
2
    errno = 0;
345
2
    PyFPE_START_PROTECT("PyOS_string_to_double", return -1.0)
346
2
    x = _PyOS_ascii_strtod(s, &fail_pos);
347
2
    PyFPE_END_PROTECT(x)
348
349
2
    if (errno == ENOMEM) {
350
0
        PyErr_NoMemory();
351
0
        fail_pos = (char *)s;
352
0
    }
353
2
    else if (!endptr && (fail_pos == s || *fail_pos != '\0'))
354
0
        PyErr_Format(PyExc_ValueError,
355
0
                      "could not convert string to float: "
356
0
                      "'%.200s'", s);
357
2
    else if (fail_pos == s)
358
0
        PyErr_Format(PyExc_ValueError,
359
0
                      "could not convert string to float: "
360
0
                      "'%.200s'", s);
361
2
    else if (errno == ERANGE && fabs(x) >= 1.0 && overflow_exception)
362
0
        PyErr_Format(overflow_exception,
363
0
                      "value too large to convert to float: "
364
0
                      "'%.200s'", s);
365
2
    else
366
2
        result = x;
367
368
2
    if (endptr != NULL)
369
0
        *endptr = fail_pos;
370
2
    return result;
371
2
}
372
373
/* Remove underscores that follow the underscore placement rule from
374
   the string and then call the `innerfunc` function on the result.
375
   It should return a new object or NULL on exception.
376
377
   `what` is used for the error message emitted when underscores are detected
378
   that don't follow the rule. `arg` is an opaque pointer passed to the inner
379
   function.
380
381
   This is used to implement underscore-agnostic conversion for floats
382
   and complex numbers.
383
*/
384
PyObject *
385
_Py_string_to_number_with_underscores(
386
    const char *s, Py_ssize_t orig_len, const char *what, PyObject *obj, void *arg,
387
    PyObject *(*innerfunc)(const char *, Py_ssize_t, void *))
388
0
{
389
0
    char prev;
390
0
    const char *p, *last;
391
0
    char *dup, *end;
392
0
    PyObject *result;
393
394
0
    assert(s[orig_len] == '\0');
395
396
0
    if (strchr(s, '_') == NULL) {
397
0
        return innerfunc(s, orig_len, arg);
398
0
    }
399
400
0
    dup = PyMem_Malloc(orig_len + 1);
401
0
    if (dup == NULL) {
402
0
        return PyErr_NoMemory();
403
0
    }
404
0
    end = dup;
405
0
    prev = '\0';
406
0
    last = s + orig_len;
407
0
    for (p = s; *p; p++) {
408
0
        if (*p == '_') {
409
            /* Underscores are only allowed after digits. */
410
0
            if (!(prev >= '0' && prev <= '9')) {
411
0
                goto error;
412
0
            }
413
0
        }
414
0
        else {
415
0
            *end++ = *p;
416
            /* Underscores are only allowed before digits. */
417
0
            if (prev == '_' && !(*p >= '0' && *p <= '9')) {
418
0
                goto error;
419
0
            }
420
0
        }
421
0
        prev = *p;
422
0
    }
423
    /* Underscores are not allowed at the end. */
424
0
    if (prev == '_') {
425
0
        goto error;
426
0
    }
427
    /* No embedded NULs allowed. */
428
0
    if (p != last) {
429
0
        goto error;
430
0
    }
431
0
    *end = '\0';
432
0
    result = innerfunc(dup, end - dup, arg);
433
0
    PyMem_Free(dup);
434
0
    return result;
435
436
0
  error:
437
0
    PyMem_Free(dup);
438
0
    PyErr_Format(PyExc_ValueError,
439
0
                 "could not convert string to %s: "
440
0
                 "%R", what, obj);
441
0
    return NULL;
442
0
}
443
444
#ifdef PY_NO_SHORT_FLOAT_REPR
445
446
/* Given a string that may have a decimal point in the current
447
   locale, change it back to a dot.  Since the string cannot get
448
   longer, no need for a maximum buffer size parameter. */
449
Py_LOCAL_INLINE(void)
450
change_decimal_from_locale_to_dot(char* buffer)
451
{
452
    struct lconv *locale_data = localeconv();
453
    const char *decimal_point = locale_data->decimal_point;
454
455
    if (decimal_point[0] != '.' || decimal_point[1] != 0) {
456
        size_t decimal_point_len = strlen(decimal_point);
457
458
        if (*buffer == '+' || *buffer == '-')
459
            buffer++;
460
        while (Py_ISDIGIT(*buffer))
461
            buffer++;
462
        if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
463
            *buffer = '.';
464
            buffer++;
465
            if (decimal_point_len > 1) {
466
                /* buffer needs to get smaller */
467
                size_t rest_len = strlen(buffer +
468
                                     (decimal_point_len - 1));
469
                memmove(buffer,
470
                    buffer + (decimal_point_len - 1),
471
                    rest_len);
472
                buffer[rest_len] = 0;
473
            }
474
        }
475
    }
476
}
477
478
479
/* From the C99 standard, section 7.19.6:
480
The exponent always contains at least two digits, and only as many more digits
481
as necessary to represent the exponent.
482
*/
483
#define MIN_EXPONENT_DIGITS 2
484
485
/* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
486
   in length. */
487
Py_LOCAL_INLINE(void)
488
ensure_minimum_exponent_length(char* buffer, size_t buf_size)
489
{
490
    char *p = strpbrk(buffer, "eE");
491
    if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
492
        char *start = p + 2;
493
        int exponent_digit_cnt = 0;
494
        int leading_zero_cnt = 0;
495
        int in_leading_zeros = 1;
496
        int significant_digit_cnt;
497
498
        /* Skip over the exponent and the sign. */
499
        p += 2;
500
501
        /* Find the end of the exponent, keeping track of leading
502
           zeros. */
503
        while (*p && Py_ISDIGIT(*p)) {
504
            if (in_leading_zeros && *p == '0')
505
                ++leading_zero_cnt;
506
            if (*p != '0')
507
                in_leading_zeros = 0;
508
            ++p;
509
            ++exponent_digit_cnt;
510
        }
511
512
        significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
513
        if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
514
            /* If there are 2 exactly digits, we're done,
515
               regardless of what they contain */
516
        }
517
        else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
518
            int extra_zeros_cnt;
519
520
            /* There are more than 2 digits in the exponent.  See
521
               if we can delete some of the leading zeros */
522
            if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
523
                significant_digit_cnt = MIN_EXPONENT_DIGITS;
524
            extra_zeros_cnt = exponent_digit_cnt -
525
                significant_digit_cnt;
526
527
            /* Delete extra_zeros_cnt worth of characters from the
528
               front of the exponent */
529
            assert(extra_zeros_cnt >= 0);
530
531
            /* Add one to significant_digit_cnt to copy the
532
               trailing 0 byte, thus setting the length */
533
            memmove(start,
534
                start + extra_zeros_cnt,
535
                significant_digit_cnt + 1);
536
        }
537
        else {
538
            /* If there are fewer than 2 digits, add zeros
539
               until there are 2, if there's enough room */
540
            int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
541
            if (start + zeros + exponent_digit_cnt + 1
542
                  < buffer + buf_size) {
543
                memmove(start + zeros, start,
544
                    exponent_digit_cnt + 1);
545
                memset(start, '0', zeros);
546
            }
547
        }
548
    }
549
}
550
551
/* Remove trailing zeros after the decimal point from a numeric string; also
552
   remove the decimal point if all digits following it are zero.  The numeric
553
   string must end in '\0', and should not have any leading or trailing
554
   whitespace.  Assumes that the decimal point is '.'. */
555
Py_LOCAL_INLINE(void)
556
remove_trailing_zeros(char *buffer)
557
{
558
    char *old_fraction_end, *new_fraction_end, *end, *p;
559
560
    p = buffer;
561
    if (*p == '-' || *p == '+')
562
        /* Skip leading sign, if present */
563
        ++p;
564
    while (Py_ISDIGIT(*p))
565
        ++p;
566
567
    /* if there's no decimal point there's nothing to do */
568
    if (*p++ != '.')
569
        return;
570
571
    /* scan any digits after the point */
572
    while (Py_ISDIGIT(*p))
573
        ++p;
574
    old_fraction_end = p;
575
576
    /* scan up to ending '\0' */
577
    while (*p != '\0')
578
        p++;
579
    /* +1 to make sure that we move the null byte as well */
580
    end = p+1;
581
582
    /* scan back from fraction_end, looking for removable zeros */
583
    p = old_fraction_end;
584
    while (*(p-1) == '0')
585
        --p;
586
    /* and remove point if we've got that far */
587
    if (*(p-1) == '.')
588
        --p;
589
    new_fraction_end = p;
590
591
    memmove(new_fraction_end, old_fraction_end, end-old_fraction_end);
592
}
593
594
/* Ensure that buffer has a decimal point in it.  The decimal point will not
595
   be in the current locale, it will always be '.'. Don't add a decimal point
596
   if an exponent is present.  Also, convert to exponential notation where
597
   adding a '.0' would produce too many significant digits (see issue 5864).
598
599
   Returns a pointer to the fixed buffer, or NULL on failure.
600
*/
601
Py_LOCAL_INLINE(char *)
602
ensure_decimal_point(char* buffer, size_t buf_size, int precision)
603
{
604
    int digit_count, insert_count = 0, convert_to_exp = 0;
605
    const char *chars_to_insert;
606
    char *digits_start;
607
608
    /* search for the first non-digit character */
609
    char *p = buffer;
610
    if (*p == '-' || *p == '+')
611
        /* Skip leading sign, if present.  I think this could only
612
           ever be '-', but it can't hurt to check for both. */
613
        ++p;
614
    digits_start = p;
615
    while (*p && Py_ISDIGIT(*p))
616
        ++p;
617
    digit_count = Py_SAFE_DOWNCAST(p - digits_start, Py_ssize_t, int);
618
619
    if (*p == '.') {
620
        if (Py_ISDIGIT(*(p+1))) {
621
            /* Nothing to do, we already have a decimal
622
               point and a digit after it */
623
        }
624
        else {
625
            /* We have a decimal point, but no following
626
               digit.  Insert a zero after the decimal. */
627
            /* can't ever get here via PyOS_double_to_string */
628
            assert(precision == -1);
629
            ++p;
630
            chars_to_insert = "0";
631
            insert_count = 1;
632
        }
633
    }
634
    else if (!(*p == 'e' || *p == 'E')) {
635
        /* Don't add ".0" if we have an exponent. */
636
        if (digit_count == precision) {
637
            /* issue 5864: don't add a trailing .0 in the case
638
               where the '%g'-formatted result already has as many
639
               significant digits as were requested.  Switch to
640
               exponential notation instead. */
641
            convert_to_exp = 1;
642
            /* no exponent, no point, and we shouldn't land here
643
               for infs and nans, so we must be at the end of the
644
               string. */
645
            assert(*p == '\0');
646
        }
647
        else {
648
            assert(precision == -1 || digit_count < precision);
649
            chars_to_insert = ".0";
650
            insert_count = 2;
651
        }
652
    }
653
    if (insert_count) {
654
        size_t buf_len = strlen(buffer);
655
        if (buf_len + insert_count + 1 >= buf_size) {
656
            /* If there is not enough room in the buffer
657
               for the additional text, just skip it.  It's
658
               not worth generating an error over. */
659
        }
660
        else {
661
            memmove(p + insert_count, p,
662
                buffer + strlen(buffer) - p + 1);
663
            memcpy(p, chars_to_insert, insert_count);
664
        }
665
    }
666
    if (convert_to_exp) {
667
        int written;
668
        size_t buf_avail;
669
        p = digits_start;
670
        /* insert decimal point */
671
        assert(digit_count >= 1);
672
        memmove(p+2, p+1, digit_count); /* safe, but overwrites nul */
673
        p[1] = '.';
674
        p += digit_count+1;
675
        assert(p <= buf_size+buffer);
676
        buf_avail = buf_size+buffer-p;
677
        if (buf_avail == 0)
678
            return NULL;
679
        /* Add exponent.  It's okay to use lower case 'e': we only
680
           arrive here as a result of using the empty format code or
681
           repr/str builtins and those never want an upper case 'E' */
682
        written = PyOS_snprintf(p, buf_avail, "e%+.02d", digit_count-1);
683
        if (!(0 <= written &&
684
              written < Py_SAFE_DOWNCAST(buf_avail, size_t, int)))
685
            /* output truncated, or something else bad happened */
686
            return NULL;
687
        remove_trailing_zeros(buffer);
688
    }
689
    return buffer;
690
}
691
692
/* see FORMATBUFLEN in unicodeobject.c */
693
#define FLOAT_FORMATBUFLEN 120
694
695
/**
696
 * _PyOS_ascii_formatd:
697
 * @buffer: A buffer to place the resulting string in
698
 * @buf_size: The length of the buffer.
699
 * @format: The printf()-style format to use for the
700
 *          code to use for converting.
701
 * @d: The #gdouble to convert
702
 * @precision: The precision to use when formatting.
703
 *
704
 * Converts a #gdouble to a string, using the '.' as
705
 * decimal point. To format the number you pass in
706
 * a printf()-style format string. Allowed conversion
707
 * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'Z'.
708
 *
709
 * 'Z' is the same as 'g', except it always has a decimal and
710
 *     at least one digit after the decimal.
711
 *
712
 * Return value: The pointer to the buffer with the converted string.
713
 * On failure returns NULL but does not set any Python exception.
714
 **/
715
static char *
716
_PyOS_ascii_formatd(char       *buffer,
717
                   size_t      buf_size,
718
                   const char *format,
719
                   double      d,
720
                   int         precision)
721
{
722
    char format_char;
723
    size_t format_len = strlen(format);
724
725
    /* Issue 2264: code 'Z' requires copying the format.  'Z' is 'g', but
726
       also with at least one character past the decimal. */
727
    char tmp_format[FLOAT_FORMATBUFLEN];
728
729
    /* The last character in the format string must be the format char */
730
    format_char = format[format_len - 1];
731
732
    if (format[0] != '%')
733
        return NULL;
734
735
    /* I'm not sure why this test is here.  It's ensuring that the format
736
       string after the first character doesn't have a single quote, a
737
       lowercase l, or a percent. This is the reverse of the commented-out
738
       test about 10 lines ago. */
739
    if (strpbrk(format + 1, "'l%"))
740
        return NULL;
741
742
    /* Also curious about this function is that it accepts format strings
743
       like "%xg", which are invalid for floats.  In general, the
744
       interface to this function is not very good, but changing it is
745
       difficult because it's a public API. */
746
747
    if (!(format_char == 'e' || format_char == 'E' ||
748
          format_char == 'f' || format_char == 'F' ||
749
          format_char == 'g' || format_char == 'G' ||
750
          format_char == 'Z'))
751
        return NULL;
752
753
    /* Map 'Z' format_char to 'g', by copying the format string and
754
       replacing the final char with a 'g' */
755
    if (format_char == 'Z') {
756
        if (format_len + 1 >= sizeof(tmp_format)) {
757
            /* The format won't fit in our copy.  Error out.  In
758
               practice, this will never happen and will be
759
               detected by returning NULL */
760
            return NULL;
761
        }
762
        strcpy(tmp_format, format);
763
        tmp_format[format_len - 1] = 'g';
764
        format = tmp_format;
765
    }
766
767
768
    /* Have PyOS_snprintf do the hard work */
769
    PyOS_snprintf(buffer, buf_size, format, d);
770
771
    /* Do various fixups on the return string */
772
773
    /* Get the current locale, and find the decimal point string.
774
       Convert that string back to a dot. */
775
    change_decimal_from_locale_to_dot(buffer);
776
777
    /* If an exponent exists, ensure that the exponent is at least
778
       MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
779
       for the extra zeros.  Also, if there are more than
780
       MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
781
       back to MIN_EXPONENT_DIGITS */
782
    ensure_minimum_exponent_length(buffer, buf_size);
783
784
    /* If format_char is 'Z', make sure we have at least one character
785
       after the decimal point (and make sure we have a decimal point);
786
       also switch to exponential notation in some edge cases where the
787
       extra character would produce more significant digits that we
788
       really want. */
789
    if (format_char == 'Z')
790
        buffer = ensure_decimal_point(buffer, buf_size, precision);
791
792
    return buffer;
793
}
794
795
/* The fallback code to use if _Py_dg_dtoa is not available. */
796
797
char * PyOS_double_to_string(double val,
798
                                         char format_code,
799
                                         int precision,
800
                                         int flags,
801
                                         int *type)
802
{
803
    char format[32];
804
    Py_ssize_t bufsize;
805
    char *buf;
806
    int t, exp;
807
    int upper = 0;
808
809
    /* Validate format_code, and map upper and lower case */
810
    switch (format_code) {
811
    case 'e':          /* exponent */
812
    case 'f':          /* fixed */
813
    case 'g':          /* general */
814
        break;
815
    case 'E':
816
        upper = 1;
817
        format_code = 'e';
818
        break;
819
    case 'F':
820
        upper = 1;
821
        format_code = 'f';
822
        break;
823
    case 'G':
824
        upper = 1;
825
        format_code = 'g';
826
        break;
827
    case 'r':          /* repr format */
828
        /* Supplied precision is unused, must be 0. */
829
        if (precision != 0) {
830
            PyErr_BadInternalCall();
831
            return NULL;
832
        }
833
        /* The repr() precision (17 significant decimal digits) is the
834
           minimal number that is guaranteed to have enough precision
835
           so that if the number is read back in the exact same binary
836
           value is recreated.  This is true for IEEE floating point
837
           by design, and also happens to work for all other modern
838
           hardware. */
839
        precision = 17;
840
        format_code = 'g';
841
        break;
842
    default:
843
        PyErr_BadInternalCall();
844
        return NULL;
845
    }
846
847
    /* Here's a quick-and-dirty calculation to figure out how big a buffer
848
       we need.  In general, for a finite float we need:
849
850
         1 byte for each digit of the decimal significand, and
851
852
         1 for a possible sign
853
         1 for a possible decimal point
854
         2 for a possible [eE][+-]
855
         1 for each digit of the exponent;  if we allow 19 digits
856
           total then we're safe up to exponents of 2**63.
857
         1 for the trailing nul byte
858
859
       This gives a total of 24 + the number of digits in the significand,
860
       and the number of digits in the significand is:
861
862
         for 'g' format: at most precision, except possibly
863
           when precision == 0, when it's 1.
864
         for 'e' format: precision+1
865
         for 'f' format: precision digits after the point, at least 1
866
           before.  To figure out how many digits appear before the point
867
           we have to examine the size of the number.  If fabs(val) < 1.0
868
           then there will be only one digit before the point.  If
869
           fabs(val) >= 1.0, then there are at most
870
871
         1+floor(log10(ceiling(fabs(val))))
872
873
           digits before the point (where the 'ceiling' allows for the
874
           possibility that the rounding rounds the integer part of val
875
           up).  A safe upper bound for the above quantity is
876
           1+floor(exp/3), where exp is the unique integer such that 0.5
877
           <= fabs(val)/2**exp < 1.0.  This exp can be obtained from
878
           frexp.
879
880
       So we allow room for precision+1 digits for all formats, plus an
881
       extra floor(exp/3) digits for 'f' format.
882
883
    */
884
885
    if (Py_IS_NAN(val) || Py_IS_INFINITY(val))
886
        /* 3 for 'inf'/'nan', 1 for sign, 1 for '\0' */
887
        bufsize = 5;
888
    else {
889
        bufsize = 25 + precision;
890
        if (format_code == 'f' && fabs(val) >= 1.0) {
891
            frexp(val, &exp);
892
            bufsize += exp/3;
893
        }
894
    }
895
896
    buf = PyMem_Malloc(bufsize);
897
    if (buf == NULL) {
898
        PyErr_NoMemory();
899
        return NULL;
900
    }
901
902
    /* Handle nan and inf. */
903
    if (Py_IS_NAN(val)) {
904
        strcpy(buf, "nan");
905
        t = Py_DTST_NAN;
906
    } else if (Py_IS_INFINITY(val)) {
907
        if (copysign(1., val) == 1.)
908
            strcpy(buf, "inf");
909
        else
910
            strcpy(buf, "-inf");
911
        t = Py_DTST_INFINITE;
912
    } else {
913
        t = Py_DTST_FINITE;
914
        if (flags & Py_DTSF_ADD_DOT_0)
915
            format_code = 'Z';
916
917
        PyOS_snprintf(format, sizeof(format), "%%%s.%i%c",
918
                      (flags & Py_DTSF_ALT ? "#" : ""), precision,
919
                      format_code);
920
        _PyOS_ascii_formatd(buf, bufsize, format, val, precision);
921
    }
922
923
    /* Add sign when requested.  It's convenient (esp. when formatting
924
     complex numbers) to include a sign even for inf and nan. */
925
    if (flags & Py_DTSF_SIGN && buf[0] != '-') {
926
        size_t len = strlen(buf);
927
        /* the bufsize calculations above should ensure that we've got
928
           space to add a sign */
929
        assert((size_t)bufsize >= len+2);
930
        memmove(buf+1, buf, len+1);
931
        buf[0] = '+';
932
    }
933
    if (upper) {
934
        /* Convert to upper case. */
935
        char *p1;
936
        for (p1 = buf; *p1; p1++)
937
            *p1 = Py_TOUPPER(*p1);
938
    }
939
940
    if (type)
941
        *type = t;
942
    return buf;
943
}
944
945
#else
946
947
/* _Py_dg_dtoa is available. */
948
949
/* I'm using a lookup table here so that I don't have to invent a non-locale
950
   specific way to convert to uppercase */
951
0
#define OFS_INF 0
952
0
#define OFS_NAN 1
953
0
#define OFS_E 2
954
955
/* The lengths of these are known to the code below, so don't change them */
956
static const char * const lc_float_strings[] = {
957
    "inf",
958
    "nan",
959
    "e",
960
};
961
static const char * const uc_float_strings[] = {
962
    "INF",
963
    "NAN",
964
    "E",
965
};
966
967
968
/* Convert a double d to a string, and return a PyMem_Malloc'd block of
969
   memory contain the resulting string.
970
971
   Arguments:
972
     d is the double to be converted
973
     format_code is one of 'e', 'f', 'g', 'r'.  'e', 'f' and 'g'
974
       correspond to '%e', '%f' and '%g';  'r' corresponds to repr.
975
     mode is one of '0', '2' or '3', and is completely determined by
976
       format_code: 'e' and 'g' use mode 2; 'f' mode 3, 'r' mode 0.
977
     precision is the desired precision
978
     always_add_sign is nonzero if a '+' sign should be included for positive
979
       numbers
980
     add_dot_0_if_integer is nonzero if integers in non-exponential form
981
       should have ".0" added.  Only applies to format codes 'r' and 'g'.
982
     use_alt_formatting is nonzero if alternative formatting should be
983
       used.  Only applies to format codes 'e', 'f' and 'g'.  For code 'g',
984
       at most one of use_alt_formatting and add_dot_0_if_integer should
985
       be nonzero.
986
     type, if non-NULL, will be set to one of these constants to identify
987
       the type of the 'd' argument:
988
     Py_DTST_FINITE
989
     Py_DTST_INFINITE
990
     Py_DTST_NAN
991
992
   Returns a PyMem_Malloc'd block of memory containing the resulting string,
993
    or NULL on error. If NULL is returned, the Python error has been set.
994
 */
995
996
static char *
997
format_float_short(double d, char format_code,
998
                   int mode, int precision,
999
                   int always_add_sign, int add_dot_0_if_integer,
1000
                   int use_alt_formatting, const char * const *float_strings,
1001
                   int *type)
1002
0
{
1003
0
    char *buf = NULL;
1004
0
    char *p = NULL;
1005
0
    Py_ssize_t bufsize = 0;
1006
0
    char *digits, *digits_end;
1007
0
    int decpt_as_int, sign, exp_len, exp = 0, use_exp = 0;
1008
0
    Py_ssize_t decpt, digits_len, vdigits_start, vdigits_end;
1009
0
    _Py_SET_53BIT_PRECISION_HEADER;
1010
1011
    /* _Py_dg_dtoa returns a digit string (no decimal point or exponent).
1012
       Must be matched by a call to _Py_dg_freedtoa. */
1013
0
    _Py_SET_53BIT_PRECISION_START;
1014
0
    digits = _Py_dg_dtoa(d, mode, precision, &decpt_as_int, &sign,
1015
0
                         &digits_end);
1016
0
    _Py_SET_53BIT_PRECISION_END;
1017
1018
0
    decpt = (Py_ssize_t)decpt_as_int;
1019
0
    if (digits == NULL) {
1020
        /* The only failure mode is no memory. */
1021
0
        PyErr_NoMemory();
1022
0
        goto exit;
1023
0
    }
1024
0
    assert(digits_end != NULL && digits_end >= digits);
1025
0
    digits_len = digits_end - digits;
1026
1027
0
    if (digits_len && !Py_ISDIGIT(digits[0])) {
1028
        /* Infinities and nans here; adapt Gay's output,
1029
           so convert Infinity to inf and NaN to nan, and
1030
           ignore sign of nan. Then return. */
1031
1032
        /* ignore the actual sign of a nan */
1033
0
        if (digits[0] == 'n' || digits[0] == 'N')
1034
0
            sign = 0;
1035
1036
        /* We only need 5 bytes to hold the result "+inf\0" . */
1037
0
        bufsize = 5; /* Used later in an assert. */
1038
0
        buf = (char *)PyMem_Malloc(bufsize);
1039
0
        if (buf == NULL) {
1040
0
            PyErr_NoMemory();
1041
0
            goto exit;
1042
0
        }
1043
0
        p = buf;
1044
1045
0
        if (sign == 1) {
1046
0
            *p++ = '-';
1047
0
        }
1048
0
        else if (always_add_sign) {
1049
0
            *p++ = '+';
1050
0
        }
1051
0
        if (digits[0] == 'i' || digits[0] == 'I') {
1052
0
            strncpy(p, float_strings[OFS_INF], 3);
1053
0
            p += 3;
1054
1055
0
            if (type)
1056
0
                *type = Py_DTST_INFINITE;
1057
0
        }
1058
0
        else if (digits[0] == 'n' || digits[0] == 'N') {
1059
0
            strncpy(p, float_strings[OFS_NAN], 3);
1060
0
            p += 3;
1061
1062
0
            if (type)
1063
0
                *type = Py_DTST_NAN;
1064
0
        }
1065
0
        else {
1066
            /* shouldn't get here: Gay's code should always return
1067
               something starting with a digit, an 'I',  or 'N' */
1068
0
            Py_UNREACHABLE();
1069
0
        }
1070
0
        goto exit;
1071
0
    }
1072
1073
    /* The result must be finite (not inf or nan). */
1074
0
    if (type)
1075
0
        *type = Py_DTST_FINITE;
1076
1077
1078
    /* We got digits back, format them.  We may need to pad 'digits'
1079
       either on the left or right (or both) with extra zeros, so in
1080
       general the resulting string has the form
1081
1082
         [<sign>]<zeros><digits><zeros>[<exponent>]
1083
1084
       where either of the <zeros> pieces could be empty, and there's a
1085
       decimal point that could appear either in <digits> or in the
1086
       leading or trailing <zeros>.
1087
1088
       Imagine an infinite 'virtual' string vdigits, consisting of the
1089
       string 'digits' (starting at index 0) padded on both the left and
1090
       right with infinite strings of zeros.  We want to output a slice
1091
1092
         vdigits[vdigits_start : vdigits_end]
1093
1094
       of this virtual string.  Thus if vdigits_start < 0 then we'll end
1095
       up producing some leading zeros; if vdigits_end > digits_len there
1096
       will be trailing zeros in the output.  The next section of code
1097
       determines whether to use an exponent or not, figures out the
1098
       position 'decpt' of the decimal point, and computes 'vdigits_start'
1099
       and 'vdigits_end'. */
1100
0
    vdigits_end = digits_len;
1101
0
    switch (format_code) {
1102
0
    case 'e':
1103
0
        use_exp = 1;
1104
0
        vdigits_end = precision;
1105
0
        break;
1106
0
    case 'f':
1107
0
        vdigits_end = decpt + precision;
1108
0
        break;
1109
0
    case 'g':
1110
0
        if (decpt <= -4 || decpt >
1111
0
            (add_dot_0_if_integer ? precision-1 : precision))
1112
0
            use_exp = 1;
1113
0
        if (use_alt_formatting)
1114
0
            vdigits_end = precision;
1115
0
        break;
1116
0
    case 'r':
1117
        /* convert to exponential format at 1e16.  We used to convert
1118
           at 1e17, but that gives odd-looking results for some values
1119
           when a 16-digit 'shortest' repr is padded with bogus zeros.
1120
           For example, repr(2e16+8) would give 20000000000000010.0;
1121
           the true value is 20000000000000008.0. */
1122
0
        if (decpt <= -4 || decpt > 16)
1123
0
            use_exp = 1;
1124
0
        break;
1125
0
    default:
1126
0
        PyErr_BadInternalCall();
1127
0
        goto exit;
1128
0
    }
1129
1130
    /* if using an exponent, reset decimal point position to 1 and adjust
1131
       exponent accordingly.*/
1132
0
    if (use_exp) {
1133
0
        exp = (int)decpt - 1;
1134
0
        decpt = 1;
1135
0
    }
1136
    /* ensure vdigits_start < decpt <= vdigits_end, or vdigits_start <
1137
       decpt < vdigits_end if add_dot_0_if_integer and no exponent */
1138
0
    vdigits_start = decpt <= 0 ? decpt-1 : 0;
1139
0
    if (!use_exp && add_dot_0_if_integer)
1140
0
        vdigits_end = vdigits_end > decpt ? vdigits_end : decpt + 1;
1141
0
    else
1142
0
        vdigits_end = vdigits_end > decpt ? vdigits_end : decpt;
1143
1144
    /* double check inequalities */
1145
0
    assert(vdigits_start <= 0 &&
1146
0
           0 <= digits_len &&
1147
0
           digits_len <= vdigits_end);
1148
    /* decimal point should be in (vdigits_start, vdigits_end] */
1149
0
    assert(vdigits_start < decpt && decpt <= vdigits_end);
1150
1151
    /* Compute an upper bound how much memory we need. This might be a few
1152
       chars too long, but no big deal. */
1153
0
    bufsize =
1154
        /* sign, decimal point and trailing 0 byte */
1155
0
        3 +
1156
1157
        /* total digit count (including zero padding on both sides) */
1158
0
        (vdigits_end - vdigits_start) +
1159
1160
        /* exponent "e+100", max 3 numerical digits */
1161
0
        (use_exp ? 5 : 0);
1162
1163
    /* Now allocate the memory and initialize p to point to the start of
1164
       it. */
1165
0
    buf = (char *)PyMem_Malloc(bufsize);
1166
0
    if (buf == NULL) {
1167
0
        PyErr_NoMemory();
1168
0
        goto exit;
1169
0
    }
1170
0
    p = buf;
1171
1172
    /* Add a negative sign if negative, and a plus sign if non-negative
1173
       and always_add_sign is true. */
1174
0
    if (sign == 1)
1175
0
        *p++ = '-';
1176
0
    else if (always_add_sign)
1177
0
        *p++ = '+';
1178
1179
    /* note that exactly one of the three 'if' conditions is true,
1180
       so we include exactly one decimal point */
1181
    /* Zero padding on left of digit string */
1182
0
    if (decpt <= 0) {
1183
0
        memset(p, '0', decpt-vdigits_start);
1184
0
        p += decpt - vdigits_start;
1185
0
        *p++ = '.';
1186
0
        memset(p, '0', 0-decpt);
1187
0
        p += 0-decpt;
1188
0
    }
1189
0
    else {
1190
0
        memset(p, '0', 0-vdigits_start);
1191
0
        p += 0 - vdigits_start;
1192
0
    }
1193
1194
    /* Digits, with included decimal point */
1195
0
    if (0 < decpt && decpt <= digits_len) {
1196
0
        strncpy(p, digits, decpt-0);
1197
0
        p += decpt-0;
1198
0
        *p++ = '.';
1199
0
        strncpy(p, digits+decpt, digits_len-decpt);
1200
0
        p += digits_len-decpt;
1201
0
    }
1202
0
    else {
1203
0
        strncpy(p, digits, digits_len);
1204
0
        p += digits_len;
1205
0
    }
1206
1207
    /* And zeros on the right */
1208
0
    if (digits_len < decpt) {
1209
0
        memset(p, '0', decpt-digits_len);
1210
0
        p += decpt-digits_len;
1211
0
        *p++ = '.';
1212
0
        memset(p, '0', vdigits_end-decpt);
1213
0
        p += vdigits_end-decpt;
1214
0
    }
1215
0
    else {
1216
0
        memset(p, '0', vdigits_end-digits_len);
1217
0
        p += vdigits_end-digits_len;
1218
0
    }
1219
1220
    /* Delete a trailing decimal pt unless using alternative formatting. */
1221
0
    if (p[-1] == '.' && !use_alt_formatting)
1222
0
        p--;
1223
1224
    /* Now that we've done zero padding, add an exponent if needed. */
1225
0
    if (use_exp) {
1226
0
        *p++ = float_strings[OFS_E][0];
1227
0
        exp_len = sprintf(p, "%+.02d", exp);
1228
0
        p += exp_len;
1229
0
    }
1230
0
  exit:
1231
0
    if (buf) {
1232
0
        *p = '\0';
1233
        /* It's too late if this fails, as we've already stepped on
1234
           memory that isn't ours. But it's an okay debugging test. */
1235
0
        assert(p-buf < bufsize);
1236
0
    }
1237
0
    if (digits)
1238
0
        _Py_dg_freedtoa(digits);
1239
1240
0
    return buf;
1241
0
}
1242
1243
1244
char * PyOS_double_to_string(double val,
1245
                                         char format_code,
1246
                                         int precision,
1247
                                         int flags,
1248
                                         int *type)
1249
0
{
1250
0
    const char * const *float_strings = lc_float_strings;
1251
0
    int mode;
1252
1253
    /* Validate format_code, and map upper and lower case. Compute the
1254
       mode and make any adjustments as needed. */
1255
0
    switch (format_code) {
1256
    /* exponent */
1257
0
    case 'E':
1258
0
        float_strings = uc_float_strings;
1259
0
        format_code = 'e';
1260
        /* Fall through. */
1261
0
    case 'e':
1262
0
        mode = 2;
1263
0
        precision++;
1264
0
        break;
1265
1266
    /* fixed */
1267
0
    case 'F':
1268
0
        float_strings = uc_float_strings;
1269
0
        format_code = 'f';
1270
        /* Fall through. */
1271
0
    case 'f':
1272
0
        mode = 3;
1273
0
        break;
1274
1275
    /* general */
1276
0
    case 'G':
1277
0
        float_strings = uc_float_strings;
1278
0
        format_code = 'g';
1279
        /* Fall through. */
1280
0
    case 'g':
1281
0
        mode = 2;
1282
        /* precision 0 makes no sense for 'g' format; interpret as 1 */
1283
0
        if (precision == 0)
1284
0
            precision = 1;
1285
0
        break;
1286
1287
    /* repr format */
1288
0
    case 'r':
1289
0
        mode = 0;
1290
        /* Supplied precision is unused, must be 0. */
1291
0
        if (precision != 0) {
1292
0
            PyErr_BadInternalCall();
1293
0
            return NULL;
1294
0
        }
1295
0
        break;
1296
1297
0
    default:
1298
0
        PyErr_BadInternalCall();
1299
0
        return NULL;
1300
0
    }
1301
1302
0
    return format_float_short(val, format_code, mode, precision,
1303
0
                              flags & Py_DTSF_SIGN,
1304
0
                              flags & Py_DTSF_ADD_DOT_0,
1305
0
                              flags & Py_DTSF_ALT,
1306
0
                              float_strings, type);
1307
0
}
1308
#endif /* ifdef PY_NO_SHORT_FLOAT_REPR */