Coverage Report

Created: 2025-07-04 06:49

/src/cpython/Python/mystrtoul.c
Line
Count
Source (jump to first uncovered line)
1
// strtol() and strtoul(), renamed to avoid conflicts.
2
//
3
// API:
4
//
5
// - PyOS_strtol(): convert string to C long integer.
6
// - PyOS_strtoul(): convert string to C unsigned long integer.
7
8
#include "Python.h"
9
#include "pycore_long.h"          // _PyLong_DigitValue
10
11
#if defined(__sgi) && !defined(_SGI_MP_SOURCE)
12
#  define _SGI_MP_SOURCE
13
#endif
14
15
/* strtol and strtoul, renamed to avoid conflicts */
16
17
18
#ifdef HAVE_ERRNO_H
19
#  include <errno.h>              // errno
20
#endif
21
22
/* Static overflow check values for bases 2 through 36.
23
 * smallmax[base] is the largest unsigned long i such that
24
 * i * base doesn't overflow unsigned long.
25
 */
26
static const unsigned long smallmax[] = {
27
    0, /* bases 0 and 1 are invalid */
28
    0,
29
    ULONG_MAX / 2,
30
    ULONG_MAX / 3,
31
    ULONG_MAX / 4,
32
    ULONG_MAX / 5,
33
    ULONG_MAX / 6,
34
    ULONG_MAX / 7,
35
    ULONG_MAX / 8,
36
    ULONG_MAX / 9,
37
    ULONG_MAX / 10,
38
    ULONG_MAX / 11,
39
    ULONG_MAX / 12,
40
    ULONG_MAX / 13,
41
    ULONG_MAX / 14,
42
    ULONG_MAX / 15,
43
    ULONG_MAX / 16,
44
    ULONG_MAX / 17,
45
    ULONG_MAX / 18,
46
    ULONG_MAX / 19,
47
    ULONG_MAX / 20,
48
    ULONG_MAX / 21,
49
    ULONG_MAX / 22,
50
    ULONG_MAX / 23,
51
    ULONG_MAX / 24,
52
    ULONG_MAX / 25,
53
    ULONG_MAX / 26,
54
    ULONG_MAX / 27,
55
    ULONG_MAX / 28,
56
    ULONG_MAX / 29,
57
    ULONG_MAX / 30,
58
    ULONG_MAX / 31,
59
    ULONG_MAX / 32,
60
    ULONG_MAX / 33,
61
    ULONG_MAX / 34,
62
    ULONG_MAX / 35,
63
    ULONG_MAX / 36,
64
};
65
66
/* maximum digits that can't ever overflow for bases 2 through 36,
67
 * calculated by [int(math.floor(math.log(2**32, i))) for i in range(2, 37)].
68
 * Note that this is pessimistic if sizeof(long) > 4.
69
 */
70
#if SIZEOF_LONG == 4
71
static const int digitlimit[] = {
72
    0,  0, 32, 20, 16, 13, 12, 11, 10, 10,  /*  0 -  9 */
73
    9,  9,  8,  8,  8,  8,  8,  7,  7,  7,  /* 10 - 19 */
74
    7,  7,  7,  7,  6,  6,  6,  6,  6,  6,  /* 20 - 29 */
75
    6,  6,  6,  6,  6,  6,  6};             /* 30 - 36 */
76
#elif SIZEOF_LONG == 8
77
/* [int(math.floor(math.log(2**64, i))) for i in range(2, 37)] */
78
static const int digitlimit[] = {
79
         0,   0, 64, 40, 32, 27, 24, 22, 21, 20,  /*  0 -  9 */
80
    19,  18, 17, 17, 16, 16, 16, 15, 15, 15,  /* 10 - 19 */
81
    14,  14, 14, 14, 13, 13, 13, 13, 13, 13,  /* 20 - 29 */
82
    13,  12, 12, 12, 12, 12, 12};             /* 30 - 36 */
83
#else
84
#  error "Need table for SIZEOF_LONG"
85
#endif
86
87
/*
88
**      strtoul
89
**              This is a general purpose routine for converting
90
**              an ascii string to an integer in an arbitrary base.
91
**              Leading white space is ignored.  If 'base' is zero
92
**              it looks for a leading 0b, 0o or 0x to tell which
93
**              base.  If these are absent it defaults to 10.
94
**              Base must be 0 or between 2 and 36 (inclusive).
95
**              If 'ptr' is non-NULL it will contain a pointer to
96
**              the end of the scan.
97
**              Errors due to bad pointers will probably result in
98
**              exceptions - we don't check for them.
99
*/
100
unsigned long
101
PyOS_strtoul(const char *str, char **ptr, int base)
102
244k
{
103
244k
    unsigned long result = 0; /* return value of the function */
104
244k
    int c;             /* current input character */
105
244k
    int ovlimit;       /* required digits to overflow */
106
107
    /* skip leading white space */
108
244k
    while (*str && Py_ISSPACE(*str))
109
0
        ++str;
110
111
    /* check for leading 0b, 0o or 0x for auto-base or base 16 */
112
244k
    switch (base) {
113
244k
    case 0:             /* look for leading 0b, 0o or 0x */
114
244k
        if (*str == '0') {
115
72.0k
            ++str;
116
72.0k
            if (*str == 'x' || *str == 'X') {
117
                /* there must be at least one digit after 0x */
118
33.2k
                if (_PyLong_DigitValue[Py_CHARMASK(str[1])] >= 16) {
119
0
                    if (ptr)
120
0
                        *ptr = (char *)str;
121
0
                    return 0;
122
0
                }
123
33.2k
                ++str;
124
33.2k
                base = 16;
125
38.8k
            } else if (*str == 'o' || *str == 'O') {
126
                /* there must be at least one digit after 0o */
127
1.78k
                if (_PyLong_DigitValue[Py_CHARMASK(str[1])] >= 8) {
128
0
                    if (ptr)
129
0
                        *ptr = (char *)str;
130
0
                    return 0;
131
0
                }
132
1.78k
                ++str;
133
1.78k
                base = 8;
134
37.0k
            } else if (*str == 'b' || *str == 'B') {
135
                /* there must be at least one digit after 0b */
136
995
                if (_PyLong_DigitValue[Py_CHARMASK(str[1])] >= 2) {
137
0
                    if (ptr)
138
0
                        *ptr = (char *)str;
139
0
                    return 0;
140
0
                }
141
995
                ++str;
142
995
                base = 2;
143
36.0k
            } else {
144
                /* skip all zeroes... */
145
42.6k
                while (*str == '0')
146
6.60k
                    ++str;
147
36.0k
                while (Py_ISSPACE(*str))
148
0
                    ++str;
149
36.0k
                if (ptr)
150
36.0k
                    *ptr = (char *)str;
151
36.0k
                return 0;
152
36.0k
            }
153
72.0k
        }
154
172k
        else
155
172k
            base = 10;
156
208k
        break;
157
158
    /* even with explicit base, skip leading 0? prefix */
159
208k
    case 16:
160
0
        if (*str == '0') {
161
0
            ++str;
162
0
            if (*str == 'x' || *str == 'X') {
163
                /* there must be at least one digit after 0x */
164
0
                if (_PyLong_DigitValue[Py_CHARMASK(str[1])] >= 16) {
165
0
                    if (ptr)
166
0
                        *ptr = (char *)str;
167
0
                    return 0;
168
0
                }
169
0
                ++str;
170
0
            }
171
0
        }
172
0
        break;
173
0
    case 8:
174
0
        if (*str == '0') {
175
0
            ++str;
176
0
            if (*str == 'o' || *str == 'O') {
177
                /* there must be at least one digit after 0o */
178
0
                if (_PyLong_DigitValue[Py_CHARMASK(str[1])] >= 8) {
179
0
                    if (ptr)
180
0
                        *ptr = (char *)str;
181
0
                    return 0;
182
0
                }
183
0
                ++str;
184
0
            }
185
0
        }
186
0
        break;
187
0
    case 2:
188
0
        if(*str == '0') {
189
0
            ++str;
190
0
            if (*str == 'b' || *str == 'B') {
191
                /* there must be at least one digit after 0b */
192
0
                if (_PyLong_DigitValue[Py_CHARMASK(str[1])] >= 2) {
193
0
                    if (ptr)
194
0
                        *ptr = (char *)str;
195
0
                    return 0;
196
0
                }
197
0
                ++str;
198
0
            }
199
0
        }
200
0
        break;
201
244k
    }
202
203
    /* catch silly bases */
204
208k
    if (base < 2 || base > 36) {
205
0
        if (ptr)
206
0
            *ptr = (char *)str;
207
0
        return 0;
208
0
    }
209
210
    /* skip leading zeroes */
211
263k
    while (*str == '0')
212
55.3k
        ++str;
213
214
    /* base is guaranteed to be in [2, 36] at this point */
215
208k
    ovlimit = digitlimit[base];
216
217
    /* do the conversion until non-digit character encountered */
218
751k
    while ((c = _PyLong_DigitValue[Py_CHARMASK(*str)]) < base) {
219
548k
        if (ovlimit > 0) /* no overflow check required */
220
541k
            result = result * base + c;
221
7.47k
        else { /* requires overflow check */
222
7.47k
            unsigned long temp_result;
223
224
7.47k
            if (ovlimit < 0) /* guaranteed overflow */
225
1.08k
                goto overflowed;
226
227
            /* there could be an overflow */
228
            /* check overflow just from shifting */
229
6.39k
            if (result > smallmax[base])
230
4.73k
                goto overflowed;
231
232
1.65k
            result *= base;
233
234
            /* check overflow from the digit's value */
235
1.65k
            temp_result = result + c;
236
1.65k
            if (temp_result < result)
237
244
                goto overflowed;
238
239
1.41k
            result = temp_result;
240
1.41k
        }
241
242
542k
        ++str;
243
542k
        --ovlimit;
244
542k
    }
245
246
    /* set pointer to point to the last character scanned */
247
202k
    if (ptr)
248
202k
        *ptr = (char *)str;
249
250
202k
    return result;
251
252
6.06k
overflowed:
253
6.06k
    if (ptr) {
254
        /* spool through remaining digit characters */
255
185k
        while (_PyLong_DigitValue[Py_CHARMASK(*str)] < base)
256
179k
            ++str;
257
6.06k
        *ptr = (char *)str;
258
6.06k
    }
259
6.06k
    errno = ERANGE;
260
6.06k
    return (unsigned long)-1;
261
208k
}
262
263
/* Checking for overflow in PyOS_strtol is a PITA; see comments
264
 * about PY_ABS_LONG_MIN in longobject.c.
265
 */
266
0
#define PY_ABS_LONG_MIN         (0-(unsigned long)LONG_MIN)
267
268
long
269
PyOS_strtol(const char *str, char **ptr, int base)
270
172k
{
271
172k
    long result;
272
172k
    unsigned long uresult;
273
172k
    char sign;
274
275
173k
    while (*str && Py_ISSPACE(*str))
276
560
        str++;
277
278
172k
    sign = *str;
279
172k
    if (sign == '+' || sign == '-')
280
0
        str++;
281
282
172k
    uresult = PyOS_strtoul(str, ptr, base);
283
284
172k
    if (uresult <= (unsigned long)LONG_MAX) {
285
167k
        result = (long)uresult;
286
167k
        if (sign == '-')
287
0
            result = -result;
288
167k
    }
289
5.44k
    else if (sign == '-' && uresult == PY_ABS_LONG_MIN) {
290
0
        result = LONG_MIN;
291
0
    }
292
5.44k
    else {
293
5.44k
        errno = ERANGE;
294
5.44k
        result = LONG_MAX;
295
5.44k
    }
296
172k
    return result;
297
172k
}