/src/cpython/Python/mystrtoul.c
Line | Count | Source (jump to first uncovered line) |
1 | | // strtol() and strtoul(), renamed to avoid conflicts. |
2 | | // |
3 | | // API: |
4 | | // |
5 | | // - PyOS_strtol(): convert string to C long integer. |
6 | | // - PyOS_strtoul(): convert string to C unsigned long integer. |
7 | | |
8 | | #include "Python.h" |
9 | | #include "pycore_long.h" // _PyLong_DigitValue |
10 | | |
11 | | #if defined(__sgi) && !defined(_SGI_MP_SOURCE) |
12 | | # define _SGI_MP_SOURCE |
13 | | #endif |
14 | | |
15 | | /* strtol and strtoul, renamed to avoid conflicts */ |
16 | | |
17 | | |
18 | | #ifdef HAVE_ERRNO_H |
19 | | # include <errno.h> // errno |
20 | | #endif |
21 | | |
22 | | /* Static overflow check values for bases 2 through 36. |
23 | | * smallmax[base] is the largest unsigned long i such that |
24 | | * i * base doesn't overflow unsigned long. |
25 | | */ |
26 | | static const unsigned long smallmax[] = { |
27 | | 0, /* bases 0 and 1 are invalid */ |
28 | | 0, |
29 | | ULONG_MAX / 2, |
30 | | ULONG_MAX / 3, |
31 | | ULONG_MAX / 4, |
32 | | ULONG_MAX / 5, |
33 | | ULONG_MAX / 6, |
34 | | ULONG_MAX / 7, |
35 | | ULONG_MAX / 8, |
36 | | ULONG_MAX / 9, |
37 | | ULONG_MAX / 10, |
38 | | ULONG_MAX / 11, |
39 | | ULONG_MAX / 12, |
40 | | ULONG_MAX / 13, |
41 | | ULONG_MAX / 14, |
42 | | ULONG_MAX / 15, |
43 | | ULONG_MAX / 16, |
44 | | ULONG_MAX / 17, |
45 | | ULONG_MAX / 18, |
46 | | ULONG_MAX / 19, |
47 | | ULONG_MAX / 20, |
48 | | ULONG_MAX / 21, |
49 | | ULONG_MAX / 22, |
50 | | ULONG_MAX / 23, |
51 | | ULONG_MAX / 24, |
52 | | ULONG_MAX / 25, |
53 | | ULONG_MAX / 26, |
54 | | ULONG_MAX / 27, |
55 | | ULONG_MAX / 28, |
56 | | ULONG_MAX / 29, |
57 | | ULONG_MAX / 30, |
58 | | ULONG_MAX / 31, |
59 | | ULONG_MAX / 32, |
60 | | ULONG_MAX / 33, |
61 | | ULONG_MAX / 34, |
62 | | ULONG_MAX / 35, |
63 | | ULONG_MAX / 36, |
64 | | }; |
65 | | |
66 | | /* maximum digits that can't ever overflow for bases 2 through 36, |
67 | | * calculated by [int(math.floor(math.log(2**32, i))) for i in range(2, 37)]. |
68 | | * Note that this is pessimistic if sizeof(long) > 4. |
69 | | */ |
70 | | #if SIZEOF_LONG == 4 |
71 | | static const int digitlimit[] = { |
72 | | 0, 0, 32, 20, 16, 13, 12, 11, 10, 10, /* 0 - 9 */ |
73 | | 9, 9, 8, 8, 8, 8, 8, 7, 7, 7, /* 10 - 19 */ |
74 | | 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, /* 20 - 29 */ |
75 | | 6, 6, 6, 6, 6, 6, 6}; /* 30 - 36 */ |
76 | | #elif SIZEOF_LONG == 8 |
77 | | /* [int(math.floor(math.log(2**64, i))) for i in range(2, 37)] */ |
78 | | static const int digitlimit[] = { |
79 | | 0, 0, 64, 40, 32, 27, 24, 22, 21, 20, /* 0 - 9 */ |
80 | | 19, 18, 17, 17, 16, 16, 16, 15, 15, 15, /* 10 - 19 */ |
81 | | 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, /* 20 - 29 */ |
82 | | 13, 12, 12, 12, 12, 12, 12}; /* 30 - 36 */ |
83 | | #else |
84 | | # error "Need table for SIZEOF_LONG" |
85 | | #endif |
86 | | |
87 | | /* |
88 | | ** strtoul |
89 | | ** This is a general purpose routine for converting |
90 | | ** an ascii string to an integer in an arbitrary base. |
91 | | ** Leading white space is ignored. If 'base' is zero |
92 | | ** it looks for a leading 0b, 0o or 0x to tell which |
93 | | ** base. If these are absent it defaults to 10. |
94 | | ** Base must be 0 or between 2 and 36 (inclusive). |
95 | | ** If 'ptr' is non-NULL it will contain a pointer to |
96 | | ** the end of the scan. |
97 | | ** Errors due to bad pointers will probably result in |
98 | | ** exceptions - we don't check for them. |
99 | | */ |
100 | | unsigned long |
101 | | PyOS_strtoul(const char *str, char **ptr, int base) |
102 | 244k | { |
103 | 244k | unsigned long result = 0; /* return value of the function */ |
104 | 244k | int c; /* current input character */ |
105 | 244k | int ovlimit; /* required digits to overflow */ |
106 | | |
107 | | /* skip leading white space */ |
108 | 244k | while (*str && Py_ISSPACE(*str)) |
109 | 0 | ++str; |
110 | | |
111 | | /* check for leading 0b, 0o or 0x for auto-base or base 16 */ |
112 | 244k | switch (base) { |
113 | 244k | case 0: /* look for leading 0b, 0o or 0x */ |
114 | 244k | if (*str == '0') { |
115 | 72.0k | ++str; |
116 | 72.0k | if (*str == 'x' || *str == 'X') { |
117 | | /* there must be at least one digit after 0x */ |
118 | 33.2k | if (_PyLong_DigitValue[Py_CHARMASK(str[1])] >= 16) { |
119 | 0 | if (ptr) |
120 | 0 | *ptr = (char *)str; |
121 | 0 | return 0; |
122 | 0 | } |
123 | 33.2k | ++str; |
124 | 33.2k | base = 16; |
125 | 38.8k | } else if (*str == 'o' || *str == 'O') { |
126 | | /* there must be at least one digit after 0o */ |
127 | 1.78k | if (_PyLong_DigitValue[Py_CHARMASK(str[1])] >= 8) { |
128 | 0 | if (ptr) |
129 | 0 | *ptr = (char *)str; |
130 | 0 | return 0; |
131 | 0 | } |
132 | 1.78k | ++str; |
133 | 1.78k | base = 8; |
134 | 37.0k | } else if (*str == 'b' || *str == 'B') { |
135 | | /* there must be at least one digit after 0b */ |
136 | 995 | if (_PyLong_DigitValue[Py_CHARMASK(str[1])] >= 2) { |
137 | 0 | if (ptr) |
138 | 0 | *ptr = (char *)str; |
139 | 0 | return 0; |
140 | 0 | } |
141 | 995 | ++str; |
142 | 995 | base = 2; |
143 | 36.0k | } else { |
144 | | /* skip all zeroes... */ |
145 | 42.6k | while (*str == '0') |
146 | 6.60k | ++str; |
147 | 36.0k | while (Py_ISSPACE(*str)) |
148 | 0 | ++str; |
149 | 36.0k | if (ptr) |
150 | 36.0k | *ptr = (char *)str; |
151 | 36.0k | return 0; |
152 | 36.0k | } |
153 | 72.0k | } |
154 | 172k | else |
155 | 172k | base = 10; |
156 | 208k | break; |
157 | | |
158 | | /* even with explicit base, skip leading 0? prefix */ |
159 | 208k | case 16: |
160 | 0 | if (*str == '0') { |
161 | 0 | ++str; |
162 | 0 | if (*str == 'x' || *str == 'X') { |
163 | | /* there must be at least one digit after 0x */ |
164 | 0 | if (_PyLong_DigitValue[Py_CHARMASK(str[1])] >= 16) { |
165 | 0 | if (ptr) |
166 | 0 | *ptr = (char *)str; |
167 | 0 | return 0; |
168 | 0 | } |
169 | 0 | ++str; |
170 | 0 | } |
171 | 0 | } |
172 | 0 | break; |
173 | 0 | case 8: |
174 | 0 | if (*str == '0') { |
175 | 0 | ++str; |
176 | 0 | if (*str == 'o' || *str == 'O') { |
177 | | /* there must be at least one digit after 0o */ |
178 | 0 | if (_PyLong_DigitValue[Py_CHARMASK(str[1])] >= 8) { |
179 | 0 | if (ptr) |
180 | 0 | *ptr = (char *)str; |
181 | 0 | return 0; |
182 | 0 | } |
183 | 0 | ++str; |
184 | 0 | } |
185 | 0 | } |
186 | 0 | break; |
187 | 0 | case 2: |
188 | 0 | if(*str == '0') { |
189 | 0 | ++str; |
190 | 0 | if (*str == 'b' || *str == 'B') { |
191 | | /* there must be at least one digit after 0b */ |
192 | 0 | if (_PyLong_DigitValue[Py_CHARMASK(str[1])] >= 2) { |
193 | 0 | if (ptr) |
194 | 0 | *ptr = (char *)str; |
195 | 0 | return 0; |
196 | 0 | } |
197 | 0 | ++str; |
198 | 0 | } |
199 | 0 | } |
200 | 0 | break; |
201 | 244k | } |
202 | | |
203 | | /* catch silly bases */ |
204 | 208k | if (base < 2 || base > 36) { |
205 | 0 | if (ptr) |
206 | 0 | *ptr = (char *)str; |
207 | 0 | return 0; |
208 | 0 | } |
209 | | |
210 | | /* skip leading zeroes */ |
211 | 263k | while (*str == '0') |
212 | 55.3k | ++str; |
213 | | |
214 | | /* base is guaranteed to be in [2, 36] at this point */ |
215 | 208k | ovlimit = digitlimit[base]; |
216 | | |
217 | | /* do the conversion until non-digit character encountered */ |
218 | 751k | while ((c = _PyLong_DigitValue[Py_CHARMASK(*str)]) < base) { |
219 | 548k | if (ovlimit > 0) /* no overflow check required */ |
220 | 541k | result = result * base + c; |
221 | 7.47k | else { /* requires overflow check */ |
222 | 7.47k | unsigned long temp_result; |
223 | | |
224 | 7.47k | if (ovlimit < 0) /* guaranteed overflow */ |
225 | 1.08k | goto overflowed; |
226 | | |
227 | | /* there could be an overflow */ |
228 | | /* check overflow just from shifting */ |
229 | 6.39k | if (result > smallmax[base]) |
230 | 4.73k | goto overflowed; |
231 | | |
232 | 1.65k | result *= base; |
233 | | |
234 | | /* check overflow from the digit's value */ |
235 | 1.65k | temp_result = result + c; |
236 | 1.65k | if (temp_result < result) |
237 | 244 | goto overflowed; |
238 | | |
239 | 1.41k | result = temp_result; |
240 | 1.41k | } |
241 | | |
242 | 542k | ++str; |
243 | 542k | --ovlimit; |
244 | 542k | } |
245 | | |
246 | | /* set pointer to point to the last character scanned */ |
247 | 202k | if (ptr) |
248 | 202k | *ptr = (char *)str; |
249 | | |
250 | 202k | return result; |
251 | | |
252 | 6.06k | overflowed: |
253 | 6.06k | if (ptr) { |
254 | | /* spool through remaining digit characters */ |
255 | 185k | while (_PyLong_DigitValue[Py_CHARMASK(*str)] < base) |
256 | 179k | ++str; |
257 | 6.06k | *ptr = (char *)str; |
258 | 6.06k | } |
259 | 6.06k | errno = ERANGE; |
260 | 6.06k | return (unsigned long)-1; |
261 | 208k | } |
262 | | |
263 | | /* Checking for overflow in PyOS_strtol is a PITA; see comments |
264 | | * about PY_ABS_LONG_MIN in longobject.c. |
265 | | */ |
266 | 0 | #define PY_ABS_LONG_MIN (0-(unsigned long)LONG_MIN) |
267 | | |
268 | | long |
269 | | PyOS_strtol(const char *str, char **ptr, int base) |
270 | 172k | { |
271 | 172k | long result; |
272 | 172k | unsigned long uresult; |
273 | 172k | char sign; |
274 | | |
275 | 173k | while (*str && Py_ISSPACE(*str)) |
276 | 560 | str++; |
277 | | |
278 | 172k | sign = *str; |
279 | 172k | if (sign == '+' || sign == '-') |
280 | 0 | str++; |
281 | | |
282 | 172k | uresult = PyOS_strtoul(str, ptr, base); |
283 | | |
284 | 172k | if (uresult <= (unsigned long)LONG_MAX) { |
285 | 167k | result = (long)uresult; |
286 | 167k | if (sign == '-') |
287 | 0 | result = -result; |
288 | 167k | } |
289 | 5.44k | else if (sign == '-' && uresult == PY_ABS_LONG_MIN) { |
290 | 0 | result = LONG_MIN; |
291 | 0 | } |
292 | 5.44k | else { |
293 | 5.44k | errno = ERANGE; |
294 | 5.44k | result = LONG_MAX; |
295 | 5.44k | } |
296 | 172k | return result; |
297 | 172k | } |