/src/tor/src/lib/string/scanf.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* Copyright (c) 2003-2004, Roger Dingledine |
2 | | * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. |
3 | | * Copyright (c) 2007-2021, The Tor Project, Inc. */ |
4 | | /* See LICENSE for licensing information */ |
5 | | |
6 | | /** |
7 | | * \file scanf.c |
8 | | * \brief Locale-independent minimal implementation of sscanf(). |
9 | | **/ |
10 | | |
11 | | #include "lib/string/scanf.h" |
12 | | #include "lib/string/compat_ctype.h" |
13 | | #include "lib/cc/torint.h" |
14 | | #include "lib/err/torerr.h" |
15 | | |
16 | | #include <stdlib.h> |
17 | | |
18 | 57.2k | #define MAX_SCANF_WIDTH 9999 |
19 | | |
20 | | /** Helper: given an ASCII-encoded decimal digit, return its numeric value. |
21 | | * NOTE: requires that its input be in-bounds. */ |
22 | | static int |
23 | | digit_to_num(char d) |
24 | 9.47M | { |
25 | 9.47M | int num = ((int)d) - (int)'0'; |
26 | 9.47M | raw_assert(num <= 9 && num >= 0); |
27 | 9.47M | return num; |
28 | 9.47M | } |
29 | | |
30 | | /** Helper: Read an unsigned int from *<b>bufp</b> of up to <b>width</b> |
31 | | * characters. (Handle arbitrary width if <b>width</b> is less than 0.) On |
32 | | * success, store the result in <b>out</b>, advance bufp to the next |
33 | | * character, and return 0. On failure, return -1. */ |
34 | | static int |
35 | | scan_unsigned(const char **bufp, unsigned long *out, int width, unsigned base) |
36 | 3.74M | { |
37 | 3.74M | unsigned long result = 0; |
38 | 3.74M | int scanned_so_far = 0; |
39 | 3.74M | const int hex = base==16; |
40 | 3.74M | raw_assert(base == 10 || base == 16); |
41 | 3.74M | if (!bufp || !*bufp || !out) |
42 | 0 | return -1; |
43 | 3.74M | if (width<0) |
44 | 57.2k | width=MAX_SCANF_WIDTH; |
45 | | |
46 | 9.25M | while (**bufp && (hex?TOR_ISXDIGIT(**bufp):TOR_ISDIGIT(**bufp)) |
47 | 9.25M | && scanned_so_far < width) { |
48 | 5.51M | unsigned digit = hex?hex_decode_digit(*(*bufp)++):digit_to_num(*(*bufp)++); |
49 | | // Check for overflow beforehand, without actually causing any overflow |
50 | | // This preserves functionality on compilers that don't wrap overflow |
51 | | // (i.e. that trap or optimise away overflow) |
52 | | // result * base + digit > ULONG_MAX |
53 | | // result * base > ULONG_MAX - digit |
54 | 5.51M | if (result > (ULONG_MAX - digit)/base) |
55 | 304 | return -1; /* Processing this digit would overflow */ |
56 | 5.51M | result = result * base + digit; |
57 | 5.51M | ++scanned_so_far; |
58 | 5.51M | } |
59 | | |
60 | 3.74M | if (!scanned_so_far) /* No actual digits scanned */ |
61 | 220k | return -1; |
62 | | |
63 | 3.52M | *out = result; |
64 | 3.52M | return 0; |
65 | 3.74M | } |
66 | | |
67 | | /** Helper: Read an signed int from *<b>bufp</b> of up to <b>width</b> |
68 | | * characters. (Handle arbitrary width if <b>width</b> is less than 0.) On |
69 | | * success, store the result in <b>out</b>, advance bufp to the next |
70 | | * character, and return 0. On failure, return -1. */ |
71 | | static int |
72 | | scan_signed(const char **bufp, long *out, int width) |
73 | 0 | { |
74 | 0 | int neg = 0; |
75 | 0 | unsigned long result = 0; |
76 | |
|
77 | 0 | if (!bufp || !*bufp || !out) |
78 | 0 | return -1; |
79 | 0 | if (width<0) |
80 | 0 | width=MAX_SCANF_WIDTH; |
81 | |
|
82 | 0 | if (**bufp == '-') { |
83 | 0 | neg = 1; |
84 | 0 | ++*bufp; |
85 | 0 | --width; |
86 | 0 | } |
87 | |
|
88 | 0 | if (scan_unsigned(bufp, &result, width, 10) < 0) |
89 | 0 | return -1; |
90 | | |
91 | 0 | if (neg && result > 0) { |
92 | 0 | if (result > ((unsigned long)LONG_MAX) + 1) |
93 | 0 | return -1; /* Underflow */ |
94 | 0 | else if (result == ((unsigned long)LONG_MAX) + 1) |
95 | 0 | *out = LONG_MIN; |
96 | 0 | else { |
97 | | /* We once had a far more clever no-overflow conversion here, but |
98 | | * some versions of GCC apparently ran it into the ground. Now |
99 | | * we just check for LONG_MIN explicitly. |
100 | | */ |
101 | 0 | *out = -(long)result; |
102 | 0 | } |
103 | 0 | } else { |
104 | 0 | if (result > LONG_MAX) |
105 | 0 | return -1; /* Overflow */ |
106 | 0 | *out = (long)result; |
107 | 0 | } |
108 | | |
109 | 0 | return 0; |
110 | 0 | } |
111 | | |
112 | | /** Helper: Read a decimal-formatted double from *<b>bufp</b> of up to |
113 | | * <b>width</b> characters. (Handle arbitrary width if <b>width</b> is less |
114 | | * than 0.) On success, store the result in <b>out</b>, advance bufp to the |
115 | | * next character, and return 0. On failure, return -1. */ |
116 | | static int |
117 | | scan_double(const char **bufp, double *out, int width) |
118 | 0 | { |
119 | 0 | int neg = 0; |
120 | 0 | double result = 0; |
121 | 0 | int scanned_so_far = 0; |
122 | |
|
123 | 0 | if (!bufp || !*bufp || !out) |
124 | 0 | return -1; |
125 | 0 | if (width<0) |
126 | 0 | width=MAX_SCANF_WIDTH; |
127 | |
|
128 | 0 | if (**bufp == '-') { |
129 | 0 | neg = 1; |
130 | 0 | ++*bufp; |
131 | 0 | } |
132 | |
|
133 | 0 | while (**bufp && TOR_ISDIGIT(**bufp) && scanned_so_far < width) { |
134 | 0 | const int digit = digit_to_num(*(*bufp)++); |
135 | 0 | result = result * 10 + digit; |
136 | 0 | ++scanned_so_far; |
137 | 0 | } |
138 | 0 | if (**bufp == '.') { |
139 | 0 | double fracval = 0, denominator = 1; |
140 | 0 | ++*bufp; |
141 | 0 | ++scanned_so_far; |
142 | 0 | while (**bufp && TOR_ISDIGIT(**bufp) && scanned_so_far < width) { |
143 | 0 | const int digit = digit_to_num(*(*bufp)++); |
144 | 0 | fracval = fracval * 10 + digit; |
145 | 0 | denominator *= 10; |
146 | 0 | ++scanned_so_far; |
147 | 0 | } |
148 | 0 | result += fracval / denominator; |
149 | 0 | } |
150 | |
|
151 | 0 | if (!scanned_so_far) /* No actual digits scanned */ |
152 | 0 | return -1; |
153 | | |
154 | 0 | *out = neg ? -result : result; |
155 | 0 | return 0; |
156 | 0 | } |
157 | | |
158 | | /** Helper: copy up to <b>width</b> non-space characters from <b>bufp</b> to |
159 | | * <b>out</b>. Make sure <b>out</b> is nul-terminated. Advance <b>bufp</b> |
160 | | * to the next non-space character or the EOS. */ |
161 | | static int |
162 | | scan_string(const char **bufp, char *out, int width) |
163 | 0 | { |
164 | 0 | int scanned_so_far = 0; |
165 | 0 | if (!bufp || !out || width < 0) |
166 | 0 | return -1; |
167 | 0 | while (**bufp && ! TOR_ISSPACE(**bufp) && scanned_so_far < width) { |
168 | 0 | *out++ = *(*bufp)++; |
169 | 0 | ++scanned_so_far; |
170 | 0 | } |
171 | 0 | *out = '\0'; |
172 | 0 | return 0; |
173 | 0 | } |
174 | | |
175 | | /** Locale-independent, minimal, no-surprises scanf variant, accepting only a |
176 | | * restricted pattern format. For more info on what it supports, see |
177 | | * tor_sscanf() documentation. */ |
178 | | int |
179 | | tor_vsscanf(const char *buf, const char *pattern, va_list ap) |
180 | 1.38M | { |
181 | 1.38M | int n_matched = 0; |
182 | | |
183 | 7.56M | while (*pattern) { |
184 | 7.55M | if (*pattern != '%') { |
185 | 2.63M | if (*buf == *pattern) { |
186 | 2.59M | ++buf; |
187 | 2.59M | ++pattern; |
188 | 2.59M | continue; |
189 | 2.59M | } else { |
190 | 43.0k | return n_matched; |
191 | 43.0k | } |
192 | 4.92M | } else { |
193 | 4.92M | int width = -1; |
194 | 4.92M | int longmod = 0; |
195 | 4.92M | ++pattern; |
196 | 4.92M | if (TOR_ISDIGIT(*pattern)) { |
197 | 3.96M | width = digit_to_num(*pattern++); |
198 | 3.96M | while (TOR_ISDIGIT(*pattern)) { |
199 | 0 | width *= 10; |
200 | 0 | width += digit_to_num(*pattern++); |
201 | 0 | if (width > MAX_SCANF_WIDTH) |
202 | 0 | return -1; |
203 | 0 | } |
204 | 3.96M | if (!width) /* No zero-width things. */ |
205 | 0 | return -1; |
206 | 3.96M | } |
207 | 4.92M | if (*pattern == 'l') { |
208 | 0 | longmod = 1; |
209 | 0 | ++pattern; |
210 | 0 | } |
211 | 4.92M | if (*pattern == 'u' || *pattern == 'x') { |
212 | 4.02M | unsigned long u; |
213 | 4.02M | const int base = (*pattern == 'u') ? 10 : 16; |
214 | 4.02M | if (!*buf) |
215 | 276k | return n_matched; |
216 | 3.74M | if (scan_unsigned(&buf, &u, width, base)<0) |
217 | 220k | return n_matched; |
218 | 3.52M | if (longmod) { |
219 | 0 | unsigned long *out = va_arg(ap, unsigned long *); |
220 | 0 | *out = u; |
221 | 3.52M | } else { |
222 | 3.52M | unsigned *out = va_arg(ap, unsigned *); |
223 | 3.52M | if (u > UINT_MAX) |
224 | 448 | return n_matched; |
225 | 3.52M | *out = (unsigned) u; |
226 | 3.52M | } |
227 | 3.52M | ++pattern; |
228 | 3.52M | ++n_matched; |
229 | 3.52M | } else if (*pattern == 'f') { |
230 | 0 | double *d = va_arg(ap, double *); |
231 | 0 | if (!longmod) |
232 | 0 | return -1; /* float not supported */ |
233 | 0 | if (!*buf) |
234 | 0 | return n_matched; |
235 | 0 | if (scan_double(&buf, d, width)<0) |
236 | 0 | return n_matched; |
237 | 0 | ++pattern; |
238 | 0 | ++n_matched; |
239 | 898k | } else if (*pattern == 'd') { |
240 | 0 | long lng=0; |
241 | 0 | if (scan_signed(&buf, &lng, width)<0) |
242 | 0 | return n_matched; |
243 | 0 | if (longmod) { |
244 | 0 | long *out = va_arg(ap, long *); |
245 | 0 | *out = lng; |
246 | 0 | } else { |
247 | 0 | int *out = va_arg(ap, int *); |
248 | 0 | #if LONG_MAX > INT_MAX |
249 | 0 | if (lng < INT_MIN || lng > INT_MAX) |
250 | 0 | return n_matched; |
251 | 0 | #endif |
252 | 0 | *out = (int)lng; |
253 | 0 | } |
254 | 0 | ++pattern; |
255 | 0 | ++n_matched; |
256 | 898k | } else if (*pattern == 's') { |
257 | 0 | char *s = va_arg(ap, char *); |
258 | 0 | if (longmod) |
259 | 0 | return -1; |
260 | 0 | if (width < 0) |
261 | 0 | return -1; |
262 | 0 | if (scan_string(&buf, s, width)<0) |
263 | 0 | return n_matched; |
264 | 0 | ++pattern; |
265 | 0 | ++n_matched; |
266 | 898k | } else if (*pattern == 'c') { |
267 | 898k | char *ch = va_arg(ap, char *); |
268 | 898k | if (longmod) |
269 | 0 | return -1; |
270 | 898k | if (width != -1) |
271 | 0 | return -1; |
272 | 898k | if (!*buf) |
273 | 840k | return n_matched; |
274 | 57.5k | *ch = *buf++; |
275 | 57.5k | ++pattern; |
276 | 57.5k | ++n_matched; |
277 | 57.5k | } else if (*pattern == '%') { |
278 | 0 | if (*buf != '%') |
279 | 0 | return n_matched; |
280 | 0 | if (longmod) |
281 | 0 | return -1; |
282 | 0 | ++buf; |
283 | 0 | ++pattern; |
284 | 0 | } else { |
285 | 0 | return -1; /* Unrecognized pattern component. */ |
286 | 0 | } |
287 | 4.92M | } |
288 | 7.55M | } |
289 | | |
290 | 2.55k | return n_matched; |
291 | 1.38M | } |
292 | | |
293 | | /** Minimal sscanf replacement: parse <b>buf</b> according to <b>pattern</b> |
294 | | * and store the results in the corresponding argument fields. Differs from |
295 | | * sscanf in that: |
296 | | * <ul><li>It only handles %u, %lu, %x, %lx, %[NUM]s, %d, %ld, %lf, and %c. |
297 | | * <li>It only handles decimal inputs for %lf. (12.3, not 1.23e1) |
298 | | * <li>It does not handle arbitrarily long widths. |
299 | | * <li>Numbers do not consume any space characters. |
300 | | * <li>It is locale-independent. |
301 | | * <li>%u and %x do not consume any space. |
302 | | * <li>It returns -1 on malformed patterns.</ul> |
303 | | * |
304 | | * (As with other locale-independent functions, we need this to parse data that |
305 | | * is in ASCII without worrying that the C library's locale-handling will make |
306 | | * miscellaneous characters look like numbers, spaces, and so on.) |
307 | | */ |
308 | | int |
309 | | tor_sscanf(const char *buf, const char *pattern, ...) |
310 | 1.38M | { |
311 | 1.38M | int r; |
312 | 1.38M | va_list ap; |
313 | 1.38M | va_start(ap, pattern); |
314 | 1.38M | r = tor_vsscanf(buf, pattern, ap); |
315 | 1.38M | va_end(ap); |
316 | 1.38M | return r; |
317 | 1.38M | } |