Coverage Report

Created: 2025-08-24 06:20

/src/tor/src/lib/string/scanf.c
Line
Count
Source (jump to first uncovered line)
1
/* Copyright (c) 2003-2004, Roger Dingledine
2
 * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
3
 * Copyright (c) 2007-2021, The Tor Project, Inc. */
4
/* See LICENSE for licensing information */
5
6
/**
7
 * \file scanf.c
8
 * \brief Locale-independent minimal implementation of sscanf().
9
 **/
10
11
#include "lib/string/scanf.h"
12
#include "lib/string/compat_ctype.h"
13
#include "lib/cc/torint.h"
14
#include "lib/err/torerr.h"
15
16
#include <stdlib.h>
17
18
57.2k
#define MAX_SCANF_WIDTH 9999
19
20
/** Helper: given an ASCII-encoded decimal digit, return its numeric value.
21
 * NOTE: requires that its input be in-bounds. */
22
static int
23
digit_to_num(char d)
24
9.47M
{
25
9.47M
  int num = ((int)d) - (int)'0';
26
9.47M
  raw_assert(num <= 9 && num >= 0);
27
9.47M
  return num;
28
9.47M
}
29
30
/** Helper: Read an unsigned int from *<b>bufp</b> of up to <b>width</b>
31
 * characters.  (Handle arbitrary width if <b>width</b> is less than 0.)  On
32
 * success, store the result in <b>out</b>, advance bufp to the next
33
 * character, and return 0.  On failure, return -1. */
34
static int
35
scan_unsigned(const char **bufp, unsigned long *out, int width, unsigned base)
36
3.74M
{
37
3.74M
  unsigned long result = 0;
38
3.74M
  int scanned_so_far = 0;
39
3.74M
  const int hex = base==16;
40
3.74M
  raw_assert(base == 10 || base == 16);
41
3.74M
  if (!bufp || !*bufp || !out)
42
0
    return -1;
43
3.74M
  if (width<0)
44
57.2k
    width=MAX_SCANF_WIDTH;
45
46
9.25M
  while (**bufp && (hex?TOR_ISXDIGIT(**bufp):TOR_ISDIGIT(**bufp))
47
9.25M
         && scanned_so_far < width) {
48
5.51M
    unsigned digit = hex?hex_decode_digit(*(*bufp)++):digit_to_num(*(*bufp)++);
49
    // Check for overflow beforehand, without actually causing any overflow
50
    // This preserves functionality on compilers that don't wrap overflow
51
    // (i.e. that trap or optimise away overflow)
52
    // result * base + digit > ULONG_MAX
53
    // result * base > ULONG_MAX - digit
54
5.51M
    if (result > (ULONG_MAX - digit)/base)
55
304
      return -1; /* Processing this digit would overflow */
56
5.51M
    result = result * base + digit;
57
5.51M
    ++scanned_so_far;
58
5.51M
  }
59
60
3.74M
  if (!scanned_so_far) /* No actual digits scanned */
61
220k
    return -1;
62
63
3.52M
  *out = result;
64
3.52M
  return 0;
65
3.74M
}
66
67
/** Helper: Read an signed int from *<b>bufp</b> of up to <b>width</b>
68
 * characters.  (Handle arbitrary width if <b>width</b> is less than 0.)  On
69
 * success, store the result in <b>out</b>, advance bufp to the next
70
 * character, and return 0.  On failure, return -1. */
71
static int
72
scan_signed(const char **bufp, long *out, int width)
73
0
{
74
0
  int neg = 0;
75
0
  unsigned long result = 0;
76
77
0
  if (!bufp || !*bufp || !out)
78
0
    return -1;
79
0
  if (width<0)
80
0
    width=MAX_SCANF_WIDTH;
81
82
0
  if (**bufp == '-') {
83
0
    neg = 1;
84
0
    ++*bufp;
85
0
    --width;
86
0
  }
87
88
0
  if (scan_unsigned(bufp, &result, width, 10) < 0)
89
0
    return -1;
90
91
0
  if (neg && result > 0) {
92
0
    if (result > ((unsigned long)LONG_MAX) + 1)
93
0
      return -1; /* Underflow */
94
0
    else if (result == ((unsigned long)LONG_MAX) + 1)
95
0
      *out = LONG_MIN;
96
0
    else {
97
      /* We once had a far more clever no-overflow conversion here, but
98
       * some versions of GCC apparently ran it into the ground.  Now
99
       * we just check for LONG_MIN explicitly.
100
       */
101
0
      *out = -(long)result;
102
0
    }
103
0
  } else {
104
0
    if (result > LONG_MAX)
105
0
      return -1; /* Overflow */
106
0
    *out = (long)result;
107
0
  }
108
109
0
  return 0;
110
0
}
111
112
/** Helper: Read a decimal-formatted double from *<b>bufp</b> of up to
113
 * <b>width</b> characters.  (Handle arbitrary width if <b>width</b> is less
114
 * than 0.)  On success, store the result in <b>out</b>, advance bufp to the
115
 * next character, and return 0.  On failure, return -1. */
116
static int
117
scan_double(const char **bufp, double *out, int width)
118
0
{
119
0
  int neg = 0;
120
0
  double result = 0;
121
0
  int scanned_so_far = 0;
122
123
0
  if (!bufp || !*bufp || !out)
124
0
    return -1;
125
0
  if (width<0)
126
0
    width=MAX_SCANF_WIDTH;
127
128
0
  if (**bufp == '-') {
129
0
    neg = 1;
130
0
    ++*bufp;
131
0
  }
132
133
0
  while (**bufp && TOR_ISDIGIT(**bufp) && scanned_so_far < width) {
134
0
    const int digit = digit_to_num(*(*bufp)++);
135
0
    result = result * 10 + digit;
136
0
    ++scanned_so_far;
137
0
  }
138
0
  if (**bufp == '.') {
139
0
    double fracval = 0, denominator = 1;
140
0
    ++*bufp;
141
0
    ++scanned_so_far;
142
0
    while (**bufp && TOR_ISDIGIT(**bufp) && scanned_so_far < width) {
143
0
      const int digit = digit_to_num(*(*bufp)++);
144
0
      fracval = fracval * 10 + digit;
145
0
      denominator *= 10;
146
0
      ++scanned_so_far;
147
0
    }
148
0
    result += fracval / denominator;
149
0
  }
150
151
0
  if (!scanned_so_far) /* No actual digits scanned */
152
0
    return -1;
153
154
0
  *out = neg ? -result : result;
155
0
  return 0;
156
0
}
157
158
/** Helper: copy up to <b>width</b> non-space characters from <b>bufp</b> to
159
 * <b>out</b>.  Make sure <b>out</b> is nul-terminated. Advance <b>bufp</b>
160
 * to the next non-space character or the EOS. */
161
static int
162
scan_string(const char **bufp, char *out, int width)
163
0
{
164
0
  int scanned_so_far = 0;
165
0
  if (!bufp || !out || width < 0)
166
0
    return -1;
167
0
  while (**bufp && ! TOR_ISSPACE(**bufp) && scanned_so_far < width) {
168
0
    *out++ = *(*bufp)++;
169
0
    ++scanned_so_far;
170
0
  }
171
0
  *out = '\0';
172
0
  return 0;
173
0
}
174
175
/** Locale-independent, minimal, no-surprises scanf variant, accepting only a
176
 * restricted pattern format.  For more info on what it supports, see
177
 * tor_sscanf() documentation.  */
178
int
179
tor_vsscanf(const char *buf, const char *pattern, va_list ap)
180
1.38M
{
181
1.38M
  int n_matched = 0;
182
183
7.56M
  while (*pattern) {
184
7.55M
    if (*pattern != '%') {
185
2.63M
      if (*buf == *pattern) {
186
2.59M
        ++buf;
187
2.59M
        ++pattern;
188
2.59M
        continue;
189
2.59M
      } else {
190
43.0k
        return n_matched;
191
43.0k
      }
192
4.92M
    } else {
193
4.92M
      int width = -1;
194
4.92M
      int longmod = 0;
195
4.92M
      ++pattern;
196
4.92M
      if (TOR_ISDIGIT(*pattern)) {
197
3.96M
        width = digit_to_num(*pattern++);
198
3.96M
        while (TOR_ISDIGIT(*pattern)) {
199
0
          width *= 10;
200
0
          width += digit_to_num(*pattern++);
201
0
          if (width > MAX_SCANF_WIDTH)
202
0
            return -1;
203
0
        }
204
3.96M
        if (!width) /* No zero-width things. */
205
0
          return -1;
206
3.96M
      }
207
4.92M
      if (*pattern == 'l') {
208
0
        longmod = 1;
209
0
        ++pattern;
210
0
      }
211
4.92M
      if (*pattern == 'u' || *pattern == 'x') {
212
4.02M
        unsigned long u;
213
4.02M
        const int base = (*pattern == 'u') ? 10 : 16;
214
4.02M
        if (!*buf)
215
276k
          return n_matched;
216
3.74M
        if (scan_unsigned(&buf, &u, width, base)<0)
217
220k
          return n_matched;
218
3.52M
        if (longmod) {
219
0
          unsigned long *out = va_arg(ap, unsigned long *);
220
0
          *out = u;
221
3.52M
        } else {
222
3.52M
          unsigned *out = va_arg(ap, unsigned *);
223
3.52M
          if (u > UINT_MAX)
224
448
            return n_matched;
225
3.52M
          *out = (unsigned) u;
226
3.52M
        }
227
3.52M
        ++pattern;
228
3.52M
        ++n_matched;
229
3.52M
      } else if (*pattern == 'f') {
230
0
        double *d = va_arg(ap, double *);
231
0
        if (!longmod)
232
0
          return -1; /* float not supported */
233
0
        if (!*buf)
234
0
          return n_matched;
235
0
        if (scan_double(&buf, d, width)<0)
236
0
          return n_matched;
237
0
        ++pattern;
238
0
        ++n_matched;
239
898k
      } else if (*pattern == 'd') {
240
0
        long lng=0;
241
0
        if (scan_signed(&buf, &lng, width)<0)
242
0
          return n_matched;
243
0
        if (longmod) {
244
0
          long *out = va_arg(ap, long *);
245
0
          *out = lng;
246
0
        } else {
247
0
          int *out = va_arg(ap, int *);
248
0
#if LONG_MAX > INT_MAX
249
0
          if (lng < INT_MIN || lng > INT_MAX)
250
0
            return n_matched;
251
0
#endif
252
0
          *out = (int)lng;
253
0
        }
254
0
        ++pattern;
255
0
        ++n_matched;
256
898k
      } else if (*pattern == 's') {
257
0
        char *s = va_arg(ap, char *);
258
0
        if (longmod)
259
0
          return -1;
260
0
        if (width < 0)
261
0
          return -1;
262
0
        if (scan_string(&buf, s, width)<0)
263
0
          return n_matched;
264
0
        ++pattern;
265
0
        ++n_matched;
266
898k
      } else if (*pattern == 'c') {
267
898k
        char *ch = va_arg(ap, char *);
268
898k
        if (longmod)
269
0
          return -1;
270
898k
        if (width != -1)
271
0
          return -1;
272
898k
        if (!*buf)
273
840k
          return n_matched;
274
57.5k
        *ch = *buf++;
275
57.5k
        ++pattern;
276
57.5k
        ++n_matched;
277
57.5k
      } else if (*pattern == '%') {
278
0
        if (*buf != '%')
279
0
          return n_matched;
280
0
        if (longmod)
281
0
          return -1;
282
0
        ++buf;
283
0
        ++pattern;
284
0
      } else {
285
0
        return -1; /* Unrecognized pattern component. */
286
0
      }
287
4.92M
    }
288
7.55M
  }
289
290
2.55k
  return n_matched;
291
1.38M
}
292
293
/** Minimal sscanf replacement: parse <b>buf</b> according to <b>pattern</b>
294
 * and store the results in the corresponding argument fields.  Differs from
295
 * sscanf in that:
296
 * <ul><li>It only handles %u, %lu, %x, %lx, %[NUM]s, %d, %ld, %lf, and %c.
297
 *     <li>It only handles decimal inputs for %lf. (12.3, not 1.23e1)
298
 *     <li>It does not handle arbitrarily long widths.
299
 *     <li>Numbers do not consume any space characters.
300
 *     <li>It is locale-independent.
301
 *     <li>%u and %x do not consume any space.
302
 *     <li>It returns -1 on malformed patterns.</ul>
303
 *
304
 * (As with other locale-independent functions, we need this to parse data that
305
 * is in ASCII without worrying that the C library's locale-handling will make
306
 * miscellaneous characters look like numbers, spaces, and so on.)
307
 */
308
int
309
tor_sscanf(const char *buf, const char *pattern, ...)
310
1.38M
{
311
1.38M
  int r;
312
1.38M
  va_list ap;
313
1.38M
  va_start(ap, pattern);
314
1.38M
  r = tor_vsscanf(buf, pattern, ap);
315
1.38M
  va_end(ap);
316
1.38M
  return r;
317
1.38M
}