Coverage Report

Created: 2025-04-24 07:09

/src/botan/src/lib/utils/parsing.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
* Various string utils and parsing functions
3
* (C) 1999-2007,2013,2014,2015,2018 Jack Lloyd
4
* (C) 2015 Simon Warta (Kullo GmbH)
5
* (C) 2017 René Korthaus, Rohde & Schwarz Cybersecurity
6
*
7
* Botan is released under the Simplified BSD License (see license.txt)
8
*/
9
10
#include <botan/internal/parsing.h>
11
12
#include <botan/exceptn.h>
13
#include <botan/internal/fmt.h>
14
#include <botan/internal/loadstor.h>
15
#include <algorithm>
16
#include <cctype>
17
#include <limits>
18
#include <sstream>
19
20
namespace Botan {
21
22
0
uint16_t to_uint16(std::string_view str) {
23
0
   const uint32_t x = to_u32bit(str);
24
25
0
   if(x >> 16) {
26
0
      throw Invalid_Argument("Integer value exceeds 16 bit range");
27
0
   }
28
29
0
   return static_cast<uint16_t>(x);
30
0
}
31
32
80
uint32_t to_u32bit(std::string_view str_view) {
33
80
   const std::string str(str_view);
34
35
   // std::stoul is not strict enough. Ensure that str is digit only [0-9]*
36
120
   for(const char chr : str) {
37
120
      if(chr < '0' || chr > '9') {
38
0
         throw Invalid_Argument("to_u32bit invalid decimal string '" + str + "'");
39
0
      }
40
120
   }
41
42
80
   const unsigned long int x = std::stoul(str);
43
44
80
   if constexpr(sizeof(unsigned long int) > 4) {
45
      // x might be uint64
46
80
      if(x > std::numeric_limits<uint32_t>::max()) {
47
0
         throw Invalid_Argument("Integer value of " + str + " exceeds 32 bit range");
48
0
      }
49
80
   }
50
51
80
   return static_cast<uint32_t>(x);
52
80
}
53
54
/*
55
* Parse a SCAN-style algorithm name
56
*/
57
0
std::vector<std::string> parse_algorithm_name(std::string_view namex) {
58
0
   if(namex.find('(') == std::string::npos && namex.find(')') == std::string::npos) {
59
0
      return {std::string(namex)};
60
0
   }
61
62
0
   std::string name(namex);
63
0
   std::string substring;
64
0
   std::vector<std::string> elems;
65
0
   size_t level = 0;
66
67
0
   elems.push_back(name.substr(0, name.find('(')));
68
0
   name = name.substr(name.find('('));
69
70
0
   for(auto i = name.begin(); i != name.end(); ++i) {
71
0
      char c = *i;
72
73
0
      if(c == '(') {
74
0
         ++level;
75
0
      }
76
0
      if(c == ')') {
77
0
         if(level == 1 && i == name.end() - 1) {
78
0
            if(elems.size() == 1) {
79
0
               elems.push_back(substring.substr(1));
80
0
            } else {
81
0
               elems.push_back(substring);
82
0
            }
83
0
            return elems;
84
0
         }
85
86
0
         if(level == 0 || (level == 1 && i != name.end() - 1)) {
87
0
            throw Invalid_Algorithm_Name(namex);
88
0
         }
89
0
         --level;
90
0
      }
91
92
0
      if(c == ',' && level == 1) {
93
0
         if(elems.size() == 1) {
94
0
            elems.push_back(substring.substr(1));
95
0
         } else {
96
0
            elems.push_back(substring);
97
0
         }
98
0
         substring.clear();
99
0
      } else {
100
0
         substring += c;
101
0
      }
102
0
   }
103
104
0
   if(!substring.empty()) {
105
0
      throw Invalid_Algorithm_Name(namex);
106
0
   }
107
108
0
   return elems;
109
0
}
110
111
0
std::vector<std::string> split_on(std::string_view str, char delim) {
112
0
   std::vector<std::string> elems;
113
0
   if(str.empty()) {
114
0
      return elems;
115
0
   }
116
117
0
   std::string substr;
118
0
   for(auto i = str.begin(); i != str.end(); ++i) {
119
0
      if(*i == delim) {
120
0
         if(!substr.empty()) {
121
0
            elems.push_back(substr);
122
0
         }
123
0
         substr.clear();
124
0
      } else {
125
0
         substr += *i;
126
0
      }
127
0
   }
128
129
0
   if(substr.empty()) {
130
0
      throw Invalid_Argument(fmt("Unable to split string '{}", str));
131
0
   }
132
0
   elems.push_back(substr);
133
134
0
   return elems;
135
0
}
136
137
/*
138
* Join a string
139
*/
140
0
std::string string_join(const std::vector<std::string>& strs, char delim) {
141
0
   std::ostringstream out;
142
143
0
   for(size_t i = 0; i != strs.size(); ++i) {
144
0
      if(i != 0) {
145
0
         out << delim;
146
0
      }
147
0
      out << strs[i];
148
0
   }
149
150
0
   return out.str();
151
0
}
152
153
/*
154
* Convert a decimal-dotted string to binary IP
155
*/
156
0
std::optional<uint32_t> string_to_ipv4(std::string_view str) {
157
   // At least 3 dots + 4 1-digit integers
158
   // At most 3 dots + 4 3-digit integers
159
0
   if(str.size() < 3 + 4 * 1 || str.size() > 3 + 4 * 3) {
160
0
      return {};
161
0
   }
162
163
   // the final result
164
0
   uint32_t ip = 0;
165
   // the number of '.' seen so far
166
0
   size_t dots = 0;
167
   // accumulates one quad (range 0-255)
168
0
   uint32_t accum = 0;
169
   // # of digits pushed to accum since last dot
170
0
   size_t cur_digits = 0;
171
172
0
   for(char c : str) {
173
0
      if(c == '.') {
174
         // . without preceding digit is invalid
175
0
         if(cur_digits == 0) {
176
0
            return {};
177
0
         }
178
0
         dots += 1;
179
         // too many dots
180
0
         if(dots > 3) {
181
0
            return {};
182
0
         }
183
184
0
         cur_digits = 0;
185
0
         ip = (ip << 8) | accum;
186
0
         accum = 0;
187
0
      } else if(c >= '0' && c <= '9') {
188
0
         const auto d = static_cast<uint8_t>(c - '0');
189
190
         // prohibit leading zero in quad (used for octal)
191
0
         if(cur_digits > 0 && accum == 0) {
192
0
            return {};
193
0
         }
194
0
         accum = (accum * 10) + d;
195
196
0
         if(accum > 255) {
197
0
            return {};
198
0
         }
199
200
0
         cur_digits++;
201
0
         BOTAN_ASSERT_NOMSG(cur_digits <= 3);
202
0
      } else {
203
0
         return {};
204
0
      }
205
0
   }
206
207
   // no trailing digits?
208
0
   if(cur_digits == 0) {
209
0
      return {};
210
0
   }
211
212
   // insufficient # of dots
213
0
   if(dots != 3) {
214
0
      return {};
215
0
   }
216
217
0
   ip = (ip << 8) | accum;
218
219
0
   return ip;
220
0
}
221
222
/*
223
* Convert an IP address to decimal-dotted string
224
*/
225
0
std::string ipv4_to_string(uint32_t ip) {
226
0
   uint8_t bits[4];
227
0
   store_be(ip, bits);
228
229
0
   std::string str;
230
231
0
   for(size_t i = 0; i != 4; ++i) {
232
0
      if(i > 0) {
233
0
         str += ".";
234
0
      }
235
0
      str += std::to_string(bits[i]);
236
0
   }
237
238
0
   return str;
239
0
}
240
241
0
std::string tolower_string(std::string_view in) {
242
0
   std::string s(in);
243
0
   for(size_t i = 0; i != s.size(); ++i) {
244
0
      const int cu = static_cast<unsigned char>(s[i]);
245
0
      if(std::isalpha(cu)) {
246
0
         s[i] = static_cast<char>(std::tolower(cu));
247
0
      }
248
0
   }
249
0
   return s;
250
0
}
251
252
0
bool host_wildcard_match(std::string_view issued_, std::string_view host_) {
253
0
   const std::string issued = tolower_string(issued_);
254
0
   const std::string host = tolower_string(host_);
255
256
0
   if(host.empty() || issued.empty()) {
257
0
      return false;
258
0
   }
259
260
   /*
261
   If there are embedded nulls in your issued name
262
   Well I feel bad for you son
263
   */
264
0
   if(std::count(issued.begin(), issued.end(), char(0)) > 0) {
265
0
      return false;
266
0
   }
267
268
   // If more than one wildcard, then issued name is invalid
269
0
   const size_t stars = std::count(issued.begin(), issued.end(), '*');
270
0
   if(stars > 1) {
271
0
      return false;
272
0
   }
273
274
   // '*' is not a valid character in DNS names so should not appear on the host side
275
0
   if(std::count(host.begin(), host.end(), '*') != 0) {
276
0
      return false;
277
0
   }
278
279
   // Similarly a DNS name can't end in .
280
0
   if(host[host.size() - 1] == '.') {
281
0
      return false;
282
0
   }
283
284
   // And a host can't have an empty name component, so reject that
285
0
   if(host.find("..") != std::string::npos) {
286
0
      return false;
287
0
   }
288
289
   // Exact match: accept
290
0
   if(issued == host) {
291
0
      return true;
292
0
   }
293
294
   /*
295
   Otherwise it might be a wildcard
296
297
   If the issued size is strictly longer than the hostname size it
298
   couldn't possibly be a match, even if the issued value is a
299
   wildcard. The only exception is when the wildcard ends up empty
300
   (eg www.example.com matches www*.example.com)
301
   */
302
0
   if(issued.size() > host.size() + 1) {
303
0
      return false;
304
0
   }
305
306
   // If no * at all then not a wildcard, and so not a match
307
0
   if(stars != 1) {
308
0
      return false;
309
0
   }
310
311
   /*
312
   Now walk through the issued string, making sure every character
313
   matches. When we come to the (singular) '*', jump forward in the
314
   hostname by the corresponding amount. We know exactly how much
315
   space the wildcard takes because it must be exactly `len(host) -
316
   len(issued) + 1 chars`.
317
318
   We also verify that the '*' comes in the leftmost component, and
319
   doesn't skip over any '.' in the hostname.
320
   */
321
0
   size_t dots_seen = 0;
322
0
   size_t host_idx = 0;
323
324
0
   for(size_t i = 0; i != issued.size(); ++i) {
325
0
      dots_seen += (issued[i] == '.');
326
327
0
      if(issued[i] == '*') {
328
         // Fail: wildcard can only come in leftmost component
329
0
         if(dots_seen > 0) {
330
0
            return false;
331
0
         }
332
333
         /*
334
         Since there is only one * we know the tail of the issued and
335
         hostname must be an exact match. In this case advance host_idx
336
         to match.
337
         */
338
0
         const size_t advance = (host.size() - issued.size() + 1);
339
340
0
         if(host_idx + advance > host.size()) {  // shouldn't happen
341
0
            return false;
342
0
         }
343
344
         // Can't be any intervening .s that we would have skipped
345
0
         if(std::count(host.begin() + host_idx, host.begin() + host_idx + advance, '.') != 0) {
346
0
            return false;
347
0
         }
348
349
0
         host_idx += advance;
350
0
      } else {
351
0
         if(issued[i] != host[host_idx]) {
352
0
            return false;
353
0
         }
354
355
0
         host_idx += 1;
356
0
      }
357
0
   }
358
359
   // Wildcard issued name must have at least 3 components
360
0
   if(dots_seen < 2) {
361
0
      return false;
362
0
   }
363
364
0
   return true;
365
0
}
366
367
0
std::string check_and_canonicalize_dns_name(std::string_view name) {
368
0
   if(name.size() > 255) {
369
0
      throw Decoding_Error("DNS name exceeds maximum allowed length");
370
0
   }
371
372
0
   if(name.empty()) {
373
0
      throw Decoding_Error("DNS name cannot be empty");
374
0
   }
375
376
0
   if(name.starts_with(".")) {
377
0
      throw Decoding_Error("DNS name cannot start with a dot");
378
0
   }
379
380
   /*
381
   * Table mapping uppercase to lowercase and only including values for valid DNS names
382
   * namely A-Z, a-z, 0-9, hypen, and dot, plus '*' for wildcarding.
383
   */
384
   // clang-format off
385
0
   constexpr uint8_t DNS_CHAR_MAPPING[128] = {
386
0
      '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
387
0
      '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
388
0
      '\0', '\0', '\0', '\0',  '*', '\0', '\0',  '-',  '.', '\0',  '0',  '1',  '2',  '3',  '4',  '5',  '6',  '7',  '8',
389
0
       '9', '\0', '\0', '\0', '\0', '\0', '\0', '\0',  'a',  'b',  'c',  'd',  'e',  'f',  'g',  'h',  'i',  'j',  'k',
390
0
       'l',  'm',  'n',  'o',  'p',  'q',  'r',  's',  't',  'u',  'v',  'w',  'x',  'y',  'z', '\0', '\0', '\0', '\0',
391
0
      '\0', '\0',  'a',  'b',  'c',  'd',  'e',  'f',  'g',  'h',  'i',  'j',  'k',  'l',  'm',  'n',  'o',  'p',  'q',
392
0
       'r',  's',  't',  'u',  'v',  'w',  'x',  'y',  'z', '\0', '\0', '\0', '\0', '\0',
393
0
   };
394
   // clang-format on
395
396
0
   std::string canon;
397
0
   canon.reserve(name.size());
398
399
0
   for(size_t i = 0; i != name.size(); ++i) {
400
0
      char c = name[i];
401
402
0
      if(c == '.') {
403
0
         if(name[i - 1] == '.') {
404
0
            throw Decoding_Error("DNS name contains sequential period chars");
405
0
         }
406
0
         if(i == name.size() - 1) {
407
0
            throw Decoding_Error("DNS name cannot end in a period");
408
0
         }
409
0
      }
410
411
0
      const uint8_t cu = static_cast<uint8_t>(c);
412
0
      if(cu >= 128) {
413
0
         throw Decoding_Error("DNS name must not contain any extended ASCII code points");
414
0
      }
415
0
      const uint8_t mapped = DNS_CHAR_MAPPING[cu];
416
0
      if(mapped == 0) {
417
0
         throw Decoding_Error("DNS name includes invalid character");
418
0
      }
419
      // TODO check label lengths
420
0
      canon.push_back(static_cast<char>(mapped));
421
0
   }
422
423
0
   return canon;
424
0
}
425
426
}  // namespace Botan