Coverage Report

Created: 2026-02-07 06:16

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/botan/src/lib/utils/parsing.cpp
Line
Count
Source
1
/*
2
* Various string utils and parsing functions
3
* (C) 1999-2007,2013,2014,2015,2018 Jack Lloyd
4
* (C) 2015 Simon Warta (Kullo GmbH)
5
* (C) 2017 René Korthaus, Rohde & Schwarz Cybersecurity
6
*
7
* Botan is released under the Simplified BSD License (see license.txt)
8
*/
9
10
#include <botan/internal/parsing.h>
11
12
#include <botan/exceptn.h>
13
#include <botan/internal/fmt.h>
14
#include <botan/internal/loadstor.h>
15
#include <algorithm>
16
#include <cctype>
17
#include <limits>
18
#include <sstream>
19
20
namespace Botan {
21
22
0
uint16_t to_uint16(std::string_view str) {
23
0
   const uint32_t x = to_u32bit(str);
24
25
0
   if(x != static_cast<uint16_t>(x)) {
26
0
      throw Invalid_Argument("Integer value exceeds 16 bit range");
27
0
   }
28
29
0
   return static_cast<uint16_t>(x);
30
0
}
31
32
2.53k
uint32_t to_u32bit(std::string_view str_view) {
33
2.53k
   const std::string str(str_view);
34
35
   // std::stoul is not strict enough. Ensure that str is digit only [0-9]*
36
5.93k
   for(const char chr : str) {
37
5.93k
      if(chr < '0' || chr > '9') {
38
0
         throw Invalid_Argument("to_u32bit invalid decimal string '" + str + "'");
39
0
      }
40
5.93k
   }
41
42
2.53k
   const unsigned long int x = std::stoul(str);
43
44
2.53k
   if constexpr(sizeof(unsigned long int) > 4) {
45
      // x might be uint64
46
2.53k
      if(x > std::numeric_limits<uint32_t>::max()) {
47
0
         throw Invalid_Argument("Integer value of " + str + " exceeds 32 bit range");
48
0
      }
49
2.53k
   }
50
51
2.53k
   return static_cast<uint32_t>(x);
52
2.53k
}
53
54
/*
55
* Parse a SCAN-style algorithm name
56
*/
57
944
std::vector<std::string> parse_algorithm_name(std::string_view scan_name) {
58
944
   if(scan_name.find('(') == std::string::npos && scan_name.find(')') == std::string::npos) {
59
0
      return {std::string(scan_name)};
60
0
   }
61
62
944
   std::string name(scan_name);
63
944
   std::string substring;
64
944
   std::vector<std::string> elems;
65
944
   size_t level = 0;
66
67
944
   elems.push_back(name.substr(0, name.find('(')));
68
944
   name = name.substr(name.find('('));
69
70
3.42k
   for(auto i = name.begin(); i != name.end(); ++i) {
71
3.42k
      const char c = *i;
72
73
3.42k
      if(c == '(') {
74
944
         ++level;
75
944
      }
76
3.42k
      if(c == ')') {
77
944
         if(level == 1 && i == name.end() - 1) {
78
944
            if(elems.size() == 1) {
79
944
               elems.push_back(substring.substr(1));
80
944
            } else {
81
0
               elems.push_back(substring);
82
0
            }
83
944
            return elems;
84
944
         }
85
86
0
         if(level == 0 || (level == 1 && i != name.end() - 1)) {
87
0
            throw Invalid_Algorithm_Name(scan_name);
88
0
         }
89
0
         --level;
90
0
      }
91
92
2.47k
      if(c == ',' && level == 1) {
93
0
         if(elems.size() == 1) {
94
0
            elems.push_back(substring.substr(1));
95
0
         } else {
96
0
            elems.push_back(substring);
97
0
         }
98
0
         substring.clear();
99
2.47k
      } else {
100
2.47k
         substring += c;
101
2.47k
      }
102
2.47k
   }
103
104
0
   if(!substring.empty()) {
105
0
      throw Invalid_Algorithm_Name(scan_name);
106
0
   }
107
108
0
   return elems;
109
0
}
110
111
944
std::vector<std::string> split_on(std::string_view str, char delim) {
112
944
   std::vector<std::string> elems;
113
944
   if(str.empty()) {
114
0
      return elems;
115
0
   }
116
117
944
   std::string substr;
118
13.8k
   for(const char c : str) {
119
13.8k
      if(c == delim) {
120
944
         if(!substr.empty()) {
121
944
            elems.push_back(substr);
122
944
         }
123
944
         substr.clear();
124
12.8k
      } else {
125
12.8k
         substr += c;
126
12.8k
      }
127
13.8k
   }
128
129
944
   if(substr.empty()) {
130
0
      throw Invalid_Argument(fmt("Unable to split string '{}", str));
131
0
   }
132
944
   elems.push_back(substr);
133
134
944
   return elems;
135
944
}
136
137
/*
138
* Join a string
139
*/
140
0
std::string string_join(const std::vector<std::string>& strs, char delim) {
141
0
   std::ostringstream out;
142
143
0
   for(size_t i = 0; i != strs.size(); ++i) {
144
0
      if(i != 0) {
145
0
         out << delim;
146
0
      }
147
0
      out << strs[i];
148
0
   }
149
150
0
   return out.str();
151
0
}
152
153
/*
154
* Convert a decimal-dotted string to binary IP
155
*/
156
0
std::optional<uint32_t> string_to_ipv4(std::string_view str) {
157
   // At least 3 dots + 4 1-digit integers
158
   // At most 3 dots + 4 3-digit integers
159
0
   if(str.size() < 3 + 4 * 1 || str.size() > 3 + 4 * 3) {
160
0
      return {};
161
0
   }
162
163
   // the final result
164
0
   uint32_t ip = 0;
165
   // the number of '.' seen so far
166
0
   size_t dots = 0;
167
   // accumulates one quad (range 0-255)
168
0
   uint32_t accum = 0;
169
   // # of digits pushed to accum since last dot
170
0
   size_t cur_digits = 0;
171
172
0
   for(const char c : str) {
173
0
      if(c == '.') {
174
         // . without preceding digit is invalid
175
0
         if(cur_digits == 0) {
176
0
            return {};
177
0
         }
178
0
         dots += 1;
179
         // too many dots
180
0
         if(dots > 3) {
181
0
            return {};
182
0
         }
183
184
0
         cur_digits = 0;
185
0
         ip = (ip << 8) | accum;
186
0
         accum = 0;
187
0
      } else if(c >= '0' && c <= '9') {
188
0
         const auto d = static_cast<uint8_t>(c - '0');
189
190
         // prohibit leading zero in quad (used for octal)
191
0
         if(cur_digits > 0 && accum == 0) {
192
0
            return {};
193
0
         }
194
0
         accum = (accum * 10) + d;
195
196
0
         if(accum > 255) {
197
0
            return {};
198
0
         }
199
200
0
         cur_digits++;
201
0
         BOTAN_ASSERT_NOMSG(cur_digits <= 3);
202
0
      } else {
203
0
         return {};
204
0
      }
205
0
   }
206
207
   // no trailing digits?
208
0
   if(cur_digits == 0) {
209
0
      return {};
210
0
   }
211
212
   // insufficient # of dots
213
0
   if(dots != 3) {
214
0
      return {};
215
0
   }
216
217
0
   ip = (ip << 8) | accum;
218
219
0
   return ip;
220
0
}
221
222
/*
223
* Convert an IP address to decimal-dotted string
224
*/
225
0
std::string ipv4_to_string(uint32_t ip) {
226
0
   uint8_t bits[4];
227
0
   store_be(ip, bits);
228
229
0
   std::string str;
230
231
0
   for(size_t i = 0; i != 4; ++i) {
232
0
      if(i > 0) {
233
0
         str += ".";
234
0
      }
235
0
      str += std::to_string(bits[i]);
236
0
   }
237
238
0
   return str;
239
0
}
240
241
0
std::string tolower_string(std::string_view str) {
242
0
   std::string lower(str);
243
0
   for(char& c : lower) {
244
0
      const int cu = static_cast<unsigned char>(c);
245
0
      if(std::isalpha(cu) != 0) {
246
0
         c = static_cast<char>(std::tolower(cu));
247
0
      }
248
0
   }
249
0
   return lower;
250
0
}
251
252
0
bool host_wildcard_match(std::string_view issued_, std::string_view host_) {
253
0
   const std::string issued = tolower_string(issued_);
254
0
   const std::string host = tolower_string(host_);
255
256
0
   if(host.empty() || issued.empty()) {
257
0
      return false;
258
0
   }
259
260
   /*
261
   If there are embedded nulls in your issued name
262
   Well I feel bad for you son
263
   */
264
0
   if(std::count(issued.begin(), issued.end(), char(0)) > 0) {
265
0
      return false;
266
0
   }
267
268
   // If more than one wildcard, then issued name is invalid
269
0
   const size_t stars = std::count(issued.begin(), issued.end(), '*');
270
0
   if(stars > 1) {
271
0
      return false;
272
0
   }
273
274
   // '*' is not a valid character in DNS names so should not appear on the host side
275
0
   if(std::count(host.begin(), host.end(), '*') != 0) {
276
0
      return false;
277
0
   }
278
279
   // Similarly a DNS name can't end in .
280
0
   if(host[host.size() - 1] == '.') {
281
0
      return false;
282
0
   }
283
284
   // And a host can't have an empty name component, so reject that
285
0
   if(host.find("..") != std::string::npos) {
286
0
      return false;
287
0
   }
288
289
   // Exact match: accept
290
0
   if(issued == host) {
291
0
      return true;
292
0
   }
293
294
   /*
295
   Otherwise it might be a wildcard
296
297
   If the issued size is strictly longer than the hostname size it
298
   couldn't possibly be a match, even if the issued value is a
299
   wildcard. The only exception is when the wildcard ends up empty
300
   (eg www.example.com matches www*.example.com)
301
   */
302
0
   if(issued.size() > host.size() + 1) {
303
0
      return false;
304
0
   }
305
306
   // If no * at all then not a wildcard, and so not a match
307
0
   if(stars != 1) {
308
0
      return false;
309
0
   }
310
311
   /*
312
   Now walk through the issued string, making sure every character
313
   matches. When we come to the (singular) '*', jump forward in the
314
   hostname by the corresponding amount. We know exactly how much
315
   space the wildcard takes because it must be exactly `len(host) -
316
   len(issued) + 1 chars`.
317
318
   We also verify that the '*' comes in the leftmost component, and
319
   doesn't skip over any '.' in the hostname.
320
   */
321
0
   size_t dots_seen = 0;
322
0
   size_t host_idx = 0;
323
324
0
   for(size_t i = 0; i != issued.size(); ++i) {
325
0
      if(issued[i] == '.') {
326
0
         dots_seen += 1;
327
0
      }
328
329
0
      if(issued[i] == '*') {
330
         // Fail: wildcard can only come in leftmost component
331
0
         if(dots_seen > 0) {
332
0
            return false;
333
0
         }
334
335
         /*
336
         Since there is only one * we know the tail of the issued and
337
         hostname must be an exact match. In this case advance host_idx
338
         to match.
339
         */
340
0
         const size_t advance = (host.size() - issued.size() + 1);
341
342
0
         if(host_idx + advance > host.size()) {  // shouldn't happen
343
0
            return false;
344
0
         }
345
346
         // Can't be any intervening .s that we would have skipped
347
0
         if(std::count(host.begin() + host_idx, host.begin() + host_idx + advance, '.') != 0) {
348
0
            return false;
349
0
         }
350
351
0
         host_idx += advance;
352
0
      } else {
353
0
         if(issued[i] != host[host_idx]) {
354
0
            return false;
355
0
         }
356
357
0
         host_idx += 1;
358
0
      }
359
0
   }
360
361
   // Wildcard issued name must have at least 3 components
362
0
   if(dots_seen < 2) {
363
0
      return false;
364
0
   }
365
366
0
   return true;
367
0
}
368
369
0
std::string check_and_canonicalize_dns_name(std::string_view name) {
370
0
   if(name.size() > 255) {
371
0
      throw Decoding_Error("DNS name exceeds maximum allowed length");
372
0
   }
373
374
0
   if(name.empty()) {
375
0
      throw Decoding_Error("DNS name cannot be empty");
376
0
   }
377
378
0
   if(name.starts_with(".") || name.ends_with(".")) {
379
0
      throw Decoding_Error("DNS name cannot start or end with a dot");
380
0
   }
381
382
   /*
383
   * Table mapping uppercase to lowercase and only including values for valid DNS names
384
   * namely A-Z, a-z, 0-9, hyphen, and dot, plus '*' for wildcarding. (RFC 1035)
385
   */
386
   // clang-format off
387
0
   constexpr uint8_t DNS_CHAR_MAPPING[128] = {
388
0
      '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
389
0
      '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
390
0
      '\0', '\0', '\0', '\0',  '*', '\0', '\0',  '-',  '.', '\0',  '0',  '1',  '2',  '3',  '4',  '5',  '6',  '7',  '8',
391
0
       '9', '\0', '\0', '\0', '\0', '\0', '\0', '\0',  'a',  'b',  'c',  'd',  'e',  'f',  'g',  'h',  'i',  'j',  'k',
392
0
       'l',  'm',  'n',  'o',  'p',  'q',  'r',  's',  't',  'u',  'v',  'w',  'x',  'y',  'z', '\0', '\0', '\0', '\0',
393
0
      '\0', '\0',  'a',  'b',  'c',  'd',  'e',  'f',  'g',  'h',  'i',  'j',  'k',  'l',  'm',  'n',  'o',  'p',  'q',
394
0
       'r',  's',  't',  'u',  'v',  'w',  'x',  'y',  'z', '\0', '\0', '\0', '\0', '\0',
395
0
   };
396
   // clang-format on
397
398
0
   std::string canon;
399
0
   canon.reserve(name.size());
400
401
   // RFC 1035: DNS labels must not exceed 63 characters
402
0
   size_t current_label_length = 0;
403
404
0
   for(size_t i = 0; i != name.size(); ++i) {
405
0
      const char c = name[i];
406
407
0
      if(c == '.') {
408
0
         if(i > 0 && name[i - 1] == '.') {
409
0
            throw Decoding_Error("DNS name contains sequential period chars");
410
0
         }
411
412
0
         if(current_label_length == 0) {
413
0
            throw Decoding_Error("DNS name contains empty label");
414
0
         }
415
0
         current_label_length = 0;  // Reset for next label
416
0
      } else {
417
0
         current_label_length++;
418
419
0
         if(current_label_length > 63) {  // RFC 1035 Maximum DNS label length
420
0
            throw Decoding_Error("DNS name label exceeds maximum length of 63 characters");
421
0
         }
422
0
      }
423
424
0
      const uint8_t cu = static_cast<uint8_t>(c);
425
0
      if(cu >= 128) {
426
0
         throw Decoding_Error("DNS name must not contain any extended ASCII code points");
427
0
      }
428
0
      const uint8_t mapped = DNS_CHAR_MAPPING[cu];
429
0
      if(mapped == 0) {
430
0
         throw Decoding_Error("DNS name includes invalid character");
431
0
      }
432
433
0
      if(mapped == '-') {
434
0
         if(i == 0 || (i > 0 && name[i - 1] == '.')) {
435
0
            throw Decoding_Error("DNS name has label with leading hyphen");
436
0
         } else if(i == name.size() - 1 || (i < name.size() - 1 && name[i + 1] == '.')) {
437
0
            throw Decoding_Error("DNS name has label with trailing hyphen");
438
0
         }
439
0
      }
440
0
      canon.push_back(static_cast<char>(mapped));
441
0
   }
442
443
0
   if(current_label_length == 0) {
444
0
      throw Decoding_Error("DNS name contains empty label");
445
0
   }
446
0
   return canon;
447
0
}
448
449
}  // namespace Botan