/src/wireshark/epan/strutil.c
Line | Count | Source |
1 | | /* strutil.c |
2 | | * String utility routines |
3 | | * |
4 | | * Wireshark - Network traffic analyzer |
5 | | * By Gerald Combs <gerald@wireshark.org> |
6 | | * Copyright 1998 Gerald Combs |
7 | | * |
8 | | * SPDX-License-Identifier: GPL-2.0-or-later |
9 | | */ |
10 | | |
11 | | #include "config.h" |
12 | | |
13 | | #include <stdlib.h> |
14 | | #include <string.h> |
15 | | #include <glib.h> |
16 | | #include "strutil.h" |
17 | | |
18 | | #include <wsutil/str_util.h> |
19 | | #include <wsutil/unicode-utils.h> |
20 | | #include <epan/proto.h> |
21 | | |
22 | | #ifdef _WIN32 |
23 | | #include <windows.h> |
24 | | #include <tchar.h> |
25 | | #include <wchar.h> |
26 | | #endif |
27 | | |
28 | | |
29 | | /* |
30 | | * Given a pointer into a data buffer, and to the end of the buffer, |
31 | | * find the end of the (putative) line at that position in the data |
32 | | * buffer. |
33 | | * Return a pointer to the EOL character(s) in "*eol". |
34 | | */ |
35 | | const unsigned char * |
36 | | find_line_end(const unsigned char *data, const unsigned char *dataend, const unsigned char **eol) |
37 | 0 | { |
38 | 0 | const unsigned char *lineend; |
39 | |
|
40 | 0 | lineend = (unsigned char *)memchr(data, '\n', dataend - data); |
41 | 0 | if (lineend == NULL) { |
42 | | /* |
43 | | * No LF - line is probably continued in next TCP segment. |
44 | | */ |
45 | 0 | lineend = dataend; |
46 | 0 | *eol = dataend; |
47 | 0 | } else { |
48 | | /* |
49 | | * Is the LF at the beginning of the line? |
50 | | */ |
51 | 0 | if (lineend > data) { |
52 | | /* |
53 | | * No - is it preceded by a carriage return? |
54 | | * (Perhaps it's supposed to be, but that's not guaranteed....) |
55 | | */ |
56 | 0 | if (*(lineend - 1) == '\r') { |
57 | | /* |
58 | | * Yes. The EOL starts with the CR. |
59 | | */ |
60 | 0 | *eol = lineend - 1; |
61 | 0 | } else { |
62 | | /* |
63 | | * No. The EOL starts with the LF. |
64 | | */ |
65 | 0 | *eol = lineend; |
66 | | |
67 | | /* |
68 | | * I seem to remember that we once saw lines ending with LF-CR |
69 | | * in an HTTP request or response, so check if it's *followed* |
70 | | * by a carriage return. |
71 | | */ |
72 | 0 | if (lineend < (dataend - 1) && *(lineend + 1) == '\r') { |
73 | | /* |
74 | | * It's <non-LF><LF><CR>; say it ends with the CR. |
75 | | */ |
76 | 0 | lineend++; |
77 | 0 | } |
78 | 0 | } |
79 | 0 | } else { |
80 | | /* |
81 | | * Yes - the EOL starts with the LF. |
82 | | */ |
83 | 0 | *eol = lineend; |
84 | 0 | } |
85 | | |
86 | | /* |
87 | | * Point to the character after the last character. |
88 | | */ |
89 | 0 | lineend++; |
90 | 0 | } |
91 | 0 | return lineend; |
92 | 0 | } |
93 | | |
94 | | /* |
95 | | * Get the length of the next token in a line, and the beginning of the |
96 | | * next token after that (if any). |
97 | | * Return 0 if there is no next token. |
98 | | */ |
99 | | int |
100 | | get_token_len(const unsigned char *linep, const unsigned char *lineend, |
101 | | const unsigned char **next_token) |
102 | 696 | { |
103 | 696 | const unsigned char *tokenp; |
104 | 696 | int token_len; |
105 | | |
106 | 696 | tokenp = linep; |
107 | | |
108 | | /* |
109 | | * Search for a blank, a CR or an LF, or the end of the buffer. |
110 | | */ |
111 | 8.32k | while (linep < lineend && *linep != ' ' && *linep != '\r' && *linep != '\n') |
112 | 7.62k | linep++; |
113 | 696 | token_len = (int) (linep - tokenp); |
114 | | |
115 | | /* |
116 | | * Skip trailing blanks. |
117 | | */ |
118 | 1.48k | while (linep < lineend && *linep == ' ') |
119 | 787 | linep++; |
120 | | |
121 | 696 | *next_token = linep; |
122 | | |
123 | 696 | return token_len; |
124 | 696 | } |
125 | | |
126 | | static bool |
127 | | is_byte_sep(uint8_t c) |
128 | 2.50k | { |
129 | 2.50k | return (c == '-' || c == ':' || c == '.'); |
130 | 2.50k | } |
131 | | |
132 | | /* Turn a string of hex digits with optional separators (defined by |
133 | | * is_byte_sep() into a byte array. |
134 | | * |
135 | | * XXX - This function is perhaps too generous in what it accepts. |
136 | | * It allows the separator to change from one character to another, |
137 | | * or to and from no separator if force_separators is false. |
138 | | */ |
139 | | bool |
140 | | hex_str_to_bytes(const char *hex_str, GByteArray *bytes, bool force_separators) |
141 | 515 | { |
142 | 515 | uint8_t val; |
143 | 515 | const char *p, *q, *r, *s, *punct; |
144 | 515 | char four_digits_first_half[3]; |
145 | 515 | char four_digits_second_half[3]; |
146 | 515 | char two_digits[3]; |
147 | 515 | char one_digit[2]; |
148 | | |
149 | 515 | if (! hex_str || ! bytes) { |
150 | 0 | return false; |
151 | 0 | } |
152 | 515 | g_byte_array_set_size(bytes, 0); |
153 | 515 | p = hex_str; |
154 | 3.51k | while (*p) { |
155 | 3.00k | q = p+1; |
156 | 3.00k | r = p+2; |
157 | 3.00k | s = p+3; |
158 | | |
159 | 3.00k | if (*q && *r |
160 | 3.00k | && g_ascii_isxdigit(*p) && g_ascii_isxdigit(*q) && |
161 | 3.00k | g_ascii_isxdigit(*r)) { |
162 | | |
163 | | /* |
164 | | * Three hex bytes in a row, followed by a non hex byte |
165 | | * (possibly the end of the string). We don't accept an |
166 | | * odd number of hex digits except for single digits |
167 | | * by themselves or after a separator. |
168 | | */ |
169 | 0 | if (!g_ascii_isxdigit(*s)) { |
170 | 0 | return false; |
171 | 0 | } |
172 | 0 | four_digits_first_half[0] = *p; |
173 | 0 | four_digits_first_half[1] = *q; |
174 | 0 | four_digits_first_half[2] = '\0'; |
175 | 0 | four_digits_second_half[0] = *r; |
176 | 0 | four_digits_second_half[1] = *s; |
177 | 0 | four_digits_second_half[2] = '\0'; |
178 | | |
179 | | /* |
180 | | * Four or more hex digits in a row. |
181 | | */ |
182 | 0 | val = (uint8_t) strtoul(four_digits_first_half, NULL, 16); |
183 | 0 | g_byte_array_append(bytes, &val, 1); |
184 | 0 | val = (uint8_t) strtoul(four_digits_second_half, NULL, 16); |
185 | 0 | g_byte_array_append(bytes, &val, 1); |
186 | |
|
187 | 0 | punct = s + 1; |
188 | 0 | if (*punct) { |
189 | | /* |
190 | | * Make sure the character after |
191 | | * the fourth hex digit is a byte |
192 | | * separator, i.e. that we don't have |
193 | | * more than four hex digits, or a |
194 | | * bogus character. |
195 | | */ |
196 | 0 | if (is_byte_sep(*punct)) { |
197 | 0 | p = punct + 1; |
198 | 0 | continue; |
199 | 0 | } |
200 | 0 | else if (force_separators) { |
201 | 0 | return false; |
202 | 0 | } |
203 | 0 | } |
204 | 0 | p = punct; |
205 | 0 | continue; |
206 | 0 | } |
207 | 3.00k | else if (*q && g_ascii_isxdigit(*p) && g_ascii_isxdigit(*q)) { |
208 | 3.00k | two_digits[0] = *p; |
209 | 3.00k | two_digits[1] = *q; |
210 | 3.00k | two_digits[2] = '\0'; |
211 | | |
212 | | /* |
213 | | * Two hex digits in a row. |
214 | | */ |
215 | 3.00k | val = (uint8_t) strtoul(two_digits, NULL, 16); |
216 | 3.00k | g_byte_array_append(bytes, &val, 1); |
217 | 3.00k | punct = q + 1; |
218 | 3.00k | if (*punct) { |
219 | | /* |
220 | | * Make sure the character after |
221 | | * the second hex digit is a byte |
222 | | * separator, i.e. that we don't have |
223 | | * more than two hex digits, or a |
224 | | * bogus character. |
225 | | */ |
226 | 2.50k | if (is_byte_sep(*punct)) { |
227 | 2.50k | p = punct + 1; |
228 | 2.50k | continue; |
229 | 2.50k | } |
230 | 0 | else if (force_separators) { |
231 | 0 | return false; |
232 | 0 | } |
233 | 2.50k | } |
234 | 500 | p = punct; |
235 | 500 | continue; |
236 | 3.00k | } |
237 | 0 | else if (*q && g_ascii_isxdigit(*p) && is_byte_sep(*q)) { |
238 | 0 | one_digit[0] = *p; |
239 | 0 | one_digit[1] = '\0'; |
240 | | |
241 | | /* |
242 | | * Only one hex digit (not at the end of the string) |
243 | | */ |
244 | 0 | val = (uint8_t) strtoul(one_digit, NULL, 16); |
245 | 0 | g_byte_array_append(bytes, &val, 1); |
246 | 0 | p = q + 1; |
247 | 0 | continue; |
248 | 0 | } |
249 | 0 | else if (!*q && g_ascii_isxdigit(*p)) { |
250 | 0 | one_digit[0] = *p; |
251 | 0 | one_digit[1] = '\0'; |
252 | | |
253 | | /* |
254 | | * Only one hex digit (at the end of the string) |
255 | | */ |
256 | 0 | val = (uint8_t) strtoul(one_digit, NULL, 16); |
257 | 0 | g_byte_array_append(bytes, &val, 1); |
258 | 0 | p = q; |
259 | 0 | continue; |
260 | 0 | } |
261 | 0 | else { |
262 | 0 | return false; |
263 | 0 | } |
264 | 3.00k | } |
265 | 515 | return true; |
266 | 515 | } |
267 | | |
268 | | static inline char |
269 | | get_valid_byte_sep(char c, const unsigned encoding) |
270 | 0 | { |
271 | 0 | char retval = -1; /* -1 means failure */ |
272 | |
|
273 | 0 | switch (c) { |
274 | 0 | case ':': |
275 | 0 | if (encoding & ENC_SEP_COLON) |
276 | 0 | retval = c; |
277 | 0 | break; |
278 | 0 | case '-': |
279 | 0 | if (encoding & ENC_SEP_DASH) |
280 | 0 | retval = c; |
281 | 0 | break; |
282 | 0 | case '.': |
283 | 0 | if (encoding & ENC_SEP_DOT) |
284 | 0 | retval = c; |
285 | 0 | break; |
286 | 0 | case ' ': |
287 | 0 | if (encoding & ENC_SEP_SPACE) |
288 | 0 | retval = c; |
289 | 0 | break; |
290 | 0 | case '\0': |
291 | | /* we were given the end of the string, so it's fine */ |
292 | 0 | retval = 0; |
293 | 0 | break; |
294 | 0 | default: |
295 | 0 | if (g_ascii_isxdigit(c) && (encoding & ENC_SEP_NONE)) |
296 | 0 | retval = 0; |
297 | | /* anything else means we've got a failure */ |
298 | 0 | break; |
299 | 0 | } |
300 | | |
301 | 0 | return retval; |
302 | 0 | } |
303 | | |
304 | | /* Turn a string of hex digits with optional separators (defined by is_byte_sep()) |
305 | | * into a byte array. Unlike hex_str_to_bytes(), this will read as many hex-char |
306 | | * pairs as possible and not error if it hits a non-hex-char; instead it just ends |
307 | | * there. (i.e., like strtol()/atoi()/etc.) Unless fail_if_partial is true. |
308 | | * |
309 | | * The **endptr, if not NULL, is set to the char after the last hex character. |
310 | | */ |
311 | | bool |
312 | | hex_str_to_bytes_encoding(const char *hex_str, GByteArray *bytes, const char **endptr, |
313 | | const unsigned encoding, const bool fail_if_partial) |
314 | 0 | { |
315 | 0 | int8_t c, d; |
316 | 0 | uint8_t val; |
317 | 0 | const char *end = hex_str; |
318 | 0 | bool retval = false; |
319 | 0 | char sep = -1; |
320 | | |
321 | | /* a map from ASCII hex chars to their value */ |
322 | 0 | static const int8_t str_to_nibble[256] = { |
323 | 0 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, |
324 | 0 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, |
325 | 0 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, |
326 | 0 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1, |
327 | 0 | -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1, |
328 | 0 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, |
329 | 0 | -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1, |
330 | 0 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, |
331 | 0 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, |
332 | 0 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, |
333 | 0 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, |
334 | 0 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, |
335 | 0 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, |
336 | 0 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, |
337 | 0 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, |
338 | 0 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 |
339 | 0 | }; |
340 | | |
341 | | /* we must see two hex chars at the beginning, or fail */ |
342 | 0 | if (bytes && *end && g_ascii_isxdigit(*end) && g_ascii_isxdigit(*(end+1))) { |
343 | 0 | retval = true; |
344 | | |
345 | | /* set the separator character we'll allow; if this returns a -1, it means something's |
346 | | * invalid after the hex, but we'll let the while-loop grab the first hex-pair anyway |
347 | | */ |
348 | 0 | sep = get_valid_byte_sep(*(end+2), encoding); |
349 | |
|
350 | 0 | while (*end) { |
351 | 0 | c = str_to_nibble[(unsigned char)*end]; |
352 | 0 | if (c < 0) { |
353 | 0 | if (fail_if_partial) retval = false; |
354 | 0 | break; |
355 | 0 | } |
356 | | |
357 | 0 | d = str_to_nibble[(unsigned char)*(end+1)]; |
358 | 0 | if (d < 0) { |
359 | 0 | if (fail_if_partial) retval = false; |
360 | 0 | break; |
361 | 0 | } |
362 | 0 | val = ((uint8_t)c * 16) + d; |
363 | 0 | g_byte_array_append(bytes, &val, 1); |
364 | 0 | end += 2; |
365 | | |
366 | | /* check for separator and peek at next char to make sure we should keep going */ |
367 | 0 | if (sep > 0 && *end == sep && str_to_nibble[(unsigned char)*(end+1)] > -1) { |
368 | | /* yes, it's the right sep and followed by more hex, so skip the sep */ |
369 | 0 | ++end; |
370 | 0 | } else if (sep != 0 && *end) { |
371 | | /* we either need a separator, but we don't see one; or the get_valid_byte_sep() |
372 | | earlier didn't find a valid one to begin with */ |
373 | 0 | if (fail_if_partial) retval = false; |
374 | 0 | break; |
375 | 0 | } |
376 | | /* otherwise, either no separator allowed, or *end is null, or *end is an invalid |
377 | | * sep, or *end is a valid sep but after it is not a hex char - in all those |
378 | | * cases, just loop back up and let it fail later naturally. |
379 | | */ |
380 | 0 | } |
381 | 0 | } |
382 | |
|
383 | 0 | if (!retval) { |
384 | 0 | if (bytes) g_byte_array_set_size(bytes, 0); |
385 | 0 | end = hex_str; |
386 | 0 | } |
387 | |
|
388 | 0 | if (endptr) *endptr = end; |
389 | |
|
390 | 0 | return retval; |
391 | 0 | } |
392 | | |
393 | | /* |
394 | | * Turn an RFC 3986 percent-encoded array of characters, not |
395 | | * necessarily null-terminated, into a byte array. |
396 | | * XXX - We don't check for reserved characters. |
397 | | * XXX - g_uri_unescape_bytes is superior, but limited to |
398 | | * glib >= 2.66 |
399 | | */ |
400 | | #define HEX_DIGIT_BUF_LEN 3 |
401 | | bool |
402 | | uri_to_bytes(const char *uri_str, GByteArray *bytes, size_t len) |
403 | 0 | { |
404 | 0 | uint8_t val; |
405 | 0 | const char *p; |
406 | 0 | const char *uri_end = uri_str + len; |
407 | 0 | char hex_digit[HEX_DIGIT_BUF_LEN]; |
408 | |
|
409 | 0 | g_byte_array_set_size(bytes, 0); |
410 | 0 | if (! uri_str) { |
411 | 0 | return false; |
412 | 0 | } |
413 | | |
414 | 0 | p = uri_str; |
415 | |
|
416 | 0 | while (p < uri_end) { |
417 | 0 | if (!g_ascii_isprint(*p)) |
418 | 0 | return false; |
419 | 0 | if (*p == '%') { |
420 | 0 | p++; |
421 | 0 | if (*p == '\0') return false; |
422 | 0 | hex_digit[0] = *p; |
423 | 0 | p++; |
424 | 0 | if (*p == '\0') return false; |
425 | 0 | hex_digit[1] = *p; |
426 | 0 | hex_digit[2] = '\0'; |
427 | 0 | if (! g_ascii_isxdigit(hex_digit[0]) || ! g_ascii_isxdigit(hex_digit[1])) |
428 | 0 | return false; |
429 | 0 | val = (uint8_t) strtoul(hex_digit, NULL, 16); |
430 | 0 | g_byte_array_append(bytes, &val, 1); |
431 | 0 | } else { |
432 | 0 | g_byte_array_append(bytes, (const uint8_t *) p, 1); |
433 | 0 | } |
434 | 0 | p++; |
435 | |
|
436 | 0 | } |
437 | 0 | return true; |
438 | 0 | } |
439 | | |
440 | | /* |
441 | | * Turn an RFC 3986 percent-encoded string into a byte array. |
442 | | * XXX - We don't check for reserved characters. |
443 | | * XXX - Just use g_uri_unescape_string instead? |
444 | | */ |
445 | | bool |
446 | | uri_str_to_bytes(const char *uri_str, GByteArray *bytes) |
447 | 0 | { |
448 | 0 | return uri_to_bytes(uri_str, bytes, strlen(uri_str)); |
449 | 0 | } |
450 | | |
451 | | /** |
452 | | * Create a copy of a GByteArray |
453 | | * |
454 | | * @param ba The byte array to be copied. |
455 | | * @return If ba exists, a freshly allocated copy. NULL otherwise. |
456 | | * |
457 | | */ |
458 | | GByteArray * |
459 | | byte_array_dup(const GByteArray *ba) |
460 | 0 | { |
461 | 0 | GByteArray *new_ba; |
462 | |
|
463 | 0 | if (!ba) |
464 | 0 | return NULL; |
465 | | |
466 | 0 | new_ba = g_byte_array_new(); |
467 | 0 | g_byte_array_append(new_ba, ba->data, ba->len); |
468 | 0 | return new_ba; |
469 | 0 | } |
470 | | |
471 | 0 | #define SUBID_BUF_LEN 5 |
472 | | bool |
473 | | oid_str_to_bytes(const char *oid_str, GByteArray *bytes) |
474 | 0 | { |
475 | 0 | return rel_oid_str_to_bytes(oid_str, bytes, true); |
476 | 0 | } |
477 | | bool |
478 | | rel_oid_str_to_bytes(const char *oid_str, GByteArray *bytes, bool is_absolute) |
479 | 0 | { |
480 | 0 | uint32_t subid0, subid, sicnt, i; |
481 | 0 | const char *p, *dot; |
482 | 0 | uint8_t buf[SUBID_BUF_LEN]; |
483 | |
|
484 | 0 | g_byte_array_set_size(bytes, 0); |
485 | | |
486 | | /* check syntax */ |
487 | 0 | p = oid_str; |
488 | 0 | dot = NULL; |
489 | 0 | while (*p) { |
490 | 0 | if (!g_ascii_isdigit(*p) && (*p != '.')) return false; |
491 | 0 | if (*p == '.') { |
492 | 0 | if (p == oid_str && is_absolute) return false; |
493 | 0 | if (!*(p+1)) return false; |
494 | 0 | if ((p-1) == dot) return false; |
495 | 0 | dot = p; |
496 | 0 | } |
497 | 0 | p++; |
498 | 0 | } |
499 | 0 | if (!dot) return false; |
500 | | |
501 | 0 | p = oid_str; |
502 | 0 | sicnt = is_absolute ? 0 : 2; |
503 | 0 | if (!is_absolute) p++; |
504 | 0 | subid0 = 0; /* squelch GCC complaints */ |
505 | 0 | while (*p) { |
506 | 0 | subid = 0; |
507 | 0 | while (g_ascii_isdigit(*p)) { |
508 | 0 | subid *= 10; |
509 | 0 | subid += *p - '0'; |
510 | 0 | p++; |
511 | 0 | } |
512 | 0 | if (sicnt == 0) { |
513 | 0 | subid0 = subid; |
514 | 0 | if (subid0 > 2) return false; |
515 | 0 | } else if (sicnt == 1) { |
516 | 0 | if ((subid0 < 2) && (subid > 39)) return false; |
517 | 0 | subid += 40 * subid0; |
518 | 0 | } |
519 | 0 | if (sicnt) { |
520 | 0 | i = SUBID_BUF_LEN; |
521 | 0 | do { |
522 | 0 | i--; |
523 | 0 | buf[i] = 0x80 | (subid % 0x80); |
524 | 0 | subid >>= 7; |
525 | 0 | } while (subid && i); |
526 | 0 | buf[SUBID_BUF_LEN-1] &= 0x7F; |
527 | 0 | g_byte_array_append(bytes, buf + i, SUBID_BUF_LEN - i); |
528 | 0 | } |
529 | 0 | sicnt++; |
530 | 0 | if (*p) p++; |
531 | 0 | } |
532 | | |
533 | 0 | return true; |
534 | 0 | } |
535 | | |
536 | | /** |
537 | | * Compare the contents of two GByteArrays |
538 | | * |
539 | | * @param ba1 A byte array |
540 | | * @param ba2 A byte array |
541 | | * @return If both arrays are non-NULL and their lengths are equal and |
542 | | * their contents are equal, returns true. Otherwise, returns |
543 | | * false. |
544 | | * |
545 | | * XXX - Should this be in strutil.c? |
546 | | */ |
547 | | bool |
548 | | byte_array_equal(GByteArray *ba1, GByteArray *ba2) |
549 | 0 | { |
550 | 0 | if (!ba1 || !ba2) |
551 | 0 | return false; |
552 | | |
553 | 0 | if (ba1->len != ba2->len) |
554 | 0 | return false; |
555 | | |
556 | 0 | if (memcmp(ba1->data, ba2->data, ba1->len) != 0) |
557 | 0 | return false; |
558 | | |
559 | 0 | return true; |
560 | 0 | } |
561 | | |
562 | | |
563 | | /* Return a XML escaped representation of the unescaped string. |
564 | | * The returned string must be freed when no longer in use. */ |
565 | | char * |
566 | | xml_escape(const char *unescaped) |
567 | 0 | { |
568 | 0 | GString *buffer = g_string_sized_new(128); |
569 | 0 | const char *p; |
570 | 0 | char c; |
571 | |
|
572 | 0 | p = unescaped; |
573 | 0 | while ( (c = *p++) ) { |
574 | 0 | switch (c) { |
575 | 0 | case '<': |
576 | 0 | g_string_append(buffer, "<"); |
577 | 0 | break; |
578 | 0 | case '>': |
579 | 0 | g_string_append(buffer, ">"); |
580 | 0 | break; |
581 | 0 | case '&': |
582 | 0 | g_string_append(buffer, "&"); |
583 | 0 | break; |
584 | 0 | case '\'': |
585 | 0 | g_string_append(buffer, "'"); |
586 | 0 | break; |
587 | 0 | case '"': |
588 | 0 | g_string_append(buffer, """); |
589 | 0 | break; |
590 | 0 | case '\t': |
591 | 0 | case '\n': |
592 | 0 | case '\r': |
593 | 0 | g_string_append_c(buffer, c); |
594 | 0 | break; |
595 | 0 | default: |
596 | | /* XML 1.0 doesn't allow ASCII control characters, except |
597 | | * for the three whitespace ones above (which do *not* |
598 | | * include '\v' and '\f', so not the same group as isspace), |
599 | | * even as character references. |
600 | | * There's no official way to escape them, so we'll do this. */ |
601 | 0 | if (g_ascii_iscntrl(c)) { |
602 | 0 | g_string_append_printf(buffer, "\\x%x", c); |
603 | 0 | } else { |
604 | 0 | g_string_append_c(buffer, c); |
605 | 0 | } |
606 | 0 | break; |
607 | 0 | } |
608 | 0 | } |
609 | | /* Return the string value contained within the GString |
610 | | * after getting rid of the GString structure. |
611 | | * This is the way to do this, see the GLib reference. */ |
612 | 0 | return g_string_free(buffer, FALSE); |
613 | 0 | } |
614 | | |
615 | | /* |
616 | | * Scan the search string to make sure it's valid hex. Return the |
617 | | * number of bytes in nbytes. |
618 | | */ |
619 | | uint8_t * |
620 | | convert_string_to_hex(const char *string, size_t *nbytes) |
621 | 150 | { |
622 | 150 | size_t n_bytes; |
623 | 150 | const char *p; |
624 | 150 | char c; |
625 | 150 | uint8_t *bytes, *q, byte_val; |
626 | | |
627 | 150 | n_bytes = 0; |
628 | 150 | p = &string[0]; |
629 | 150 | for (;;) { |
630 | 150 | c = *p++; |
631 | 150 | if (c == '\0') |
632 | 150 | break; |
633 | 0 | if (g_ascii_isspace(c)) |
634 | 0 | continue; /* allow white space */ |
635 | 0 | if (c==':' || c=='.' || c=='-') |
636 | 0 | continue; /* skip any ':', '.', or '-' between bytes */ |
637 | 0 | if (!g_ascii_isxdigit(c)) { |
638 | | /* Not a valid hex digit - fail */ |
639 | 0 | return NULL; |
640 | 0 | } |
641 | | |
642 | | /* |
643 | | * We can only match bytes, not nibbles; we must have a valid |
644 | | * hex digit immediately after that hex digit. |
645 | | */ |
646 | 0 | c = *p++; |
647 | 0 | if (!g_ascii_isxdigit(c)) |
648 | 0 | return NULL; |
649 | | |
650 | | /* 2 hex digits = 1 byte */ |
651 | 0 | n_bytes++; |
652 | 0 | } |
653 | | |
654 | | /* |
655 | | * Were we given any hex digits? |
656 | | */ |
657 | 150 | if (n_bytes == 0) { |
658 | | /* No. */ |
659 | 150 | return NULL; |
660 | 150 | } |
661 | | |
662 | | /* |
663 | | * OK, it's valid, and it generates "n_bytes" bytes; generate the |
664 | | * raw byte array. |
665 | | */ |
666 | 0 | bytes = (uint8_t *)g_malloc(n_bytes); |
667 | 0 | p = &string[0]; |
668 | 0 | q = &bytes[0]; |
669 | 0 | for (;;) { |
670 | 0 | c = *p++; |
671 | 0 | if (c == '\0') |
672 | 0 | break; |
673 | 0 | if (g_ascii_isspace(c)) |
674 | 0 | continue; /* allow white space */ |
675 | 0 | if (c==':' || c=='.' || c=='-') |
676 | 0 | continue; /* skip any ':', '.', or '-' between bytes */ |
677 | | /* From the loop above, we know this is a hex digit */ |
678 | 0 | byte_val = ws_xton(c); |
679 | 0 | byte_val <<= 4; |
680 | | |
681 | | /* We also know this is a hex digit */ |
682 | 0 | c = *p++; |
683 | 0 | byte_val |= ws_xton(c); |
684 | |
|
685 | 0 | *q++ = byte_val; |
686 | 0 | } |
687 | 0 | *nbytes = n_bytes; |
688 | 0 | return bytes; |
689 | 150 | } |
690 | | |
691 | | /* |
692 | | * Copy if it's a case-sensitive search; uppercase it if it's |
693 | | * a case-insensitive search. |
694 | | */ |
695 | | char * |
696 | | convert_string_case(const char *string, bool case_insensitive) |
697 | 0 | { |
698 | |
|
699 | 0 | if (case_insensitive) { |
700 | 0 | return g_utf8_strup(string, -1); |
701 | 0 | } else { |
702 | 0 | return g_strdup(string); |
703 | 0 | } |
704 | 0 | } |
705 | | |
706 | 0 | #define GN_CHAR_ALPHABET_SIZE 128 |
707 | | |
708 | | static const gunichar IA5_default_alphabet[GN_CHAR_ALPHABET_SIZE] = { |
709 | | |
710 | | /*ITU-T recommendation T.50 specifies International Reference Alphabet 5 (IA5) */ |
711 | | |
712 | | '?', '?', '?', '?', '?', '?', '?', '?', |
713 | | '?', '?', '?', '?', '?', '?', '?', '?', |
714 | | '?', '?', '?', '?', '?', '?', '?', '?', |
715 | | '?', '?', '?', '?', '?', '?', '?', '?', |
716 | | ' ', '!', '\"','#', '$', '%', '&', '\'', |
717 | | '(', ')', '*', '+', ',', '-', '.', '/', |
718 | | '0', '1', '2', '3', '4', '5', '6', '7', |
719 | | '8', '9', ':', ';', '<', '=', '>', '?', |
720 | | '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', |
721 | | 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', |
722 | | 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', |
723 | | 'X', 'Y', 'Z', '[', '\\', ']', '^', '_', |
724 | | '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', |
725 | | 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', |
726 | | 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', |
727 | | 'x', 'y', 'z', '{', '|', '}', '~', '?' |
728 | | }; |
729 | | |
730 | | static gunichar |
731 | | char_def_ia5_alphabet_decode(unsigned char value) |
732 | 0 | { |
733 | 0 | if (value < GN_CHAR_ALPHABET_SIZE) { |
734 | 0 | return IA5_default_alphabet[value]; |
735 | 0 | } |
736 | 0 | else { |
737 | 0 | return '?'; |
738 | 0 | } |
739 | 0 | } |
740 | | |
741 | | void |
742 | | IA5_7BIT_decode(unsigned char * dest, const unsigned char* src, int len) |
743 | 0 | { |
744 | 0 | int i, j; |
745 | 0 | gunichar buf; |
746 | |
|
747 | 0 | for (i = 0, j = 0; j < len; j++) { |
748 | 0 | buf = char_def_ia5_alphabet_decode(src[j]); |
749 | 0 | i += g_unichar_to_utf8(buf,(char*)&(dest[i])); |
750 | 0 | } |
751 | 0 | dest[i]=0; |
752 | 0 | } |
753 | | |
754 | | /* chars allowed: lower case letters, digits, '-', "_", and ".". */ |
755 | | static |
756 | | const uint8_t module_valid_chars_lower_case[256] = { |
757 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0F */ |
758 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1F */ |
759 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, /* 0x20-0x2F '-', '.' */ |
760 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30-0x3F '0'-'9' */ |
761 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4F */ |
762 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* 0x50-0x5F '_' */ |
763 | | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60-0x6F 'a'-'o' */ |
764 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x70-0x7F 'p'-'z' */ |
765 | | /* upper 128 empty-initialized to 0 */ |
766 | | }; |
767 | | |
768 | | /* chars allowed: alphanumerics, '-', "_", and ".". */ |
769 | | static |
770 | | const uint8_t module_valid_chars[256] = { |
771 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0F */ |
772 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1F */ |
773 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, /* 0x20-0x2F '-', '.' */ |
774 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30-0x3F '0'-'9' */ |
775 | | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40-0x4F 'A'-'O' */ |
776 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 0x50-0x5F 'P'-'Z', '_' */ |
777 | | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60-0x6F 'a'-'o' */ |
778 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x70-0x7F 'p'-'z' */ |
779 | | /* upper 128 empty-initialized to 0 */ |
780 | | }; |
781 | | |
782 | | unsigned char |
783 | | module_check_valid_name(const char *name, bool lower_only) |
784 | 3.78M | { |
785 | 3.78M | const char *p = name; |
786 | 3.78M | unsigned char c = '.', lastc; |
787 | 3.78M | const uint8_t *chars; |
788 | | |
789 | | /* First character cannot be '-'. */ |
790 | 3.78M | if (name[0] == '-') |
791 | 0 | return '-'; |
792 | | |
793 | 3.78M | if (lower_only) |
794 | 6.52k | chars = module_valid_chars_lower_case; |
795 | 3.77M | else |
796 | 3.77M | chars = module_valid_chars; |
797 | | |
798 | 105M | do { |
799 | 105M | lastc = c; |
800 | 105M | c = *(p++); |
801 | | /* Leading '.' or substring ".." are disallowed. */ |
802 | 105M | if (c == '.' && lastc == '.') { |
803 | 0 | break; |
804 | 0 | } |
805 | 105M | } while (chars[c]); |
806 | | |
807 | | /* Trailing '.' is disallowed. */ |
808 | 3.78M | if (lastc == '.') { |
809 | 0 | return '.'; |
810 | 0 | } |
811 | 3.78M | return c; |
812 | 3.78M | } |
813 | | |
814 | | static const char _hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7', |
815 | | '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; |
816 | | |
817 | | /* |
818 | | * Copy byte by byte without UTF-8 truncation (assume valid UTF-8 input). |
819 | | * Return byte size written, or that would have been |
820 | | * written with enough space. |
821 | | */ |
822 | | size_t |
823 | | ws_label_strcpy(char *label_str, size_t buf_size, size_t pos, |
824 | | const uint8_t *str, int flags) |
825 | 30.4M | { |
826 | 30.4M | if (pos >= buf_size) |
827 | 3.99k | return pos; |
828 | | |
829 | 30.4M | uint8_t r = 0; |
830 | 30.4M | ssize_t chlen; |
831 | 30.4M | ssize_t idx, src_len; |
832 | 30.4M | ssize_t free_len; |
833 | | |
834 | 30.4M | label_str[pos] = '\0'; |
835 | | |
836 | 30.4M | ws_return_val_if(str == NULL, pos); |
837 | 30.4M | idx = 0; |
838 | 30.4M | src_len = strlen((const char*)str); |
839 | 30.4M | free_len = buf_size - pos - 1; |
840 | | |
841 | 655M | while (idx < src_len) { |
842 | 624M | chlen = ws_utf8_char_len(str[idx]); |
843 | 624M | if (chlen <= 0) { |
844 | | /* We were passed invalid UTF-8. This is an error. Complain and do... something. */ |
845 | 0 | ws_log_utf8((char*)str, -1, NULL); |
846 | | /* |
847 | | * XXX If we are going to return here instead of trying to recover maybe the log level should |
848 | | * be higher than DEBUG. |
849 | | */ |
850 | 0 | return pos; |
851 | 0 | } |
852 | | |
853 | | /* ASCII */ |
854 | 624M | if (chlen == 1) { |
855 | 624M | if (flags & FORMAT_LABEL_REPLACE_SPACE && g_ascii_isspace(str[idx])) { |
856 | 68.1k | if (free_len >= 1) { |
857 | 68.1k | label_str[pos] = ' '; |
858 | 68.1k | label_str[pos+1] = '\0'; |
859 | 68.1k | } |
860 | 68.1k | pos++; |
861 | 68.1k | idx++; |
862 | 68.1k | free_len--; |
863 | 68.1k | continue; |
864 | 68.1k | } |
865 | | |
866 | 624M | r = 0; |
867 | 624M | switch (str[idx]) { |
868 | 1.28k | case '\a': r = 'a'; break; |
869 | 1.23k | case '\b': r = 'b'; break; |
870 | 3.50k | case '\f': r = 'f'; break; |
871 | 3.76k | case '\n': r = 'n'; break; |
872 | 320 | case '\r': r = 'r'; break; |
873 | 1.01k | case '\t': r = 't'; break; |
874 | 379 | case '\v': r = 'v'; break; |
875 | 624M | } |
876 | 624M | if (r != 0) { |
877 | 11.4k | if (free_len >= 2) { |
878 | 10.2k | label_str[pos] = '\\'; |
879 | 10.2k | label_str[pos+1] = r; |
880 | 10.2k | label_str[pos+2] = '\0'; |
881 | 10.2k | } |
882 | 11.4k | pos += 2; |
883 | 11.4k | idx += 1; |
884 | 11.4k | free_len -= 2; |
885 | 11.4k | continue; |
886 | 11.4k | } |
887 | | |
888 | 624M | if (g_ascii_isprint(str[idx])) { |
889 | 624M | if (free_len >= 1) { |
890 | 624M | label_str[pos] = str[idx]; |
891 | 624M | label_str[pos+1] = '\0'; |
892 | 624M | } |
893 | 624M | pos++; |
894 | 624M | idx++; |
895 | 624M | free_len--; |
896 | 624M | continue; |
897 | 624M | } |
898 | | |
899 | 30.4k | if (free_len >= 4) { |
900 | 23.7k | label_str[pos+0] = '\\'; |
901 | 23.7k | label_str[pos+1] = 'x'; |
902 | | |
903 | 23.7k | uint8_t ch = str[idx]; |
904 | 23.7k | label_str[pos+2] = _hex[ch >> 4]; |
905 | 23.7k | label_str[pos+3] = _hex[ch & 0x0F]; |
906 | 23.7k | label_str[pos+4] = '\0'; |
907 | 23.7k | } |
908 | 30.4k | pos += 4; |
909 | 30.4k | idx += chlen; |
910 | 30.4k | free_len -= 4; |
911 | 30.4k | continue; |
912 | 624M | } |
913 | | |
914 | | /* UTF-8 multibyte */ |
915 | 109k | if (chlen == 2 && str[idx] == 0xC2 && |
916 | 2.40k | str[idx+1] >= 0x80 && str[idx+1] <= 0x9F) { |
917 | | /* |
918 | | * Escape the C1 control codes. C0 (covered above) and C1 are |
919 | | * inband signalling and transparent to Unicode. |
920 | | * Anything else probably has text semantics should not be removed. |
921 | | */ |
922 | | /* |
923 | | * Special case: The second UTF-8 byte is the same as the Unicode |
924 | | * code point for range U+0080 - U+009F. |
925 | | */ |
926 | 496 | if (free_len >= 6) { |
927 | 397 | label_str[pos+0] = '\\'; |
928 | 397 | label_str[pos+1] = 'u'; |
929 | 397 | label_str[pos+2] = '0'; |
930 | 397 | label_str[pos+3] = '0'; |
931 | | |
932 | 397 | uint8_t ch = str[idx+1]; |
933 | 397 | label_str[pos+4] = _hex[ch >> 4]; |
934 | 397 | label_str[pos+5] = _hex[ch & 0x0F]; |
935 | 397 | label_str[pos+6] = '\0'; |
936 | 397 | } |
937 | 496 | pos += 6; |
938 | 496 | idx += chlen; |
939 | 496 | free_len -= 6; |
940 | 496 | continue; |
941 | 496 | } |
942 | | |
943 | | /* Just copy */ |
944 | 109k | if (free_len >= chlen) { |
945 | 352k | for (ssize_t j = 0; j < chlen; j++) { |
946 | 262k | label_str[pos+j] = str[idx+j]; |
947 | 262k | } |
948 | 90.3k | label_str[pos+chlen] = '\0'; |
949 | 90.3k | } |
950 | 109k | pos += chlen; |
951 | 109k | idx += chlen; |
952 | 109k | free_len -= chlen; |
953 | 109k | } |
954 | | |
955 | 30.4M | return pos; |
956 | 30.4M | } |
957 | | |
958 | | size_t |
959 | | ws_label_strcat(char *label_str, size_t bufsize, const uint8_t *str, int flags) |
960 | 0 | { |
961 | 0 | return ws_label_strcpy(label_str, bufsize, strlen(label_str), str, flags); |
962 | 0 | } |
963 | | |
964 | | /* |
965 | | * Editor modelines - https://www.wireshark.org/tools/modelines.html |
966 | | * |
967 | | * Local variables: |
968 | | * c-basic-offset: 4 |
969 | | * tab-width: 8 |
970 | | * indent-tabs-mode: nil |
971 | | * End: |
972 | | * |
973 | | * vi: set shiftwidth=4 tabstop=8 expandtab: |
974 | | * :indentSize=4:tabSize=8:noTabs=true: |
975 | | */ |