Coverage Report

Created: 2026-05-14 06:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/wireshark/epan/strutil.c
Line
Count
Source
1
/* strutil.c
2
 * String utility routines
3
 *
4
 * Wireshark - Network traffic analyzer
5
 * By Gerald Combs <gerald@wireshark.org>
6
 * Copyright 1998 Gerald Combs
7
 *
8
 * SPDX-License-Identifier: GPL-2.0-or-later
9
 */
10
11
#include "config.h"
12
13
#include <stdlib.h>
14
#include <string.h>
15
#include <glib.h>
16
#include "strutil.h"
17
18
#include <wsutil/str_util.h>
19
#include <wsutil/unicode-utils.h>
20
#include <epan/proto.h>
21
22
#ifdef _WIN32
23
#include <windows.h>
24
#include <tchar.h>
25
#include <wchar.h>
26
#endif
27
28
29
/*
30
 * Given a pointer into a data buffer, and to the end of the buffer,
31
 * find the end of the (putative) line at that position in the data
32
 * buffer.
33
 * Return a pointer to the EOL character(s) in "*eol".
34
 */
35
const unsigned char *
36
find_line_end(const unsigned char *data, const unsigned char *dataend, const unsigned char **eol)
37
0
{
38
0
    const unsigned char *lineend;
39
40
0
    lineend = (unsigned char *)memchr(data, '\n', dataend - data);
41
0
    if (lineend == NULL) {
42
        /*
43
         * No LF - line is probably continued in next TCP segment.
44
         */
45
0
        lineend = dataend;
46
0
        *eol = dataend;
47
0
    } else {
48
        /*
49
         * Is the LF at the beginning of the line?
50
         */
51
0
        if (lineend > data) {
52
            /*
53
             * No - is it preceded by a carriage return?
54
             * (Perhaps it's supposed to be, but that's not guaranteed....)
55
             */
56
0
            if (*(lineend - 1) == '\r') {
57
                /*
58
                 * Yes.  The EOL starts with the CR.
59
                 */
60
0
                *eol = lineend - 1;
61
0
            } else {
62
                /*
63
                 * No.  The EOL starts with the LF.
64
                 */
65
0
                *eol = lineend;
66
67
                /*
68
                 * I seem to remember that we once saw lines ending with LF-CR
69
                 * in an HTTP request or response, so check if it's *followed*
70
                 * by a carriage return.
71
                 */
72
0
                if (lineend < (dataend - 1) && *(lineend + 1) == '\r') {
73
                    /*
74
                     * It's <non-LF><LF><CR>; say it ends with the CR.
75
                     */
76
0
                    lineend++;
77
0
                }
78
0
            }
79
0
        } else {
80
            /*
81
             * Yes - the EOL starts with the LF.
82
             */
83
0
            *eol = lineend;
84
0
        }
85
86
        /*
87
         * Point to the character after the last character.
88
         */
89
0
        lineend++;
90
0
    }
91
0
    return lineend;
92
0
}
93
94
/*
95
 * Get the length of the next token in a line, and the beginning of the
96
 * next token after that (if any).
97
 * Return 0 if there is no next token.
98
 */
99
int
100
get_token_len(const unsigned char *linep, const unsigned char *lineend,
101
        const unsigned char **next_token)
102
696
{
103
696
    const unsigned char *tokenp;
104
696
    int token_len;
105
106
696
    tokenp = linep;
107
108
    /*
109
     * Search for a blank, a CR or an LF, or the end of the buffer.
110
     */
111
8.32k
    while (linep < lineend && *linep != ' ' && *linep != '\r' && *linep != '\n')
112
7.62k
        linep++;
113
696
    token_len = (int) (linep - tokenp);
114
115
    /*
116
     * Skip trailing blanks.
117
     */
118
1.48k
    while (linep < lineend && *linep == ' ')
119
787
        linep++;
120
121
696
    *next_token = linep;
122
123
696
    return token_len;
124
696
}
125
126
static bool
127
is_byte_sep(uint8_t c)
128
2.50k
{
129
2.50k
    return (c == '-' || c == ':' || c == '.');
130
2.50k
}
131
132
/* Turn a string of hex digits with optional separators (defined by
133
 * is_byte_sep() into a byte array.
134
 *
135
 * XXX - This function is perhaps too generous in what it accepts.
136
 * It allows the separator to change from one character to another,
137
 * or to and from no separator if force_separators is false.
138
 */
139
bool
140
hex_str_to_bytes(const char *hex_str, GByteArray *bytes, bool force_separators)
141
515
{
142
515
    uint8_t       val;
143
515
    const char     *p, *q, *r, *s, *punct;
144
515
    char        four_digits_first_half[3];
145
515
    char        four_digits_second_half[3];
146
515
    char        two_digits[3];
147
515
    char        one_digit[2];
148
149
515
    if (! hex_str || ! bytes) {
150
0
        return false;
151
0
    }
152
515
    g_byte_array_set_size(bytes, 0);
153
515
    p = hex_str;
154
3.51k
    while (*p) {
155
3.00k
        q = p+1;
156
3.00k
        r = p+2;
157
3.00k
        s = p+3;
158
159
3.00k
        if (*q && *r
160
3.00k
                && g_ascii_isxdigit(*p) && g_ascii_isxdigit(*q) &&
161
3.00k
                g_ascii_isxdigit(*r)) {
162
163
            /*
164
             * Three hex bytes in a row, followed by a non hex byte
165
             * (possibly the end of the string). We don't accept an
166
             * odd number of hex digits except for single digits
167
             * by themselves or after a separator.
168
             */
169
0
            if (!g_ascii_isxdigit(*s)) {
170
0
                return false;
171
0
            }
172
0
            four_digits_first_half[0] = *p;
173
0
            four_digits_first_half[1] = *q;
174
0
            four_digits_first_half[2] = '\0';
175
0
            four_digits_second_half[0] = *r;
176
0
            four_digits_second_half[1] = *s;
177
0
            four_digits_second_half[2] = '\0';
178
179
            /*
180
             * Four or more hex digits in a row.
181
             */
182
0
            val = (uint8_t) strtoul(four_digits_first_half, NULL, 16);
183
0
            g_byte_array_append(bytes, &val, 1);
184
0
            val = (uint8_t) strtoul(four_digits_second_half, NULL, 16);
185
0
            g_byte_array_append(bytes, &val, 1);
186
187
0
            punct = s + 1;
188
0
            if (*punct) {
189
                /*
190
                 * Make sure the character after
191
                 * the fourth hex digit is a byte
192
                 * separator, i.e. that we don't have
193
                 * more than four hex digits, or a
194
                 * bogus character.
195
                 */
196
0
                if (is_byte_sep(*punct)) {
197
0
                    p = punct + 1;
198
0
                    continue;
199
0
                }
200
0
                else if (force_separators) {
201
0
                    return false;
202
0
                }
203
0
            }
204
0
            p = punct;
205
0
            continue;
206
0
        }
207
3.00k
        else if (*q && g_ascii_isxdigit(*p) && g_ascii_isxdigit(*q)) {
208
3.00k
            two_digits[0] = *p;
209
3.00k
            two_digits[1] = *q;
210
3.00k
            two_digits[2] = '\0';
211
212
            /*
213
             * Two hex digits in a row.
214
             */
215
3.00k
            val = (uint8_t) strtoul(two_digits, NULL, 16);
216
3.00k
            g_byte_array_append(bytes, &val, 1);
217
3.00k
            punct = q + 1;
218
3.00k
            if (*punct) {
219
                /*
220
                 * Make sure the character after
221
                 * the second hex digit is a byte
222
                 * separator, i.e. that we don't have
223
                 * more than two hex digits, or a
224
                 * bogus character.
225
                 */
226
2.50k
                if (is_byte_sep(*punct)) {
227
2.50k
                    p = punct + 1;
228
2.50k
                    continue;
229
2.50k
                }
230
0
                else if (force_separators) {
231
0
                    return false;
232
0
                }
233
2.50k
            }
234
500
            p = punct;
235
500
            continue;
236
3.00k
        }
237
0
        else if (*q && g_ascii_isxdigit(*p) && is_byte_sep(*q)) {
238
0
            one_digit[0] = *p;
239
0
            one_digit[1] = '\0';
240
241
            /*
242
             * Only one hex digit (not at the end of the string)
243
             */
244
0
            val = (uint8_t) strtoul(one_digit, NULL, 16);
245
0
            g_byte_array_append(bytes, &val, 1);
246
0
            p = q + 1;
247
0
            continue;
248
0
        }
249
0
        else if (!*q && g_ascii_isxdigit(*p)) {
250
0
            one_digit[0] = *p;
251
0
            one_digit[1] = '\0';
252
253
            /*
254
             * Only one hex digit (at the end of the string)
255
             */
256
0
            val = (uint8_t) strtoul(one_digit, NULL, 16);
257
0
            g_byte_array_append(bytes, &val, 1);
258
0
            p = q;
259
0
            continue;
260
0
        }
261
0
        else {
262
0
            return false;
263
0
        }
264
3.00k
    }
265
515
    return true;
266
515
}
267
268
static inline char
269
get_valid_byte_sep(char c, const unsigned encoding)
270
0
{
271
0
    char retval = -1; /* -1 means failure */
272
273
0
    switch (c) {
274
0
        case ':':
275
0
            if (encoding & ENC_SEP_COLON)
276
0
                retval = c;
277
0
            break;
278
0
        case '-':
279
0
            if (encoding & ENC_SEP_DASH)
280
0
                retval = c;
281
0
            break;
282
0
        case '.':
283
0
            if (encoding & ENC_SEP_DOT)
284
0
                retval = c;
285
0
            break;
286
0
        case ' ':
287
0
            if (encoding & ENC_SEP_SPACE)
288
0
                retval = c;
289
0
            break;
290
0
        case '\0':
291
            /* we were given the end of the string, so it's fine */
292
0
            retval = 0;
293
0
            break;
294
0
        default:
295
0
            if (g_ascii_isxdigit(c) && (encoding & ENC_SEP_NONE))
296
0
                retval = 0;
297
            /* anything else means we've got a failure */
298
0
            break;
299
0
    }
300
301
0
    return retval;
302
0
}
303
304
/* Turn a string of hex digits with optional separators (defined by is_byte_sep())
305
 * into a byte array. Unlike hex_str_to_bytes(), this will read as many hex-char
306
 * pairs as possible and not error if it hits a non-hex-char; instead it just ends
307
 * there. (i.e., like strtol()/atoi()/etc.) Unless fail_if_partial is true.
308
 *
309
 * The **endptr, if not NULL, is set to the char after the last hex character.
310
 */
311
bool
312
hex_str_to_bytes_encoding(const char *hex_str, GByteArray *bytes, const char **endptr,
313
                          const unsigned encoding, const bool fail_if_partial)
314
0
{
315
0
    int8_t c, d;
316
0
    uint8_t val;
317
0
    const char *end = hex_str;
318
0
    bool retval = false;
319
0
    char sep = -1;
320
321
    /* a map from ASCII hex chars to their value */
322
0
    static const int8_t str_to_nibble[256] = {
323
0
        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
324
0
        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
325
0
        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
326
0
         0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,
327
0
        -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1,
328
0
        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
329
0
        -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1,
330
0
        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
331
0
        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
332
0
        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
333
0
        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
334
0
        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
335
0
        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
336
0
        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
337
0
        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
338
0
        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
339
0
    };
340
341
    /* we must see two hex chars at the beginning, or fail */
342
0
    if (bytes && *end && g_ascii_isxdigit(*end) && g_ascii_isxdigit(*(end+1))) {
343
0
        retval = true;
344
345
        /* set the separator character we'll allow; if this returns a -1, it means something's
346
         * invalid after the hex, but we'll let the while-loop grab the first hex-pair anyway
347
         */
348
0
        sep = get_valid_byte_sep(*(end+2), encoding);
349
350
0
        while (*end) {
351
0
            c = str_to_nibble[(unsigned char)*end];
352
0
            if (c < 0) {
353
0
                if (fail_if_partial) retval = false;
354
0
                break;
355
0
            }
356
357
0
            d = str_to_nibble[(unsigned char)*(end+1)];
358
0
            if (d < 0) {
359
0
                if (fail_if_partial) retval = false;
360
0
                break;
361
0
            }
362
0
            val = ((uint8_t)c * 16) + d;
363
0
            g_byte_array_append(bytes, &val, 1);
364
0
            end += 2;
365
366
            /* check for separator and peek at next char to make sure we should keep going */
367
0
            if (sep > 0 && *end == sep && str_to_nibble[(unsigned char)*(end+1)] > -1) {
368
                /* yes, it's the right sep and followed by more hex, so skip the sep */
369
0
                ++end;
370
0
            } else if (sep != 0 && *end) {
371
                /* we either need a separator, but we don't see one; or the get_valid_byte_sep()
372
                   earlier didn't find a valid one to begin with */
373
0
                if (fail_if_partial) retval = false;
374
0
                break;
375
0
            }
376
            /* otherwise, either no separator allowed, or *end is null, or *end is an invalid
377
             * sep, or *end is a valid sep but after it is not a hex char - in all those
378
             * cases, just loop back up and let it fail later naturally.
379
             */
380
0
        }
381
0
    }
382
383
0
    if (!retval) {
384
0
        if (bytes) g_byte_array_set_size(bytes, 0);
385
0
        end = hex_str;
386
0
    }
387
388
0
    if (endptr) *endptr = end;
389
390
0
    return retval;
391
0
}
392
393
/*
394
 * Turn an RFC 3986 percent-encoded array of characters, not
395
 * necessarily null-terminated, into a byte array.
396
 * XXX - We don't check for reserved characters.
397
 * XXX - g_uri_unescape_bytes is superior, but limited to
398
 * glib >= 2.66
399
 */
400
#define HEX_DIGIT_BUF_LEN 3
401
bool
402
uri_to_bytes(const char *uri_str, GByteArray *bytes, size_t len)
403
0
{
404
0
    uint8_t       val;
405
0
    const char   *p;
406
0
    const char   *uri_end = uri_str + len;
407
0
    char          hex_digit[HEX_DIGIT_BUF_LEN];
408
409
0
    g_byte_array_set_size(bytes, 0);
410
0
    if (! uri_str) {
411
0
        return false;
412
0
    }
413
414
0
    p = uri_str;
415
416
0
    while (p < uri_end) {
417
0
        if (!g_ascii_isprint(*p))
418
0
            return false;
419
0
        if (*p == '%') {
420
0
            p++;
421
0
            if (*p == '\0') return false;
422
0
            hex_digit[0] = *p;
423
0
            p++;
424
0
            if (*p == '\0') return false;
425
0
            hex_digit[1] = *p;
426
0
            hex_digit[2] = '\0';
427
0
            if (! g_ascii_isxdigit(hex_digit[0]) || ! g_ascii_isxdigit(hex_digit[1]))
428
0
                return false;
429
0
            val = (uint8_t) strtoul(hex_digit, NULL, 16);
430
0
            g_byte_array_append(bytes, &val, 1);
431
0
        } else {
432
0
            g_byte_array_append(bytes, (const uint8_t *) p, 1);
433
0
        }
434
0
        p++;
435
436
0
    }
437
0
    return true;
438
0
}
439
440
/*
441
 * Turn an RFC 3986 percent-encoded string into a byte array.
442
 * XXX - We don't check for reserved characters.
443
 * XXX - Just use g_uri_unescape_string instead?
444
 */
445
bool
446
uri_str_to_bytes(const char *uri_str, GByteArray *bytes)
447
0
{
448
0
    return uri_to_bytes(uri_str, bytes, strlen(uri_str));
449
0
}
450
451
/**
452
 * Create a copy of a GByteArray
453
 *
454
 * @param ba The byte array to be copied.
455
 * @return If ba exists, a freshly allocated copy.  NULL otherwise.
456
 *
457
 */
458
GByteArray *
459
byte_array_dup(const GByteArray *ba)
460
0
{
461
0
    GByteArray *new_ba;
462
463
0
    if (!ba)
464
0
        return NULL;
465
466
0
    new_ba = g_byte_array_new();
467
0
    g_byte_array_append(new_ba, ba->data, ba->len);
468
0
    return new_ba;
469
0
}
470
471
0
#define SUBID_BUF_LEN 5
472
bool
473
oid_str_to_bytes(const char *oid_str, GByteArray *bytes)
474
0
{
475
0
    return rel_oid_str_to_bytes(oid_str, bytes, true);
476
0
}
477
bool
478
rel_oid_str_to_bytes(const char *oid_str, GByteArray *bytes, bool is_absolute)
479
0
{
480
0
    uint32_t subid0, subid, sicnt, i;
481
0
    const char *p, *dot;
482
0
    uint8_t buf[SUBID_BUF_LEN];
483
484
0
    g_byte_array_set_size(bytes, 0);
485
486
    /* check syntax */
487
0
    p = oid_str;
488
0
    dot = NULL;
489
0
    while (*p) {
490
0
        if (!g_ascii_isdigit(*p) && (*p != '.')) return false;
491
0
        if (*p == '.') {
492
0
            if (p == oid_str && is_absolute) return false;
493
0
            if (!*(p+1)) return false;
494
0
            if ((p-1) == dot) return false;
495
0
            dot = p;
496
0
        }
497
0
        p++;
498
0
    }
499
0
    if (!dot) return false;
500
501
0
    p = oid_str;
502
0
    sicnt = is_absolute ? 0 : 2;
503
0
    if (!is_absolute) p++;
504
0
    subid0 = 0;    /* squelch GCC complaints */
505
0
    while (*p) {
506
0
        subid = 0;
507
0
        while (g_ascii_isdigit(*p)) {
508
0
            subid *= 10;
509
0
            subid += *p - '0';
510
0
            p++;
511
0
        }
512
0
        if (sicnt == 0) {
513
0
            subid0 = subid;
514
0
            if (subid0 > 2) return false;
515
0
        } else if (sicnt == 1) {
516
0
            if ((subid0 < 2) && (subid > 39)) return false;
517
0
            subid += 40 * subid0;
518
0
        }
519
0
        if (sicnt) {
520
0
            i = SUBID_BUF_LEN;
521
0
            do {
522
0
                i--;
523
0
                buf[i] = 0x80 | (subid % 0x80);
524
0
                subid >>= 7;
525
0
            } while (subid && i);
526
0
            buf[SUBID_BUF_LEN-1] &= 0x7F;
527
0
            g_byte_array_append(bytes, buf + i, SUBID_BUF_LEN - i);
528
0
        }
529
0
        sicnt++;
530
0
        if (*p) p++;
531
0
    }
532
533
0
    return true;
534
0
}
535
536
/**
537
 * Compare the contents of two GByteArrays
538
 *
539
 * @param ba1 A byte array
540
 * @param ba2 A byte array
541
 * @return If both arrays are non-NULL and their lengths are equal and
542
 *         their contents are equal, returns true.  Otherwise, returns
543
 *         false.
544
 *
545
 * XXX - Should this be in strutil.c?
546
 */
547
bool
548
byte_array_equal(GByteArray *ba1, GByteArray *ba2)
549
0
{
550
0
    if (!ba1 || !ba2)
551
0
        return false;
552
553
0
    if (ba1->len != ba2->len)
554
0
        return false;
555
556
0
    if (memcmp(ba1->data, ba2->data, ba1->len) != 0)
557
0
        return false;
558
559
0
    return true;
560
0
}
561
562
563
/* Return a XML escaped representation of the unescaped string.
564
 * The returned string must be freed when no longer in use. */
565
char *
566
xml_escape(const char *unescaped)
567
0
{
568
0
    GString *buffer = g_string_sized_new(128);
569
0
    const char *p;
570
0
    char c;
571
572
0
    p = unescaped;
573
0
    while ( (c = *p++) ) {
574
0
        switch (c) {
575
0
            case '<':
576
0
                g_string_append(buffer, "&lt;");
577
0
                break;
578
0
            case '>':
579
0
                g_string_append(buffer, "&gt;");
580
0
                break;
581
0
            case '&':
582
0
                g_string_append(buffer, "&amp;");
583
0
                break;
584
0
            case '\'':
585
0
                g_string_append(buffer, "&#x27;");
586
0
                break;
587
0
            case '"':
588
0
                g_string_append(buffer, "&quot;");
589
0
                break;
590
0
            case '\t':
591
0
            case '\n':
592
0
            case '\r':
593
0
                g_string_append_c(buffer, c);
594
0
                break;
595
0
            default:
596
                /* XML 1.0 doesn't allow ASCII control characters, except
597
                 * for the three whitespace ones above (which do *not*
598
                 * include '\v' and '\f', so not the same group as isspace),
599
                 * even as character references.
600
                 * There's no official way to escape them, so we'll do this. */
601
0
                if (g_ascii_iscntrl(c)) {
602
0
                    g_string_append_printf(buffer, "\\x%x", c);
603
0
                } else {
604
0
                    g_string_append_c(buffer, c);
605
0
                }
606
0
                break;
607
0
        }
608
0
    }
609
    /* Return the string value contained within the GString
610
     * after getting rid of the GString structure.
611
     * This is the way to do this, see the GLib reference. */
612
0
    return g_string_free(buffer, FALSE);
613
0
}
614
615
/*
616
 * Scan the search string to make sure it's valid hex.  Return the
617
 * number of bytes in nbytes.
618
 */
619
uint8_t *
620
convert_string_to_hex(const char *string, size_t *nbytes)
621
150
{
622
150
    size_t n_bytes;
623
150
    const char *p;
624
150
    char c;
625
150
    uint8_t *bytes, *q, byte_val;
626
627
150
    n_bytes = 0;
628
150
    p = &string[0];
629
150
    for (;;) {
630
150
        c = *p++;
631
150
        if (c == '\0')
632
150
            break;
633
0
        if (g_ascii_isspace(c))
634
0
            continue;    /* allow white space */
635
0
        if (c==':' || c=='.' || c=='-')
636
0
            continue; /* skip any ':', '.', or '-' between bytes */
637
0
        if (!g_ascii_isxdigit(c)) {
638
            /* Not a valid hex digit - fail */
639
0
            return NULL;
640
0
        }
641
642
        /*
643
         * We can only match bytes, not nibbles; we must have a valid
644
         * hex digit immediately after that hex digit.
645
         */
646
0
        c = *p++;
647
0
        if (!g_ascii_isxdigit(c))
648
0
            return NULL;
649
650
        /* 2 hex digits = 1 byte */
651
0
        n_bytes++;
652
0
    }
653
654
    /*
655
     * Were we given any hex digits?
656
     */
657
150
    if (n_bytes == 0) {
658
        /* No. */
659
150
        return NULL;
660
150
    }
661
662
    /*
663
     * OK, it's valid, and it generates "n_bytes" bytes; generate the
664
     * raw byte array.
665
     */
666
0
    bytes = (uint8_t *)g_malloc(n_bytes);
667
0
    p = &string[0];
668
0
    q = &bytes[0];
669
0
    for (;;) {
670
0
        c = *p++;
671
0
        if (c == '\0')
672
0
            break;
673
0
        if (g_ascii_isspace(c))
674
0
            continue;    /* allow white space */
675
0
        if (c==':' || c=='.' || c=='-')
676
0
            continue; /* skip any ':', '.', or '-' between bytes */
677
        /* From the loop above, we know this is a hex digit */
678
0
        byte_val = ws_xton(c);
679
0
        byte_val <<= 4;
680
681
        /* We also know this is a hex digit */
682
0
        c = *p++;
683
0
        byte_val |= ws_xton(c);
684
685
0
        *q++ = byte_val;
686
0
    }
687
0
    *nbytes = n_bytes;
688
0
    return bytes;
689
150
}
690
691
/*
692
 * Copy if it's a case-sensitive search; uppercase it if it's
693
 * a case-insensitive search.
694
 */
695
char *
696
convert_string_case(const char *string, bool case_insensitive)
697
0
{
698
699
0
    if (case_insensitive) {
700
0
        return g_utf8_strup(string, -1);
701
0
    } else {
702
0
        return g_strdup(string);
703
0
    }
704
0
}
705
706
0
#define GN_CHAR_ALPHABET_SIZE 128
707
708
static const gunichar IA5_default_alphabet[GN_CHAR_ALPHABET_SIZE] = {
709
710
    /*ITU-T recommendation T.50 specifies International Reference Alphabet 5 (IA5) */
711
712
    '?', '?', '?', '?', '?', '?', '?', '?',
713
    '?', '?', '?', '?', '?', '?', '?', '?',
714
    '?', '?', '?', '?', '?', '?', '?', '?',
715
    '?', '?', '?', '?', '?', '?', '?', '?',
716
    ' ', '!', '\"','#', '$', '%', '&', '\'',
717
    '(', ')', '*', '+', ',', '-', '.', '/',
718
    '0', '1', '2', '3', '4', '5', '6', '7',
719
    '8', '9', ':', ';', '<', '=', '>', '?',
720
    '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
721
    'H',  'I',  'J',  'K',  'L',  'M',  'N',  'O',
722
    'P',  'Q',  'R',  'S',  'T',  'U',  'V',  'W',
723
    'X',  'Y',  'Z',  '[',  '\\',  ']',  '^',  '_',
724
    '`', 'a',  'b',  'c',  'd',  'e',  'f',  'g',
725
    'h',  'i',  'j',  'k',  'l',  'm',  'n',  'o',
726
    'p',  'q',  'r',  's',  't',  'u',  'v',  'w',
727
    'x',  'y',  'z',  '{',  '|',  '}',  '~',  '?'
728
};
729
730
static gunichar
731
char_def_ia5_alphabet_decode(unsigned char value)
732
0
{
733
0
    if (value < GN_CHAR_ALPHABET_SIZE) {
734
0
        return IA5_default_alphabet[value];
735
0
    }
736
0
    else {
737
0
        return '?';
738
0
    }
739
0
}
740
741
void
742
IA5_7BIT_decode(unsigned char * dest, const unsigned char* src, int len)
743
0
{
744
0
    int i, j;
745
0
    gunichar buf;
746
747
0
    for (i = 0, j = 0; j < len;  j++) {
748
0
        buf = char_def_ia5_alphabet_decode(src[j]);
749
0
        i += g_unichar_to_utf8(buf,(char*)&(dest[i]));
750
0
    }
751
0
    dest[i]=0;
752
0
}
753
754
/* chars allowed: lower case letters, digits, '-', "_", and ".". */
755
static
756
const uint8_t module_valid_chars_lower_case[256] = {
757
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0F */
758
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1F */
759
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, /* 0x20-0x2F '-', '.'      */
760
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30-0x3F '0'-'9'       */
761
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4F */
762
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* 0x50-0x5F '_' */
763
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60-0x6F 'a'-'o'       */
764
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x70-0x7F 'p'-'z'       */
765
    /* upper 128 empty-initialized to 0 */
766
};
767
768
/* chars allowed: alphanumerics, '-', "_", and ".". */
769
static
770
const uint8_t module_valid_chars[256] = {
771
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0F */
772
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1F */
773
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, /* 0x20-0x2F '-', '.'      */
774
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30-0x3F '0'-'9'       */
775
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40-0x4F 'A'-'O'       */
776
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 0x50-0x5F 'P'-'Z', '_' */
777
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60-0x6F 'a'-'o'       */
778
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x70-0x7F 'p'-'z'       */
779
    /* upper 128 empty-initialized to 0 */
780
};
781
782
unsigned char
783
module_check_valid_name(const char *name, bool lower_only)
784
3.78M
{
785
3.78M
    const char *p = name;
786
3.78M
    unsigned char c = '.', lastc;
787
3.78M
    const uint8_t *chars;
788
789
    /* First character cannot be '-'. */
790
3.78M
    if (name[0] == '-')
791
0
        return '-';
792
793
3.78M
    if (lower_only)
794
6.52k
        chars = module_valid_chars_lower_case;
795
3.77M
    else
796
3.77M
        chars = module_valid_chars;
797
798
105M
    do {
799
105M
        lastc = c;
800
105M
        c = *(p++);
801
        /* Leading '.' or substring ".." are disallowed. */
802
105M
        if (c == '.' && lastc == '.') {
803
0
            break;
804
0
        }
805
105M
    } while (chars[c]);
806
807
    /* Trailing '.' is disallowed. */
808
3.78M
    if (lastc == '.') {
809
0
        return '.';
810
0
    }
811
3.78M
    return c;
812
3.78M
}
813
814
static const char _hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
815
                              '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
816
817
/*
818
 * Copy byte by byte without UTF-8 truncation (assume valid UTF-8 input).
819
 * Return byte size written, or that would have been
820
 * written with enough space.
821
 */
822
size_t
823
ws_label_strcpy(char *label_str, size_t buf_size, size_t pos,
824
                const uint8_t *str, int flags)
825
30.4M
{
826
30.4M
    if (pos >= buf_size)
827
3.99k
        return pos;
828
829
30.4M
    uint8_t r = 0;
830
30.4M
    ssize_t chlen;
831
30.4M
    ssize_t idx, src_len;
832
30.4M
    ssize_t free_len;
833
834
30.4M
    label_str[pos] = '\0';
835
836
30.4M
    ws_return_val_if(str == NULL, pos);
837
30.4M
    idx = 0;
838
30.4M
    src_len = strlen((const char*)str);
839
30.4M
    free_len = buf_size - pos - 1;
840
841
655M
    while (idx < src_len) {
842
624M
        chlen = ws_utf8_char_len(str[idx]);
843
624M
        if (chlen <= 0) {
844
            /* We were passed invalid UTF-8. This is an error. Complain and do... something. */
845
0
            ws_log_utf8((char*)str, -1, NULL);
846
            /*
847
             * XXX If we are going to return here instead of trying to recover maybe the log level should
848
             * be higher than DEBUG.
849
             */
850
0
            return pos;
851
0
        }
852
853
        /* ASCII */
854
624M
        if (chlen == 1) {
855
624M
            if (flags & FORMAT_LABEL_REPLACE_SPACE && g_ascii_isspace(str[idx])) {
856
68.1k
                if (free_len >= 1) {
857
68.1k
                    label_str[pos] = ' ';
858
68.1k
                    label_str[pos+1] = '\0';
859
68.1k
                }
860
68.1k
                pos++;
861
68.1k
                idx++;
862
68.1k
                free_len--;
863
68.1k
                continue;
864
68.1k
            }
865
866
624M
            r = 0;
867
624M
            switch (str[idx]) {
868
1.28k
                case '\a': r = 'a'; break;
869
1.23k
                case '\b': r = 'b'; break;
870
3.50k
                case '\f': r = 'f'; break;
871
3.76k
                case '\n': r = 'n'; break;
872
320
                case '\r': r = 'r'; break;
873
1.01k
                case '\t': r = 't'; break;
874
379
                case '\v': r = 'v'; break;
875
624M
            }
876
624M
            if (r != 0) {
877
11.4k
                if (free_len >= 2) {
878
10.2k
                    label_str[pos] = '\\';
879
10.2k
                    label_str[pos+1] = r;
880
10.2k
                    label_str[pos+2] = '\0';
881
10.2k
                }
882
11.4k
                pos += 2;
883
11.4k
                idx += 1;
884
11.4k
                free_len -= 2;
885
11.4k
                continue;
886
11.4k
            }
887
888
624M
            if (g_ascii_isprint(str[idx])) {
889
624M
                if (free_len >= 1) {
890
624M
                    label_str[pos] = str[idx];
891
624M
                    label_str[pos+1] = '\0';
892
624M
                }
893
624M
                pos++;
894
624M
                idx++;
895
624M
                free_len--;
896
624M
                continue;
897
624M
            }
898
899
30.4k
            if (free_len >= 4) {
900
23.7k
                label_str[pos+0] = '\\';
901
23.7k
                label_str[pos+1] = 'x';
902
903
23.7k
                uint8_t ch = str[idx];
904
23.7k
                label_str[pos+2] = _hex[ch >> 4];
905
23.7k
                label_str[pos+3] = _hex[ch & 0x0F];
906
23.7k
                label_str[pos+4] = '\0';
907
23.7k
            }
908
30.4k
            pos += 4;
909
30.4k
            idx += chlen;
910
30.4k
            free_len -= 4;
911
30.4k
            continue;
912
624M
        }
913
914
        /* UTF-8 multibyte */
915
109k
        if (chlen == 2 && str[idx] == 0xC2 &&
916
2.40k
                                str[idx+1] >= 0x80 && str[idx+1] <= 0x9F) {
917
            /*
918
             * Escape the C1 control codes. C0 (covered above) and C1 are
919
             * inband signalling and transparent to Unicode.
920
             * Anything else probably has text semantics should not be removed.
921
             */
922
            /*
923
             * Special case: The second UTF-8 byte is the same as the Unicode
924
             * code point for range U+0080 - U+009F.
925
             */
926
496
            if (free_len >= 6) {
927
397
                label_str[pos+0] = '\\';
928
397
                label_str[pos+1] = 'u';
929
397
                label_str[pos+2] = '0';
930
397
                label_str[pos+3] = '0';
931
932
397
                uint8_t ch = str[idx+1];
933
397
                label_str[pos+4] = _hex[ch >> 4];
934
397
                label_str[pos+5] = _hex[ch & 0x0F];
935
397
                label_str[pos+6] = '\0';
936
397
            }
937
496
            pos += 6;
938
496
            idx += chlen;
939
496
            free_len -= 6;
940
496
            continue;
941
496
        }
942
943
        /* Just copy */
944
109k
        if (free_len >= chlen) {
945
352k
            for (ssize_t j = 0; j < chlen; j++) {
946
262k
                label_str[pos+j] = str[idx+j];
947
262k
            }
948
90.3k
            label_str[pos+chlen] = '\0';
949
90.3k
        }
950
109k
        pos += chlen;
951
109k
        idx += chlen;
952
109k
        free_len -= chlen;
953
109k
    }
954
955
30.4M
    return pos;
956
30.4M
}
957
958
size_t
959
ws_label_strcat(char *label_str, size_t bufsize, const uint8_t *str, int flags)
960
0
{
961
0
    return ws_label_strcpy(label_str, bufsize, strlen(label_str), str, flags);
962
0
}
963
964
/*
965
 * Editor modelines  -  https://www.wireshark.org/tools/modelines.html
966
 *
967
 * Local variables:
968
 * c-basic-offset: 4
969
 * tab-width: 8
970
 * indent-tabs-mode: nil
971
 * End:
972
 *
973
 * vi: set shiftwidth=4 tabstop=8 expandtab:
974
 * :indentSize=4:tabSize=8:noTabs=true:
975
 */