Coverage Report

Created: 2026-03-30 07:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/wireshark/wsutil/regex.c
Line
Count
Source
1
/*
2
 * Wireshark - Network traffic analyzer
3
 * By Gerald Combs <gerald@wireshark.org>
4
 * Copyright 1998 Gerald Combs
5
 *
6
 * SPDX-License-Identifier: GPL-2.0-or-later
7
 */
8
9
#include "config.h"
10
11
#include "regex.h"
12
13
#include <wsutil/str_util.h>
14
#include <pcre2.h>
15
16
17
struct _ws_regex {
18
    pcre2_code *code;
19
    char *pattern;
20
};
21
22
0
#define ERROR_MAXLEN_IN_CODE_UNITS   128
23
24
static char *
25
get_error_msg(int errorcode)
26
0
{
27
0
    uint8_t *buffer;
28
29
    /*
30
     * We have to provide a buffer and we don't know how long the
31
     * error message is or even the maximum size. From pcre2api(3):
32
     *     "None of the messages are very long; a
33
     *     buffer size of 120 code units is ample."
34
     */
35
    /* Code unit = one byte */
36
0
    buffer = g_malloc(ERROR_MAXLEN_IN_CODE_UNITS);
37
    /* Message is returned with a trailing zero. */
38
0
    pcre2_get_error_message(errorcode, buffer, ERROR_MAXLEN_IN_CODE_UNITS);
39
    /* One more at the end for good luck. */
40
0
    buffer[ERROR_MAXLEN_IN_CODE_UNITS-1] = '\0';
41
0
    return (char*)buffer;
42
0
}
43
44
45
static pcre2_code *
46
compile_pcre2(const char *patt, ssize_t size, char **errmsg, unsigned flags)
47
0
{
48
0
    pcre2_code *code;
49
0
    int errorcode;
50
0
    PCRE2_SIZE length;
51
0
    PCRE2_SIZE erroroffset;
52
0
    uint32_t options = 0;
53
54
0
    if (size < 0)
55
0
        length = PCRE2_ZERO_TERMINATED;
56
0
    else
57
0
        length = (PCRE2_SIZE)size;
58
59
0
    if (flags & WS_REGEX_NEVER_UTF)
60
0
        options |= PCRE2_NEVER_UTF;
61
0
    if (flags & WS_REGEX_CASELESS)
62
0
        options |= PCRE2_CASELESS;
63
0
    if (flags & WS_REGEX_ANCHORED)
64
0
        options |= PCRE2_ANCHORED;
65
66
    /* By default UTF-8 is off. */
67
0
    code = pcre2_compile_8((PCRE2_SPTR)patt,
68
0
                length,
69
0
                options,
70
0
                &errorcode,
71
0
                &erroroffset,
72
0
                NULL);
73
74
0
    if (code == NULL) {
75
0
        *errmsg = get_error_msg(errorcode);
76
0
        return NULL;
77
0
    }
78
79
0
    return code;
80
0
}
81
82
83
ws_regex_t *
84
ws_regex_compile_ex(const char *patt, ssize_t size, char **errmsg, unsigned flags)
85
0
{
86
0
    ws_return_val_if(!patt, NULL);
87
88
0
    pcre2_code *code = compile_pcre2(patt, size, errmsg, flags);
89
0
    if (code == NULL)
90
0
        return NULL;
91
92
0
    ws_regex_t *re = g_new(ws_regex_t, 1);
93
0
    re->code = code;
94
0
    re->pattern = ws_escape_string_len(NULL, patt, size, false);
95
0
    return re;
96
0
}
97
98
99
ws_regex_t *
100
ws_regex_compile(const char *patt, char **errmsg)
101
0
{
102
0
    return ws_regex_compile_ex(patt, -1, errmsg, 0);
103
0
}
104
105
106
static bool
107
match_pcre2(pcre2_code *code, const char *subject, ssize_t subj_length,
108
                size_t subj_offset, pcre2_match_data *match_data)
109
0
{
110
0
    PCRE2_SIZE length;
111
0
    int rc;
112
113
0
    if (subj_length < 0)
114
0
        length = PCRE2_ZERO_TERMINATED;
115
0
    else
116
0
        length = (PCRE2_SIZE)subj_length;
117
118
0
    rc = pcre2_match(code,
119
0
                    (const uint8_t*)subject,
120
0
                    length,
121
0
                    (PCRE2_SIZE)subj_offset,
122
0
                    0,          /* default options */
123
0
                    match_data,
124
0
                    NULL);
125
126
0
    if (rc < 0) {
127
        /* No match */
128
0
        if (rc != PCRE2_ERROR_NOMATCH) {
129
            /* Error. Should not happen with UTF-8 disabled. Some huge
130
             * subject strings could hit some internal limit. */
131
0
            char *msg = get_error_msg(rc);
132
0
            ws_debug("Unexpected pcre2_match() error: %s.", msg);
133
0
            g_free(msg);
134
0
        }
135
0
        return false;
136
0
    }
137
138
    /* Matched */
139
0
    return true;
140
0
}
141
142
143
bool
144
ws_regex_matches(const ws_regex_t *re, const char *subj)
145
0
{
146
0
    return ws_regex_matches_length(re, subj, -1);
147
0
}
148
149
150
bool
151
ws_regex_matches_length(const ws_regex_t *re,
152
                        const char *subj, ssize_t subj_length)
153
0
{
154
0
    bool matched;
155
0
    pcre2_match_data *match_data;
156
157
0
    ws_return_val_if(!re, false);
158
0
    ws_return_val_if(!subj, false);
159
160
    /* We don't use the matched substring but pcre2_match requires
161
     * at least one pair of offsets. */
162
0
    match_data = pcre2_match_data_create(1, NULL);
163
0
    matched = match_pcre2(re->code, subj, subj_length, 0, match_data);
164
0
    pcre2_match_data_free(match_data);
165
0
    return matched;
166
0
}
167
168
169
bool
170
ws_regex_matches_pos(const ws_regex_t *re,
171
                        const char *subj, ssize_t subj_length,
172
                        size_t subj_offset, size_t pos_vect[2])
173
0
{
174
0
    bool matched;
175
0
    pcre2_match_data *match_data;
176
177
0
    ws_return_val_if(!re, false);
178
0
    ws_return_val_if(!subj, false);
179
180
0
    match_data = pcre2_match_data_create(1, NULL);
181
0
    matched = match_pcre2(re->code, subj, subj_length, subj_offset, match_data);
182
0
    if (matched && pos_vect) {
183
0
        PCRE2_SIZE *ovect = pcre2_get_ovector_pointer(match_data);
184
0
        pos_vect[0] = ovect[0];
185
0
        pos_vect[1] = ovect[1];
186
0
    }
187
0
    pcre2_match_data_free(match_data);
188
0
    return matched;
189
0
}
190
191
192
void
193
ws_regex_free(ws_regex_t *re)
194
0
{
195
0
    pcre2_code_free(re->code);
196
0
    g_free(re->pattern);
197
0
    g_free(re);
198
0
}
199
200
201
const char *
202
ws_regex_pattern(const ws_regex_t *re)
203
0
{
204
0
    return re->pattern;
205
0
}