/src/clamav/libclamav/matcher-pcre.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Support for matcher using PCRE |
3 | | * |
4 | | * Copyright (C) 2013-2023 Cisco Systems, Inc. and/or its affiliates. All rights reserved. |
5 | | * Copyright (C) 2007-2013 Sourcefire, Inc. |
6 | | * |
7 | | * Authors: Kevin Lin |
8 | | * |
9 | | * This program is free software; you can redistribute it and/or modify |
10 | | * it under the terms of the GNU General Public License version 2 as |
11 | | * published by the Free Software Foundation. |
12 | | * |
13 | | * This program is distributed in the hope that it will be useful, |
14 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | | * GNU General Public License for more details. |
17 | | * |
18 | | * You should have received a copy of the GNU General Public License |
19 | | * along with this program; if not, write to the Free Software |
20 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, |
21 | | * MA 02110-1301, USA. |
22 | | */ |
23 | | |
24 | | #if HAVE_CONFIG_H |
25 | | #include "clamav-config.h" |
26 | | #endif |
27 | | |
28 | | #include "clamav.h" |
29 | | #include "dconf.h" |
30 | | #include "events.h" |
31 | | #include "others.h" |
32 | | #include "matcher.h" |
33 | | #include "matcher-ac.h" |
34 | | #include "matcher-pcre.h" |
35 | | #include "mpool.h" |
36 | | #include "readdb.h" |
37 | | #include "regex_pcre.h" |
38 | | #include "str.h" |
39 | | |
40 | | #if HAVE_PCRE |
41 | | #if USING_PCRE2 |
42 | | #define PCRE2_CODE_UNIT_WIDTH 8 |
43 | | #include <pcre2.h> |
44 | | #else |
45 | | #include <pcre.h> |
46 | | #endif |
47 | | |
48 | | /* DEBUGGING */ |
49 | | //#define MATCHER_PCRE_DEBUG |
50 | | #ifdef MATCHER_PCRE_DEBUG |
51 | | #define pm_dbgmsg(...) cli_dbgmsg(__VA_ARGS__) |
52 | | #else |
53 | | #define pm_dbgmsg(...) |
54 | | #endif |
55 | | #undef MATCHER_PCRE_DEBUG |
56 | | |
57 | | /* PERFORMANCE MACROS AND FUNCTIONS */ |
58 | | #ifndef MAX_TRACKED_PCRE |
59 | 0 | #define MAX_TRACKED_PCRE 64 |
60 | | #endif |
61 | 0 | #define PCRE_EVENTS_PER_SIG 2 |
62 | 0 | #define MAX_PCRE_SIGEVENT_ID MAX_TRACKED_PCRE *PCRE_EVENTS_PER_SIG |
63 | | |
64 | | cli_events_t *p_sigevents = NULL; |
65 | | unsigned int p_sigid = 0; |
66 | | |
67 | | static void pcre_perf_events_init(struct cli_pcre_meta *pm, const char *virname) |
68 | 0 | { |
69 | 0 | int ret; |
70 | 0 | size_t namelen; |
71 | |
|
72 | 0 | if (!p_sigevents) { |
73 | 0 | p_sigevents = cli_events_new(MAX_PCRE_SIGEVENT_ID); |
74 | 0 | if (!p_sigevents) { |
75 | 0 | cli_errmsg("pcre_perf: no memory for events table\n"); |
76 | 0 | return; |
77 | 0 | } |
78 | 0 | } |
79 | | |
80 | 0 | if (p_sigid > MAX_PCRE_SIGEVENT_ID - PCRE_EVENTS_PER_SIG - 1) { |
81 | 0 | cli_errmsg("pcre_perf: events table full. Increase MAX_TRACKED_PCRE\n"); |
82 | 0 | return; |
83 | 0 | } |
84 | | |
85 | 0 | if (!virname) { |
86 | 0 | virname = "(null)"; |
87 | 0 | namelen = 7; |
88 | 0 | } else { |
89 | 0 | namelen = strlen(virname) + strlen(pm->pdata.expression) + 3; |
90 | 0 | } |
91 | | |
92 | | /* set the name */ |
93 | 0 | pm->statname = (char *)cli_calloc(1, namelen); |
94 | 0 | if (!pm->statname) { |
95 | 0 | return; |
96 | 0 | } |
97 | 0 | snprintf(pm->statname, namelen, "%s/%s/", virname, pm->pdata.expression); |
98 | |
|
99 | 0 | pm_dbgmsg("pcre_perf: adding sig ids starting %u for %s\n", p_sigid, pm->statname); |
100 | | |
101 | | /* register time event */ |
102 | 0 | pm->sigtime_id = p_sigid; |
103 | 0 | ret = cli_event_define(p_sigevents, p_sigid++, pm->statname, ev_time, multiple_sum); |
104 | 0 | if (ret) { |
105 | 0 | cli_errmsg("pcre_perf: cli_event_define() error for time event id %d\n", pm->sigtime_id); |
106 | 0 | pm->sigtime_id = MAX_PCRE_SIGEVENT_ID + 1; |
107 | 0 | return; |
108 | 0 | } |
109 | | |
110 | | /* register match count */ |
111 | 0 | pm->sigmatch_id = p_sigid; |
112 | 0 | ret = cli_event_define(p_sigevents, p_sigid++, pm->statname, ev_int, multiple_sum); |
113 | 0 | if (ret) { |
114 | 0 | cli_errmsg("pcre_perf: cli_event_define() error for matches event id %d\n", pm->sigmatch_id); |
115 | 0 | pm->sigmatch_id = MAX_PCRE_SIGEVENT_ID + 1; |
116 | 0 | return; |
117 | 0 | } |
118 | 0 | } |
119 | | |
120 | | struct sigperf_elem { |
121 | | const char *name; |
122 | | uint64_t usecs; |
123 | | unsigned long run_count; |
124 | | unsigned long match_count; |
125 | | }; |
126 | | |
127 | | static int sigelem_comp(const void *a, const void *b) |
128 | 0 | { |
129 | 0 | const struct sigperf_elem *ela = (const struct sigperf_elem *)a; |
130 | 0 | const struct sigperf_elem *elb = (const struct sigperf_elem *)b; |
131 | 0 | return elb->usecs / elb->run_count - ela->usecs / ela->run_count; |
132 | 0 | } |
133 | | |
134 | | void cli_pcre_perf_print() |
135 | 0 | { |
136 | 0 | struct sigperf_elem stats[MAX_TRACKED_PCRE], *elem = stats; |
137 | 0 | int i, elems = 0, max_name_len = 0, name_len; |
138 | |
|
139 | 0 | if (!p_sigid || !p_sigevents) { |
140 | 0 | cli_warnmsg("cli_pcre_perf_print: statistics requested but no PCREs were loaded!\n"); |
141 | 0 | return; |
142 | 0 | } |
143 | | |
144 | 0 | memset(stats, 0, sizeof(stats)); |
145 | 0 | for (i = 0; i < MAX_TRACKED_PCRE; i++) { |
146 | 0 | union ev_val val; |
147 | 0 | uint32_t count; |
148 | 0 | const char *name = cli_event_get_name(p_sigevents, i * PCRE_EVENTS_PER_SIG); |
149 | 0 | cli_event_get(p_sigevents, i * PCRE_EVENTS_PER_SIG, &val, &count); |
150 | 0 | if (!count) { |
151 | 0 | if (name) |
152 | 0 | cli_dbgmsg("No event triggered for %s\n", name); |
153 | 0 | continue; |
154 | 0 | } |
155 | 0 | if (name) |
156 | 0 | name_len = (int)strlen(name); |
157 | 0 | else |
158 | 0 | name_len = 0; |
159 | 0 | if (name_len > max_name_len) |
160 | 0 | max_name_len = name_len; |
161 | 0 | elem->name = name ? name : "\"noname\""; |
162 | 0 | elem->usecs = val.v_int; |
163 | 0 | elem->run_count = count; |
164 | 0 | cli_event_get(p_sigevents, i * PCRE_EVENTS_PER_SIG + 1, &val, &count); |
165 | 0 | elem->match_count = count; |
166 | 0 | elem++; |
167 | 0 | elems++; |
168 | 0 | } |
169 | 0 | if (max_name_len < (int)strlen("PCRE Expression")) |
170 | 0 | max_name_len = (int)strlen("PCRE Expression"); |
171 | |
|
172 | 0 | cli_qsort(stats, elems, sizeof(struct sigperf_elem), sigelem_comp); |
173 | |
|
174 | 0 | elem = stats; |
175 | | /* name runs matches microsecs avg */ |
176 | 0 | cli_infomsg(NULL, "%-*s %*s %*s %*s %*s\n", max_name_len, "PCRE Expression", |
177 | 0 | 8, "#runs", 8, "#matches", 12, "usecs total", 9, "usecs avg"); |
178 | 0 | cli_infomsg(NULL, "%-*s %*s %*s %*s %*s\n", max_name_len, "===============", |
179 | 0 | 8, "=====", 8, "========", 12, "===========", 9, "========="); |
180 | 0 | while (elem->run_count) { |
181 | 0 | cli_infomsg(NULL, "%-*s %*lu %*lu %*llu %*.2f\n", max_name_len, elem->name, |
182 | 0 | 8, elem->run_count, 8, elem->match_count, |
183 | 0 | 12, (long long unsigned)elem->usecs, 9, (double)elem->usecs / elem->run_count); |
184 | 0 | elem++; |
185 | 0 | } |
186 | 0 | } |
187 | | |
188 | | void cli_pcre_perf_events_destroy() |
189 | 0 | { |
190 | 0 | cli_events_free(p_sigevents); |
191 | 0 | p_sigid = 0; |
192 | 0 | } |
193 | | |
194 | | /* PCRE MATCHER FUNCTIONS */ |
195 | | cl_error_t cli_pcre_init() |
196 | 47.2k | { |
197 | 47.2k | return cli_pcre_init_internal(); |
198 | 47.2k | } |
199 | | |
200 | | cl_error_t cli_pcre_addpatt(struct cli_matcher *root, const char *virname, const char *trigger, const char *pattern, const char *cflags, const char *offset, const uint32_t *lsigid, unsigned int options) |
201 | 3.47k | { |
202 | 3.47k | struct cli_pcre_meta **newmetatable = NULL, *pm = NULL; |
203 | 3.47k | uint32_t pcre_count; |
204 | 3.47k | const char *opt; |
205 | 3.47k | int ret = CL_SUCCESS, rssigs; |
206 | | |
207 | 3.47k | if (!root || !trigger || !pattern || !offset) { |
208 | 0 | cli_errmsg("cli_pcre_addpatt: NULL root or NULL trigger or NULL pattern or NULL offset\n"); |
209 | 0 | return CL_ENULLARG; |
210 | 0 | } |
211 | | |
212 | | /* TODO: trigger and regex checking (backreference limitations?) (control pattern limitations?) */ |
213 | | /* cli_ac_chklsig will fail an empty trigger; empty patterns can cause an infinite loop */ |
214 | 3.47k | if (*trigger == '\0' || *pattern == '\0') { |
215 | 32 | cli_errmsg("cli_pcre_addpatt: trigger or pattern cannot be an empty string\n"); |
216 | 32 | return CL_EMALFDB; |
217 | 32 | } |
218 | 3.44k | if (cflags && *cflags == '\0') { |
219 | 0 | cflags = NULL; |
220 | 0 | } |
221 | | |
222 | 3.44k | if (lsigid) { |
223 | 3.44k | pm_dbgmsg("cli_pcre_addpatt: Adding /%s/%s%s triggered on (%s) as subsig %d for lsigid %d\n", |
224 | 3.44k | pattern, cflags ? " with flags " : "", cflags ? cflags : "", trigger, lsigid[1], lsigid[0]); |
225 | 3.44k | } else { |
226 | 0 | pm_dbgmsg("cli_pcre_addpatt: Adding /%s/%s%s triggered on (%s) [no lsigid]\n", |
227 | 0 | pattern, cflags ? " with flags " : "", cflags ? cflags : "", trigger); |
228 | 0 | } |
229 | | |
230 | 3.44k | #ifdef PCRE_BYPASS |
231 | | /* check for trigger bypass */ |
232 | 3.44k | if (strcmp(trigger, PCRE_BYPASS)) { |
233 | 1.52k | #endif |
234 | | /* validate the lsig trigger */ |
235 | 1.52k | rssigs = cli_ac_chklsig(trigger, trigger + strlen(trigger), NULL, NULL, NULL, 1); |
236 | 1.52k | if (rssigs == -1) { |
237 | 13 | cli_errmsg("cli_pcre_addpatt: regex subsig /%s/ is missing a valid logical trigger\n", pattern); |
238 | 13 | return CL_EMALFDB; |
239 | 13 | } |
240 | | |
241 | 1.51k | if (lsigid) { |
242 | 1.51k | if ((uint32_t)rssigs > lsigid[1]) { |
243 | 199 | cli_errmsg("cli_pcre_addpatt: regex subsig %d logical trigger refers to subsequent subsig %d\n", lsigid[1], rssigs); |
244 | 199 | return CL_EMALFDB; |
245 | 199 | } |
246 | 1.31k | if ((uint32_t)rssigs == lsigid[1]) { |
247 | 6 | cli_errmsg("cli_pcre_addpatt: regex subsig %d logical trigger is self-referential\n", lsigid[1]); |
248 | 6 | return CL_EMALFDB; |
249 | 6 | } |
250 | 1.31k | } else { |
251 | 0 | cli_dbgmsg("cli_pcre_addpatt: regex subsig is missing lsigid data\n"); |
252 | 0 | } |
253 | 1.51k | #ifdef PCRE_BYPASS |
254 | 1.51k | } |
255 | 3.22k | #endif |
256 | | |
257 | | /* allocating entries */ |
258 | 3.22k | pm = (struct cli_pcre_meta *)MPOOL_CALLOC(root->mempool, 1, sizeof(*pm)); |
259 | 3.22k | if (!pm) { |
260 | 0 | cli_errmsg("cli_pcre_addpatt: Unable to allocate memory for new pcre meta\n"); |
261 | 0 | return CL_EMEM; |
262 | 0 | } |
263 | | |
264 | 3.22k | pm->trigger = CLI_MPOOL_STRDUP(root->mempool, trigger); |
265 | 3.22k | if (!pm->trigger) { |
266 | 0 | cli_errmsg("cli_pcre_addpatt: Unable to allocate memory for trigger string\n"); |
267 | 0 | cli_pcre_freemeta(root, pm); |
268 | 0 | MPOOL_FREE(root->mempool, pm); |
269 | 0 | return CL_EMEM; |
270 | 0 | } |
271 | | |
272 | 3.22k | if (lsigid) { |
273 | 3.22k | pm->lsigid[0] = 1; |
274 | 3.22k | pm->lsigid[1] = lsigid[0]; |
275 | 3.22k | pm->lsigid[2] = lsigid[1]; |
276 | 3.22k | } else { |
277 | | /* sigtool */ |
278 | 0 | pm->lsigid[0] = 0; |
279 | 0 | } |
280 | | |
281 | 3.22k | pm->pdata.expression = strdup(pattern); |
282 | 3.22k | if (!pm->pdata.expression) { |
283 | 0 | cli_errmsg("cli_pcre_addpatt: Unable to allocate memory for expression\n"); |
284 | 0 | cli_pcre_freemeta(root, pm); |
285 | 0 | MPOOL_FREE(root->mempool, pm); |
286 | 0 | return CL_EMEM; |
287 | 0 | } |
288 | | |
289 | | /* offset parsing and usage, similar to cli_ac_addsig */ |
290 | | /* relative and type-specific offsets handled during scan */ |
291 | 3.22k | ret = cli_caloff(offset, NULL, root->type, pm->offdata, &(pm->offset_min), &(pm->offset_max)); |
292 | 3.22k | if (ret != CL_SUCCESS) { |
293 | 10 | cli_errmsg("cli_pcre_addpatt: cannot calculate offset data: %s for pattern: %s\n", offset, pattern); |
294 | 10 | cli_pcre_freemeta(root, pm); |
295 | 10 | MPOOL_FREE(root->mempool, pm); |
296 | 10 | return ret; |
297 | 10 | } |
298 | 3.21k | if (pm->offdata[0] != CLI_OFF_ANY) { |
299 | 2.33k | if (pm->offdata[0] == CLI_OFF_ABSOLUTE) |
300 | 2.13k | root->pcre_absoff_num++; |
301 | 201 | else |
302 | 201 | root->pcre_reloff_num++; |
303 | 2.33k | } |
304 | | |
305 | | /* parse and add options, also totally not from snort */ |
306 | 3.21k | if (cflags) { |
307 | 2.21k | opt = cflags; |
308 | | |
309 | | /* cli_pcre_addoptions handles pcre specific options */ |
310 | 5.88k | while (cli_pcre_addoptions(&(pm->pdata), &opt, 0) != CL_SUCCESS) { |
311 | | /* it will return here to handle any matcher specific options */ |
312 | 3.72k | switch (*opt) { |
313 | 1.30k | case 'g': |
314 | 1.30k | pm->flags |= CLI_PCRE_GLOBAL; |
315 | 1.30k | break; |
316 | 327 | case 'r': |
317 | 327 | pm->flags |= CLI_PCRE_ROLLING; |
318 | 327 | break; |
319 | 2.03k | case 'e': |
320 | 2.03k | pm->flags |= CLI_PCRE_ENCOMPASS; |
321 | 2.03k | break; |
322 | 49 | default: |
323 | 49 | cli_errmsg("cli_pcre_addpatt: unknown/extra pcre option encountered %c\n", *opt); |
324 | 49 | cli_pcre_freemeta(root, pm); |
325 | 49 | MPOOL_FREE(root->mempool, pm); |
326 | 49 | return CL_EMALFDB; |
327 | 3.72k | } |
328 | 3.67k | opt++; |
329 | 3.67k | } |
330 | | |
331 | 2.16k | if (pm->flags) { |
332 | 2.05k | pm_dbgmsg("Matcher: %s%s%s\n", |
333 | 2.05k | pm->flags & CLI_PCRE_GLOBAL ? "CLAMAV_GLOBAL " : "", |
334 | 2.05k | pm->flags & CLI_PCRE_ROLLING ? "CLAMAV_ROLLING " : "", |
335 | 2.05k | pm->flags & CLI_PCRE_ENCOMPASS ? "CLAMAV_ENCOMPASS " : ""); |
336 | 2.05k | } else { |
337 | 110 | pm_dbgmsg("Matcher: NONE\n"); |
338 | 110 | } |
339 | | |
340 | 2.16k | if (pm->pdata.options) { |
341 | 608 | #if USING_PCRE2 |
342 | 608 | pm_dbgmsg("Compiler: %s%s%s%s%s%s%s\n", |
343 | 608 | pm->pdata.options & PCRE2_CASELESS ? "PCRE2_CASELESS " : "", |
344 | 608 | pm->pdata.options & PCRE2_DOTALL ? "PCRE2_DOTALL " : "", |
345 | 608 | pm->pdata.options & PCRE2_MULTILINE ? "PCRE2_MULTILINE " : "", |
346 | 608 | pm->pdata.options & PCRE2_EXTENDED ? "PCRE2_EXTENDED " : "", |
347 | | |
348 | 608 | pm->pdata.options & PCRE2_ANCHORED ? "PCRE2_ANCHORED " : "", |
349 | 608 | pm->pdata.options & PCRE2_DOLLAR_ENDONLY ? "PCRE2_DOLLAR_ENDONLY " : "", |
350 | 608 | pm->pdata.options & PCRE2_UNGREEDY ? "PCRE2_UNGREEDY " : ""); |
351 | | #else |
352 | | pm_dbgmsg("Compiler: %s%s%s%s%s%s%s\n", |
353 | | pm->pdata.options & PCRE_CASELESS ? "PCRE_CASELESS " : "", |
354 | | pm->pdata.options & PCRE_DOTALL ? "PCRE_DOTALL " : "", |
355 | | pm->pdata.options & PCRE_MULTILINE ? "PCRE_MULTILINE " : "", |
356 | | pm->pdata.options & PCRE_EXTENDED ? "PCRE_EXTENDED " : "", |
357 | | |
358 | | pm->pdata.options & PCRE_ANCHORED ? "PCRE_ANCHORED " : "", |
359 | | pm->pdata.options & PCRE_DOLLAR_ENDONLY ? "PCRE_DOLLAR_ENDONLY " : "", |
360 | | pm->pdata.options & PCRE_UNGREEDY ? "PCRE_UNGREEDY " : ""); |
361 | | #endif |
362 | 1.55k | } else { |
363 | 1.55k | pm_dbgmsg("Compiler: NONE\n"); |
364 | 1.55k | } |
365 | 2.16k | } |
366 | | |
367 | | /* add metadata to the performance tracker */ |
368 | 3.16k | if (options & CL_DB_PCRE_STATS) |
369 | 0 | pcre_perf_events_init(pm, virname); |
370 | | |
371 | | /* add pcre data to root after reallocation */ |
372 | 3.16k | pcre_count = root->pcre_metas + 1; |
373 | 3.16k | newmetatable = (struct cli_pcre_meta **)MPOOL_REALLOC(root->mempool, root->pcre_metatable, |
374 | 3.16k | pcre_count * sizeof(struct cli_pcre_meta *)); |
375 | 3.16k | if (!newmetatable) { |
376 | 0 | cli_errmsg("cli_pcre_addpatt: Unable to allocate memory for new pcre meta table\n"); |
377 | 0 | cli_pcre_freemeta(root, pm); |
378 | 0 | MPOOL_FREE(root->mempool, pm); |
379 | 0 | return CL_EMEM; |
380 | 0 | } |
381 | | |
382 | 3.16k | newmetatable[pcre_count - 1] = pm; |
383 | 3.16k | root->pcre_metatable = newmetatable; |
384 | | |
385 | 3.16k | root->pcre_metas = pcre_count; |
386 | | |
387 | 3.16k | return CL_SUCCESS; |
388 | 3.16k | } |
389 | | |
390 | | cl_error_t cli_pcre_build(struct cli_matcher *root, long long unsigned match_limit, long long unsigned recmatch_limit, const struct cli_dconf *dconf) |
391 | 309k | { |
392 | 309k | unsigned int i; |
393 | 309k | cl_error_t ret; |
394 | 309k | struct cli_pcre_meta *pm = NULL; |
395 | 309k | int disable_all = 0; |
396 | | |
397 | 309k | if (dconf && !(dconf->pcre & PCRE_CONF_SUPPORT)) |
398 | 375 | disable_all = 1; |
399 | | |
400 | 310k | for (i = 0; i < root->pcre_metas; ++i) { |
401 | 1.10k | pm = root->pcre_metatable[i]; |
402 | 1.10k | if (!pm) { |
403 | 0 | cli_errmsg("cli_pcre_build: metadata for pcre %d is missing\n", i); |
404 | 0 | return CL_ENULLARG; |
405 | 0 | } |
406 | | |
407 | | /* for safety, disable all pcre */ |
408 | 1.10k | if (disable_all) { |
409 | 0 | pm->flags |= CLI_PCRE_DISABLED; |
410 | 0 | continue; |
411 | 0 | } |
412 | | |
413 | 1.10k | if (pm->flags & CLI_PCRE_DISABLED) { |
414 | 0 | cli_dbgmsg("cli_pcre_build: Skip compiling regex: %s (disabled)\n", pm->pdata.expression); |
415 | 0 | continue; |
416 | 0 | } |
417 | | |
418 | | /* disable global */ |
419 | 1.10k | if (dconf && !(dconf->pcre & PCRE_CONF_GLOBAL)) { |
420 | 0 | cli_dbgmsg("cli_pcre_build: disabling global option for regex /%s/\n", pm->pdata.expression); |
421 | 0 | pm->flags &= ~(CLI_PCRE_GLOBAL); |
422 | 0 | } |
423 | | |
424 | | /* options override through metadata manipulation */ |
425 | 1.10k | #if USING_PCRE2 |
426 | | // pm->pdata.options |= PCRE2_NEVER_UTF; /* disables (?UTF*) potential security vuln */ |
427 | | // pm->pdata.options |= PCRE2_UCP; |
428 | | // pm->pdata.options |= PCRE2_AUTO_CALLOUT; /* used with CALLOUT(-BACK) function */ |
429 | | #else |
430 | | // pm->pdata.options |= PCRE_NEVER_UTF; /* implemented in 8.33, disables (?UTF*) potential security vuln */ |
431 | | // pm->pdata.options |= PCRE_UCP;/* implemented in 8.20 */ |
432 | | // pm->pdata.options |= PCRE_AUTO_CALLOUT; /* used with CALLOUT(-BACK) function */ |
433 | | #endif |
434 | | |
435 | 1.10k | if (dconf && (dconf->pcre & PCRE_CONF_OPTIONS)) { |
436 | | /* compile the regex, no options override *wink* */ |
437 | 1.10k | pm_dbgmsg("cli_pcre_build: Compiling regex: /%s/\n", pm->pdata.expression); |
438 | 1.10k | ret = cli_pcre_compile(&(pm->pdata), match_limit, recmatch_limit, 0, 0); |
439 | 1.10k | } else { |
440 | | /* compile the regex, options overridden and disabled */ |
441 | 0 | pm_dbgmsg("cli_pcre_build: Compiling regex: /%s/ (without options)\n", pm->pdata.expression); |
442 | 0 | ret = cli_pcre_compile(&(pm->pdata), match_limit, recmatch_limit, 0, 1); |
443 | 0 | } |
444 | 1.10k | if (ret != CL_SUCCESS) { |
445 | 47 | cli_errmsg("cli_pcre_build: failed to build pcre regex\n"); |
446 | 47 | pm->flags |= CLI_PCRE_DISABLED; /* disable the pcre, currently will terminate execution */ |
447 | 47 | return ret; |
448 | 47 | } |
449 | 1.10k | } |
450 | | |
451 | 309k | return CL_SUCCESS; |
452 | 309k | } |
453 | | |
454 | | /* TODO - handle VI and Macro offset types */ |
455 | | cl_error_t cli_pcre_recaloff(struct cli_matcher *root, struct cli_pcre_off *data, struct cli_target_info *info, cli_ctx *ctx) |
456 | 25.0M | { |
457 | | /* TANGENT: maintain relative offset data in cli_ac_data? */ |
458 | 25.0M | cl_error_t ret; |
459 | 25.0M | unsigned int i; |
460 | 25.0M | struct cli_pcre_meta *pm; |
461 | 25.0M | uint32_t endoff; |
462 | | |
463 | 25.0M | if (!data) { |
464 | 0 | return CL_ENULLARG; |
465 | 0 | } |
466 | | |
467 | 25.0M | if (!root || !root->pcre_metatable || !info || (ctx && ctx->dconf && !(ctx->dconf->pcre & PCRE_CONF_SUPPORT))) { |
468 | 25.0M | data->shift = NULL; |
469 | 25.0M | data->offset = NULL; |
470 | 25.0M | return CL_SUCCESS; |
471 | 25.0M | } |
472 | | |
473 | | /* allocate data structures */ |
474 | 0 | data->shift = (uint32_t *)cli_calloc(root->pcre_metas, sizeof(uint32_t)); |
475 | 0 | if (!data->shift) { |
476 | 0 | cli_errmsg("cli_pcre_initoff: cannot allocate memory for data->shift\n"); |
477 | 0 | return CL_EMEM; |
478 | 0 | } |
479 | 0 | data->offset = (uint32_t *)cli_calloc(root->pcre_metas, sizeof(uint32_t)); |
480 | 0 | if (!data->offset) { |
481 | 0 | cli_errmsg("cli_pcre_initoff: cannot allocate memory for data->offset\n"); |
482 | 0 | free(data->shift); |
483 | 0 | return CL_EMEM; |
484 | 0 | } |
485 | | |
486 | 0 | pm_dbgmsg("CLI_OFF_NONE: %u\n", CLI_OFF_NONE); |
487 | 0 | pm_dbgmsg("CLI_OFF_ANY: %u\n", CLI_OFF_ANY); |
488 | | |
489 | | /* iterate across all pcre metadata and recalc offsets */ |
490 | 0 | for (i = 0; i < root->pcre_metas; ++i) { |
491 | 0 | pm = root->pcre_metatable[i]; |
492 | | |
493 | | /* skip broken pcres, not getting executed anyways */ |
494 | 0 | if (pm->flags & CLI_PCRE_DISABLED) { |
495 | 0 | data->offset[i] = CLI_OFF_NONE; |
496 | 0 | data->shift[i] = 0; |
497 | 0 | continue; |
498 | 0 | } |
499 | | |
500 | 0 | if (pm->offdata[0] == CLI_OFF_ANY) { |
501 | 0 | data->offset[i] = CLI_OFF_ANY; |
502 | 0 | data->shift[i] = 0; |
503 | 0 | } else if (pm->offdata[0] == CLI_OFF_NONE) { |
504 | 0 | data->offset[i] = CLI_OFF_NONE; |
505 | 0 | data->shift[i] = 0; |
506 | 0 | } else if (pm->offdata[0] == CLI_OFF_ABSOLUTE) { |
507 | 0 | data->offset[i] = pm->offdata[1]; |
508 | 0 | data->shift[i] = pm->offdata[2]; |
509 | 0 | } else { |
510 | 0 | ret = cli_caloff(NULL, info, root->type, pm->offdata, &data->offset[i], &endoff); |
511 | 0 | if (ret != CL_SUCCESS) { |
512 | 0 | cli_errmsg("cli_pcre_recaloff: cannot recalculate relative offset for signature\n"); |
513 | 0 | free(data->shift); |
514 | 0 | free(data->offset); |
515 | 0 | return ret; |
516 | 0 | } |
517 | | /* CLI_OFF_NONE gets passed down, CLI_OFF_ANY gets reinterpreted */ |
518 | | /* TODO - CLI_OFF_VERSION is interpreted as CLI_OFF_ANY(?) */ |
519 | 0 | if (data->offset[i] == CLI_OFF_ANY) { |
520 | 0 | data->offset[i] = CLI_OFF_ANY; |
521 | 0 | data->shift[i] = 0; |
522 | 0 | } else { |
523 | 0 | data->shift[i] = endoff - (data->offset[i]); |
524 | 0 | } |
525 | 0 | } |
526 | | |
527 | 0 | pm_dbgmsg("%u: %u %u->%u(+%u)\n", i, pm->offdata[0], data->offset[i], |
528 | 0 | data->offset[i] + data->shift[i], data->shift[i]); |
529 | 0 | } |
530 | | |
531 | 0 | return CL_SUCCESS; |
532 | 0 | } |
533 | | |
534 | | void cli_pcre_freeoff(struct cli_pcre_off *data) |
535 | 25.0M | { |
536 | 25.0M | if (data) { |
537 | 25.0M | free(data->offset); |
538 | 25.0M | data->offset = NULL; |
539 | 25.0M | free(data->shift); |
540 | 25.0M | data->shift = NULL; |
541 | 25.0M | } |
542 | 25.0M | } |
543 | | |
544 | | int cli_pcre_qoff(struct cli_pcre_meta *pm, uint32_t length, uint32_t *adjbuffer, uint32_t *adjshift) |
545 | 0 | { |
546 | 0 | if (!pm) |
547 | 0 | return CL_ENULLARG; |
548 | | |
549 | | /* default to scanning whole buffer but try to use existing offdata */ |
550 | 0 | if (pm->offdata[0] == CLI_OFF_NONE) { |
551 | 0 | return CL_BREAK; |
552 | 0 | } else if (pm->offdata[0] == CLI_OFF_ANY) { |
553 | 0 | *adjbuffer = CLI_OFF_ANY; |
554 | 0 | *adjshift = 0; |
555 | 0 | } else if (pm->offdata[0] == CLI_OFF_ABSOLUTE) { |
556 | 0 | *adjbuffer = pm->offdata[1]; |
557 | 0 | *adjshift = pm->offdata[2]; |
558 | 0 | } else if (pm->offdata[0] == CLI_OFF_EOF_MINUS) { |
559 | 0 | *adjbuffer = length - pm->offdata[1]; |
560 | 0 | *adjshift = pm->offdata[2]; |
561 | 0 | } else { |
562 | | /* all relative offsets */ |
563 | | /* TODO - check if relative offsets apply for normal hex substrs */ |
564 | 0 | *adjbuffer = 0; |
565 | 0 | *adjshift = 0; |
566 | 0 | } |
567 | | |
568 | 0 | return CL_SUCCESS; |
569 | 0 | } |
570 | | |
571 | | cl_error_t cli_pcre_scanbuf(const unsigned char *buffer, uint32_t length, const char **virname, struct cli_ac_result **res, const struct cli_matcher *root, struct cli_ac_data *mdata, const struct cli_pcre_off *data, cli_ctx *ctx) |
572 | 0 | { |
573 | 0 | cl_error_t ret = CL_SUCCESS; |
574 | |
|
575 | 0 | struct cli_pcre_meta *pm = NULL; |
576 | 0 | struct cli_pcre_data *pd; |
577 | 0 | struct cli_pcre_results p_res; |
578 | 0 | struct cli_ac_result *newres; |
579 | 0 | uint32_t adjbuffer, adjshift, adjlength; |
580 | 0 | unsigned int i, evalcnt = 0; |
581 | 0 | uint64_t evalids = 0; |
582 | 0 | uint32_t global, encompass, rolling; |
583 | 0 | int rc = 0; |
584 | 0 | int options = 0; |
585 | 0 | uint32_t offset = 0; |
586 | |
|
587 | 0 | if ((root->pcre_metas == 0) || (!root->pcre_metatable) || (ctx && ctx->dconf && !(ctx->dconf->pcre & PCRE_CONF_SUPPORT))) |
588 | 0 | return CL_SUCCESS; |
589 | | |
590 | 0 | memset(&p_res, 0, sizeof(p_res)); |
591 | |
|
592 | 0 | for (i = 0; i < root->pcre_metas; ++i) { |
593 | |
|
594 | 0 | pm = root->pcre_metatable[i]; |
595 | 0 | pd = &(pm->pdata); |
596 | | |
597 | | /* skip checking and running disabled pcres */ |
598 | 0 | if (pm->flags & CLI_PCRE_DISABLED) { |
599 | 0 | cli_dbgmsg("cli_pcre_scanbuf: skipping disabled regex /%s/\n", pd->expression); |
600 | 0 | continue; |
601 | 0 | } |
602 | | |
603 | | /* skip checking and running CLI_OFF_NONE pcres */ |
604 | 0 | if (data && data->offset[i] == CLI_OFF_NONE) { |
605 | 0 | pm_dbgmsg("cli_pcre_scanbuf: skipping CLI_OFF_NONE regex /%s/\n", pd->expression); |
606 | 0 | continue; |
607 | 0 | } |
608 | | |
609 | | /* evaluate trigger */ |
610 | 0 | if (pm->lsigid[0]) { |
611 | 0 | pm_dbgmsg("cli_pcre_scanbuf: checking %s; running regex /%s/\n", pm->trigger, pd->expression); |
612 | 0 | #ifdef PCRE_BYPASS |
613 | 0 | if (strcmp(pm->trigger, PCRE_BYPASS)) |
614 | 0 | #endif |
615 | 0 | if (cli_ac_chklsig(pm->trigger, pm->trigger + strlen(pm->trigger), mdata->lsigcnt[pm->lsigid[1]], &evalcnt, &evalids, 0) != 1) |
616 | 0 | continue; |
617 | 0 | } else { |
618 | 0 | cli_dbgmsg("cli_pcre_scanbuf: skipping %s check due to uninitialized lsigid\n", pm->trigger); |
619 | | /* fall-through to unconditional execution - sigtool-only */ |
620 | 0 | } |
621 | | |
622 | 0 | global = (pm->flags & CLI_PCRE_GLOBAL); /* globally search for all matches (within bounds) */ |
623 | 0 | encompass = (pm->flags & CLI_PCRE_ENCOMPASS); /* encompass search to offset->offset+maxshift */ |
624 | 0 | rolling = (pm->flags & CLI_PCRE_ROLLING); /* rolling search (unanchored) */ |
625 | 0 | offset = pd->search_offset; /* this is usually 0 */ |
626 | |
|
627 | 0 | pm_dbgmsg("cli_pcre_scanbuf: triggered %s; running regex /%s/%s%s\n", pm->trigger, pd->expression, |
628 | 0 | global ? " (global)" : "", rolling ? " (rolling)" : ""); |
629 | | |
630 | | /* adjust the buffer sent to cli_pcre_match for offset and maxshift */ |
631 | 0 | if (!data) { |
632 | 0 | if (cli_pcre_qoff(pm, length, &adjbuffer, &adjshift) != CL_SUCCESS) |
633 | 0 | continue; |
634 | 0 | } else { |
635 | 0 | adjbuffer = data->offset[i]; |
636 | 0 | adjshift = data->shift[i]; |
637 | 0 | } |
638 | | |
639 | | /* check for need to anchoring */ |
640 | 0 | if (!rolling && !adjshift && (adjbuffer != CLI_OFF_ANY)) |
641 | 0 | #if USING_PCRE2 |
642 | 0 | options |= PCRE2_ANCHORED; |
643 | | #else |
644 | | options |= PCRE_ANCHORED; |
645 | | #endif |
646 | 0 | else |
647 | 0 | options = 0; |
648 | |
|
649 | 0 | if (adjbuffer == CLI_OFF_ANY) |
650 | 0 | adjbuffer = 0; |
651 | | |
652 | | /* check the offset bounds */ |
653 | 0 | if (adjbuffer < length) { |
654 | | /* handle encompass flag */ |
655 | 0 | if (encompass && adjshift != 0 && adjshift != CLI_OFF_NONE) { |
656 | 0 | if (adjbuffer + adjshift > length) |
657 | 0 | adjlength = length - adjbuffer; |
658 | 0 | else |
659 | 0 | adjlength = adjshift; |
660 | 0 | } else { |
661 | | /* NOTE - if using non-encompass method 2, alter shift universally */ |
662 | | /* TODO - limitations on non-encompassed buffers? */ |
663 | 0 | adjlength = length - adjbuffer; |
664 | 0 | } |
665 | 0 | } else { |
666 | | /* starting offset is outside bounds of file, skip pcre execution silently */ |
667 | 0 | pm_dbgmsg("cli_pcre_scanbuf: starting offset is outside bounds of file %u >= %u\n", adjbuffer, length); |
668 | 0 | continue; |
669 | 0 | } |
670 | | |
671 | 0 | pm_dbgmsg("cli_pcre_scanbuf: passed buffer adjusted to %u +%u(%u)[%u]%s\n", adjbuffer, adjlength, adjbuffer + adjlength, adjshift, encompass ? " (encompass)" : ""); |
672 | | |
673 | | /* if the global flag is set, loop through the scanning */ |
674 | 0 | do { |
675 | 0 | if (cli_checktimelimit(ctx) != CL_SUCCESS) { |
676 | 0 | cli_dbgmsg("cli_unzip: Time limit reached (max: %u)\n", ctx->engine->maxscantime); |
677 | 0 | ret = CL_ETIMEOUT; |
678 | 0 | break; |
679 | 0 | } |
680 | | |
681 | | /* reset the match results */ |
682 | 0 | if ((ret = cli_pcre_results_reset(&p_res, pd)) != CL_SUCCESS) |
683 | 0 | break; |
684 | | |
685 | | /* performance metrics */ |
686 | 0 | cli_event_time_start(p_sigevents, pm->sigtime_id); |
687 | 0 | rc = cli_pcre_match(pd, buffer + adjbuffer, adjlength, offset, options, &p_res); |
688 | 0 | cli_event_time_stop(p_sigevents, pm->sigtime_id); |
689 | | /* if debug, generate a match report */ |
690 | 0 | if (cli_debug_flag) |
691 | 0 | cli_pcre_report(pd, buffer + adjbuffer, adjlength, rc, &p_res); |
692 | | |
693 | | /* matched, rc shouldn't be >0 unless a full match occurs */ |
694 | 0 | if (rc > 0) { |
695 | 0 | cli_dbgmsg("cli_pcre_scanbuf: located regex match @ %d\n", adjbuffer + p_res.match[0]); |
696 | | |
697 | | /* check if we've gone over offset+shift */ |
698 | 0 | if (!encompass && adjshift) { |
699 | 0 | if (p_res.match[0] > adjshift) { |
700 | | /* ignore matched offset (outside of maxshift) */ |
701 | 0 | cli_dbgmsg("cli_pcre_scanbuf: match found outside of maxshift @%u\n", adjbuffer + p_res.match[0]); |
702 | 0 | break; |
703 | 0 | } |
704 | 0 | } |
705 | | |
706 | | /* track the detection count */ |
707 | 0 | cli_event_count(p_sigevents, pm->sigmatch_id); |
708 | | |
709 | | /* for logical signature evaluation */ |
710 | |
|
711 | 0 | if (pm->lsigid[0]) { |
712 | 0 | pm_dbgmsg("cli_pcre_scanbuf: assigning lsigcnt[%d][%d], located @ %d\n", |
713 | 0 | pm->lsigid[1], pm->lsigid[2], adjbuffer + p_res.match[0]); |
714 | |
|
715 | 0 | ret = lsig_sub_matched(root, mdata, pm->lsigid[1], pm->lsigid[2], adjbuffer + p_res.match[0], 0); |
716 | 0 | if (ret != CL_SUCCESS) { |
717 | 0 | break; |
718 | 0 | } |
719 | 0 | } else { |
720 | | /* for raw match data - sigtool only */ |
721 | 0 | if (res) { |
722 | 0 | newres = (struct cli_ac_result *)cli_calloc(1, sizeof(struct cli_ac_result)); |
723 | 0 | if (!newres) { |
724 | 0 | cli_errmsg("cli_pcre_scanbuff: Can't allocate memory for new result\n"); |
725 | 0 | ret = CL_EMEM; |
726 | 0 | break; |
727 | 0 | } |
728 | 0 | newres->virname = "test"; |
729 | 0 | newres->customdata = NULL; /* get value? */ |
730 | 0 | newres->next = *res; |
731 | 0 | newres->offset = adjbuffer + p_res.match[0]; |
732 | 0 | *res = newres; |
733 | 0 | } else { |
734 | 0 | ret = CL_VIRUS; |
735 | |
|
736 | 0 | if (virname) { |
737 | 0 | *virname = "test"; |
738 | 0 | } |
739 | | |
740 | | // ctx is not provided in the unit tests. |
741 | 0 | if (ctx) { |
742 | 0 | ret = cli_append_virus(ctx, "test"); |
743 | 0 | if (ret != CL_SUCCESS) { |
744 | 0 | break; |
745 | 0 | } |
746 | 0 | } |
747 | 0 | } |
748 | 0 | } |
749 | 0 | } |
750 | | |
751 | | /* move off to the end of the match for next match; offset is relative to adjbuffer |
752 | | * NOTE: misses matches starting within the last match; TODO: start from start of last match? */ |
753 | 0 | offset = p_res.match[1]; |
754 | |
|
755 | 0 | } while ((global && rc > 0) && (offset < adjlength)); |
756 | | |
757 | | /* handle error code */ |
758 | 0 | if (rc < 0 && p_res.err != CL_SUCCESS) { |
759 | 0 | ret = p_res.err; |
760 | 0 | } |
761 | | |
762 | | /* jumps out of main loop from 'global' loop */ |
763 | 0 | if (ret != CL_SUCCESS) { |
764 | 0 | break; |
765 | 0 | } |
766 | 0 | } |
767 | | |
768 | | /* free match results */ |
769 | 0 | cli_pcre_results_free(&p_res); |
770 | |
|
771 | 0 | return ret; |
772 | 0 | } |
773 | | |
774 | | void cli_pcre_freemeta(struct cli_matcher *root, struct cli_pcre_meta *pm) |
775 | 3.22k | { |
776 | 3.22k | if (!pm) |
777 | 0 | return; |
778 | | |
779 | 3.22k | #ifndef USE_MPOOL |
780 | 3.22k | UNUSEDPARAM(root); |
781 | 3.22k | #endif |
782 | | |
783 | 3.22k | if (pm->trigger) { |
784 | 3.22k | MPOOL_FREE(root->mempool, pm->trigger); |
785 | 3.22k | pm->trigger = NULL; |
786 | 3.22k | } |
787 | | |
788 | 3.22k | if (pm->statname) { |
789 | 0 | free(pm->statname); |
790 | 0 | pm->statname = NULL; |
791 | 0 | } |
792 | | |
793 | 3.22k | cli_pcre_free_single(&(pm->pdata)); |
794 | 3.22k | } |
795 | | |
796 | | void cli_pcre_freetable(struct cli_matcher *root) |
797 | 536k | { |
798 | 536k | uint32_t i; |
799 | 536k | struct cli_pcre_meta *pm = NULL; |
800 | | |
801 | 539k | for (i = 0; i < root->pcre_metas; ++i) { |
802 | | /* free pcre meta */ |
803 | 3.16k | pm = root->pcre_metatable[i]; |
804 | 3.16k | cli_pcre_freemeta(root, pm); |
805 | 3.16k | MPOOL_FREE(root->mempool, pm); |
806 | 3.16k | } |
807 | | |
808 | | /* free holding structures and set count to zero */ |
809 | 536k | MPOOL_FREE(root->mempool, root->pcre_metatable); |
810 | 536k | root->pcre_metatable = NULL; |
811 | 536k | root->pcre_metas = 0; |
812 | 536k | } |
813 | | |
814 | | #else |
815 | | /* NO-PCRE FUNCTIONS */ |
816 | | void cli_pcre_perf_print() |
817 | | { |
818 | | cli_errmsg("cli_pcre_perf_print: Cannot print PCRE performance results without PCRE support\n"); |
819 | | return; |
820 | | } |
821 | | |
822 | | void cli_pcre_perf_events_destroy() |
823 | | { |
824 | | cli_errmsg("cli_pcre_perf_events_destroy: Cannot destroy PCRE performance results without PCRE support\n"); |
825 | | return; |
826 | | } |
827 | | |
828 | | cl_error_t cli_pcre_init() |
829 | | { |
830 | | cli_errmsg("cli_pcre_init: Cannot initialize PCRE without PCRE support\n"); |
831 | | return CL_SUCCESS; |
832 | | } |
833 | | |
834 | | cl_error_t cli_pcre_build(struct cli_matcher *root, long long unsigned match_limit, long long unsigned recmatch_limit, const struct cli_dconf *dconf) |
835 | | { |
836 | | UNUSEDPARAM(root); |
837 | | UNUSEDPARAM(match_limit); |
838 | | UNUSEDPARAM(recmatch_limit); |
839 | | UNUSEDPARAM(dconf); |
840 | | |
841 | | cli_errmsg("cli_pcre_build: Cannot build PCRE expression without PCRE support\n"); |
842 | | return CL_SUCCESS; |
843 | | } |
844 | | |
845 | | cl_error_t cli_pcre_scanbuf(const unsigned char *buffer, uint32_t length, const char **virname, struct cli_ac_result **res, const struct cli_matcher *root, struct cli_ac_data *mdata, const struct cli_pcre_off *data, cli_ctx *ctx) |
846 | | { |
847 | | UNUSEDPARAM(buffer); |
848 | | UNUSEDPARAM(length); |
849 | | UNUSEDPARAM(virname); |
850 | | UNUSEDPARAM(res); |
851 | | UNUSEDPARAM(root); |
852 | | UNUSEDPARAM(mdata); |
853 | | UNUSEDPARAM(data); |
854 | | UNUSEDPARAM(ctx); |
855 | | |
856 | | cli_errmsg("cli_pcre_scanbuf: Cannot scan buffer with PCRE expression without PCRE support\n"); |
857 | | return CL_SUCCESS; |
858 | | } |
859 | | |
860 | | cl_error_t cli_pcre_recaloff(struct cli_matcher *root, struct cli_pcre_off *data, struct cli_target_info *info, cli_ctx *ctx) |
861 | | { |
862 | | UNUSEDPARAM(root); |
863 | | UNUSEDPARAM(info); |
864 | | UNUSEDPARAM(ctx); |
865 | | if (data) { |
866 | | data->offset = NULL; |
867 | | data->shift = NULL; |
868 | | } |
869 | | return CL_SUCCESS; |
870 | | } |
871 | | |
872 | | void cli_pcre_freeoff(struct cli_pcre_off *data) |
873 | | { |
874 | | UNUSEDPARAM(data); |
875 | | return; |
876 | | } |
877 | | |
878 | | #endif /* HAVE_PCRE */ |