Coverage Report

Created: 2026-06-07 07:05

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/suricata7/src/util-spm-bm.c
Line
Count
Source
1
/* Copyright (C) 2007-2014 Open Information Security Foundation
2
 *
3
 * You can copy, redistribute or modify this Program under the terms of
4
 * the GNU General Public License version 2 as published by the Free
5
 * Software Foundation.
6
 *
7
 * This program is distributed in the hope that it will be useful,
8
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
 * GNU General Public License for more details.
11
 *
12
 * You should have received a copy of the GNU General Public License
13
 * version 2 along with this program; if not, write to the Free Software
14
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15
 * 02110-1301, USA.
16
 */
17
18
/**
19
 * \file
20
 *
21
 * \author Pablo Rincon Crespo <pablo.rincon.crespo@gmail.com>
22
 *
23
 * Boyer Moore simple pattern matcher implementation
24
 *
25
 * Boyer Moore algorithm has a really good performance. It need two arrays
26
 * of context for each pattern that hold applicable shifts on the text
27
 * to search in, based on characters not available in the pattern
28
 * and combinations of characters that start a suffix of the pattern.
29
 * If possible, we should store the context of patterns that we are going
30
 * to search for multiple times, so we don't spend time on rebuilding them.
31
 */
32
33
#include "suricata-common.h"
34
#include "suricata.h"
35
36
#include "util-spm-bm.h"
37
#include "util-spm.h"
38
#include "util-debug.h"
39
#include "util-error.h"
40
#include "util-memcpy.h"
41
#include "util-validate.h"
42
43
static int PreBmGs(const uint8_t *x, uint16_t m, uint16_t *bmGs);
44
static void PreBmBc(const uint8_t *x, uint16_t m, uint16_t *bmBc);
45
static void PreBmBcNocase(const uint8_t *x, uint16_t m, uint16_t *bmBc);
46
static void BoyerMooreSuffixesNocase(const uint8_t *x, uint16_t m, 
47
                                     uint16_t *suff);
48
static void PreBmGsNocase(const uint8_t *x, uint16_t m, uint16_t *bmGs);
49
50
/**
51
 * \brief Given a BmCtx structure, recreate the pre/suffixes for
52
 *        nocase
53
 *
54
 * \retval BmCtx pointer to the already created BmCtx (with BoyerMooreCtxInit())
55
 * \param str pointer to the pattern string
56
 * \param size length of the string
57
 */
58
void BoyerMooreCtxToNocase(BmCtx *bm_ctx, uint8_t *needle, uint16_t needle_len)
59
111k
{
60
    /* Store the content as lower case to make searching faster */
61
111k
    memcpy_tolower(needle, needle, needle_len);
62
63
    /* Prepare bad chars with nocase chars */
64
111k
    PreBmBcNocase(needle, needle_len, bm_ctx->bmBc);
65
66
    /* Prepare good Suffixes with nocase chars */
67
111k
    PreBmGsNocase(needle, needle_len, bm_ctx->bmGs);
68
111k
}
69
70
/**
71
 * \brief Setup a Boyer Moore context.
72
 *
73
 * \param str pointer to the pattern string
74
 * \param size length of the string
75
 * \retval BmCtx pointer to the newly created Context for the pattern
76
 * \initonly BoyerMoore contexts should be created at init
77
 */
78
BmCtx *BoyerMooreCtxInit(const uint8_t *needle, uint16_t needle_len)
79
741k
{
80
741k
    BmCtx *new = SCMalloc(sizeof(BmCtx) + sizeof(uint16_t) * (needle_len + 1));
81
741k
    if (unlikely(new == NULL)) {
82
0
        FatalError("Fatal error encountered in BoyerMooreCtxInit. Exiting...");
83
0
    }
84
85
    /* Prepare bad chars */
86
741k
    PreBmBc(needle, needle_len, new->bmBc);
87
88
    /* Prepare good Suffixes */
89
741k
    if (PreBmGs(needle, needle_len, new->bmGs) == -1) {
90
0
        FatalError("Fatal error encountered in BoyerMoreCtxInit. Exiting...");
91
0
    }
92
93
94
741k
    return new;
95
741k
}
96
97
/**
98
 * \brief Setup a Boyer Moore context for nocase search
99
 *
100
 * \param str pointer to the pattern string
101
 * \param size length of the string
102
 * \retval BmCtx pointer to the newly created Context for the pattern
103
 * \initonly BoyerMoore contexts should be created at init
104
 */
105
BmCtx *BoyerMooreNocaseCtxInit(uint8_t *needle, uint16_t needle_len)
106
111k
{
107
111k
    BmCtx *bm_ctx = BoyerMooreCtxInit(needle, needle_len);
108
109
111k
    BoyerMooreCtxToNocase(bm_ctx, needle, needle_len);
110
111
111k
    return bm_ctx;
112
111k
}
113
114
/**
115
 * \brief Free the memory allocated to Boyer Moore context.
116
 *
117
 * \param bmCtx pointer to the Context for the pattern
118
 */
119
void BoyerMooreCtxDeInit(BmCtx *bmctx)
120
737k
{
121
737k
    SCEnter();
122
737k
    if (bmctx == NULL)
123
0
        SCReturn;
124
125
737k
    SCFree(bmctx);
126
127
737k
    SCReturn;
128
737k
}
129
/**
130
 * \brief Array setup function for bad characters that split the pattern
131
 *        Remember that the result array should be the length of ALPHABET_SIZE
132
 *
133
 * \param str pointer to the pattern string
134
 * \param size length of the string
135
 * \param result pointer to an empty array that will hold the badchars
136
 */
137
static void PreBmBc(const uint8_t *x, uint16_t m, uint16_t *bmBc)
138
741k
{
139
741k
    uint16_t i;
140
141
190M
    for (i = 0; i < 256; ++i) {
142
189M
        bmBc[i] = m;
143
189M
    }
144
9.17M
    for (i = 0; i < m - 1; ++i) {
145
8.43M
        bmBc[(unsigned char)x[i]] = m - i - 1;
146
8.43M
    }
147
741k
}
148
149
/**
150
 * \brief Array setup function for building prefixes (shift for valid prefixes) for boyermoore context
151
 *
152
 * \param x pointer to the pattern string
153
 * \param m length of the string
154
 * \param suff pointer to an empty array that will hold the prefixes (shifts)
155
 */
156
static void BoyerMooreSuffixes(const uint8_t *x, uint16_t m, uint16_t *suff)
157
741k
{
158
741k
    int32_t f = 0, g, i;
159
741k
    suff[m - 1] = m;
160
741k
    g = m - 1;
161
9.17M
    for (i = m - 2; i >= 0; --i) {
162
8.43M
        if (i > g && suff[i + m - 1 - f] < i - g)
163
59.4k
            suff[i] = suff[i + m - 1 - f];
164
8.37M
        else {
165
8.37M
            if (i < g)
166
7.77M
                g = i;
167
8.37M
            f = i;
168
9.05M
            while (g >= 0 && x[g] == x[g + m - 1 - f])
169
677k
                --g;
170
8.37M
            DEBUG_VALIDATE_BUG_ON(f - g < 0 || f - g > UINT16_MAX);
171
8.37M
            suff[i] = (uint16_t)(f - g);
172
8.37M
        }
173
8.43M
    }
174
741k
}
175
176
/**
177
 * \brief Array setup function for building prefixes (shift for valid prefixes) for boyermoore context
178
 *
179
 * \param x pointer to the pattern string
180
 * \param m length of the string
181
 * \param bmGs pointer to an empty array that will hold the prefixes (shifts)
182
 * \retval 0 ok, -1 failed
183
 */
184
static int PreBmGs(const uint8_t *x, uint16_t m, uint16_t *bmGs)
185
741k
{
186
741k
    int32_t i, j;
187
741k
    uint16_t suff[m + 1];
188
189
741k
    BoyerMooreSuffixes(x, m, suff);
190
191
9.91M
    for (i = 0; i < m; ++i)
192
9.17M
        bmGs[i] = m;
193
194
741k
    j = 0;
195
196
10.6M
    for (i = m - 1; i >= -1; --i)
197
9.91M
        if (i == -1 || suff[i] == i + 1)
198
10.7M
            for (; j < m - 1 - i; ++j)
199
9.17M
                if (bmGs[j] == m)
200
9.17M
                    bmGs[j] = (uint16_t)(m - 1 - i);
201
202
9.17M
    for (i = 0; i <= m - 2; ++i)
203
8.43M
        bmGs[m - 1 - suff[i]] = (uint16_t)(m - 1 - i);
204
741k
    return 0;
205
741k
}
206
207
/**
208
 * \brief Array setup function for bad characters that split the pattern
209
 *        Remember that the result array should be the length of ALPHABET_SIZE
210
 *
211
 * \param str pointer to the pattern string
212
 * \param size length of the string
213
 * \param result pointer to an empty array that will hold the badchars
214
 */
215
static void PreBmBcNocase(const uint8_t *x, uint16_t m, uint16_t *bmBc)
216
111k
{
217
111k
    uint16_t i;
218
219
28.5M
    for (i = 0; i < 256; ++i) {
220
28.4M
        bmBc[i] = m;
221
28.4M
    }
222
2.09M
    for (i = 0; i < m - 1; ++i) {
223
1.98M
        bmBc[u8_tolower(x[i])] = m - 1 - i;
224
1.98M
        bmBc[u8_toupper(x[i])] = m - 1 - i;
225
1.98M
    }
226
111k
}
227
228
static void BoyerMooreSuffixesNocase(const uint8_t *x, uint16_t m, 
229
                                     uint16_t *suff)
230
111k
{
231
111k
    int32_t f = 0, g, i;
232
233
111k
    suff[m - 1] = m;
234
111k
    g = m - 1;
235
2.09M
    for (i = m - 2; i >= 0; --i) {
236
1.98M
        if (i > g && suff[i + m - 1 - f] < i - g) {
237
10.1k
            suff[i] = suff[i + m - 1 - f];
238
1.97M
        } else {
239
1.97M
            if (i < g) {
240
1.79M
                g = i;
241
1.79M
            }
242
1.97M
            f = i;
243
2.15M
            while (g >= 0 && u8_tolower(x[g]) == u8_tolower(x[g + m - 1 - f])) {
244
186k
                --g;
245
186k
            }
246
1.97M
            DEBUG_VALIDATE_BUG_ON(f - g < 0 || f - g > UINT16_MAX);
247
1.97M
            suff[i] = (uint16_t)(f - g);
248
1.97M
        }
249
1.98M
    }
250
111k
}
251
252
/**
253
 * \brief Array setup function for building prefixes (shift for valid prefixes)
254
 *        for boyermoore context case less
255
 *
256
 * \param x pointer to the pattern string
257
 * \param m length of the string
258
 * \param bmGs pointer to an empty array that will hold the prefixes (shifts)
259
 */
260
static void PreBmGsNocase(const uint8_t *x, uint16_t m, uint16_t *bmGs)
261
111k
{
262
111k
    uint16_t i, j;
263
111k
    uint16_t suff[m + 1];
264
265
111k
    BoyerMooreSuffixesNocase(x, m, suff);
266
267
2.20M
    for (i = 0; i < m; ++i) {
268
2.09M
        bmGs[i] = m;
269
2.09M
    }
270
111k
    j = 0;
271
2.20M
    for (i = m; i > 0; --i) {
272
2.09M
        if (suff[i - 1] == i) {
273
221k
            for (; j < m - i; ++j) {
274
94.3k
                if (bmGs[j] == m) {
275
94.3k
                    bmGs[j] = m - i;
276
94.3k
                }
277
94.3k
            }
278
127k
        }
279
2.09M
    }
280
2.09M
    for (i = 0; i <= m - 2; ++i) {
281
1.98M
        bmGs[m - 1 - suff[i]] = m - 1 - i;
282
1.98M
    }
283
111k
}
284
285
/**
286
 * \brief Boyer Moore search algorithm
287
 *        Is better as the pattern length increases and for big buffers to search in.
288
 *        The algorithm needs a context of two arrays already prepared
289
 *        by prep_bad_chars() and prep_good_suffix()
290
 *
291
 * \param y pointer to the buffer to search in
292
 * \param n length limit of the buffer
293
 * \param x pointer to the pattern we ar searching for
294
 * \param m length limit of the needle
295
 * \param bmBc pointer to an array of BoyerMooreSuffixes prepared by prep_good_suffix()
296
 * \param bmGs pointer to an array of bachars prepared by prep_bad_chars()
297
 *
298
 * \retval ptr to start of the match; NULL if no match
299
 */
300
uint8_t *BoyerMoore(const uint8_t *x, uint16_t m, const uint8_t *y, uint32_t n, BmCtx *bm_ctx)
301
226k
{
302
226k
    uint16_t *bmGs = bm_ctx->bmGs;
303
226k
    uint16_t *bmBc = bm_ctx->bmBc;
304
305
226k
    int i, j, m1, m2;
306
226k
    int32_t int_n;
307
#if 0
308
    printf("\nBad:\n");
309
    for (i=0;i<ALPHABET_SIZE;i++)
310
        printf("%c,%d ", i, bmBc[i]);
311
312
    printf("\ngood:\n");
313
    for (i=0;i<m;i++)
314
        printf("%c, %d ", x[i],bmBc[i]);
315
    printf("\n");
316
#endif
317
    // force casting to int32_t (if possible)
318
226k
    int_n = unlikely(n > INT32_MAX) ? INT32_MAX : n;
319
226k
    j = 0;
320
71.0M
    while (j <= int_n - m ) {
321
71.3M
        for (i = m - 1; i >= 0 && x[i] == y[i + j]; --i);
322
323
70.9M
        if (i < 0) {
324
98.2k
            return (uint8_t *)(y + j);
325
            //j += bmGs[0];
326
70.8M
        } else {
327
//          printf("%c", y[i+j]);
328
70.8M
            j += (m1 = bmGs[i]) > (m2 = bmBc[y[i + j]] - m + 1 + i)? m1: m2;
329
//          printf("%d, %d\n", m1, m2);
330
70.8M
        }
331
70.9M
    }
332
128k
    return NULL;
333
226k
}
334
335
336
/**
337
 * \brief Boyer Moore search algorithm
338
 *        Is better as the pattern length increases and for big buffers to search in.
339
 *        The algorithm needs a context of two arrays already prepared
340
 *        by prep_bad_chars() and prep_good_suffix()
341
 *
342
 * \param y pointer to the buffer to search in
343
 * \param n length limit of the buffer
344
 * \param x pointer to the pattern we ar searching for
345
 * \param m length limit of the needle
346
 * \param bmBc pointer to an array of BoyerMooreSuffixes prepared by prep_good_suffix()
347
 * \param bmGs pointer to an array of bachars prepared by prep_bad_chars()
348
 *
349
 * \retval ptr to start of the match; NULL if no match
350
 */
351
uint8_t *BoyerMooreNocase(const uint8_t *x, uint16_t m, const uint8_t *y, uint32_t n, BmCtx *bm_ctx)
352
42.9k
{
353
42.9k
    uint16_t *bmGs = bm_ctx->bmGs;
354
42.9k
    uint16_t *bmBc = bm_ctx->bmBc;
355
42.9k
    int i, j, m1, m2;
356
42.9k
    int32_t int_n;
357
#if 0
358
    printf("\nBad:\n");
359
    for (i=0;i<ALPHABET_SIZE;i++)
360
        printf("%c,%d ", i, bmBc[i]);
361
362
    printf("\ngood:\n");
363
    for (i=0;i<m;i++)
364
        printf("%c, %d ", x[i],bmBc[i]);
365
    printf("\n");
366
#endif
367
    // force casting to int32_t (if possible)
368
42.9k
    int_n = unlikely(n > INT32_MAX) ? INT32_MAX : n;
369
42.9k
    j = 0;
370
1.72M
    while (j <= int_n - m ) {
371
         /* x is stored in lowercase. */
372
1.94M
        for (i = m - 1; i >= 0 && x[i] == u8_tolower(y[i + j]); --i);
373
374
1.71M
        if (i < 0) {
375
37.6k
            return (uint8_t *)(y + j);
376
1.67M
        } else {
377
1.67M
            j += (m1 = bmGs[i]) > (m2 = bmBc[y[i + j]] - m + 1 + i)?
378
1.67M
                m1: m2;
379
1.67M
        }
380
1.71M
    }
381
5.26k
    return NULL;
382
42.9k
}
383
384
typedef struct SpmBmCtx_ {
385
    BmCtx *bm_ctx;
386
    uint8_t *needle;
387
    uint16_t needle_len;
388
    int nocase;
389
} SpmBmCtx;
390
391
static SpmCtx *BMInitCtx(const uint8_t *needle, uint16_t needle_len, int nocase,
392
                         SpmGlobalThreadCtx *global_thread_ctx)
393
741k
{
394
741k
    SpmCtx *ctx = SCMalloc(sizeof(SpmCtx));
395
741k
    if (ctx == NULL) {
396
0
        SCLogDebug("Unable to alloc SpmCtx.");
397
0
        return NULL;
398
0
    }
399
741k
    memset(ctx, 0, sizeof(*ctx));
400
741k
    ctx->matcher = SPM_BM;
401
402
741k
    SpmBmCtx *sctx = SCMalloc(sizeof(SpmBmCtx));
403
741k
    if (sctx == NULL) {
404
0
        SCLogDebug("Unable to alloc SpmBmCtx.");
405
0
        SCFree(ctx);
406
0
        return NULL;
407
0
    }
408
741k
    memset(sctx, 0, sizeof(*sctx));
409
410
741k
    sctx->needle = SCMalloc(needle_len);
411
741k
    if (sctx->needle == NULL) {
412
0
        SCLogDebug("Unable to alloc string.");
413
0
        SCFree(sctx);
414
0
        SCFree(ctx);
415
0
        return NULL;
416
0
    }
417
741k
    memcpy(sctx->needle, needle, needle_len);
418
741k
    sctx->needle_len = needle_len;
419
420
741k
    if (nocase) {
421
111k
        sctx->bm_ctx = BoyerMooreNocaseCtxInit(sctx->needle, sctx->needle_len);
422
111k
        sctx->nocase = 1;
423
630k
    } else {
424
630k
        sctx->bm_ctx = BoyerMooreCtxInit(sctx->needle, sctx->needle_len);
425
630k
        sctx->nocase = 0;
426
630k
    }
427
428
741k
    ctx->ctx = sctx;
429
741k
    return ctx;
430
741k
}
431
432
static void BMDestroyCtx(SpmCtx *ctx)
433
737k
{
434
737k
    if (ctx == NULL) {
435
0
        return;
436
0
    }
437
438
737k
    SpmBmCtx *sctx = ctx->ctx;
439
737k
    if (sctx != NULL) {
440
737k
        BoyerMooreCtxDeInit(sctx->bm_ctx);
441
737k
        if (sctx->needle != NULL) {
442
737k
            SCFree(sctx->needle);
443
737k
        }
444
737k
        SCFree(sctx);
445
737k
    }
446
447
737k
    SCFree(ctx);
448
737k
}
449
450
static uint8_t *BMScan(const SpmCtx *ctx, SpmThreadCtx *thread_ctx,
451
                       const uint8_t *haystack, uint32_t haystack_len)
452
269k
{
453
269k
    const SpmBmCtx *sctx = ctx->ctx;
454
455
269k
    if (sctx->nocase) {
456
42.9k
        return BoyerMooreNocase(sctx->needle, sctx->needle_len, haystack,
457
42.9k
                                haystack_len, sctx->bm_ctx);
458
226k
    } else {
459
226k
        return BoyerMoore(sctx->needle, sctx->needle_len, haystack,
460
226k
                          haystack_len, sctx->bm_ctx);
461
226k
    }
462
269k
}
463
464
static SpmGlobalThreadCtx *BMInitGlobalThreadCtx(void)
465
64.2k
{
466
64.2k
    SpmGlobalThreadCtx *global_thread_ctx = SCMalloc(sizeof(SpmGlobalThreadCtx));
467
64.2k
    if (global_thread_ctx == NULL) {
468
0
        SCLogDebug("Unable to alloc SpmThreadCtx.");
469
0
        return NULL;
470
0
    }
471
64.2k
    memset(global_thread_ctx, 0, sizeof(*global_thread_ctx));
472
64.2k
    global_thread_ctx->matcher = SPM_BM;
473
64.2k
    return global_thread_ctx;
474
64.2k
}
475
476
static void BMDestroyGlobalThreadCtx(SpmGlobalThreadCtx *global_thread_ctx)
477
64.2k
{
478
64.2k
    if (global_thread_ctx == NULL) {
479
0
        return;
480
0
    }
481
64.2k
    SCFree(global_thread_ctx);
482
64.2k
}
483
484
static void BMDestroyThreadCtx(SpmThreadCtx *thread_ctx)
485
64.2k
{
486
64.2k
    if (thread_ctx == NULL) {
487
0
        return;
488
0
    }
489
64.2k
    SCFree(thread_ctx);
490
64.2k
}
491
492
64.2k
static SpmThreadCtx *BMMakeThreadCtx(const SpmGlobalThreadCtx *global_thread_ctx) {
493
64.2k
    SpmThreadCtx *thread_ctx = SCMalloc(sizeof(SpmThreadCtx));
494
64.2k
    if (thread_ctx == NULL) {
495
0
        SCLogDebug("Unable to alloc SpmThreadCtx.");
496
0
        return NULL;
497
0
    }
498
64.2k
    memset(thread_ctx, 0, sizeof(*thread_ctx));
499
64.2k
    thread_ctx->matcher = SPM_BM;
500
64.2k
    return thread_ctx;
501
64.2k
}
502
503
void SpmBMRegister(void)
504
77
{
505
77
    spm_table[SPM_BM].name = "bm";
506
77
    spm_table[SPM_BM].InitGlobalThreadCtx = BMInitGlobalThreadCtx;
507
77
    spm_table[SPM_BM].DestroyGlobalThreadCtx = BMDestroyGlobalThreadCtx;
508
77
    spm_table[SPM_BM].MakeThreadCtx = BMMakeThreadCtx;
509
77
    spm_table[SPM_BM].DestroyThreadCtx = BMDestroyThreadCtx;
510
77
    spm_table[SPM_BM].InitCtx = BMInitCtx;
511
77
    spm_table[SPM_BM].DestroyCtx = BMDestroyCtx;
512
77
    spm_table[SPM_BM].Scan = BMScan;
513
77
}