Coverage Report

Created: 2026-06-30 07:20

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/suricata7/src/runmode-dpdk.c
Line
Count
Source
1
/* Copyright (C) 2021 Open Information Security Foundation
2
 *
3
 * You can copy, redistribute or modify this Program under the terms of
4
 * the GNU General Public License version 2 as published by the Free
5
 * Software Foundation.
6
 *
7
 * This program is distributed in the hope that it will be useful,
8
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
 * GNU General Public License for more details.
11
 *
12
 * You should have received a copy of the GNU General Public License
13
 * version 2 along with this program; if not, write to the Free Software
14
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15
 * 02110-1301, USA.
16
 */
17
18
/**
19
 * \ingroup dpdk
20
 *
21
 * @{
22
 */
23
24
/**
25
 * \file
26
 *
27
 * \author Lukas Sismis <lukas.sismis@gmail.com>
28
 *
29
 * DPDK runmode
30
 *
31
 */
32
33
#include "suricata-common.h"
34
#include "runmodes.h"
35
#include "runmode-dpdk.h"
36
#include "decode.h"
37
#include "source-dpdk.h"
38
#include "util-runmodes.h"
39
#include "util-byte.h"
40
#include "util-cpu.h"
41
#include "util-debug.h"
42
#include "util-device.h"
43
#include "util-dpdk.h"
44
#include "util-dpdk-i40e.h"
45
#include "util-dpdk-ice.h"
46
#include "util-dpdk-ixgbe.h"
47
#include "util-dpdk-bonding.h"
48
#include "util-time.h"
49
#include "util-conf.h"
50
#include "suricata.h"
51
#include "util-affinity.h"
52
53
#ifdef HAVE_DPDK
54
55
#define RSS_HKEY_LEN 40
56
// General purpose RSS key for symmetric bidirectional flow distribution
57
uint8_t rss_hkey[] = {
58
    0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A,
59
    0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A,
60
    0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A,                         // 40
61
    0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, // 52
62
};
63
64
// Calculates the closest multiple of y from x
65
#define ROUNDUP(x, y) ((((x) + ((y)-1)) / (y)) * (y))
66
67
/* Maximum DPDK EAL parameters count. */
68
#define EAL_ARGS 48
69
70
struct Arguments {
71
    uint16_t capacity;
72
    char **argv;
73
    uint16_t argc;
74
};
75
76
static char *AllocArgument(size_t arg_len);
77
static char *AllocAndSetArgument(const char *arg);
78
static char *AllocAndSetOption(const char *arg);
79
80
static void ArgumentsInit(struct Arguments *args, unsigned capacity);
81
static void ArgumentsCleanup(struct Arguments *args);
82
static void ArgumentsAdd(struct Arguments *args, char *value);
83
static void ArgumentsAddOptionAndArgument(struct Arguments *args, const char *opt, const char *arg);
84
static void InitEal(void);
85
86
static void ConfigSetIface(DPDKIfaceConfig *iconf, const char *entry_str);
87
static int ConfigSetThreads(DPDKIfaceConfig *iconf, const char *entry_str);
88
static int ConfigSetRxQueues(DPDKIfaceConfig *iconf, uint16_t nb_queues);
89
static int ConfigSetTxQueues(DPDKIfaceConfig *iconf, uint16_t nb_queues);
90
static int ConfigSetMempoolSize(DPDKIfaceConfig *iconf, intmax_t entry_int);
91
static int ConfigSetMempoolCacheSize(DPDKIfaceConfig *iconf, const char *entry_str);
92
static int ConfigSetRxDescriptors(DPDKIfaceConfig *iconf, intmax_t entry_int);
93
static int ConfigSetTxDescriptors(DPDKIfaceConfig *iconf, intmax_t entry_int);
94
static int ConfigSetMtu(DPDKIfaceConfig *iconf, intmax_t entry_int);
95
static bool ConfigSetPromiscuousMode(DPDKIfaceConfig *iconf, int entry_bool);
96
static bool ConfigSetMulticast(DPDKIfaceConfig *iconf, int entry_bool);
97
static int ConfigSetChecksumChecks(DPDKIfaceConfig *iconf, int entry_bool);
98
static int ConfigSetChecksumOffload(DPDKIfaceConfig *iconf, int entry_bool);
99
static int ConfigSetCopyIface(DPDKIfaceConfig *iconf, const char *entry_str);
100
static int ConfigSetCopyMode(DPDKIfaceConfig *iconf, const char *entry_str);
101
static int ConfigSetCopyIfaceSettings(DPDKIfaceConfig *iconf, const char *iface, const char *mode);
102
static void ConfigInit(DPDKIfaceConfig **iconf);
103
static int ConfigLoad(DPDKIfaceConfig *iconf, const char *iface);
104
static DPDKIfaceConfig *ConfigParse(const char *iface);
105
106
static void DeviceInitPortConf(const DPDKIfaceConfig *iconf,
107
        const struct rte_eth_dev_info *dev_info, struct rte_eth_conf *port_conf);
108
static int DeviceConfigureQueues(DPDKIfaceConfig *iconf, const struct rte_eth_dev_info *dev_info,
109
        const struct rte_eth_conf *port_conf);
110
static int DeviceValidateOutIfaceConfig(DPDKIfaceConfig *iconf);
111
static int DeviceConfigureIPS(DPDKIfaceConfig *iconf);
112
static int DeviceConfigure(DPDKIfaceConfig *iconf);
113
static void *ParseDpdkConfigAndConfigureDevice(const char *iface);
114
static void DPDKDerefConfig(void *conf);
115
116
#define DPDK_CONFIG_DEFAULT_THREADS                     "auto"
117
#define DPDK_CONFIG_DEFAULT_INTERRUPT_MODE              false
118
#define DPDK_CONFIG_DEFAULT_MEMPOOL_SIZE                65535
119
#define DPDK_CONFIG_DEFAULT_MEMPOOL_CACHE_SIZE          "auto"
120
#define DPDK_CONFIG_DEFAULT_RX_DESCRIPTORS              1024
121
#define DPDK_CONFIG_DEFAULT_TX_DESCRIPTORS              1024
122
#define DPDK_CONFIG_DEFAULT_RSS_HASH_FUNCTIONS          RTE_ETH_RSS_IP
123
#define DPDK_CONFIG_DEFAULT_MTU                         1500
124
#define DPDK_CONFIG_DEFAULT_PROMISCUOUS_MODE            1
125
#define DPDK_CONFIG_DEFAULT_MULTICAST_MODE              1
126
#define DPDK_CONFIG_DEFAULT_CHECKSUM_VALIDATION         1
127
#define DPDK_CONFIG_DEFAULT_CHECKSUM_VALIDATION_OFFLOAD 1
128
#define DPDK_CONFIG_DEFAULT_COPY_MODE                   "none"
129
#define DPDK_CONFIG_DEFAULT_COPY_INTERFACE              "none"
130
131
DPDKIfaceConfigAttributes dpdk_yaml = {
132
    .threads = "threads",
133
    .irq_mode = "interrupt-mode",
134
    .promisc = "promisc",
135
    .multicast = "multicast",
136
    .checksum_checks = "checksum-checks",
137
    .checksum_checks_offload = "checksum-checks-offload",
138
    .mtu = "mtu",
139
    .rss_hf = "rss-hash-functions",
140
    .mempool_size = "mempool-size",
141
    .mempool_cache_size = "mempool-cache-size",
142
    .rx_descriptors = "rx-descriptors",
143
    .tx_descriptors = "tx-descriptors",
144
    .copy_mode = "copy-mode",
145
    .copy_iface = "copy-iface",
146
};
147
148
static int GreatestDivisorUpTo(uint32_t num, uint32_t max_num)
149
{
150
    for (int i = max_num; i >= 2; i--) {
151
        if (num % i == 0) {
152
            return i;
153
        }
154
    }
155
    return 1;
156
}
157
158
static char *AllocArgument(size_t arg_len)
159
{
160
    SCEnter();
161
    char *ptr;
162
163
    arg_len += 1; // null character
164
    ptr = (char *)SCCalloc(arg_len, sizeof(char));
165
    if (ptr == NULL)
166
        FatalError("Could not allocate memory for an argument");
167
168
    SCReturnPtr(ptr, "char *");
169
}
170
171
/**
172
 * Allocates space for length of the given string and then copies contents
173
 * @param arg String to set to the newly allocated space
174
 * @return memory address if no error otherwise NULL (with errno set)
175
 */
176
static char *AllocAndSetArgument(const char *arg)
177
{
178
    SCEnter();
179
    if (arg == NULL)
180
        FatalError("Passed argument is NULL in DPDK config initialization");
181
182
    char *ptr;
183
    size_t arg_len = strlen(arg);
184
185
    ptr = AllocArgument(arg_len);
186
    strlcpy(ptr, arg, arg_len + 1);
187
    SCReturnPtr(ptr, "char *");
188
}
189
190
static char *AllocAndSetOption(const char *arg)
191
{
192
    SCEnter();
193
    if (arg == NULL)
194
        FatalError("Passed option is NULL in DPDK config initialization");
195
196
    char *ptr = NULL;
197
    size_t arg_len = strlen(arg);
198
    uint8_t is_long_arg = arg_len > 1;
199
    const char *dash_prefix = is_long_arg ? "--" : "-";
200
    size_t full_len = arg_len + strlen(dash_prefix);
201
202
    ptr = AllocArgument(full_len);
203
    strlcpy(ptr, dash_prefix, strlen(dash_prefix) + 1);
204
    strlcat(ptr, arg, full_len + 1);
205
    SCReturnPtr(ptr, "char *");
206
}
207
208
static void ArgumentsInit(struct Arguments *args, unsigned capacity)
209
{
210
    SCEnter();
211
    args->argv = SCCalloc(capacity, sizeof(*args->argv)); // alloc array of pointers
212
    if (args->argv == NULL)
213
        FatalError("Could not allocate memory for Arguments structure");
214
215
    args->capacity = capacity;
216
    args->argc = 0;
217
    SCReturn;
218
}
219
220
static void ArgumentsCleanup(struct Arguments *args)
221
{
222
    SCEnter();
223
    for (int i = 0; i < args->argc; i++) {
224
        if (args->argv[i] != NULL) {
225
            SCFree(args->argv[i]);
226
            args->argv[i] = NULL;
227
        }
228
    }
229
230
    SCFree(args->argv);
231
    args->argv = NULL;
232
    args->argc = 0;
233
    args->capacity = 0;
234
}
235
236
static void ArgumentsAdd(struct Arguments *args, char *value)
237
{
238
    SCEnter();
239
    if (args->argc + 1 > args->capacity)
240
        FatalError("No capacity for more arguments (Max: %" PRIu32 ")", EAL_ARGS);
241
242
    args->argv[args->argc++] = value;
243
    SCReturn;
244
}
245
246
static void ArgumentsAddOptionAndArgument(struct Arguments *args, const char *opt, const char *arg)
247
{
248
    SCEnter();
249
    char *option;
250
    char *argument;
251
252
    option = AllocAndSetOption(opt);
253
    ArgumentsAdd(args, option);
254
255
    // Empty argument could mean option only (e.g. --no-huge)
256
    if (arg == NULL || arg[0] == '\0')
257
        SCReturn;
258
259
    argument = AllocAndSetArgument(arg);
260
    ArgumentsAdd(args, argument);
261
    SCReturn;
262
}
263
264
static void InitEal(void)
265
{
266
    SCEnter();
267
    int retval;
268
    ConfNode *param;
269
    const ConfNode *eal_params = ConfGetNode("dpdk.eal-params");
270
    struct Arguments args;
271
    char **eal_argv;
272
273
    if (eal_params == NULL) {
274
        FatalError("DPDK EAL parameters not found in the config");
275
    }
276
277
    ArgumentsInit(&args, EAL_ARGS);
278
    ArgumentsAdd(&args, AllocAndSetArgument("suricata"));
279
280
    TAILQ_FOREACH (param, &eal_params->head, next) {
281
        if (ConfNodeIsSequence(param)) {
282
            const char *key = param->name;
283
            ConfNode *val;
284
            TAILQ_FOREACH (val, &param->head, next) {
285
                ArgumentsAddOptionAndArgument(&args, key, (const char *)val->val);
286
            }
287
            continue;
288
        }
289
        ArgumentsAddOptionAndArgument(&args, param->name, param->val);
290
    }
291
292
    // creating a shallow copy for cleanup because rte_eal_init changes array contents
293
    eal_argv = SCCalloc(args.argc, sizeof(*args.argv));
294
    if (eal_argv == NULL) {
295
        FatalError("Failed to allocate memory for the array of DPDK EAL arguments");
296
    }
297
    memcpy(eal_argv, args.argv, args.argc * sizeof(*args.argv));
298
299
    rte_log_set_global_level(RTE_LOG_WARNING);
300
    retval = rte_eal_init(args.argc, eal_argv);
301
302
    ArgumentsCleanup(&args);
303
    SCFree(eal_argv);
304
305
    if (retval < 0) { // retval bound to the result of rte_eal_init
306
        FatalError("DPDK EAL initialization error: %s", rte_strerror(-retval));
307
    }
308
}
309
310
static void DPDKDerefConfig(void *conf)
311
{
312
    SCEnter();
313
    DPDKIfaceConfig *iconf = (DPDKIfaceConfig *)conf;
314
315
    if (SC_ATOMIC_SUB(iconf->ref, 1) == 1) {
316
        if (iconf->pkt_mempool != NULL) {
317
            rte_mempool_free(iconf->pkt_mempool);
318
        }
319
320
        SCFree(iconf);
321
    }
322
    SCReturn;
323
}
324
325
static void ConfigInit(DPDKIfaceConfig **iconf)
326
{
327
    SCEnter();
328
    DPDKIfaceConfig *ptr = NULL;
329
    ptr = SCCalloc(1, sizeof(DPDKIfaceConfig));
330
    if (ptr == NULL)
331
        FatalError("Could not allocate memory for DPDKIfaceConfig");
332
333
    ptr->pkt_mempool = NULL;
334
    ptr->out_port_id = -1; // make sure no port is set
335
    SC_ATOMIC_INIT(ptr->ref);
336
    (void)SC_ATOMIC_ADD(ptr->ref, 1);
337
    ptr->DerefFunc = DPDKDerefConfig;
338
    ptr->flags = 0;
339
340
    *iconf = ptr;
341
    SCReturn;
342
}
343
344
static void ConfigSetIface(DPDKIfaceConfig *iconf, const char *entry_str)
345
{
346
    SCEnter();
347
    int retval;
348
349
    if (entry_str == NULL || entry_str[0] == '\0')
350
        FatalError("Interface name in DPDK config is NULL or empty");
351
352
    retval = rte_eth_dev_get_port_by_name(entry_str, &iconf->port_id);
353
    if (retval < 0)
354
        FatalError("Interface \"%s\": %s", entry_str, rte_strerror(-retval));
355
356
    strlcpy(iconf->iface, entry_str, sizeof(iconf->iface));
357
    SCReturn;
358
}
359
360
static int ConfigSetThreads(DPDKIfaceConfig *iconf, const char *entry_str)
361
{
362
    SCEnter();
363
    static int32_t remaining_auto_cpus = -1;
364
    if (!threading_set_cpu_affinity) {
365
        SCLogError("DPDK runmode requires configured thread affinity");
366
        SCReturnInt(-EINVAL);
367
    }
368
369
    ThreadsAffinityType *wtaf = GetAffinityTypeFromName("worker-cpu-set");
370
    if (wtaf == NULL) {
371
        SCLogError("Specify worker-cpu-set list in the threading section");
372
        SCReturnInt(-EINVAL);
373
    }
374
    ThreadsAffinityType *mtaf = GetAffinityTypeFromName("management-cpu-set");
375
    if (mtaf == NULL) {
376
        SCLogError("Specify management-cpu-set list in the threading section");
377
        SCReturnInt(-EINVAL);
378
    }
379
    uint32_t sched_cpus = UtilAffinityGetAffinedCPUNum(wtaf);
380
    if (sched_cpus == UtilCpuGetNumProcessorsOnline()) {
381
        SCLogWarning(
382
                "\"all\" specified in worker CPU cores affinity, excluding management threads");
383
        UtilAffinityCpusExclude(wtaf, mtaf);
384
        sched_cpus = UtilAffinityGetAffinedCPUNum(wtaf);
385
    }
386
387
    if (sched_cpus == 0) {
388
        SCLogError("No worker CPU cores with configured affinity were configured");
389
        SCReturnInt(-EINVAL);
390
    } else if (UtilAffinityCpusOverlap(wtaf, mtaf) != 0) {
391
        SCLogWarning("Worker threads should not overlap with management threads in the CPU core "
392
                     "affinity configuration");
393
    }
394
395
    const char *active_runmode = RunmodeGetActive();
396
    if (active_runmode && !strcmp("single", active_runmode)) {
397
        iconf->threads = 1;
398
        SCReturnInt(0);
399
    }
400
401
    if (entry_str == NULL) {
402
        SCLogError("Number of threads for interface \"%s\" not specified", iconf->iface);
403
        SCReturnInt(-EINVAL);
404
    }
405
406
    if (strcmp(entry_str, "auto") == 0) {
407
        iconf->threads = (uint16_t)sched_cpus / LiveGetDeviceCount();
408
        if (iconf->threads == 0) {
409
            SCLogError("Not enough worker CPU cores with affinity were configured");
410
            SCReturnInt(-ERANGE);
411
        }
412
413
        if (remaining_auto_cpus > 0) {
414
            iconf->threads++;
415
            remaining_auto_cpus--;
416
        } else if (remaining_auto_cpus == -1) {
417
            remaining_auto_cpus = (int32_t)sched_cpus % LiveGetDeviceCount();
418
            if (remaining_auto_cpus > 0) {
419
                iconf->threads++;
420
                remaining_auto_cpus--;
421
            }
422
        }
423
        SCLogConfig("%s: auto-assigned %u threads", iconf->iface, iconf->threads);
424
        SCReturnInt(0);
425
    }
426
427
    if (StringParseInt32(&iconf->threads, 10, 0, entry_str) < 0) {
428
        SCLogError("Threads entry for interface %s contain non-numerical characters - \"%s\"",
429
                iconf->iface, entry_str);
430
        SCReturnInt(-EINVAL);
431
    }
432
433
    if (iconf->threads <= 0) {
434
        SCLogError("%s: positive number of threads required", iconf->iface);
435
        SCReturnInt(-ERANGE);
436
    }
437
438
    SCReturnInt(0);
439
}
440
441
static bool ConfigSetInterruptMode(DPDKIfaceConfig *iconf, bool enable)
442
{
443
    SCEnter();
444
    if (enable)
445
        iconf->flags |= DPDK_IRQ_MODE;
446
447
    SCReturnBool(true);
448
}
449
450
static int ConfigSetRxQueues(DPDKIfaceConfig *iconf, uint16_t nb_queues)
451
{
452
    SCEnter();
453
    iconf->nb_rx_queues = nb_queues;
454
    if (iconf->nb_rx_queues < 1) {
455
        SCLogError("%s: positive number of RX queues is required", iconf->iface);
456
        SCReturnInt(-ERANGE);
457
    }
458
459
    SCReturnInt(0);
460
}
461
462
static int ConfigSetTxQueues(DPDKIfaceConfig *iconf, uint16_t nb_queues)
463
{
464
    SCEnter();
465
    iconf->nb_tx_queues = nb_queues;
466
    if (iconf->nb_tx_queues < 1) {
467
        SCLogError("%s: positive number of TX queues is required", iconf->iface);
468
        SCReturnInt(-ERANGE);
469
    }
470
471
    SCReturnInt(0);
472
}
473
474
static int ConfigSetMempoolSize(DPDKIfaceConfig *iconf, intmax_t entry_int)
475
{
476
    SCEnter();
477
    if (entry_int <= 0) {
478
        SCLogError("%s: positive memory pool size is required", iconf->iface);
479
        SCReturnInt(-ERANGE);
480
    } else if (entry_int > UINT32_MAX) {
481
        SCLogError("%s: memory pool size cannot exceed %" PRIu32, iconf->iface, UINT32_MAX);
482
        SCReturnInt(-ERANGE);
483
    }
484
485
    iconf->mempool_size = entry_int;
486
    SCReturnInt(0);
487
}
488
489
static int ConfigSetMempoolCacheSize(DPDKIfaceConfig *iconf, const char *entry_str)
490
{
491
    SCEnter();
492
    if (entry_str == NULL || entry_str[0] == '\0' || strcmp(entry_str, "auto") == 0) {
493
        // calculate the mempool size based on the mempool size (it needs to be already filled in)
494
        // It is advised to have mempool cache size lower or equal to:
495
        //   RTE_MEMPOOL_CACHE_MAX_SIZE (by default 512) and "mempool-size / 1.5"
496
        // and at the same time "mempool-size modulo cache_size == 0".
497
        if (iconf->mempool_size == 0) {
498
            SCLogError("%s: cannot calculate mempool cache size of a mempool with size %d",
499
                    iconf->iface, iconf->mempool_size);
500
            SCReturnInt(-EINVAL);
501
        }
502
503
        uint32_t max_cache_size = MIN(RTE_MEMPOOL_CACHE_MAX_SIZE, iconf->mempool_size / 1.5);
504
        iconf->mempool_cache_size = GreatestDivisorUpTo(iconf->mempool_size, max_cache_size);
505
        SCReturnInt(0);
506
    }
507
508
    if (StringParseUint32(&iconf->mempool_cache_size, 10, 0, entry_str) < 0) {
509
        SCLogError("%s: mempool cache size entry contain non-numerical characters - \"%s\"",
510
                iconf->iface, entry_str);
511
        SCReturnInt(-EINVAL);
512
    }
513
514
    if (iconf->mempool_cache_size <= 0 || iconf->mempool_cache_size > RTE_MEMPOOL_CACHE_MAX_SIZE) {
515
        SCLogError("%s: mempool cache size requires a positive number smaller than %" PRIu32,
516
                iconf->iface, RTE_MEMPOOL_CACHE_MAX_SIZE);
517
        SCReturnInt(-ERANGE);
518
    }
519
520
    SCReturnInt(0);
521
}
522
523
static int ConfigSetRxDescriptors(DPDKIfaceConfig *iconf, intmax_t entry_int)
524
{
525
    SCEnter();
526
    if (entry_int <= 0) {
527
        SCLogError("%s: positive number of RX descriptors is required", iconf->iface);
528
        SCReturnInt(-ERANGE);
529
    } else if (entry_int > UINT16_MAX) {
530
        SCLogError("%s: number of RX descriptors cannot exceed %" PRIu16, iconf->iface, UINT16_MAX);
531
        SCReturnInt(-ERANGE);
532
    }
533
534
    iconf->nb_rx_desc = entry_int;
535
    SCReturnInt(0);
536
}
537
538
static int ConfigSetTxDescriptors(DPDKIfaceConfig *iconf, intmax_t entry_int)
539
{
540
    SCEnter();
541
    if (entry_int <= 0) {
542
        SCLogError("%s: positive number of TX descriptors is required", iconf->iface);
543
        SCReturnInt(-ERANGE);
544
    } else if (entry_int > UINT16_MAX) {
545
        SCLogError("%s: number of TX descriptors cannot exceed %" PRIu16, iconf->iface, UINT16_MAX);
546
        SCReturnInt(-ERANGE);
547
    }
548
549
    iconf->nb_tx_desc = entry_int;
550
    SCReturnInt(0);
551
}
552
553
static int ConfigSetRSSHashFunctions(DPDKIfaceConfig *iconf, const char *entry_str)
554
{
555
    SCEnter();
556
    if (entry_str == NULL || entry_str[0] == '\0' || strcmp(entry_str, "auto") == 0) {
557
        iconf->rss_hf = DPDK_CONFIG_DEFAULT_RSS_HASH_FUNCTIONS;
558
        SCReturnInt(0);
559
    }
560
561
    if (StringParseUint64(&iconf->rss_hf, 0, 0, entry_str) < 0) {
562
        SCLogError("%s: RSS hash functions entry contain non-numerical characters - \"%s\"",
563
                iconf->iface, entry_str);
564
        SCReturnInt(-EINVAL);
565
    }
566
567
    SCReturnInt(0);
568
}
569
570
static int ConfigSetMtu(DPDKIfaceConfig *iconf, intmax_t entry_int)
571
{
572
    SCEnter();
573
    if (entry_int < RTE_ETHER_MIN_MTU || entry_int > RTE_ETHER_MAX_JUMBO_FRAME_LEN) {
574
        SCLogError("%s: MTU size can only be between %" PRIu32 " and %" PRIu32, iconf->iface,
575
                RTE_ETHER_MIN_MTU, RTE_ETHER_MAX_JUMBO_FRAME_LEN);
576
        SCReturnInt(-ERANGE);
577
    }
578
579
    iconf->mtu = entry_int;
580
    SCReturnInt(0);
581
}
582
583
static bool ConfigSetPromiscuousMode(DPDKIfaceConfig *iconf, int entry_bool)
584
{
585
    SCEnter();
586
    if (entry_bool)
587
        iconf->flags |= DPDK_PROMISC;
588
589
    SCReturnBool(true);
590
}
591
592
static bool ConfigSetMulticast(DPDKIfaceConfig *iconf, int entry_bool)
593
{
594
    SCEnter();
595
    if (entry_bool)
596
        iconf->flags |= DPDK_MULTICAST; // enable
597
598
    SCReturnBool(true);
599
}
600
601
static int ConfigSetChecksumChecks(DPDKIfaceConfig *iconf, int entry_bool)
602
{
603
    SCEnter();
604
    if (entry_bool)
605
        iconf->checksum_mode = CHECKSUM_VALIDATION_ENABLE;
606
607
    SCReturnInt(0);
608
}
609
610
static int ConfigSetChecksumOffload(DPDKIfaceConfig *iconf, int entry_bool)
611
{
612
    SCEnter();
613
    if (entry_bool)
614
        iconf->flags |= DPDK_RX_CHECKSUM_OFFLOAD;
615
616
    SCReturnInt(0);
617
}
618
619
static int ConfigSetCopyIface(DPDKIfaceConfig *iconf, const char *entry_str)
620
{
621
    SCEnter();
622
    int retval;
623
624
    if (entry_str == NULL || entry_str[0] == '\0' || strcmp(entry_str, "none") == 0) {
625
        iconf->out_iface = NULL;
626
        SCReturnInt(0);
627
    }
628
629
    retval = rte_eth_dev_get_port_by_name(entry_str, &iconf->out_port_id);
630
    if (retval < 0) {
631
        SCLogError("%s: name of the copy interface (%s) is invalid (err %s)", iconf->iface,
632
                entry_str, rte_strerror(-retval));
633
        SCReturnInt(retval);
634
    }
635
636
    iconf->out_iface = entry_str;
637
    SCReturnInt(0);
638
}
639
640
static int ConfigSetCopyMode(DPDKIfaceConfig *iconf, const char *entry_str)
641
{
642
    SCEnter();
643
    if (entry_str == NULL) {
644
        SCLogWarning("%s: no copy mode specified, changing to %s ", iconf->iface,
645
                DPDK_CONFIG_DEFAULT_COPY_MODE);
646
        entry_str = DPDK_CONFIG_DEFAULT_COPY_MODE;
647
    }
648
649
    if (strcmp(entry_str, "none") != 0 && strcmp(entry_str, "tap") != 0 &&
650
            strcmp(entry_str, "ips") != 0) {
651
        SCLogWarning("%s: copy mode \"%s\" is not one of the possible values (none|tap|ips). "
652
                     "Changing to %s",
653
                entry_str, iconf->iface, DPDK_CONFIG_DEFAULT_COPY_MODE);
654
        entry_str = DPDK_CONFIG_DEFAULT_COPY_MODE;
655
    }
656
657
    if (strcmp(entry_str, "none") == 0) {
658
        iconf->copy_mode = DPDK_COPY_MODE_NONE;
659
    } else if (strcmp(entry_str, "tap") == 0) {
660
        iconf->copy_mode = DPDK_COPY_MODE_TAP;
661
    } else if (strcmp(entry_str, "ips") == 0) {
662
        iconf->copy_mode = DPDK_COPY_MODE_IPS;
663
    }
664
665
    SCReturnInt(0);
666
}
667
668
static int ConfigSetCopyIfaceSettings(DPDKIfaceConfig *iconf, const char *iface, const char *mode)
669
{
670
    SCEnter();
671
    int retval;
672
673
    retval = ConfigSetCopyIface(iconf, iface);
674
    if (retval < 0)
675
        SCReturnInt(retval);
676
677
    retval = ConfigSetCopyMode(iconf, mode);
678
    if (retval < 0)
679
        SCReturnInt(retval);
680
681
    if (iconf->copy_mode == DPDK_COPY_MODE_NONE) {
682
        if (iconf->out_iface != NULL)
683
            iconf->out_iface = NULL;
684
        SCReturnInt(0);
685
    }
686
687
    if (iconf->out_iface == NULL || strlen(iconf->out_iface) <= 0) {
688
        SCLogError("%s: copy mode enabled but interface not set", iconf->iface);
689
        SCReturnInt(-EINVAL);
690
    }
691
692
    SCReturnInt(0);
693
}
694
695
static int ConfigLoad(DPDKIfaceConfig *iconf, const char *iface)
696
{
697
    SCEnter();
698
    int retval;
699
    ConfNode *if_root;
700
    ConfNode *if_default;
701
    const char *entry_str = NULL;
702
    intmax_t entry_int = 0;
703
    int entry_bool = 0;
704
    const char *copy_iface_str = NULL;
705
    const char *copy_mode_str = NULL;
706
707
    ConfigSetIface(iconf, iface);
708
709
    retval = ConfSetRootAndDefaultNodes("dpdk.interfaces", iconf->iface, &if_root, &if_default);
710
    if (retval < 0) {
711
        FatalError("failed to find DPDK configuration for the interface %s", iconf->iface);
712
    }
713
714
    retval = ConfGetChildValueWithDefault(if_root, if_default, dpdk_yaml.threads, &entry_str) != 1
715
                     ? ConfigSetThreads(iconf, DPDK_CONFIG_DEFAULT_THREADS)
716
                     : ConfigSetThreads(iconf, entry_str);
717
    if (retval < 0)
718
        SCReturnInt(retval);
719
720
    bool irq_enable;
721
    retval = ConfGetChildValueBoolWithDefault(if_root, if_default, dpdk_yaml.irq_mode, &entry_bool);
722
    if (retval != 1) {
723
        irq_enable = DPDK_CONFIG_DEFAULT_INTERRUPT_MODE;
724
    } else {
725
        irq_enable = entry_bool ? true : false;
726
    }
727
    retval = ConfigSetInterruptMode(iconf, irq_enable);
728
    if (retval != true)
729
        SCReturnInt(-EINVAL);
730
731
    // currently only mapping "1 thread == 1 RX (and 1 TX queue in IPS mode)" is supported
732
    retval = ConfigSetRxQueues(iconf, (uint16_t)iconf->threads);
733
    if (retval < 0)
734
        SCReturnInt(retval);
735
736
    // currently only mapping "1 thread == 1 RX (and 1 TX queue in IPS mode)" is supported
737
    retval = ConfigSetTxQueues(iconf, (uint16_t)iconf->threads);
738
    if (retval < 0)
739
        SCReturnInt(retval);
740
741
    retval = ConfGetChildValueIntWithDefault(
742
                     if_root, if_default, dpdk_yaml.mempool_size, &entry_int) != 1
743
                     ? ConfigSetMempoolSize(iconf, DPDK_CONFIG_DEFAULT_MEMPOOL_SIZE)
744
                     : ConfigSetMempoolSize(iconf, entry_int);
745
    if (retval < 0)
746
        SCReturnInt(retval);
747
748
    retval = ConfGetChildValueWithDefault(
749
                     if_root, if_default, dpdk_yaml.mempool_cache_size, &entry_str) != 1
750
                     ? ConfigSetMempoolCacheSize(iconf, DPDK_CONFIG_DEFAULT_MEMPOOL_CACHE_SIZE)
751
                     : ConfigSetMempoolCacheSize(iconf, entry_str);
752
    if (retval < 0)
753
        SCReturnInt(retval);
754
755
    retval = ConfGetChildValueIntWithDefault(
756
                     if_root, if_default, dpdk_yaml.rx_descriptors, &entry_int) != 1
757
                     ? ConfigSetRxDescriptors(iconf, DPDK_CONFIG_DEFAULT_RX_DESCRIPTORS)
758
                     : ConfigSetRxDescriptors(iconf, entry_int);
759
    if (retval < 0)
760
        SCReturnInt(retval);
761
762
    retval = ConfGetChildValueIntWithDefault(
763
                     if_root, if_default, dpdk_yaml.tx_descriptors, &entry_int) != 1
764
                     ? ConfigSetTxDescriptors(iconf, DPDK_CONFIG_DEFAULT_TX_DESCRIPTORS)
765
                     : ConfigSetTxDescriptors(iconf, entry_int);
766
    if (retval < 0)
767
        SCReturnInt(retval);
768
769
    retval = ConfGetChildValueIntWithDefault(if_root, if_default, dpdk_yaml.mtu, &entry_int) != 1
770
                     ? ConfigSetMtu(iconf, DPDK_CONFIG_DEFAULT_MTU)
771
                     : ConfigSetMtu(iconf, entry_int);
772
    if (retval < 0)
773
        SCReturnInt(retval);
774
775
    retval = ConfGetChildValueWithDefault(if_root, if_default, dpdk_yaml.rss_hf, &entry_str) != 1
776
                     ? ConfigSetRSSHashFunctions(iconf, NULL)
777
                     : ConfigSetRSSHashFunctions(iconf, entry_str);
778
    if (retval < 0)
779
        SCReturnInt(retval);
780
781
    retval = ConfGetChildValueBoolWithDefault(
782
                     if_root, if_default, dpdk_yaml.promisc, &entry_bool) != 1
783
                     ? ConfigSetPromiscuousMode(iconf, DPDK_CONFIG_DEFAULT_PROMISCUOUS_MODE)
784
                     : ConfigSetPromiscuousMode(iconf, entry_bool);
785
    if (retval != true)
786
        SCReturnInt(-EINVAL);
787
788
    retval = ConfGetChildValueBoolWithDefault(
789
                     if_root, if_default, dpdk_yaml.multicast, &entry_bool) != 1
790
                     ? ConfigSetMulticast(iconf, DPDK_CONFIG_DEFAULT_MULTICAST_MODE)
791
                     : ConfigSetMulticast(iconf, entry_bool);
792
    if (retval != true)
793
        SCReturnInt(-EINVAL);
794
795
    retval = ConfGetChildValueBoolWithDefault(
796
                     if_root, if_default, dpdk_yaml.checksum_checks, &entry_bool) != 1
797
                     ? ConfigSetChecksumChecks(iconf, DPDK_CONFIG_DEFAULT_CHECKSUM_VALIDATION)
798
                     : ConfigSetChecksumChecks(iconf, entry_bool);
799
    if (retval < 0)
800
        SCReturnInt(retval);
801
802
    retval = ConfGetChildValueBoolWithDefault(
803
                     if_root, if_default, dpdk_yaml.checksum_checks_offload, &entry_bool) != 1
804
                     ? ConfigSetChecksumOffload(
805
                               iconf, DPDK_CONFIG_DEFAULT_CHECKSUM_VALIDATION_OFFLOAD)
806
                     : ConfigSetChecksumOffload(iconf, entry_bool);
807
    if (retval < 0)
808
        SCReturnInt(retval);
809
810
    retval = ConfGetChildValueWithDefault(if_root, if_default, dpdk_yaml.copy_mode, &copy_mode_str);
811
    if (retval != 1) {
812
        copy_mode_str = DPDK_CONFIG_DEFAULT_COPY_MODE;
813
    }
814
815
    retval = ConfGetChildValueWithDefault(
816
            if_root, if_default, dpdk_yaml.copy_iface, &copy_iface_str);
817
    if (retval != 1) {
818
        copy_iface_str = DPDK_CONFIG_DEFAULT_COPY_INTERFACE;
819
    }
820
821
    retval = ConfigSetCopyIfaceSettings(iconf, copy_iface_str, copy_mode_str);
822
    if (retval < 0)
823
        SCReturnInt(retval);
824
825
    SCReturnInt(0);
826
}
827
828
static int32_t ConfigValidateThreads(uint16_t iface_threads)
829
{
830
    static uint32_t total_cpus = 0;
831
    total_cpus += iface_threads;
832
    ThreadsAffinityType *wtaf = GetAffinityTypeFromName("worker-cpu-set");
833
    if (wtaf == NULL) {
834
        SCLogError("Specify worker-cpu-set list in the threading section");
835
        return -1;
836
    }
837
    if (total_cpus > UtilAffinityGetAffinedCPUNum(wtaf)) {
838
        SCLogError("Interfaces requested more cores than configured in the threading section "
839
                   "(requested %d configured %d",
840
                total_cpus, UtilAffinityGetAffinedCPUNum(wtaf));
841
        return -1;
842
    }
843
844
    return 0;
845
}
846
847
static DPDKIfaceConfig *ConfigParse(const char *iface)
848
{
849
    SCEnter();
850
    int retval;
851
    DPDKIfaceConfig *iconf = NULL;
852
    if (iface == NULL)
853
        FatalError("DPDK interface is NULL");
854
855
    ConfigInit(&iconf);
856
    retval = ConfigLoad(iconf, iface);
857
    if (retval < 0 || ConfigValidateThreads(iconf->threads) != 0) {
858
        iconf->DerefFunc(iconf);
859
        SCReturnPtr(NULL, "void *");
860
    }
861
862
    SCReturnPtr(iconf, "DPDKIfaceConfig *");
863
}
864
865
static void DeviceSetPMDSpecificRSS(struct rte_eth_rss_conf *rss_conf, const char *driver_name)
866
{
867
    // RSS is configured in a specific way for a driver i40e and DPDK version <= 19.xx
868
    if (strcmp(driver_name, "net_i40e") == 0)
869
        i40eDeviceSetRSSConf(rss_conf);
870
    if (strcmp(driver_name, "net_ice") == 0)
871
        iceDeviceSetRSSConf(rss_conf);
872
    if (strcmp(driver_name, "net_ixgbe") == 0)
873
        ixgbeDeviceSetRSSHashFunction(&rss_conf->rss_hf);
874
    if (strcmp(driver_name, "net_e1000_igb") == 0)
875
        rss_conf->rss_hf = (RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_IPV6 | RTE_ETH_RSS_IPV6_EX);
876
}
877
878
// Returns -1 if no bit is set
879
static int GetFirstSetBitPosition(uint64_t bits)
880
{
881
    for (uint64_t i = 0; i < 64; i++) {
882
        if (bits & BIT_U64(i))
883
            return i;
884
    }
885
    return -1;
886
}
887
888
static void DumpRSSFlags(const uint64_t requested, const uint64_t actual)
889
{
890
    SCLogConfig("REQUESTED (groups):");
891
892
    SCLogConfig(
893
            "RTE_ETH_RSS_IP %sset", ((requested & RTE_ETH_RSS_IP) == RTE_ETH_RSS_IP) ? "" : "NOT ");
894
    SCLogConfig("RTE_ETH_RSS_TCP %sset",
895
            ((requested & RTE_ETH_RSS_TCP) == RTE_ETH_RSS_TCP) ? "" : "NOT ");
896
    SCLogConfig("RTE_ETH_RSS_UDP %sset",
897
            ((requested & RTE_ETH_RSS_UDP) == RTE_ETH_RSS_UDP) ? "" : "NOT ");
898
    SCLogConfig("RTE_ETH_RSS_SCTP %sset",
899
            ((requested & RTE_ETH_RSS_SCTP) == RTE_ETH_RSS_SCTP) ? "" : "NOT ");
900
    SCLogConfig("RTE_ETH_RSS_TUNNEL %sset",
901
            ((requested & RTE_ETH_RSS_TUNNEL) == RTE_ETH_RSS_TUNNEL) ? "" : "NOT ");
902
903
    SCLogConfig("REQUESTED (individual):");
904
    SCLogConfig("RTE_ETH_RSS_IPV4 (Bit position: %d) %sset",
905
            GetFirstSetBitPosition(RTE_ETH_RSS_IPV4), (requested & RTE_ETH_RSS_IPV4) ? "" : "NOT ");
906
    SCLogConfig("RTE_ETH_RSS_FRAG_IPV4 (Bit position: %d) %sset",
907
            GetFirstSetBitPosition(RTE_ETH_RSS_FRAG_IPV4),
908
            (requested & RTE_ETH_RSS_FRAG_IPV4) ? "" : "NOT ");
909
    SCLogConfig("RTE_ETH_RSS_NONFRAG_IPV4_TCP (Bit position: %d) %sset",
910
            GetFirstSetBitPosition(RTE_ETH_RSS_NONFRAG_IPV4_TCP),
911
            (requested & RTE_ETH_RSS_NONFRAG_IPV4_TCP) ? "" : "NOT ");
912
    SCLogConfig("RTE_ETH_RSS_NONFRAG_IPV4_UDP (Bit position: %d) %sset",
913
            GetFirstSetBitPosition(RTE_ETH_RSS_NONFRAG_IPV4_UDP),
914
            (requested & RTE_ETH_RSS_NONFRAG_IPV4_UDP) ? "" : "NOT ");
915
    SCLogConfig("RTE_ETH_RSS_NONFRAG_IPV4_SCTP (Bit position: %d) %sset",
916
            GetFirstSetBitPosition(RTE_ETH_RSS_NONFRAG_IPV4_SCTP),
917
            (requested & RTE_ETH_RSS_NONFRAG_IPV4_SCTP) ? "" : "NOT ");
918
    SCLogConfig("RTE_ETH_RSS_NONFRAG_IPV4_OTHER (Bit position: %d) %sset",
919
            GetFirstSetBitPosition(RTE_ETH_RSS_NONFRAG_IPV4_OTHER),
920
            (requested & RTE_ETH_RSS_NONFRAG_IPV4_OTHER) ? "" : "NOT ");
921
    SCLogConfig("RTE_ETH_RSS_IPV6 (Bit position: %d) %sset",
922
            GetFirstSetBitPosition(RTE_ETH_RSS_IPV6), (requested & RTE_ETH_RSS_IPV6) ? "" : "NOT ");
923
    SCLogConfig("RTE_ETH_RSS_FRAG_IPV6 (Bit position: %d) %sset",
924
            GetFirstSetBitPosition(RTE_ETH_RSS_FRAG_IPV6),
925
            (requested & RTE_ETH_RSS_FRAG_IPV6) ? "" : "NOT ");
926
    SCLogConfig("RTE_ETH_RSS_NONFRAG_IPV6_TCP (Bit position: %d) %sset",
927
            GetFirstSetBitPosition(RTE_ETH_RSS_NONFRAG_IPV6_TCP),
928
            (requested & RTE_ETH_RSS_NONFRAG_IPV6_TCP) ? "" : "NOT ");
929
    SCLogConfig("RTE_ETH_RSS_NONFRAG_IPV6_UDP (Bit position: %d) %sset",
930
            GetFirstSetBitPosition(RTE_ETH_RSS_NONFRAG_IPV6_UDP),
931
            (requested & RTE_ETH_RSS_NONFRAG_IPV6_UDP) ? "" : "NOT ");
932
    SCLogConfig("RTE_ETH_RSS_NONFRAG_IPV6_SCTP (Bit position: %d) %sset",
933
            GetFirstSetBitPosition(RTE_ETH_RSS_NONFRAG_IPV6_SCTP),
934
            (requested & RTE_ETH_RSS_NONFRAG_IPV6_SCTP) ? "" : "NOT ");
935
    SCLogConfig("RTE_ETH_RSS_NONFRAG_IPV6_OTHER (Bit position: %d) %sset",
936
            GetFirstSetBitPosition(RTE_ETH_RSS_NONFRAG_IPV6_OTHER),
937
            (requested & RTE_ETH_RSS_NONFRAG_IPV6_OTHER) ? "" : "NOT ");
938
939
    SCLogConfig("RTE_ETH_RSS_L2_PAYLOAD (Bit position: %d) %sset",
940
            GetFirstSetBitPosition(RTE_ETH_RSS_L2_PAYLOAD),
941
            (requested & RTE_ETH_RSS_L2_PAYLOAD) ? "" : "NOT ");
942
    SCLogConfig("RTE_ETH_RSS_IPV6_EX (Bit position: %d) %sset",
943
            GetFirstSetBitPosition(RTE_ETH_RSS_IPV6_EX),
944
            (requested & RTE_ETH_RSS_IPV6_EX) ? "" : "NOT ");
945
    SCLogConfig("RTE_ETH_RSS_IPV6_TCP_EX (Bit position: %d) %sset",
946
            GetFirstSetBitPosition(RTE_ETH_RSS_IPV6_TCP_EX),
947
            (requested & RTE_ETH_RSS_IPV6_TCP_EX) ? "" : "NOT ");
948
    SCLogConfig("RTE_ETH_RSS_IPV6_UDP_EX (Bit position: %d) %sset",
949
            GetFirstSetBitPosition(RTE_ETH_RSS_IPV6_UDP_EX),
950
            (requested & RTE_ETH_RSS_IPV6_UDP_EX) ? "" : "NOT ");
951
952
    SCLogConfig("RTE_ETH_RSS_PORT (Bit position: %d) %sset",
953
            GetFirstSetBitPosition(RTE_ETH_RSS_PORT), (requested & RTE_ETH_RSS_PORT) ? "" : "NOT ");
954
    SCLogConfig("RTE_ETH_RSS_VXLAN (Bit position: %d) %sset",
955
            GetFirstSetBitPosition(RTE_ETH_RSS_VXLAN),
956
            (requested & RTE_ETH_RSS_VXLAN) ? "" : "NOT ");
957
    SCLogConfig("RTE_ETH_RSS_NVGRE (Bit position: %d) %sset",
958
            GetFirstSetBitPosition(RTE_ETH_RSS_NVGRE),
959
            (requested & RTE_ETH_RSS_NVGRE) ? "" : "NOT ");
960
    SCLogConfig("RTE_ETH_RSS_GTPU (Bit position: %d) %sset",
961
            GetFirstSetBitPosition(RTE_ETH_RSS_GTPU), (requested & RTE_ETH_RSS_GTPU) ? "" : "NOT ");
962
963
    SCLogConfig("RTE_ETH_RSS_L3_SRC_ONLY (Bit position: %d) %sset",
964
            GetFirstSetBitPosition(RTE_ETH_RSS_L3_SRC_ONLY),
965
            (requested & RTE_ETH_RSS_L3_SRC_ONLY) ? "" : "NOT ");
966
    SCLogConfig("RTE_ETH_RSS_L3_DST_ONLY (Bit position: %d) %sset",
967
            GetFirstSetBitPosition(RTE_ETH_RSS_L3_DST_ONLY),
968
            (requested & RTE_ETH_RSS_L3_DST_ONLY) ? "" : "NOT ");
969
    SCLogConfig("RTE_ETH_RSS_L4_SRC_ONLY (Bit position: %d) %sset",
970
            GetFirstSetBitPosition(RTE_ETH_RSS_L4_SRC_ONLY),
971
            (requested & RTE_ETH_RSS_L4_SRC_ONLY) ? "" : "NOT ");
972
    SCLogConfig("RTE_ETH_RSS_L4_DST_ONLY (Bit position: %d) %sset",
973
            GetFirstSetBitPosition(RTE_ETH_RSS_L4_DST_ONLY),
974
            (requested & RTE_ETH_RSS_L4_DST_ONLY) ? "" : "NOT ");
975
    SCLogConfig("ACTUAL (group):");
976
    SCLogConfig(
977
            "RTE_ETH_RSS_IP %sset", ((actual & RTE_ETH_RSS_IP) == RTE_ETH_RSS_IP) ? "" : "NOT ");
978
    SCLogConfig(
979
            "RTE_ETH_RSS_TCP %sset", ((actual & RTE_ETH_RSS_TCP) == RTE_ETH_RSS_TCP) ? "" : "NOT ");
980
    SCLogConfig(
981
            "RTE_ETH_RSS_UDP %sset", ((actual & RTE_ETH_RSS_UDP) == RTE_ETH_RSS_UDP) ? "" : "NOT ");
982
    SCLogConfig("RTE_ETH_RSS_SCTP %sset",
983
            ((actual & RTE_ETH_RSS_SCTP) == RTE_ETH_RSS_SCTP) ? "" : "NOT ");
984
    SCLogConfig("RTE_ETH_RSS_TUNNEL %sset",
985
            ((actual & RTE_ETH_RSS_TUNNEL) == RTE_ETH_RSS_TUNNEL) ? "" : "NOT ");
986
987
    SCLogConfig("ACTUAL (individual flags):");
988
    SCLogConfig("RTE_ETH_RSS_IPV4 %sset", (actual & RTE_ETH_RSS_IPV4) ? "" : "NOT ");
989
    SCLogConfig("RTE_ETH_RSS_FRAG_IPV4 %sset", (actual & RTE_ETH_RSS_FRAG_IPV4) ? "" : "NOT ");
990
    SCLogConfig("RTE_ETH_RSS_NONFRAG_IPV4_TCP %sset",
991
            (actual & RTE_ETH_RSS_NONFRAG_IPV4_TCP) ? "" : "NOT ");
992
    SCLogConfig("RTE_ETH_RSS_NONFRAG_IPV4_UDP %sset",
993
            (actual & RTE_ETH_RSS_NONFRAG_IPV4_UDP) ? "" : "NOT ");
994
    SCLogConfig("RTE_ETH_RSS_NONFRAG_IPV4_SCTP %sset",
995
            (actual & RTE_ETH_RSS_NONFRAG_IPV4_SCTP) ? "" : "NOT ");
996
    SCLogConfig("RTE_ETH_RSS_NONFRAG_IPV4_OTHER %sset",
997
            (actual & RTE_ETH_RSS_NONFRAG_IPV4_OTHER) ? "" : "NOT ");
998
    SCLogConfig("RTE_ETH_RSS_IPV6 %sset", (actual & RTE_ETH_RSS_IPV6) ? "" : "NOT ");
999
    SCLogConfig("RTE_ETH_RSS_FRAG_IPV6 %sset", (actual & RTE_ETH_RSS_FRAG_IPV6) ? "" : "NOT ");
1000
    SCLogConfig("RTE_ETH_RSS_NONFRAG_IPV6_TCP %sset",
1001
            (actual & RTE_ETH_RSS_NONFRAG_IPV6_TCP) ? "" : "NOT ");
1002
    SCLogConfig("RTE_ETH_RSS_NONFRAG_IPV6_UDP %sset",
1003
            (actual & RTE_ETH_RSS_NONFRAG_IPV6_UDP) ? "" : "NOT ");
1004
    SCLogConfig("RTE_ETH_RSS_NONFRAG_IPV6_SCTP %sset",
1005
            (actual & RTE_ETH_RSS_NONFRAG_IPV6_SCTP) ? "" : "NOT ");
1006
    SCLogConfig("RTE_ETH_RSS_NONFRAG_IPV6_OTHER %sset",
1007
            (actual & RTE_ETH_RSS_NONFRAG_IPV6_OTHER) ? "" : "NOT ");
1008
1009
    SCLogConfig("RTE_ETH_RSS_L2_PAYLOAD %sset", (actual & RTE_ETH_RSS_L2_PAYLOAD) ? "" : "NOT ");
1010
    SCLogConfig("RTE_ETH_RSS_IPV6_EX %sset", (actual & RTE_ETH_RSS_IPV6_EX) ? "" : "NOT ");
1011
    SCLogConfig("RTE_ETH_RSS_IPV6_TCP_EX %sset", (actual & RTE_ETH_RSS_IPV6_TCP_EX) ? "" : "NOT ");
1012
    SCLogConfig("RTE_ETH_RSS_IPV6_UDP_EX %sset", (actual & RTE_ETH_RSS_IPV6_UDP_EX) ? "" : "NOT ");
1013
1014
    SCLogConfig("RTE_ETH_RSS_PORT %sset", (actual & RTE_ETH_RSS_PORT) ? "" : "NOT ");
1015
    SCLogConfig("RTE_ETH_RSS_VXLAN %sset", (actual & RTE_ETH_RSS_VXLAN) ? "" : "NOT ");
1016
    SCLogConfig("RTE_ETH_RSS_NVGRE %sset", (actual & RTE_ETH_RSS_NVGRE) ? "" : "NOT ");
1017
    SCLogConfig("RTE_ETH_RSS_GTPU %sset", (actual & RTE_ETH_RSS_GTPU) ? "" : "NOT ");
1018
1019
    SCLogConfig("RTE_ETH_RSS_L3_SRC_ONLY %sset", (actual & RTE_ETH_RSS_L3_SRC_ONLY) ? "" : "NOT ");
1020
    SCLogConfig("RTE_ETH_RSS_L3_DST_ONLY %sset", (actual & RTE_ETH_RSS_L3_DST_ONLY) ? "" : "NOT ");
1021
    SCLogConfig("RTE_ETH_RSS_L4_SRC_ONLY %sset", (actual & RTE_ETH_RSS_L4_SRC_ONLY) ? "" : "NOT ");
1022
    SCLogConfig("RTE_ETH_RSS_L4_DST_ONLY %sset", (actual & RTE_ETH_RSS_L4_DST_ONLY) ? "" : "NOT ");
1023
}
1024
1025
static void DumpRXOffloadCapabilities(const uint64_t rx_offld_capa)
1026
{
1027
    SCLogConfig("RTE_ETH_RX_OFFLOAD_VLAN_STRIP - %savailable",
1028
            rx_offld_capa & RTE_ETH_RX_OFFLOAD_VLAN_STRIP ? "" : "NOT ");
1029
    SCLogConfig("RTE_ETH_RX_OFFLOAD_IPV4_CKSUM - %savailable",
1030
            rx_offld_capa & RTE_ETH_RX_OFFLOAD_IPV4_CKSUM ? "" : "NOT ");
1031
    SCLogConfig("RTE_ETH_RX_OFFLOAD_UDP_CKSUM - %savailable",
1032
            rx_offld_capa & RTE_ETH_RX_OFFLOAD_UDP_CKSUM ? "" : "NOT ");
1033
    SCLogConfig("RTE_ETH_RX_OFFLOAD_TCP_CKSUM - %savailable",
1034
            rx_offld_capa & RTE_ETH_RX_OFFLOAD_TCP_CKSUM ? "" : "NOT ");
1035
    SCLogConfig("RTE_ETH_RX_OFFLOAD_TCP_LRO - %savailable",
1036
            rx_offld_capa & RTE_ETH_RX_OFFLOAD_TCP_LRO ? "" : "NOT ");
1037
    SCLogConfig("RTE_ETH_RX_OFFLOAD_QINQ_STRIP - %savailable",
1038
            rx_offld_capa & RTE_ETH_RX_OFFLOAD_QINQ_STRIP ? "" : "NOT ");
1039
    SCLogConfig("RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM - %savailable",
1040
            rx_offld_capa & RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM ? "" : "NOT ");
1041
    SCLogConfig("RTE_ETH_RX_OFFLOAD_MACSEC_STRIP - %savailable",
1042
            rx_offld_capa & RTE_ETH_RX_OFFLOAD_MACSEC_STRIP ? "" : "NOT ");
1043
#if RTE_VERSION < RTE_VERSION_NUM(22, 11, 0, 0)
1044
    SCLogConfig("RTE_ETH_RX_OFFLOAD_HEADER_SPLIT - %savailable",
1045
            rx_offld_capa & RTE_ETH_RX_OFFLOAD_HEADER_SPLIT ? "" : "NOT ");
1046
#endif
1047
    SCLogConfig("RTE_ETH_RX_OFFLOAD_VLAN_FILTER - %savailable",
1048
            rx_offld_capa & RTE_ETH_RX_OFFLOAD_VLAN_FILTER ? "" : "NOT ");
1049
    SCLogConfig("RTE_ETH_RX_OFFLOAD_VLAN_EXTEND - %savailable",
1050
            rx_offld_capa & RTE_ETH_RX_OFFLOAD_VLAN_EXTEND ? "" : "NOT ");
1051
    SCLogConfig("RTE_ETH_RX_OFFLOAD_SCATTER - %savailable",
1052
            rx_offld_capa & RTE_ETH_RX_OFFLOAD_SCATTER ? "" : "NOT ");
1053
    SCLogConfig("RTE_ETH_RX_OFFLOAD_TIMESTAMP - %savailable",
1054
            rx_offld_capa & RTE_ETH_RX_OFFLOAD_TIMESTAMP ? "" : "NOT ");
1055
    SCLogConfig("RTE_ETH_RX_OFFLOAD_SECURITY - %savailable",
1056
            rx_offld_capa & RTE_ETH_RX_OFFLOAD_SECURITY ? "" : "NOT ");
1057
    SCLogConfig("RTE_ETH_RX_OFFLOAD_KEEP_CRC - %savailable",
1058
            rx_offld_capa & RTE_ETH_RX_OFFLOAD_KEEP_CRC ? "" : "NOT ");
1059
    SCLogConfig("RTE_ETH_RX_OFFLOAD_SCTP_CKSUM - %savailable",
1060
            rx_offld_capa & RTE_ETH_RX_OFFLOAD_SCTP_CKSUM ? "" : "NOT ");
1061
    SCLogConfig("RTE_ETH_RX_OFFLOAD_OUTER_UDP_CKSUM - %savailable",
1062
            rx_offld_capa & RTE_ETH_RX_OFFLOAD_OUTER_UDP_CKSUM ? "" : "NOT ");
1063
    SCLogConfig("RTE_ETH_RX_OFFLOAD_RSS_HASH - %savailable",
1064
            rx_offld_capa & RTE_ETH_RX_OFFLOAD_RSS_HASH ? "" : "NOT ");
1065
#if RTE_VERSION >= RTE_VERSION_NUM(20, 11, 0, 0)
1066
    SCLogConfig("RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT - %savailable",
1067
            rx_offld_capa & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT ? "" : "NOT ");
1068
#endif
1069
}
1070
1071
static int DeviceValidateMTU(const DPDKIfaceConfig *iconf, const struct rte_eth_dev_info *dev_info)
1072
{
1073
    if (iconf->mtu > dev_info->max_mtu || iconf->mtu < dev_info->min_mtu) {
1074
        SCLogError("%s: MTU out of bounds. "
1075
                   "Min MTU: %" PRIu16 " Max MTU: %" PRIu16,
1076
                iconf->iface, dev_info->min_mtu, dev_info->max_mtu);
1077
        SCReturnInt(-ERANGE);
1078
    }
1079
1080
#if RTE_VERSION < RTE_VERSION_NUM(21, 11, 0, 0)
1081
    // check if jumbo frames are set and are available
1082
    if (iconf->mtu > RTE_ETHER_MAX_LEN &&
1083
            !(dev_info->rx_offload_capa & DEV_RX_OFFLOAD_JUMBO_FRAME)) {
1084
        SCLogError("%s: jumbo frames not supported, set MTU to 1500", iconf->iface);
1085
        SCReturnInt(-EINVAL);
1086
    }
1087
#endif
1088
1089
    SCReturnInt(0);
1090
}
1091
1092
static void DeviceSetMTU(struct rte_eth_conf *port_conf, uint16_t mtu)
1093
{
1094
#if RTE_VERSION >= RTE_VERSION_NUM(21, 11, 0, 0)
1095
    port_conf->rxmode.mtu = mtu;
1096
#else
1097
    port_conf->rxmode.max_rx_pkt_len = mtu;
1098
    if (mtu > RTE_ETHER_MAX_LEN) {
1099
        port_conf->rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME;
1100
    }
1101
#endif
1102
}
1103
1104
/**
1105
 * \param port_id - queried port
1106
 * \param socket_id - socket ID of the queried port
1107
 * \return non-negative number on success, negative on failure (errno)
1108
 */
1109
static int32_t DeviceSetSocketID(uint16_t port_id, int32_t *socket_id)
1110
{
1111
    rte_errno = 0;
1112
    int retval = rte_eth_dev_socket_id(port_id);
1113
    *socket_id = retval;
1114
1115
#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0) // DPDK API changed since 22.11
1116
    retval = -rte_errno;
1117
#else
1118
    if (retval == SOCKET_ID_ANY)
1119
        retval = 0; // DPDK couldn't determine socket ID of a port
1120
#endif
1121
1122
    return retval;
1123
}
1124
1125
static void DeviceInitPortConf(const DPDKIfaceConfig *iconf,
1126
        const struct rte_eth_dev_info *dev_info, struct rte_eth_conf *port_conf)
1127
{
1128
    DumpRXOffloadCapabilities(dev_info->rx_offload_capa);
1129
    *port_conf = (struct rte_eth_conf){
1130
            .rxmode = {
1131
                    .mq_mode = RTE_ETH_MQ_RX_NONE,
1132
                    .offloads = 0, // turn every offload off to prevent any packet modification
1133
            },
1134
            .txmode = {
1135
                    .mq_mode = RTE_ETH_MQ_TX_NONE,
1136
                    .offloads = 0,
1137
            },
1138
    };
1139
1140
    SCLogConfig("%s: interrupt mode is %s", iconf->iface,
1141
            iconf->flags & DPDK_IRQ_MODE ? "enabled" : "disabled");
1142
    if (iconf->flags & DPDK_IRQ_MODE)
1143
        port_conf->intr_conf.rxq = 1;
1144
1145
    // configure RX offloads
1146
    if (dev_info->rx_offload_capa & RTE_ETH_RX_OFFLOAD_RSS_HASH) {
1147
        if (iconf->nb_rx_queues > 1) {
1148
            SCLogConfig("%s: RSS enabled for %d queues", iconf->iface, iconf->nb_rx_queues);
1149
            port_conf->rx_adv_conf.rss_conf = (struct rte_eth_rss_conf){
1150
                .rss_key = rss_hkey,
1151
                .rss_key_len = RSS_HKEY_LEN,
1152
                .rss_hf = iconf->rss_hf,
1153
            };
1154
1155
            const char *dev_driver = dev_info->driver_name;
1156
            if (strcmp(dev_info->driver_name, "net_bonding") == 0) {
1157
                dev_driver = BondingDeviceDriverGet(iconf->port_id);
1158
            }
1159
1160
            DeviceSetPMDSpecificRSS(&port_conf->rx_adv_conf.rss_conf, dev_driver);
1161
1162
            uint64_t rss_hf_tmp =
1163
                    port_conf->rx_adv_conf.rss_conf.rss_hf & dev_info->flow_type_rss_offloads;
1164
            if (port_conf->rx_adv_conf.rss_conf.rss_hf != rss_hf_tmp) {
1165
                DumpRSSFlags(port_conf->rx_adv_conf.rss_conf.rss_hf, rss_hf_tmp);
1166
1167
                SCLogWarning("%s: modified RSS hash function based on hardware support: "
1168
                             "requested:%#" PRIx64 ", configured:%#" PRIx64,
1169
                        iconf->iface, port_conf->rx_adv_conf.rss_conf.rss_hf, rss_hf_tmp);
1170
                port_conf->rx_adv_conf.rss_conf.rss_hf = rss_hf_tmp;
1171
            }
1172
            port_conf->rxmode.mq_mode = RTE_ETH_MQ_RX_RSS;
1173
        } else {
1174
            SCLogConfig("%s: RSS not enabled", iconf->iface);
1175
            port_conf->rx_adv_conf.rss_conf.rss_key = NULL;
1176
            port_conf->rx_adv_conf.rss_conf.rss_hf = 0;
1177
        }
1178
    } else {
1179
        SCLogConfig("%s: RSS not supported", iconf->iface);
1180
    }
1181
1182
    if (iconf->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
1183
        SCLogConfig("%s: checksum validation disabled", iconf->iface);
1184
    } else if ((dev_info->rx_offload_capa & RTE_ETH_RX_OFFLOAD_CHECKSUM) ==
1185
               RTE_ETH_RX_OFFLOAD_CHECKSUM) { // multibit comparison to make sure all bits are set
1186
        if (iconf->checksum_mode == CHECKSUM_VALIDATION_ENABLE &&
1187
                iconf->flags & DPDK_RX_CHECKSUM_OFFLOAD) {
1188
            SCLogConfig("%s: IP, TCP and UDP checksum validation offloaded", iconf->iface);
1189
            port_conf->rxmode.offloads |= RTE_ETH_RX_OFFLOAD_CHECKSUM;
1190
        } else if (iconf->checksum_mode == CHECKSUM_VALIDATION_ENABLE &&
1191
                   !(iconf->flags & DPDK_RX_CHECKSUM_OFFLOAD)) {
1192
            SCLogConfig("%s: checksum validation enabled (but can be offloaded)", iconf->iface);
1193
        }
1194
    }
1195
1196
    DeviceSetMTU(port_conf, iconf->mtu);
1197
1198
    if (dev_info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
1199
        port_conf->txmode.offloads |= RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE;
1200
    }
1201
}
1202
1203
static int DeviceConfigureQueues(DPDKIfaceConfig *iconf, const struct rte_eth_dev_info *dev_info,
1204
        const struct rte_eth_conf *port_conf)
1205
{
1206
    SCEnter();
1207
    int retval;
1208
    uint16_t mtu_size;
1209
    uint16_t mbuf_size;
1210
    struct rte_eth_rxconf rxq_conf;
1211
    struct rte_eth_txconf txq_conf;
1212
1213
    char mempool_name[64];
1214
    snprintf(mempool_name, 64, "mempool_%.20s", iconf->iface);
1215
    // +4 for VLAN header
1216
    mtu_size = iconf->mtu + RTE_ETHER_CRC_LEN + RTE_ETHER_HDR_LEN + 4;
1217
    mbuf_size = ROUNDUP(mtu_size, 1024) + RTE_PKTMBUF_HEADROOM;
1218
    SCLogConfig("%s: creating packet mbuf pool %s of size %d, cache size %d, mbuf size %d",
1219
            iconf->iface, mempool_name, iconf->mempool_size, iconf->mempool_cache_size, mbuf_size);
1220
1221
    iconf->pkt_mempool = rte_pktmbuf_pool_create(mempool_name, iconf->mempool_size,
1222
            iconf->mempool_cache_size, 0, mbuf_size, (int)iconf->socket_id);
1223
    if (iconf->pkt_mempool == NULL) {
1224
        retval = -rte_errno;
1225
        SCLogError("%s: rte_pktmbuf_pool_create failed with code %d (mempool: %s) - %s",
1226
                iconf->iface, rte_errno, mempool_name, rte_strerror(rte_errno));
1227
        SCReturnInt(retval);
1228
    }
1229
1230
    for (uint16_t queue_id = 0; queue_id < iconf->nb_rx_queues; queue_id++) {
1231
        rxq_conf = dev_info->default_rxconf;
1232
        rxq_conf.offloads = port_conf->rxmode.offloads;
1233
        rxq_conf.rx_thresh.hthresh = 0;
1234
        rxq_conf.rx_thresh.pthresh = 0;
1235
        rxq_conf.rx_thresh.wthresh = 0;
1236
        rxq_conf.rx_free_thresh = 0;
1237
        rxq_conf.rx_drop_en = 0;
1238
        SCLogConfig("%s: rx queue setup: queue:%d port:%d rx_desc:%d tx_desc:%d rx: hthresh: %d "
1239
                    "pthresh %d wthresh %d free_thresh %d drop_en %d offloads %lu",
1240
                iconf->iface, queue_id, iconf->port_id, iconf->nb_rx_desc, iconf->nb_tx_desc,
1241
                rxq_conf.rx_thresh.hthresh, rxq_conf.rx_thresh.pthresh, rxq_conf.rx_thresh.wthresh,
1242
                rxq_conf.rx_free_thresh, rxq_conf.rx_drop_en, rxq_conf.offloads);
1243
1244
        retval = rte_eth_rx_queue_setup(iconf->port_id, queue_id, iconf->nb_rx_desc,
1245
                iconf->socket_id, &rxq_conf, iconf->pkt_mempool);
1246
        if (retval < 0) {
1247
            rte_mempool_free(iconf->pkt_mempool);
1248
            SCLogError(
1249
                    "%s: rte_eth_rx_queue_setup failed with code %d for device queue %u of port %u",
1250
                    iconf->iface, retval, queue_id, iconf->port_id);
1251
            SCReturnInt(retval);
1252
        }
1253
    }
1254
1255
    for (uint16_t queue_id = 0; queue_id < iconf->nb_tx_queues; queue_id++) {
1256
        txq_conf = dev_info->default_txconf;
1257
        txq_conf.offloads = port_conf->txmode.offloads;
1258
        SCLogConfig("%s: tx queue setup: queue:%d port:%d", iconf->iface, queue_id, iconf->port_id);
1259
        retval = rte_eth_tx_queue_setup(
1260
                iconf->port_id, queue_id, iconf->nb_tx_desc, iconf->socket_id, &txq_conf);
1261
        if (retval < 0) {
1262
            rte_mempool_free(iconf->pkt_mempool);
1263
            SCLogError(
1264
                    "%s: rte_eth_tx_queue_setup failed with code %d for device queue %u of port %u",
1265
                    iconf->iface, retval, queue_id, iconf->port_id);
1266
            SCReturnInt(retval);
1267
        }
1268
    }
1269
1270
    SCReturnInt(0);
1271
}
1272
1273
static int DeviceValidateOutIfaceConfig(DPDKIfaceConfig *iconf)
1274
{
1275
    SCEnter();
1276
    int retval;
1277
    DPDKIfaceConfig *out_iconf = NULL;
1278
    ConfigInit(&out_iconf);
1279
    if (out_iconf == NULL) {
1280
        FatalError("Copy interface of the interface \"%s\" is NULL", iconf->iface);
1281
    }
1282
1283
    retval = ConfigLoad(out_iconf, iconf->out_iface);
1284
    if (retval < 0) {
1285
        SCLogError("%s: fail to load config of interface", iconf->out_iface);
1286
        out_iconf->DerefFunc(out_iconf);
1287
        SCReturnInt(-EINVAL);
1288
    }
1289
1290
    if (iconf->nb_rx_queues != out_iconf->nb_tx_queues) {
1291
        // the other direction is validated when the copy interface is configured
1292
        SCLogError("%s: configured %d RX queues but copy interface %s has %d TX queues"
1293
                   " - number of queues must be equal",
1294
                iconf->iface, iconf->nb_rx_queues, out_iconf->iface, out_iconf->nb_tx_queues);
1295
        out_iconf->DerefFunc(out_iconf);
1296
        SCReturnInt(-EINVAL);
1297
    } else if (iconf->mtu != out_iconf->mtu) {
1298
        SCLogError("%s: configured MTU of %d but copy interface %s has MTU set to %d"
1299
                   " - MTU must be equal",
1300
                iconf->iface, iconf->mtu, out_iconf->iface, out_iconf->mtu);
1301
        out_iconf->DerefFunc(out_iconf);
1302
        SCReturnInt(-EINVAL);
1303
    } else if (iconf->copy_mode != out_iconf->copy_mode) {
1304
        SCLogError("%s: copy modes of interfaces %s and %s are not equal", iconf->iface,
1305
                iconf->iface, out_iconf->iface);
1306
        out_iconf->DerefFunc(out_iconf);
1307
        SCReturnInt(-EINVAL);
1308
    } else if (strcmp(iconf->iface, out_iconf->out_iface) != 0) {
1309
        // check if the other iface has the current iface set as a copy iface
1310
        SCLogError("%s: copy interface of %s is not set to %s", iconf->iface, out_iconf->iface,
1311
                iconf->iface);
1312
        out_iconf->DerefFunc(out_iconf);
1313
        SCReturnInt(-EINVAL);
1314
    }
1315
1316
    out_iconf->DerefFunc(out_iconf);
1317
    SCReturnInt(0);
1318
}
1319
1320
static int DeviceConfigureIPS(DPDKIfaceConfig *iconf)
1321
{
1322
    SCEnter();
1323
    int retval;
1324
1325
    if (iconf->out_iface != NULL) {
1326
        retval = rte_eth_dev_get_port_by_name(iconf->out_iface, &iconf->out_port_id);
1327
        if (retval != 0) {
1328
            SCLogError("%s: failed to obtain out iface %s port id (err=%d)", iconf->iface,
1329
                    iconf->out_iface, retval);
1330
            SCReturnInt(retval);
1331
        }
1332
1333
        int32_t out_port_socket_id;
1334
        retval = DeviceSetSocketID(iconf->port_id, &out_port_socket_id);
1335
        if (retval < 0) {
1336
            SCLogError("%s: invalid socket id (err=%d)", iconf->out_iface, retval);
1337
            SCReturnInt(retval);
1338
        }
1339
1340
        if (iconf->socket_id != out_port_socket_id) {
1341
            SCLogWarning("%s: out iface %s is not on the same NUMA node", iconf->iface,
1342
                    iconf->out_iface);
1343
        }
1344
1345
        retval = DeviceValidateOutIfaceConfig(iconf);
1346
        if (retval != 0) {
1347
            // Error will be written out by the validation function
1348
            SCReturnInt(retval);
1349
        }
1350
1351
        if (iconf->copy_mode == DPDK_COPY_MODE_IPS)
1352
            SCLogInfo("%s: DPDK IPS mode activated: %s->%s", iconf->iface, iconf->iface,
1353
                    iconf->out_iface);
1354
        else if (iconf->copy_mode == DPDK_COPY_MODE_TAP)
1355
            SCLogInfo("%s: DPDK TAP mode activated: %s->%s", iconf->iface, iconf->iface,
1356
                    iconf->out_iface);
1357
    }
1358
    SCReturnInt(0);
1359
}
1360
1361
/**
1362
 * Function verifies changes in e.g. device info after configuration has
1363
 * happened. Sometimes (e.g. DPDK Bond PMD with Intel NICs i40e/ixgbe) change
1364
 * device info only after the device configuration.
1365
 * @param iconf
1366
 * @param dev_info
1367
 * @return 0 on success, -EAGAIN when reconfiguration is needed, <0 on failure
1368
 */
1369
static int32_t DeviceVerifyPostConfigure(
1370
        const DPDKIfaceConfig *iconf, const struct rte_eth_dev_info *dev_info)
1371
{
1372
    struct rte_eth_dev_info post_conf_dev_info = { 0 };
1373
    int32_t ret = rte_eth_dev_info_get(iconf->port_id, &post_conf_dev_info);
1374
    if (ret < 0) {
1375
        SCLogError("%s: getting device info failed (err: %s)", iconf->iface, rte_strerror(-ret));
1376
        SCReturnInt(ret);
1377
    }
1378
1379
    if (dev_info->flow_type_rss_offloads != post_conf_dev_info.flow_type_rss_offloads ||
1380
            dev_info->rx_offload_capa != post_conf_dev_info.rx_offload_capa ||
1381
            dev_info->tx_offload_capa != post_conf_dev_info.tx_offload_capa ||
1382
            dev_info->max_rx_queues != post_conf_dev_info.max_rx_queues ||
1383
            dev_info->max_tx_queues != post_conf_dev_info.max_tx_queues ||
1384
            dev_info->max_mtu != post_conf_dev_info.max_mtu) {
1385
        SCLogWarning("%s: device information severely changed after configuration, reconfiguring",
1386
                iconf->iface);
1387
        return -EAGAIN;
1388
    }
1389
1390
    if (strcmp(dev_info->driver_name, "net_bonding") == 0) {
1391
        ret = BondingAllDevicesSameDriver(iconf->port_id);
1392
        if (ret < 0) {
1393
            SCLogError("%s: bond port uses port with different DPDK drivers", iconf->iface);
1394
            SCReturnInt(ret);
1395
        }
1396
    }
1397
1398
    return 0;
1399
}
1400
1401
static int DeviceConfigure(DPDKIfaceConfig *iconf)
1402
{
1403
    SCEnter();
1404
    int32_t retval = rte_eth_dev_get_port_by_name(iconf->iface, &(iconf->port_id));
1405
    if (retval < 0) {
1406
        SCLogError("%s: getting port id failed (err: %s)", iconf->iface, rte_strerror(-retval));
1407
        SCReturnInt(retval);
1408
    }
1409
1410
    if (!rte_eth_dev_is_valid_port(iconf->port_id)) {
1411
        SCLogError("%s: specified port %d is invalid", iconf->iface, iconf->port_id);
1412
        SCReturnInt(retval);
1413
    }
1414
1415
    retval = DeviceSetSocketID(iconf->port_id, &iconf->socket_id);
1416
    if (retval < 0) {
1417
        SCLogError("%s: invalid socket id (err: %s)", iconf->iface, rte_strerror(-retval));
1418
        SCReturnInt(retval);
1419
    }
1420
1421
    struct rte_eth_dev_info dev_info = { 0 };
1422
    retval = rte_eth_dev_info_get(iconf->port_id, &dev_info);
1423
    if (retval < 0) {
1424
        SCLogError("%s: getting device info failed (err: %s)", iconf->iface, rte_strerror(-retval));
1425
        SCReturnInt(retval);
1426
    }
1427
1428
    if (iconf->nb_rx_queues > dev_info.max_rx_queues) {
1429
        SCLogError("%s: configured RX queues %u is higher than device maximum (%" PRIu16 ")",
1430
                iconf->iface, iconf->nb_rx_queues, dev_info.max_rx_queues);
1431
        SCReturnInt(-ERANGE);
1432
    }
1433
1434
    if (iconf->nb_tx_queues > dev_info.max_tx_queues) {
1435
        SCLogError("%s: configured TX queues %u is higher than device maximum (%" PRIu16 ")",
1436
                iconf->iface, iconf->nb_tx_queues, dev_info.max_tx_queues);
1437
        SCReturnInt(-ERANGE);
1438
    }
1439
1440
    retval = DeviceValidateMTU(iconf, &dev_info);
1441
    if (retval < 0)
1442
        return retval;
1443
1444
    struct rte_eth_conf port_conf = { 0 };
1445
    DeviceInitPortConf(iconf, &dev_info, &port_conf);
1446
    if (port_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_CHECKSUM) {
1447
        // Suricata does not need recalc checksums now
1448
        iconf->checksum_mode = CHECKSUM_VALIDATION_OFFLOAD;
1449
    }
1450
1451
    retval = rte_eth_dev_configure(
1452
            iconf->port_id, iconf->nb_rx_queues, iconf->nb_tx_queues, &port_conf);
1453
    if (retval < 0) {
1454
        SCLogError("%s: failed to configure the device (port %u, err %s)", iconf->iface,
1455
                iconf->port_id, rte_strerror(-retval));
1456
        SCReturnInt(retval);
1457
    }
1458
1459
    retval = DeviceVerifyPostConfigure(iconf, &dev_info);
1460
    if (retval < 0)
1461
        return retval;
1462
1463
    uint16_t tmp_nb_rx_desc = iconf->nb_rx_desc;
1464
    uint16_t tmp_nb_tx_desc = iconf->nb_tx_desc;
1465
    retval = rte_eth_dev_adjust_nb_rx_tx_desc(
1466
            iconf->port_id, &iconf->nb_rx_desc, &iconf->nb_tx_desc);
1467
    if (retval != 0) {
1468
        SCLogError("%s: failed to adjust device queue descriptors (port %u, err %d)", iconf->iface,
1469
                iconf->port_id, retval);
1470
        SCReturnInt(retval);
1471
    } else if (tmp_nb_rx_desc != iconf->nb_rx_desc || tmp_nb_tx_desc != iconf->nb_tx_desc) {
1472
        SCLogWarning("%s: device queue descriptors adjusted (RX: from %u to %u, TX: from %u to %u)",
1473
                iconf->iface, tmp_nb_rx_desc, iconf->nb_rx_desc, tmp_nb_tx_desc, iconf->nb_tx_desc);
1474
    }
1475
1476
    retval = iconf->flags & DPDK_MULTICAST ? rte_eth_allmulticast_enable(iconf->port_id)
1477
                                           : rte_eth_allmulticast_disable(iconf->port_id);
1478
    if (retval == -ENOTSUP) {
1479
        retval = rte_eth_allmulticast_get(iconf->port_id);
1480
        // when multicast is enabled but set to disable or vice versa
1481
        if ((retval == 1 && !(iconf->flags & DPDK_MULTICAST)) ||
1482
                (retval == 0 && (iconf->flags & DPDK_MULTICAST))) {
1483
            SCLogError("%s: Allmulticast setting of port (%" PRIu16
1484
                       ") can not be configured. Set it to %s",
1485
                    iconf->iface, iconf->port_id, retval == 1 ? "true" : "false");
1486
        } else if (retval < 0) {
1487
            SCLogError("%s: failed to get multicast mode (port %u, err %d)", iconf->iface,
1488
                    iconf->port_id, retval);
1489
            SCReturnInt(retval);
1490
        }
1491
    } else if (retval < 0) {
1492
        SCLogError("%s: error when changing multicast setting (port %u err %d)", iconf->iface,
1493
                iconf->port_id, retval);
1494
        SCReturnInt(retval);
1495
    }
1496
1497
    retval = iconf->flags & DPDK_PROMISC ? rte_eth_promiscuous_enable(iconf->port_id)
1498
                                         : rte_eth_promiscuous_disable(iconf->port_id);
1499
    if (retval == -ENOTSUP) {
1500
        retval = rte_eth_promiscuous_get(iconf->port_id);
1501
        if ((retval == 1 && !(iconf->flags & DPDK_PROMISC)) ||
1502
                (retval == 0 && (iconf->flags & DPDK_PROMISC))) {
1503
            SCLogError("%s: promiscuous setting of port (%" PRIu16
1504
                       ") can not be configured. Set it to %s",
1505
                    iconf->iface, iconf->port_id, retval == 1 ? "true" : "false");
1506
            SCReturnInt(TM_ECODE_FAILED);
1507
        } else if (retval < 0) {
1508
            SCLogError("%s: failed to get promiscuous mode (port %u, err=%d)", iconf->iface,
1509
                    iconf->port_id, retval);
1510
            SCReturnInt(retval);
1511
        }
1512
    } else if (retval < 0) {
1513
        SCLogError("%s: error when changing promiscuous setting (port %u, err %d)", iconf->iface,
1514
                iconf->port_id, retval);
1515
        SCReturnInt(TM_ECODE_FAILED);
1516
    }
1517
1518
    // set maximum transmission unit
1519
    SCLogConfig("%s: setting MTU to %d", iconf->iface, iconf->mtu);
1520
    retval = rte_eth_dev_set_mtu(iconf->port_id, iconf->mtu);
1521
    if (retval == -ENOTSUP) {
1522
        SCLogWarning("%s: changing MTU on port %u is not supported, ignoring the setting",
1523
                iconf->iface, iconf->port_id);
1524
        // if it is not possible to set the MTU, retrieve it
1525
        retval = rte_eth_dev_get_mtu(iconf->port_id, &iconf->mtu);
1526
        if (retval < 0) {
1527
            SCLogError("%s: failed to retrieve MTU (port %u, err %d)", iconf->iface, iconf->port_id,
1528
                    retval);
1529
            SCReturnInt(retval);
1530
        }
1531
    } else if (retval < 0) {
1532
        SCLogError("%s: failed to set MTU to %u (port %u, err %d)", iconf->iface, iconf->mtu,
1533
                iconf->port_id, retval);
1534
        SCReturnInt(retval);
1535
    }
1536
1537
    retval = DeviceConfigureQueues(iconf, &dev_info, &port_conf);
1538
    if (retval < 0) {
1539
        SCReturnInt(retval);
1540
    }
1541
1542
    retval = DeviceConfigureIPS(iconf);
1543
    if (retval < 0) {
1544
        SCReturnInt(retval);
1545
    }
1546
1547
    SCReturnInt(0);
1548
}
1549
1550
static void *ParseDpdkConfigAndConfigureDevice(const char *iface)
1551
{
1552
    int retval;
1553
    DPDKIfaceConfig *iconf = ConfigParse(iface);
1554
    if (iconf == NULL) {
1555
        FatalError("DPDK configuration could not be parsed");
1556
    }
1557
1558
    retval = DeviceConfigure(iconf);
1559
    if (retval == -EAGAIN) {
1560
        // for e.g. bonding PMD it needs to be reconfigured
1561
        retval = DeviceConfigure(iconf);
1562
    }
1563
1564
    if (retval < 0) { // handles both configure attempts
1565
        iconf->DerefFunc(iconf);
1566
        if (rte_eal_cleanup() != 0)
1567
            FatalError("EAL cleanup failed: %s", strerror(-retval));
1568
1569
        if (retval == -ENOMEM) {
1570
            FatalError("%s: memory allocation failed - consider"
1571
                       "%s freeing up some memory.",
1572
                    iface,
1573
                    rte_eal_has_hugepages() != 0 ? " increasing the number of hugepages or" : "");
1574
        } else {
1575
            FatalError("%s: failed to configure", iface);
1576
        }
1577
    }
1578
1579
    SC_ATOMIC_RESET(iconf->ref);
1580
    (void)SC_ATOMIC_ADD(iconf->ref, iconf->threads);
1581
    // This counter is increased by worker threads that individually pick queue IDs.
1582
    SC_ATOMIC_RESET(iconf->queue_id);
1583
    SC_ATOMIC_RESET(iconf->inconsitent_numa_cnt);
1584
1585
    // initialize LiveDev DPDK values
1586
    LiveDevice *ldev_instance = LiveGetDevice(iface);
1587
    if (ldev_instance == NULL) {
1588
        FatalError("Device %s is not registered as a live device", iface);
1589
    }
1590
    ldev_instance->dpdk_vars.pkt_mp = iconf->pkt_mempool;
1591
    return iconf;
1592
}
1593
1594
/**
1595
 * \brief extract information from config file
1596
 *
1597
 * The returned structure will be freed by the thread init function.
1598
 * This is thus necessary to or copy the structure before giving it
1599
 * to thread or to reparse the file for each thread (and thus have
1600
 * new structure.
1601
 *
1602
 * After configuration is loaded, DPDK also configures the device according to the settings.
1603
 *
1604
 * \return a DPDKIfaceConfig corresponding to the interface name
1605
 */
1606
1607
static int DPDKConfigGetThreadsCount(void *conf)
1608
{
1609
    if (conf == NULL)
1610
        FatalError("Configuration file is NULL");
1611
1612
    DPDKIfaceConfig *dpdk_conf = (DPDKIfaceConfig *)conf;
1613
    return dpdk_conf->threads;
1614
}
1615
1616
#endif /* HAVE_DPDK */
1617
1618
static int DPDKRunModeIsIPS(void)
1619
0
{
1620
    /* Find initial node */
1621
0
    const char dpdk_node_query[] = "dpdk.interfaces";
1622
0
    ConfNode *dpdk_node = ConfGetNode(dpdk_node_query);
1623
0
    if (dpdk_node == NULL) {
1624
0
        FatalError("Unable to get %s configuration node", dpdk_node_query);
1625
0
    }
1626
1627
0
    const char default_iface[] = "default";
1628
0
    ConfNode *if_default = ConfNodeLookupKeyValue(dpdk_node, "interface", default_iface);
1629
0
    int nlive = LiveGetDeviceCount();
1630
0
    bool has_ips = false;
1631
0
    bool has_ids = false;
1632
0
    for (int ldev = 0; ldev < nlive; ldev++) {
1633
0
        const char *live_dev = LiveGetDeviceName(ldev);
1634
0
        if (live_dev == NULL)
1635
0
            FatalError("Unable to get device id %d from LiveDevice list", ldev);
1636
1637
0
        ConfNode *if_root = ConfFindDeviceConfig(dpdk_node, live_dev);
1638
0
        if (if_root == NULL) {
1639
0
            if (if_default == NULL)
1640
0
                FatalError("Unable to get %s or %s  interface", live_dev, default_iface);
1641
1642
0
            if_root = if_default;
1643
0
        }
1644
1645
0
        const char *copymodestr = NULL;
1646
0
        const char *copyifacestr = NULL;
1647
0
        if (ConfGetChildValueWithDefault(if_root, if_default, "copy-mode", &copymodestr) == 1 &&
1648
0
                ConfGetChildValue(if_root, "copy-iface", &copyifacestr) == 1) {
1649
0
            if (strcmp(copymodestr, "ips") == 0) {
1650
0
                has_ips = true;
1651
0
            } else {
1652
0
                has_ids = true;
1653
0
            }
1654
0
        } else {
1655
0
            has_ids = true;
1656
0
        }
1657
1658
0
        if (has_ids && has_ips) {
1659
0
            FatalError("Copy-mode of interface %s mixes with the previously set copy-modes "
1660
0
                       "(only IDS/TAP and IPS copy-mode combinations are allowed in DPDK",
1661
0
                    live_dev);
1662
0
        }
1663
0
    }
1664
1665
0
    return has_ips;
1666
0
}
1667
1668
static void DPDKRunModeEnableIPS(void)
1669
0
{
1670
0
    if (DPDKRunModeIsIPS()) {
1671
0
        SCLogInfo("Setting IPS mode");
1672
0
        EngineModeSetIPS();
1673
0
    }
1674
0
}
1675
1676
const char *RunModeDpdkGetDefaultMode(void)
1677
0
{
1678
0
    return "workers";
1679
0
}
1680
1681
void RunModeDpdkRegister(void)
1682
82
{
1683
82
    RunModeRegisterNewRunMode(RUNMODE_DPDK, "workers",
1684
82
            "Workers DPDK mode, each thread does all"
1685
82
            " tasks from acquisition to logging",
1686
82
            RunModeIdsDpdkWorkers, DPDKRunModeEnableIPS);
1687
82
}
1688
1689
/**
1690
 * \brief Workers version of the DPDK processing.
1691
 *
1692
 * Start N threads with each thread doing all the work.
1693
 *
1694
 */
1695
int RunModeIdsDpdkWorkers(void)
1696
0
{
1697
0
    SCEnter();
1698
#ifdef HAVE_DPDK
1699
    int ret;
1700
1701
    TimeModeSetLive();
1702
1703
    InitEal();
1704
    ret = RunModeSetLiveCaptureWorkers(ParseDpdkConfigAndConfigureDevice, DPDKConfigGetThreadsCount,
1705
            "ReceiveDPDK", "DecodeDPDK", thread_name_workers, NULL);
1706
    if (ret != 0) {
1707
        FatalError("Unable to start runmode");
1708
    }
1709
1710
    SCLogDebug("RunModeIdsDpdkWorkers initialised");
1711
1712
#endif /* HAVE_DPDK */
1713
0
    SCReturnInt(0);
1714
0
}
1715
1716
/**
1717
 * @}
1718
 */