Coverage Report

Created: 2026-02-14 06:42

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/suricata7/src/source-netmap.c
Line
Count
Source
1
/* Copyright (C) 2011-2022 Open Information Security Foundation
2
 *
3
 * You can copy, redistribute or modify this Program under the terms of
4
 * the GNU General Public License version 2 as published by the Free
5
 * Software Foundation.
6
 *
7
 * This program is distributed in the hope that it will be useful,
8
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
 * GNU General Public License for more details.
11
 *
12
 * You should have received a copy of the GNU General Public License
13
 * version 2 along with this program; if not, write to the Free Software
14
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15
 * 02110-1301, USA.
16
 */
17
18
/**
19
*  \defgroup netmap Netmap running mode
20
*
21
*  @{
22
*/
23
24
/**
25
 * \file
26
 *
27
 * \author Aleksey Katargin <gureedo@gmail.com>
28
 * \author Victor Julien <victor@inliniac.net>
29
 * \author Bill Meeks <billmeeks8@gmail.com>
30
 *
31
 * Netmap socket acquisition support
32
 *
33
 * Many thanks to Luigi Rizzo for guidance and support.
34
 *
35
 */
36
37
#include "suricata.h"
38
#include "suricata-common.h"
39
#include "tm-threads.h"
40
#include "packet.h"
41
#include "util-bpf.h"
42
#include "util-privs.h"
43
#include "util-validate.h"
44
#include "util-datalink.h"
45
46
#include "source-netmap.h"
47
48
#ifdef HAVE_NETMAP
49
50
#define NETMAP_WITH_LIBS
51
#ifdef DEBUG
52
#define DEBUG_NETMAP_USER
53
#endif
54
55
#include <net/netmap_user.h>
56
#include <libnetmap.h>
57
58
#endif /* HAVE_NETMAP */
59
60
#include "util-ioctl.h"
61
62
#ifndef HAVE_NETMAP
63
64
/**
65
* \brief this function prints an error message and exits.
66
*/
67
static TmEcode NoNetmapSupportExit(ThreadVars *tv, const void *initdata, void **data)
68
0
{
69
0
    FatalError("Error creating thread %s: Netmap is not enabled. "
70
0
               "Make sure to pass --enable-netmap to configure when building.",
71
0
            tv->name);
72
0
}
73
74
void TmModuleReceiveNetmapRegister (void)
75
71
{
76
71
    tmm_modules[TMM_RECEIVENETMAP].name = "ReceiveNetmap";
77
71
    tmm_modules[TMM_RECEIVENETMAP].ThreadInit = NoNetmapSupportExit;
78
71
    tmm_modules[TMM_RECEIVENETMAP].flags = TM_FLAG_RECEIVE_TM;
79
71
}
80
81
/**
82
* \brief Registration Function for DecodeNetmap.
83
*/
84
void TmModuleDecodeNetmapRegister (void)
85
71
{
86
71
    tmm_modules[TMM_DECODENETMAP].name = "DecodeNetmap";
87
71
    tmm_modules[TMM_DECODENETMAP].ThreadInit = NoNetmapSupportExit;
88
71
    tmm_modules[TMM_DECODENETMAP].flags = TM_FLAG_DECODE_TM;
89
71
}
90
91
#else /* We have NETMAP support */
92
93
#include "action-globals.h"
94
95
#define POLL_TIMEOUT 100
96
97
#if defined(__linux__)
98
#define POLL_EVENTS (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL)
99
100
#ifndef IFF_PPROMISC
101
#define IFF_PPROMISC IFF_PROMISC
102
#endif
103
104
#else
105
#define POLL_EVENTS (POLLHUP|POLLERR|POLLNVAL)
106
#endif
107
108
enum { NETMAP_FLAG_ZERO_COPY = 1, NETMAP_FLAG_EXCL_RING_ACCESS = 2 };
109
110
/**
111
 * \brief Netmap device instance. Each ring for each device gets its own
112
 *        device.
113
 */
114
typedef struct NetmapDevice_
115
{
116
    struct nmport_d *nmd;
117
    unsigned int ref;
118
    SC_ATOMIC_DECLARE(unsigned int, threads_run);
119
    TAILQ_ENTRY(NetmapDevice_) next;
120
    // actual ifname can only be 16, but we store a bit more,
121
    // like the options string and a 'netmap:' prefix.
122
    char ifname[32];
123
    int ring;
124
    int direction; // 0 rx, 1 tx
125
126
    // autofp: Used to lock a destination ring while we are sending data.
127
    SCMutex netmap_dev_lock;
128
} NetmapDevice;
129
130
/**
131
 * \brief Module thread local variables.
132
 */
133
typedef struct NetmapThreadVars_
134
{
135
    /* receive interface */
136
    NetmapDevice *ifsrc;
137
    /* dst interface for IPS mode */
138
    NetmapDevice *ifdst;
139
140
    int flags;
141
    struct bpf_program bpf_prog;
142
143
    /* suricata internals */
144
    TmSlot *slot;
145
    ThreadVars *tv;
146
    LiveDevice *livedev;
147
148
    /* copy from config */
149
    int copy_mode;
150
    ChecksumValidationMode checksum_mode;
151
152
    /* counters */
153
    uint64_t pkts;
154
    uint64_t bytes;
155
    uint64_t drops;
156
    uint16_t capture_kernel_packets;
157
    uint16_t capture_kernel_drops;
158
} NetmapThreadVars;
159
160
typedef TAILQ_HEAD(NetmapDeviceList_, NetmapDevice_) NetmapDeviceList;
161
162
static NetmapDeviceList netmap_devlist = TAILQ_HEAD_INITIALIZER(netmap_devlist);
163
static SCMutex netmap_devlist_lock = SCMUTEX_INITIALIZER;
164
165
/** \brief get RSS RX-queue count
166
 *  \retval rx_rings RSS RX queue count or 0 on error
167
 */
168
int NetmapGetRSSCount(const char *ifname)
169
{
170
    struct nmreq_port_info_get req;
171
    struct nmreq_header hdr;
172
    int rx_rings = 0;
173
174
    /* we need the base interface name to query queues */
175
    char base_name[IFNAMSIZ];
176
    strlcpy(base_name, ifname, sizeof(base_name));
177
    if (strlen(base_name) > 0 &&
178
            (base_name[strlen(base_name) - 1] == '^' || base_name[strlen(base_name) - 1] == '*')) {
179
        base_name[strlen(base_name) - 1] = '\0';
180
    }
181
182
    SCMutexLock(&netmap_devlist_lock);
183
184
    /* open netmap device */
185
    int fd = open("/dev/netmap", O_RDWR);
186
    if (fd == -1) {
187
        SCLogError("%s: open netmap device failed: %s", ifname, strerror(errno));
188
        goto error_open;
189
    }
190
191
    /* query netmap interface info for ring count */
192
    memset(&req, 0, sizeof(req));
193
    memset(&hdr, 0, sizeof(hdr));
194
    hdr.nr_version = NETMAP_API;
195
    hdr.nr_reqtype = NETMAP_REQ_PORT_INFO_GET;
196
    hdr.nr_body = (uintptr_t)&req;
197
    strlcpy(hdr.nr_name, base_name, sizeof(hdr.nr_name));
198
199
    if (ioctl(fd, NIOCCTRL, &hdr) != 0) {
200
        SCLogError(
201
                "Query of netmap HW rings count on %s failed; error: %s", ifname, strerror(errno));
202
        goto error_fd;
203
    };
204
205
    /* return RX rings count if it equals TX rings count */
206
    if (req.nr_rx_rings == req.nr_tx_rings) {
207
        rx_rings = req.nr_rx_rings;
208
    }
209
210
error_fd:
211
    close(fd);
212
error_open:
213
    SCMutexUnlock(&netmap_devlist_lock);
214
    return rx_rings;
215
}
216
217
static void NetmapDestroyDevice(NetmapDevice *pdev)
218
{
219
    nmport_close(pdev->nmd);
220
    SCMutexDestroy(&pdev->netmap_dev_lock);
221
    SCFree(pdev);
222
}
223
224
/**
225
 * \brief Close or dereference netmap device instance.
226
 * \param dev Netmap device instance.
227
 * \return Zero on success.
228
 */
229
static int NetmapClose(NetmapDevice *dev)
230
{
231
    NetmapDevice *pdev, *tmp;
232
233
    SCMutexLock(&netmap_devlist_lock);
234
235
    TAILQ_FOREACH_SAFE (pdev, &netmap_devlist, next, tmp) {
236
        if (pdev == dev) {
237
            pdev->ref--;
238
            if (!pdev->ref) {
239
                NetmapDestroyDevice(pdev);
240
            }
241
            SCMutexUnlock(&netmap_devlist_lock);
242
            return 0;
243
        }
244
    }
245
246
    SCMutexUnlock(&netmap_devlist_lock);
247
    return -1;
248
}
249
250
/**
251
 * \brief Close all open netmap device instances.
252
 */
253
static void NetmapCloseAll(void)
254
{
255
    NetmapDevice *pdev, *tmp;
256
257
    SCMutexLock(&netmap_devlist_lock);
258
259
    TAILQ_FOREACH_SAFE (pdev, &netmap_devlist, next, tmp) {
260
        NetmapDestroyDevice(pdev);
261
    }
262
263
    SCMutexUnlock(&netmap_devlist_lock);
264
}
265
266
/**
267
 * \brief Open interface in netmap mode.
268
 * \param ifname Interface name.
269
 * \param promisc Enable promiscuous mode.
270
 * \param dev Pointer to requested netmap device instance.
271
 * \param verbose Verbose error logging.
272
 * \param read Indicates direction: RX or TX
273
 * \param zerocopy 1 if zerocopy access requested
274
 * \param soft Use Host stack (software) interface
275
 * \return Zero on success.
276
 */
277
static int NetmapOpen(NetmapIfaceSettings *ns, NetmapDevice **pdevice, int verbose, int read,
278
        bool zerocopy, bool soft)
279
{
280
    SCEnter();
281
    SCLogDebug("ifname %s", ns->iface);
282
283
    char base_name[IFNAMSIZ];
284
    strlcpy(base_name, ns->iface, sizeof(base_name));
285
    if (strlen(base_name) > 0 &&
286
            (base_name[strlen(base_name)-1] == '^' ||
287
             base_name[strlen(base_name)-1] == '*'))
288
    {
289
        base_name[strlen(base_name)-1] = '\0';
290
    }
291
292
    if (ns->real) {
293
        /* check interface is up */
294
        int if_flags = GetIfaceFlags(base_name);
295
        if (if_flags == -1) {
296
            if (verbose) {
297
                SCLogError("%s: cannot access network interface: %s", base_name, ns->iface);
298
            }
299
            goto error;
300
        }
301
302
        /* bring iface up if it is down */
303
        if ((if_flags & IFF_UP) == 0) {
304
            SCLogError("%s: interface is down", base_name);
305
            goto error;
306
        }
307
        /* if needed, try to set iface in promisc mode */
308
        if (ns->promisc && (if_flags & (IFF_PROMISC|IFF_PPROMISC)) == 0) {
309
            if_flags |= IFF_PPROMISC;
310
            SetIfaceFlags(base_name, if_flags); // TODO reset at exit
311
            // TODO move to parse config?
312
        }
313
    }
314
    NetmapDevice *pdev = NULL, *spdev = NULL;
315
    pdev = SCCalloc(1, sizeof(*pdev));
316
    if (unlikely(pdev == NULL)) {
317
        SCLogError("%s: memory allocation failed", base_name);
318
        goto error;
319
    }
320
    SC_ATOMIC_INIT(pdev->threads_run);
321
322
    SCMutexLock(&netmap_devlist_lock);
323
324
    const int direction = (read != 1);
325
    int ring = 0;
326
    /* Search for interface in our already opened list. */
327
    /* We will find it when opening multiple rings on   */
328
    /* the device when it exposes multiple RSS queues.  */
329
    TAILQ_FOREACH(spdev, &netmap_devlist, next) {
330
        SCLogDebug("spdev %s", spdev->ifname);
331
        if (direction == spdev->direction && strcmp(ns->iface, spdev->ifname) == 0) {
332
            ring = spdev->ring + 1;
333
        }
334
    }
335
    SCLogDebug("netmap/%s: using ring %d", ns->iface, ring);
336
337
    const char *opt_R = "R";
338
    const char *opt_T = "T";
339
    const char *opt_x = "x"; // not for IPS
340
    const char *opt_z = "z"; // zero copy, not for IPS
341
342
    /* assemble options string */
343
    char optstr[16];
344
    if (ns->ips)
345
        opt_x = "";
346
    // z seems to not play well with multiple opens of a real dev on linux
347
    opt_z = "";
348
349
    /*
350
     * How netmap endpoint names are selected:
351
     *
352
     * The following logic within the "retry" loop builds endpoint names.
353
     *
354
     * IPS Mode:
355
     * There are two endpoints: one hardware NIC and either a hardware NIC or host stack "NIC".
356
     *
357
     * IDS Mode:
358
     * One endpoint -- usually a hardware NIC.
359
     *
360
     * IPS mode -- with one endpoint a host stack "NIC":
361
     * When using multiple rings/threads, then the open of the initial Ring 0 MUST
362
     * instruct netmap to open multiple Host Stack rings (as the default is to open only a single
363
     * pair). This is also critical for the HW NIC endpoint. This is done by adding
364
     * “@conf:host-rings=x” suffix option (where “x” is the number of host rings desired)
365
     * to BOTH endpoint nmport_open_desc() calls for ring 0 (hardware and host stack).
366
     * For subsequent additional ring open calls, omit the suffix option specifying host ring count.
367
     *
368
     * IPS mode -- both endpoints are hardware NICs:
369
     * Do NOT pass any suffix option (even for Ring 0). You do not need to tell netmap how many
370
     * rings, because it already knows the correct value from the NIC driver itself. Specifying a
371
     * desired ring count when both ends are Hardware NICs confuses netmap, and it seems to default
372
     * to using only a single hardware ring. In this scenario, specify only the specific ring number
373
     * being opened.
374
     */
375
376
    // loop to retry opening if unsupported options are used
377
retry:
378
    snprintf(optstr, sizeof(optstr), "%s%s%s", opt_z, opt_x, direction == 0 ? opt_R : opt_T);
379
380
    char devname[128];
381
    if (strncmp(ns->iface, "netmap:", 7) == 0) {
382
        snprintf(devname, sizeof(devname), "%s}%d%s%s",
383
                ns->iface, ring, strlen(optstr) ? "/" : "", optstr);
384
    } else if (strlen(ns->iface) > 5 && strncmp(ns->iface, "vale", 4) == 0 && isdigit(ns->iface[4])) {
385
        snprintf(devname, sizeof(devname), "%s", ns->iface);
386
    } else if (ring == 0 && ns->threads == 1) {
387
        /* just a single thread and ring, so don't use ring param */
388
        snprintf(devname, sizeof(devname), "netmap:%s%s%s",
389
                ns->iface, strlen(optstr) ? "/" : "", optstr);
390
        SCLogDebug("device with %s-ring enabled (devname): %s", soft ? "SW" : "HW", devname);
391
    } else {
392
        /* Going to be using multiple threads and rings */
393
        if (ns->sw_ring) {
394
            /* Opening a host stack interface */
395
            if (ring == 0) {
396
                /* Ring 0, so tell netmap how many host rings we want created */
397
                snprintf(devname, sizeof(devname), "netmap:%s%d%s%s@conf:host-rings=%d", ns->iface,
398
                        ring, strlen(optstr) ? "/" : "", optstr, ns->threads);
399
            } else {
400
                /* Software (host) ring, but not initial open of ring 0 */
401
                snprintf(devname, sizeof(devname), "netmap:%s%d%s%s", ns->iface, ring,
402
                        strlen(optstr) ? "/" : "", optstr);
403
            }
404
            SCLogDebug("device with SW-ring enabled (devname): %s", devname);
405
        } else if (ring == 0 && soft) {
406
            /* Ring 0 of HW endpoint, and other endpoint is SW stack,
407
             * so request SW host stack rings to match HW rings count.
408
             */
409
            snprintf(devname, sizeof(devname), "netmap:%s-%d%s%s@conf:host-rings=%d", ns->iface,
410
                    ring, strlen(optstr) ? "/" : "", optstr, ns->threads);
411
            SCLogDebug("device with HW-ring enabled (devname): %s", devname);
412
        } else {
413
            /* Hardware ring other than ring 0, or both endpoints are HW
414
             * and there is no host stack (SW) endpoint */
415
            snprintf(devname, sizeof(devname), "netmap:%s-%d%s%s", ns->iface, ring,
416
                    strlen(optstr) ? "/" : "", optstr);
417
            SCLogDebug("device with HW-ring enabled (devname): %s", devname);
418
        }
419
    }
420
421
    strlcpy(pdev->ifname, ns->iface, sizeof(pdev->ifname));
422
423
    /* have the netmap API parse device name and prepare the port descriptor for us */
424
    pdev->nmd = nmport_prepare(devname);
425
426
    if (pdev->nmd != NULL) {
427
        /* For RX devices, set the nr_mode flag we need on the netmap port TX rings prior to opening
428
         */
429
        if (read) {
430
            pdev->nmd->reg.nr_flags |= NR_NO_TX_POLL;
431
        }
432
433
        /* Now attempt to actually open the netmap port descriptor */
434
        if (nmport_open_desc(pdev->nmd) < 0) {
435
            /* the open failed, so clean-up the descriptor and fall through to error handler */
436
            nmport_close(pdev->nmd);
437
            pdev->nmd = NULL;
438
        }
439
    }
440
441
    if (pdev->nmd == NULL) {
442
        if (errno == EINVAL) {
443
            if (opt_z[0] == 'z') {
444
                SCLogNotice(
445
                        "%s: dev '%s' got EINVAL: going to retry without 'z'", base_name, devname);
446
                opt_z = "";
447
                goto retry;
448
            } else if (opt_x[0] == 'x') {
449
                SCLogNotice(
450
                        "%s: dev '%s' got EINVAL: going to retry without 'x'", base_name, devname);
451
                opt_x = "";
452
                goto retry;
453
            }
454
        }
455
456
        SCMutexUnlock(&netmap_devlist_lock);
457
        NetmapCloseAll();
458
        FatalError("opening devname %s failed: %s", devname, strerror(errno));
459
    }
460
461
    /* Work around bug in libnetmap library where "cur_{r,t}x_ring" values not initialized */
462
    SCLogDebug("%s -- cur rings: [%d, %d] first rings: [%d, %d]", devname, pdev->nmd->cur_rx_ring,
463
            pdev->nmd->cur_tx_ring, pdev->nmd->first_rx_ring, pdev->nmd->first_tx_ring);
464
    pdev->nmd->cur_rx_ring = pdev->nmd->first_rx_ring;
465
    pdev->nmd->cur_tx_ring = pdev->nmd->first_tx_ring;
466
467
    SCLogInfo("%s: %s opened [fd: %d]", devname, ns->iface, pdev->nmd->fd);
468
469
    pdev->direction = direction;
470
    pdev->ring = ring;
471
    SCMutexInit(&pdev->netmap_dev_lock, NULL);
472
    TAILQ_INSERT_TAIL(&netmap_devlist, pdev, next);
473
474
    SCMutexUnlock(&netmap_devlist_lock);
475
    *pdevice = pdev;
476
477
    return 0;
478
error:
479
    return -1;
480
}
481
482
/**
483
 * \brief PcapDumpCounters
484
 * \param ntv
485
 */
486
static inline void NetmapDumpCounters(NetmapThreadVars *ntv)
487
{
488
    StatsAddUI64(ntv->tv, ntv->capture_kernel_packets, ntv->pkts);
489
    StatsAddUI64(ntv->tv, ntv->capture_kernel_drops, ntv->drops);
490
    (void) SC_ATOMIC_ADD(ntv->livedev->drop, ntv->drops);
491
    (void) SC_ATOMIC_ADD(ntv->livedev->pkts, ntv->pkts);
492
    ntv->drops = 0;
493
    ntv->pkts = 0;
494
}
495
496
/**
497
 * \brief Init function for ReceiveNetmap.
498
 * \param tv pointer to ThreadVars
499
 * \param initdata pointer to the interface passed from the user
500
 * \param data pointer gets populated with NetmapThreadVars
501
 */
502
static TmEcode ReceiveNetmapThreadInit(ThreadVars *tv, const void *initdata, void **data)
503
{
504
    SCEnter();
505
506
    NetmapIfaceConfig *aconf = (NetmapIfaceConfig *)initdata;
507
    if (initdata == NULL) {
508
        SCLogError("initdata == NULL");
509
        SCReturnInt(TM_ECODE_FAILED);
510
    }
511
512
    NetmapThreadVars *ntv = SCCalloc(1, sizeof(*ntv));
513
    if (unlikely(ntv == NULL)) {
514
        SCLogError("Memory allocation failed");
515
        goto error;
516
    }
517
518
    ntv->livedev = LiveGetDevice(aconf->iface_name);
519
    if (ntv->livedev == NULL) {
520
        SCLogError("Unable to find Live device");
521
        goto error_ntv;
522
    }
523
524
    ntv->tv = tv;
525
    ntv->checksum_mode = aconf->in.checksum_mode;
526
    ntv->copy_mode = aconf->in.copy_mode;
527
528
    /* enable zero-copy mode for workers runmode */
529
    char const *active_runmode = RunmodeGetActive();
530
    if (strcmp("workers", active_runmode) == 0) {
531
        ntv->flags |= NETMAP_FLAG_ZERO_COPY;
532
        SCLogDebug("Enabling zero copy mode for %s", aconf->in.iface);
533
    } else if (strcmp("autofp", active_runmode) == 0) {
534
        ntv->flags |= NETMAP_FLAG_EXCL_RING_ACCESS;
535
    }
536
537
    /* Need to insure open of ring 0 conveys requested ring count for open */
538
    bool soft = aconf->in.sw_ring || aconf->out.sw_ring;
539
    if (NetmapOpen(&aconf->in, &ntv->ifsrc, 1, 1, (ntv->flags & NETMAP_FLAG_ZERO_COPY) != 0,
540
                soft) != 0) {
541
        goto error_ntv;
542
    }
543
544
    if (aconf->in.copy_mode != NETMAP_COPY_MODE_NONE) {
545
        if (NetmapOpen(&aconf->out, &ntv->ifdst, 1, 0, (ntv->flags & NETMAP_FLAG_ZERO_COPY) != 0,
546
                    soft) != 0) {
547
            goto error_src;
548
        }
549
    }
550
551
    /* basic counters */
552
    ntv->capture_kernel_packets = StatsRegisterCounter("capture.kernel_packets",
553
            ntv->tv);
554
    ntv->capture_kernel_drops = StatsRegisterCounter("capture.kernel_drops",
555
            ntv->tv);
556
557
    if (aconf->in.bpf_filter) {
558
        SCLogConfig("%s: using BPF '%s'", ntv->ifsrc->ifname, aconf->in.bpf_filter);
559
        char errbuf[PCAP_ERRBUF_SIZE];
560
        if (SCBPFCompile(default_packet_size,  /* snaplen_arg */
561
                    LINKTYPE_ETHERNET,    /* linktype_arg */
562
                    &ntv->bpf_prog,       /* program */
563
                    aconf->in.bpf_filter, /* const char *buf */
564
                    1,                    /* optimize */
565
                    PCAP_NETMASK_UNKNOWN,  /* mask */
566
                    errbuf,
567
                    sizeof(errbuf)) == -1)
568
        {
569
            SCLogError("%s: failed to compile BPF \"%s\": %s", ntv->ifsrc->ifname,
570
                    aconf->in.bpf_filter, errbuf);
571
            goto error_dst;
572
        }
573
    }
574
575
    SCLogDebug("thread: %s polling on fd: %d", tv->name, ntv->ifsrc->nmd->fd);
576
577
    DatalinkSetGlobalType(LINKTYPE_ETHERNET);
578
579
    *data = (void *)ntv;
580
    aconf->DerefFunc(aconf);
581
    SCReturnInt(TM_ECODE_OK);
582
583
error_dst:
584
    if (aconf->in.copy_mode != NETMAP_COPY_MODE_NONE) {
585
        NetmapClose(ntv->ifdst);
586
    }
587
588
error_src:
589
    NetmapClose(ntv->ifsrc);
590
591
error_ntv:
592
    SCFree(ntv);
593
594
error:
595
    aconf->DerefFunc(aconf);
596
    SCReturnInt(TM_ECODE_FAILED);
597
}
598
599
/**
600
 * \brief Output packet to destination interface or drop.
601
 * \param ntv Thread local variables.
602
 * \param p Source packet.
603
 */
604
static TmEcode NetmapWritePacket(NetmapThreadVars *ntv, Packet *p)
605
{
606
    if (ntv->copy_mode == NETMAP_COPY_MODE_IPS) {
607
        if (PacketCheckAction(p, ACTION_DROP)) {
608
            return TM_ECODE_OK;
609
        }
610
    }
611
    DEBUG_VALIDATE_BUG_ON(ntv->ifdst == NULL);
612
613
    /* Lock the destination netmap ring while writing to it */
614
    if (ntv->flags & NETMAP_FLAG_EXCL_RING_ACCESS) {
615
        SCMutexLock(&ntv->ifdst->netmap_dev_lock);
616
    }
617
618
    int write_tries = 0;
619
try_write:
620
    /* attempt to write the packet into the netmap ring buffer(s) */
621
    if (nmport_inject(ntv->ifdst->nmd, GET_PKT_DATA(p), GET_PKT_LEN(p)) == 0) {
622
623
        /* writing the packet failed, but ask kernel to sync TX rings
624
         * for us as the ring buffers may simply be full */
625
        (void)ioctl(ntv->ifdst->nmd->fd, NIOCTXSYNC, 0);
626
627
        /* Try write up to 2 more times before giving up */
628
        if (write_tries < 3) {
629
            write_tries++;
630
            goto try_write;
631
        }
632
633
        if (ntv->flags & NETMAP_FLAG_EXCL_RING_ACCESS) {
634
            SCMutexUnlock(&ntv->ifdst->netmap_dev_lock);
635
        }
636
        SCLogDebug("failed to send %s -> %s", ntv->ifsrc->ifname, ntv->ifdst->ifname);
637
        ntv->drops++;
638
        return TM_ECODE_FAILED;
639
    }
640
641
    SCLogDebug("sent successfully: %s(%d)->%s(%d) (%u)", ntv->ifsrc->ifname, ntv->ifsrc->ring,
642
            ntv->ifdst->ifname, ntv->ifdst->ring, GET_PKT_LEN(p));
643
644
    /* Instruct netmap to push the data on the TX ring on the destination port */
645
    (void)ioctl(ntv->ifdst->nmd->fd, NIOCTXSYNC, 0);
646
    if (ntv->flags & NETMAP_FLAG_EXCL_RING_ACCESS) {
647
        SCMutexUnlock(&ntv->ifdst->netmap_dev_lock);
648
    }
649
    return TM_ECODE_OK;
650
}
651
652
/**
653
 * \brief Packet release routine.
654
 * \param p Packet.
655
 */
656
static void NetmapReleasePacket(Packet *p)
657
{
658
    NetmapThreadVars *ntv = (NetmapThreadVars *)p->netmap_v.ntv;
659
660
    if ((ntv->copy_mode != NETMAP_COPY_MODE_NONE) && !PKT_IS_PSEUDOPKT(p)) {
661
        NetmapWritePacket(ntv, p);
662
    }
663
664
    PacketFreeOrRelease(p);
665
}
666
667
static void NetmapProcessPacket(NetmapThreadVars *ntv, const struct nm_pkthdr *ph)
668
{
669
    if (ntv->bpf_prog.bf_len) {
670
        struct pcap_pkthdr pkthdr = { {0, 0}, ph->len, ph->len };
671
        if (pcap_offline_filter(&ntv->bpf_prog, &pkthdr, ph->buf) == 0) {
672
            return;
673
        }
674
    }
675
676
    Packet *p = PacketPoolGetPacket();
677
    if (unlikely(p == NULL)) {
678
        return;
679
    }
680
681
    PKT_SET_SRC(p, PKT_SRC_WIRE);
682
    p->livedev = ntv->livedev;
683
    p->datalink = LINKTYPE_ETHERNET;
684
    p->ts = SCTIME_FROM_TIMEVAL(&ph->ts);
685
    ntv->pkts++;
686
    ntv->bytes += ph->len;
687
688
    if (ntv->flags & NETMAP_FLAG_ZERO_COPY) {
689
        if (PacketSetData(p, (uint8_t *)ph->buf, ph->len) == -1) {
690
            TmqhOutputPacketpool(ntv->tv, p);
691
            return;
692
        }
693
    } else {
694
        if (PacketCopyData(p, (uint8_t *)ph->buf, ph->len) == -1) {
695
            TmqhOutputPacketpool(ntv->tv, p);
696
            return;
697
        }
698
    }
699
700
    p->ReleasePacket = NetmapReleasePacket;
701
    p->netmap_v.ntv = ntv;
702
703
    SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)",
704
            GET_PKT_LEN(p), p, GET_PKT_DATA(p));
705
706
    (void)TmThreadsSlotProcessPkt(ntv->tv, ntv->slot, p);
707
}
708
709
/**
710
 * \brief Copy netmap rings data into Packet structures.
711
 * \param *d nmport_d (or nm_desc) netmap if structure.
712
 * \param cnt int count of packets to read (-1 = all).
713
 * \param *ntv NetmapThreadVars.
714
 */
715
static TmEcode NetmapReadPackets(struct nmport_d *d, int cnt, NetmapThreadVars *ntv)
716
{
717
    struct nm_pkthdr hdr;
718
    int last_ring = d->last_rx_ring - d->first_rx_ring + 1;
719
    int cur_ring, got = 0, cur_rx_ring = d->cur_rx_ring;
720
721
    memset(&hdr, 0, sizeof(hdr));
722
    hdr.flags = NM_MORE_PKTS;
723
724
    if (cnt == 0)
725
        cnt = -1;
726
727
    for (cur_ring = 0; cur_ring < last_ring && cnt != got; cur_ring++, cur_rx_ring++) {
728
        struct netmap_ring *ring;
729
730
        if (cur_rx_ring > d->last_rx_ring)
731
            cur_rx_ring = d->first_rx_ring;
732
733
        ring = NETMAP_RXRING(d->nifp, cur_rx_ring);
734
735
        /* cycle through the non-empty ring slots to fetch their data */
736
        for (; !nm_ring_empty(ring) && cnt != got; got++) {
737
            u_int idx, i;
738
            u_char *oldbuf;
739
            struct netmap_slot *slot;
740
741
            if (hdr.buf) { /* from previous round */
742
                NetmapProcessPacket(ntv, &hdr);
743
            }
744
745
            i = ring->cur;
746
            slot = &ring->slot[i];
747
            idx = slot->buf_idx;
748
            d->cur_rx_ring = cur_rx_ring;
749
            hdr.slot = slot;
750
            oldbuf = hdr.buf = (u_char *)NETMAP_BUF(ring, idx);
751
            hdr.len = hdr.caplen = slot->len;
752
753
            /* loop through the ring slots to get packet data */
754
            while (slot->flags & NS_MOREFRAG) {
755
                /* packet can be fragmented across multiple slots, */
756
                /* so loop until we find the slot with the flag    */
757
                /* cleared, signalling the end of the packet data. */
758
                u_char *nbuf;
759
                u_int oldlen = slot->len;
760
                i = nm_ring_next(ring, i);
761
                slot = &ring->slot[i];
762
                hdr.len += slot->len;
763
                nbuf = (u_char *)NETMAP_BUF(ring, slot->buf_idx);
764
765
                if (oldbuf != NULL && nbuf - oldbuf == ring->nr_buf_size &&
766
                        oldlen == ring->nr_buf_size) {
767
                    hdr.caplen += slot->len;
768
                    oldbuf = nbuf;
769
                } else {
770
                    oldbuf = NULL;
771
                }
772
            }
773
774
            hdr.ts = ring->ts;
775
            ring->head = ring->cur = nm_ring_next(ring, i);
776
        }
777
    }
778
779
    if (hdr.buf) { /* from previous round */
780
        hdr.flags = 0;
781
        NetmapProcessPacket(ntv, &hdr);
782
    }
783
    return got;
784
}
785
786
/**
787
 *  \brief Main netmap reading loop function
788
 */
789
static TmEcode ReceiveNetmapLoop(ThreadVars *tv, void *data, void *slot)
790
{
791
    SCEnter();
792
793
    TmSlot *s = (TmSlot *)slot;
794
    NetmapThreadVars *ntv = (NetmapThreadVars *)data;
795
    struct pollfd fds;
796
797
    ntv->slot = s->slot_next;
798
    fds.fd = ntv->ifsrc->nmd->fd;
799
    fds.events = POLLIN;
800
801
    SCLogDebug("thread %s polling on %d", tv->name, fds.fd);
802
803
    // Indicate that the thread is actually running its application level code (i.e., it can poll
804
    // packets)
805
    TmThreadsSetFlag(tv, THV_RUNNING);
806
807
    for(;;) {
808
        if (unlikely(suricata_ctl_flags != 0)) {
809
            break;
810
        }
811
812
        /* make sure we have at least one packet in the packet pool,
813
         * to prevent us from alloc'ing packets at line rate */
814
        PacketPoolWait();
815
816
        int r = poll(&fds, 1, POLL_TIMEOUT);
817
        if (r < 0) {
818
            /* error */
819
            if (errno != EINTR)
820
                SCLogError("%s: error polling netmap: %s", ntv->ifsrc->ifname, strerror(errno));
821
            continue;
822
823
        } else if (r == 0) {
824
            /* no events, timeout */
825
            /* sync counters */
826
            NetmapDumpCounters(ntv);
827
            StatsSyncCountersIfSignalled(tv);
828
829
            /* poll timed out, lets handle the timeout */
830
            TmThreadsCaptureHandleTimeout(tv, NULL);
831
            continue;
832
        }
833
834
        if (unlikely(fds.revents & POLL_EVENTS)) {
835
            if (fds.revents & POLLERR) {
836
                SCLogError("%s: error reading netmap data via polling: %s", ntv->ifsrc->ifname,
837
                        strerror(errno));
838
            } else if (fds.revents & POLLNVAL) {
839
                SCLogError("%s: invalid polling request", ntv->ifsrc->ifname);
840
            }
841
            continue;
842
        }
843
844
        if (likely(fds.revents & POLLIN)) {
845
            /* have data on RX ring, so copy to Packet for processing */
846
            NetmapReadPackets(ntv->ifsrc->nmd, -1, ntv);
847
        }
848
849
        NetmapDumpCounters(ntv);
850
        StatsSyncCountersIfSignalled(tv);
851
    }
852
853
    NetmapDumpCounters(ntv);
854
    StatsSyncCountersIfSignalled(tv);
855
    SCReturnInt(TM_ECODE_OK);
856
}
857
858
/**
859
 * \brief This function prints stats to the screen at exit.
860
 * \param tv pointer to ThreadVars
861
 * \param data pointer that gets cast into NetmapThreadVars for ntv
862
 */
863
static void ReceiveNetmapThreadExitStats(ThreadVars *tv, void *data)
864
{
865
    SCEnter();
866
    NetmapThreadVars *ntv = (NetmapThreadVars *)data;
867
868
    NetmapDumpCounters(ntv);
869
    SCLogPerf("%s: (%s) packets %" PRIu64 ", dropped %" PRIu64 ", bytes %" PRIu64 "",
870
            ntv->ifsrc->ifname, tv->name,
871
            StatsGetLocalCounterValue(tv, ntv->capture_kernel_packets),
872
            StatsGetLocalCounterValue(tv, ntv->capture_kernel_drops), ntv->bytes);
873
}
874
875
/**
876
 * \brief
877
 * \param tv
878
 * \param data Pointer to NetmapThreadVars.
879
 */
880
static TmEcode ReceiveNetmapThreadDeinit(ThreadVars *tv, void *data)
881
{
882
    SCEnter();
883
884
    NetmapThreadVars *ntv = (NetmapThreadVars *)data;
885
886
    if (ntv->ifsrc) {
887
        NetmapClose(ntv->ifsrc);
888
        ntv->ifsrc = NULL;
889
    }
890
    if (ntv->ifdst) {
891
        NetmapClose(ntv->ifdst);
892
        ntv->ifdst = NULL;
893
    }
894
    if (ntv->bpf_prog.bf_insns) {
895
        SCBPFFree(&ntv->bpf_prog);
896
    }
897
898
    SCFree(ntv);
899
900
    SCReturnInt(TM_ECODE_OK);
901
}
902
903
/**
904
 * \brief Prepare netmap decode thread.
905
 * \param tv Thread local variables.
906
 * \param initdata Thread config.
907
 * \param data Pointer to DecodeThreadVars placed here.
908
 */
909
static TmEcode DecodeNetmapThreadInit(ThreadVars *tv, const void *initdata, void **data)
910
{
911
    SCEnter();
912
913
    DecodeThreadVars *dtv = DecodeThreadVarsAlloc(tv);
914
    if (dtv == NULL)
915
        SCReturnInt(TM_ECODE_FAILED);
916
917
    DecodeRegisterPerfCounters(dtv, tv);
918
919
    *data = (void *)dtv;
920
921
    SCReturnInt(TM_ECODE_OK);
922
}
923
924
/**
925
 * \brief This function passes off to link type decoders.
926
 *
927
 * \param t pointer to ThreadVars
928
 * \param p pointer to the current packet
929
 * \param data pointer that gets cast into NetmapThreadVars for ntv
930
 */
931
static TmEcode DecodeNetmap(ThreadVars *tv, Packet *p, void *data)
932
{
933
    SCEnter();
934
935
    DecodeThreadVars *dtv = (DecodeThreadVars *)data;
936
937
    BUG_ON(PKT_IS_PSEUDOPKT(p));
938
939
    /* update counters */
940
    DecodeUpdatePacketCounters(tv, dtv, p);
941
942
    DecodeEthernet(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p));
943
944
    PacketDecodeFinalize(tv, dtv, p);
945
946
    SCReturnInt(TM_ECODE_OK);
947
}
948
949
/**
950
 * \brief
951
 * \param tv
952
 * \param data Pointer to DecodeThreadVars.
953
 */
954
static TmEcode DecodeNetmapThreadDeinit(ThreadVars *tv, void *data)
955
{
956
    SCEnter();
957
958
    if (data != NULL)
959
        DecodeThreadVarsFree(tv, data);
960
961
    SCReturnInt(TM_ECODE_OK);
962
}
963
964
/**
965
 * \brief Registration Function for ReceiveNetmap.
966
 */
967
void TmModuleReceiveNetmapRegister(void)
968
{
969
    tmm_modules[TMM_RECEIVENETMAP].name = "ReceiveNetmap";
970
    tmm_modules[TMM_RECEIVENETMAP].ThreadInit = ReceiveNetmapThreadInit;
971
    tmm_modules[TMM_RECEIVENETMAP].PktAcqLoop = ReceiveNetmapLoop;
972
    tmm_modules[TMM_RECEIVENETMAP].ThreadExitPrintStats = ReceiveNetmapThreadExitStats;
973
    tmm_modules[TMM_RECEIVENETMAP].ThreadDeinit = ReceiveNetmapThreadDeinit;
974
    tmm_modules[TMM_RECEIVENETMAP].cap_flags = SC_CAP_NET_RAW;
975
    tmm_modules[TMM_RECEIVENETMAP].flags = TM_FLAG_RECEIVE_TM;
976
}
977
978
/**
979
 * \brief Registration Function for DecodeNetmap.
980
 */
981
void TmModuleDecodeNetmapRegister(void)
982
{
983
    tmm_modules[TMM_DECODENETMAP].name = "DecodeNetmap";
984
    tmm_modules[TMM_DECODENETMAP].ThreadInit = DecodeNetmapThreadInit;
985
    tmm_modules[TMM_DECODENETMAP].Func = DecodeNetmap;
986
    tmm_modules[TMM_DECODENETMAP].ThreadDeinit = DecodeNetmapThreadDeinit;
987
    tmm_modules[TMM_DECODENETMAP].cap_flags = 0;
988
    tmm_modules[TMM_DECODENETMAP].flags = TM_FLAG_DECODE_TM;
989
}
990
991
#endif /* HAVE_NETMAP */
992
993
/**
994
* @}
995
*/