Coverage Report

Created: 2026-03-31 07:45

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/suricata7/src/source-af-packet.c
Line
Count
Source
1
/* Copyright (C) 2011-2021 Open Information Security Foundation
2
 *
3
 * You can copy, redistribute or modify this Program under the terms of
4
 * the GNU General Public License version 2 as published by the Free
5
 * Software Foundation.
6
 *
7
 * This program is distributed in the hope that it will be useful,
8
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
 * GNU General Public License for more details.
11
 *
12
 * You should have received a copy of the GNU General Public License
13
 * version 2 along with this program; if not, write to the Free Software
14
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15
 * 02110-1301, USA.
16
 */
17
18
/**
19
 *  \defgroup afppacket AF_PACKET running mode
20
 *
21
 *  @{
22
 */
23
24
/**
25
 * \file
26
 *
27
 * \author Eric Leblond <eric@regit.org>
28
 *
29
 * AF_PACKET socket acquisition support
30
 *
31
 */
32
33
#define PCAP_DONT_INCLUDE_PCAP_BPF_H 1
34
#define SC_PCAP_DONT_INCLUDE_PCAP_H 1
35
#include "suricata-common.h"
36
#include "suricata.h"
37
#include "packet.h"
38
#include "decode.h"
39
#include "packet-queue.h"
40
#include "threads.h"
41
#include "threadvars.h"
42
#include "tm-queuehandlers.h"
43
#include "tm-modules.h"
44
#include "tm-threads.h"
45
#include "tm-threads-common.h"
46
#include "conf.h"
47
#include "util-cpu.h"
48
#include "util-datalink.h"
49
#include "util-debug.h"
50
#include "util-device.h"
51
#include "util-ebpf.h"
52
#include "util-error.h"
53
#include "util-privs.h"
54
#include "util-optimize.h"
55
#include "util-checksum.h"
56
#include "util-ioctl.h"
57
#include "util-host-info.h"
58
#include "tmqh-packetpool.h"
59
#include "source-af-packet.h"
60
#include "runmodes.h"
61
#include "flow-storage.h"
62
#include "util-validate.h"
63
#include "action-globals.h"
64
65
#ifdef HAVE_AF_PACKET
66
67
#if HAVE_SYS_IOCTL_H
68
#include <sys/ioctl.h>
69
#endif
70
71
#if HAVE_LINUX_SOCKIOS_H
72
#include <linux/sockios.h>
73
#endif
74
75
#ifdef HAVE_PACKET_EBPF
76
#include <bpf/libbpf.h>
77
#include <bpf/bpf.h>
78
#endif
79
80
struct bpf_program {
81
    unsigned int bf_len;
82
    struct bpf_insn *bf_insns;
83
};
84
85
#ifdef HAVE_PCAP_H
86
#include <pcap.h>
87
#endif
88
89
#ifdef HAVE_PCAP_PCAP_H
90
#include <pcap/pcap.h>
91
#endif
92
93
#include "util-bpf.h"
94
95
#if HAVE_LINUX_IF_ETHER_H
96
#include <linux/if_ether.h>
97
#endif
98
99
#if HAVE_LINUX_IF_PACKET_H
100
#include <linux/if_packet.h>
101
#endif
102
103
#if HAVE_LINUX_IF_ARP_H
104
#include <linux/if_arp.h>
105
#endif
106
107
#if HAVE_LINUX_FILTER_H
108
#include <linux/filter.h>
109
#endif
110
111
#if HAVE_SYS_MMAN_H
112
#include <sys/mman.h>
113
#endif
114
115
#ifdef HAVE_HW_TIMESTAMPING
116
#include <linux/net_tstamp.h>
117
#endif
118
119
#endif /* HAVE_AF_PACKET */
120
121
extern uint16_t max_pending_packets;
122
123
#ifndef HAVE_AF_PACKET
124
125
TmEcode NoAFPSupportExit(ThreadVars *, const void *, void **);
126
127
void TmModuleReceiveAFPRegister (void)
128
{
129
    tmm_modules[TMM_RECEIVEAFP].name = "ReceiveAFP";
130
    tmm_modules[TMM_RECEIVEAFP].ThreadInit = NoAFPSupportExit;
131
    tmm_modules[TMM_RECEIVEAFP].Func = NULL;
132
    tmm_modules[TMM_RECEIVEAFP].ThreadExitPrintStats = NULL;
133
    tmm_modules[TMM_RECEIVEAFP].ThreadDeinit = NULL;
134
    tmm_modules[TMM_RECEIVEAFP].cap_flags = 0;
135
    tmm_modules[TMM_RECEIVEAFP].flags = TM_FLAG_RECEIVE_TM;
136
}
137
138
/**
139
 * \brief Registration Function for DecodeAFP.
140
 */
141
void TmModuleDecodeAFPRegister (void)
142
{
143
    tmm_modules[TMM_DECODEAFP].name = "DecodeAFP";
144
    tmm_modules[TMM_DECODEAFP].ThreadInit = NoAFPSupportExit;
145
    tmm_modules[TMM_DECODEAFP].Func = NULL;
146
    tmm_modules[TMM_DECODEAFP].ThreadExitPrintStats = NULL;
147
    tmm_modules[TMM_DECODEAFP].ThreadDeinit = NULL;
148
    tmm_modules[TMM_DECODEAFP].cap_flags = 0;
149
    tmm_modules[TMM_DECODEAFP].flags = TM_FLAG_DECODE_TM;
150
}
151
152
/**
153
 * \brief this function prints an error message and exits.
154
 */
155
TmEcode NoAFPSupportExit(ThreadVars *tv, const void *initdata, void **data)
156
{
157
    SCLogError("Error creating thread %s: you do not have "
158
               "support for AF_PACKET enabled, on Linux host please recompile "
159
               "with --enable-af-packet",
160
            tv->name);
161
    exit(EXIT_FAILURE);
162
}
163
164
#else /* We have AF_PACKET support */
165
166
0
#define AFP_IFACE_NAME_LENGTH 48
167
168
0
#define AFP_STATE_DOWN 0
169
0
#define AFP_STATE_UP 1
170
171
0
#define AFP_RECONNECT_TIMEOUT 500000
172
0
#define AFP_DOWN_COUNTER_INTERVAL 40
173
174
0
#define POLL_TIMEOUT 100
175
176
/* kernel flags defined for RX ring tp_status */
177
#ifndef TP_STATUS_KERNEL
178
#define TP_STATUS_KERNEL 0
179
#endif
180
#ifndef TP_STATUS_USER
181
#define TP_STATUS_USER BIT_U32(0)
182
#endif
183
#ifndef TP_STATUS_COPY
184
#define TP_STATUS_COPY BIT_U32(1)
185
#endif
186
#ifndef TP_STATUS_LOSING
187
#define TP_STATUS_LOSING BIT_U32(2)
188
#endif
189
#ifndef TP_STATUS_CSUMNOTREADY
190
#define TP_STATUS_CSUMNOTREADY BIT_U32(3)
191
#endif
192
#ifndef TP_STATUS_VLAN_VALID
193
#define TP_STATUS_VLAN_VALID BIT_U32(4)
194
#endif
195
#ifndef TP_STATUS_BLK_TMO
196
#define TP_STATUS_BLK_TMO BIT_U32(5)
197
#endif
198
#ifndef TP_STATUS_VLAN_TPID_VALID
199
#define TP_STATUS_VLAN_TPID_VALID BIT_U32(6)
200
#endif
201
#ifndef TP_STATUS_CSUM_VALID
202
#define TP_STATUS_CSUM_VALID BIT_U32(7)
203
#endif
204
205
#ifndef TP_STATUS_TS_SOFTWARE
206
#define TP_STATUS_TS_SOFTWARE BIT_U32(29)
207
#endif
208
#ifndef TP_STATUS_TS_SYS_HARDWARE
209
#define TP_STATUS_TS_SYS_HARDWARE BIT_U32(30) /* kernel comment says: "deprecated, never set" */
210
#endif
211
#ifndef TP_STATUS_TS_RAW_HARDWARE
212
#define TP_STATUS_TS_RAW_HARDWARE BIT_U32(31)
213
#endif
214
215
#ifndef TP_STATUS_USER_BUSY
216
/* HACK special setting in the tp_status field for frames we are
217
 * still working on. This can happen in autofp mode where the
218
 * capture thread goes around the ring and finds a frame that still
219
 * hasn't been released by a worker thread.
220
 *
221
 * We use bits 29, 30, 31. 29 and 31 are software and hardware
222
 * timestamps. 30 should not be set by the kernel at all. Combined
223
 * they should never be set on the rx-ring together.
224
 *
225
 * The excessive casting is for handling the fact that the kernel
226
 * defines almost all of these as int flags, not unsigned ints. */
227
#define TP_STATUS_USER_BUSY                                                                        \
228
0
    (uint32_t)((uint32_t)TP_STATUS_TS_SOFTWARE | (uint32_t)TP_STATUS_TS_SYS_HARDWARE |             \
229
0
               (uint32_t)TP_STATUS_TS_RAW_HARDWARE)
230
#endif
231
#define FRAME_BUSY(tp_status)                                                                      \
232
    (((uint32_t)(tp_status) & (uint32_t)TP_STATUS_USER_BUSY) == (uint32_t)TP_STATUS_USER_BUSY)
233
234
enum {
235
    AFP_READ_OK,
236
    AFP_READ_FAILURE,
237
    /** Error during treatment by other functions of Suricata */
238
    AFP_SURI_FAILURE,
239
    AFP_KERNEL_DROP,
240
};
241
242
enum {
243
    AFP_FATAL_ERROR = 1,
244
    AFP_RECOVERABLE_ERROR,
245
};
246
247
union thdr {
248
    struct tpacket2_hdr *h2;
249
#ifdef HAVE_TPACKET_V3
250
    struct tpacket3_hdr *h3;
251
#endif
252
    void *raw;
253
};
254
255
#ifdef HAVE_PACKET_EBPF
256
static int AFPBypassCallback(Packet *p);
257
static int AFPXDPBypassCallback(Packet *p);
258
#endif
259
260
#define MAX_MAPS 32
261
/**
262
 * \brief Structure to hold thread specific variables.
263
 */
264
typedef struct AFPThreadVars_
265
{
266
    union AFPRing {
267
        union thdr **v2;
268
        struct iovec *v3;
269
    } ring;
270
271
    /* counters */
272
    uint64_t pkts;
273
274
    ThreadVars *tv;
275
    TmSlot *slot;
276
    LiveDevice *livedev;
277
    /* data link type for the thread */
278
    uint32_t datalink;
279
280
#ifdef HAVE_PACKET_EBPF
281
    /* File descriptor of the IPv4 flow bypass table maps */
282
    int v4_map_fd;
283
    /* File descriptor of the IPv6 flow bypass table maps */
284
    int v6_map_fd;
285
#endif
286
287
    unsigned int frame_offset;
288
289
    ChecksumValidationMode checksum_mode;
290
291
    /* references to packet and drop counters */
292
    uint16_t capture_kernel_packets;
293
    uint16_t capture_kernel_drops;
294
    uint16_t capture_errors;
295
    uint16_t afpacket_spin;
296
    uint16_t capture_afp_poll;
297
    uint16_t capture_afp_poll_signal;
298
    uint16_t capture_afp_poll_timeout;
299
    uint16_t capture_afp_poll_data;
300
    uint16_t capture_afp_poll_err;
301
    uint16_t capture_afp_send_err;
302
303
    uint64_t send_errors_logged; /**< snapshot of send errors logged. */
304
305
    /* handle state */
306
    uint8_t afp_state;
307
    uint8_t copy_mode;
308
    unsigned int flags;
309
310
    /* IPS peer */
311
    AFPPeer *mpeer;
312
313
    /*
314
     *  Init related members
315
     */
316
317
    /* thread specific socket */
318
    int socket;
319
320
    int ring_size;
321
    int v2_block_size;
322
    int block_size;
323
    int block_timeout;
324
    /* socket buffer size */
325
    int buffer_size;
326
    /* Filter */
327
    const char *bpf_filter;
328
329
    int promisc;
330
331
    /* bitmask of ignored ssl_pkttypes */
332
    uint32_t pkttype_filter_mask;
333
334
    int down_count;
335
336
    uint16_t cluster_id;
337
    int cluster_type;
338
339
    int threads;
340
341
    union AFPTpacketReq {
342
        struct tpacket_req v2;
343
#ifdef HAVE_TPACKET_V3
344
        struct tpacket_req3 v3;
345
#endif
346
    } req;
347
348
    char iface[AFP_IFACE_NAME_LENGTH];
349
    /* IPS output iface */
350
    char out_iface[AFP_IFACE_NAME_LENGTH];
351
352
    /* mmap'ed ring buffer */
353
    unsigned int ring_buflen;
354
    uint8_t *ring_buf;
355
356
    int snaplen; /**< snaplen in use for passing on to bpf */
357
#ifdef HAVE_PACKET_EBPF
358
    uint8_t xdp_mode;
359
    int ebpf_lb_fd;
360
    int ebpf_filter_fd;
361
    struct ebpf_timeout_config ebpf_t_config;
362
#endif
363
364
} AFPThreadVars;
365
366
static TmEcode ReceiveAFPThreadInit(ThreadVars *, const void *, void **);
367
static void ReceiveAFPThreadExitStats(ThreadVars *, void *);
368
static TmEcode ReceiveAFPThreadDeinit(ThreadVars *, void *);
369
static TmEcode ReceiveAFPLoop(ThreadVars *tv, void *data, void *slot);
370
371
static TmEcode DecodeAFPThreadInit(ThreadVars *, const void *, void **);
372
static TmEcode DecodeAFPThreadDeinit(ThreadVars *tv, void *data);
373
static TmEcode DecodeAFP(ThreadVars *, Packet *, void *);
374
375
static TmEcode AFPSetBPFFilter(AFPThreadVars *ptv);
376
static int AFPGetIfnumByDev(int fd, const char *ifname, int verbose);
377
static int AFPGetDevFlags(int fd, const char *ifname);
378
static int AFPDerefSocket(AFPPeer* peer);
379
static int AFPRefSocket(AFPPeer* peer);
380
381
382
/**
383
 * \brief Registration Function for RecieveAFP.
384
 * \todo Unit tests are needed for this module.
385
 */
386
void TmModuleReceiveAFPRegister (void)
387
71
{
388
71
    tmm_modules[TMM_RECEIVEAFP].name = "ReceiveAFP";
389
71
    tmm_modules[TMM_RECEIVEAFP].ThreadInit = ReceiveAFPThreadInit;
390
71
    tmm_modules[TMM_RECEIVEAFP].Func = NULL;
391
71
    tmm_modules[TMM_RECEIVEAFP].PktAcqLoop = ReceiveAFPLoop;
392
71
    tmm_modules[TMM_RECEIVEAFP].PktAcqBreakLoop = NULL;
393
71
    tmm_modules[TMM_RECEIVEAFP].ThreadExitPrintStats = ReceiveAFPThreadExitStats;
394
71
    tmm_modules[TMM_RECEIVEAFP].ThreadDeinit = ReceiveAFPThreadDeinit;
395
71
    tmm_modules[TMM_RECEIVEAFP].cap_flags = SC_CAP_NET_RAW;
396
71
    tmm_modules[TMM_RECEIVEAFP].flags = TM_FLAG_RECEIVE_TM;
397
398
71
}
399
400
/**
401
 *  \defgroup afppeers AFP peers list
402
 *
403
 * AF_PACKET has an IPS mode were interface are peered: packet from
404
 * on interface are sent the peered interface and the other way. The ::AFPPeer
405
 * list is maintaining the list of peers. Each ::AFPPeer is storing the needed
406
 * information to be able to send packet on the interface.
407
 * A element of the list must not be destroyed during the run of Suricata as it
408
 * is used by ::Packet and other threads.
409
 *
410
 *  @{
411
 */
412
413
typedef struct AFPPeersList_ {
414
    TAILQ_HEAD(, AFPPeer_) peers; /**< Head of list of fragments. */
415
    int cnt;
416
    int peered;
417
    int turn; /**< Next value for initialisation order */
418
    SC_ATOMIC_DECLARE(int, reached); /**< Counter used to synchronize start */
419
} AFPPeersList;
420
421
/**
422
 * \brief Update the peer.
423
 *
424
 * Update the AFPPeer of a thread ie set new state, socket number
425
 * or iface index.
426
 *
427
 */
428
static void AFPPeerUpdate(AFPThreadVars *ptv)
429
0
{
430
0
    if (ptv->mpeer == NULL) {
431
0
        return;
432
0
    }
433
0
    (void)SC_ATOMIC_SET(ptv->mpeer->if_idx, AFPGetIfnumByDev(ptv->socket, ptv->iface, 0));
434
0
    (void)SC_ATOMIC_SET(ptv->mpeer->socket, ptv->socket);
435
0
    (void)SC_ATOMIC_SET(ptv->mpeer->state, ptv->afp_state);
436
0
}
437
438
/**
439
 * \brief Clean and free ressource used by an ::AFPPeer
440
 */
441
static void AFPPeerClean(AFPPeer *peer)
442
0
{
443
0
    if (peer->flags & AFP_SOCK_PROTECT)
444
0
        SCMutexDestroy(&peer->sock_protect);
445
0
    SCFree(peer);
446
0
}
447
448
AFPPeersList peerslist;
449
450
451
/**
452
 * \brief Init the global list of ::AFPPeer
453
 */
454
TmEcode AFPPeersListInit(void)
455
0
{
456
0
    SCEnter();
457
0
    TAILQ_INIT(&peerslist.peers);
458
0
    peerslist.peered = 0;
459
0
    peerslist.cnt = 0;
460
0
    peerslist.turn = 0;
461
0
    SC_ATOMIC_INIT(peerslist.reached);
462
0
    (void) SC_ATOMIC_SET(peerslist.reached, 0);
463
0
    SCReturnInt(TM_ECODE_OK);
464
0
}
465
466
/**
467
 * \brief Check that all ::AFPPeer got a peer
468
 *
469
 * \retval TM_ECODE_FAILED if some threads are not peered or TM_ECODE_OK else.
470
 */
471
TmEcode AFPPeersListCheck(void)
472
0
{
473
0
#define AFP_PEERS_MAX_TRY 4
474
0
#define AFP_PEERS_WAIT 20000
475
0
    int try = 0;
476
0
    SCEnter();
477
0
    while (try < AFP_PEERS_MAX_TRY) {
478
0
        if (peerslist.cnt != peerslist.peered) {
479
0
            usleep(AFP_PEERS_WAIT);
480
0
        } else {
481
0
            SCReturnInt(TM_ECODE_OK);
482
0
        }
483
0
        try++;
484
0
    }
485
0
    SCLogError("thread number not equal");
486
0
    SCReturnInt(TM_ECODE_FAILED);
487
0
}
488
489
/**
490
 * \brief Declare a new AFP thread to AFP peers list.
491
 */
492
static TmEcode AFPPeersListAdd(AFPThreadVars *ptv)
493
0
{
494
0
    SCEnter();
495
0
    AFPPeer *peer = SCMalloc(sizeof(AFPPeer));
496
0
    AFPPeer *pitem;
497
498
0
    if (unlikely(peer == NULL)) {
499
0
        SCReturnInt(TM_ECODE_FAILED);
500
0
    }
501
0
    memset(peer, 0, sizeof(AFPPeer));
502
0
    SC_ATOMIC_INIT(peer->socket);
503
0
    SC_ATOMIC_INIT(peer->sock_usage);
504
0
    SC_ATOMIC_INIT(peer->if_idx);
505
0
    SC_ATOMIC_INIT(peer->state);
506
0
    peer->flags = ptv->flags;
507
0
    peer->turn = peerslist.turn++;
508
509
0
    if (peer->flags & AFP_SOCK_PROTECT) {
510
0
        SCMutexInit(&peer->sock_protect, NULL);
511
0
    }
512
513
0
    (void)SC_ATOMIC_SET(peer->sock_usage, 0);
514
0
    (void)SC_ATOMIC_SET(peer->state, AFP_STATE_DOWN);
515
0
    strlcpy(peer->iface, ptv->iface, AFP_IFACE_NAME_LENGTH);
516
0
    ptv->mpeer = peer;
517
    /* add element to iface list */
518
0
    TAILQ_INSERT_TAIL(&peerslist.peers, peer, next);
519
520
0
    if (ptv->copy_mode != AFP_COPY_MODE_NONE) {
521
0
        peerslist.cnt++;
522
523
        /* Iter to find a peer */
524
0
        TAILQ_FOREACH(pitem, &peerslist.peers, next) {
525
0
            if (pitem->peer)
526
0
                continue;
527
0
            if (strcmp(pitem->iface, ptv->out_iface))
528
0
                continue;
529
0
            peer->peer = pitem;
530
0
            pitem->peer = peer;
531
532
0
            LiveDevice *iface = ptv->livedev;
533
0
            DEBUG_VALIDATE_BUG_ON(iface == NULL);
534
0
            DEBUG_VALIDATE_BUG_ON(strcmp(iface->dev, ptv->iface) != 0);
535
0
            LiveDevice *out_iface = LiveGetDevice(ptv->out_iface);
536
0
            if (out_iface == NULL)
537
0
                FatalError("AF_PACKET device %s not found. Aborting..", ptv->out_iface);
538
0
            if (iface->mtu != out_iface->mtu) {
539
0
                SCLogWarning("MTU on %s (%d) and %s (%d) are not equal, transmission of packets "
540
0
                             "bigger than %d will fail.",
541
0
                        iface->dev, iface->mtu, out_iface->dev, out_iface->mtu,
542
0
                        MIN(out_iface->mtu, iface->mtu));
543
0
            }
544
0
            peerslist.peered += 2;
545
0
            break;
546
0
        }
547
0
    }
548
549
0
    AFPPeerUpdate(ptv);
550
551
0
    SCReturnInt(TM_ECODE_OK);
552
0
}
553
554
static int AFPPeersListWaitTurn(AFPPeer *peer)
555
0
{
556
    /* If turn is zero, we already have started threads once */
557
0
    if (peerslist.turn == 0)
558
0
        return 0;
559
560
0
    if (peer->turn == SC_ATOMIC_GET(peerslist.reached))
561
0
        return 0;
562
0
    return 1;
563
0
}
564
565
static void AFPPeersListReachedInc(void)
566
0
{
567
0
    if (peerslist.turn == 0)
568
0
        return;
569
570
0
    if ((SC_ATOMIC_ADD(peerslist.reached, 1) + 1) == peerslist.turn) {
571
0
        (void)SC_ATOMIC_SET(peerslist.reached, 0);
572
        /* Set turn to 0 to skip synchronization when ReceiveAFPLoop is
573
         * restarted.
574
         */
575
0
        peerslist.turn = 0;
576
0
    }
577
0
}
578
579
static int AFPPeersListStarted(void)
580
0
{
581
0
    return !peerslist.turn;
582
0
}
583
584
/**
585
 * \brief Clean the global peers list.
586
 */
587
void AFPPeersListClean(void)
588
0
{
589
0
    AFPPeer *pitem;
590
591
0
    while ((pitem = TAILQ_FIRST(&peerslist.peers))) {
592
0
        TAILQ_REMOVE(&peerslist.peers, pitem, next);
593
0
        AFPPeerClean(pitem);
594
0
    }
595
0
}
596
597
/**
598
 * @}
599
 */
600
601
/**
602
 * \brief Registration Function for DecodeAFP.
603
 * \todo Unit tests are needed for this module.
604
 */
605
void TmModuleDecodeAFPRegister (void)
606
71
{
607
71
    tmm_modules[TMM_DECODEAFP].name = "DecodeAFP";
608
71
    tmm_modules[TMM_DECODEAFP].ThreadInit = DecodeAFPThreadInit;
609
71
    tmm_modules[TMM_DECODEAFP].Func = DecodeAFP;
610
71
    tmm_modules[TMM_DECODEAFP].ThreadExitPrintStats = NULL;
611
71
    tmm_modules[TMM_DECODEAFP].ThreadDeinit = DecodeAFPThreadDeinit;
612
71
    tmm_modules[TMM_DECODEAFP].cap_flags = 0;
613
71
    tmm_modules[TMM_DECODEAFP].flags = TM_FLAG_DECODE_TM;
614
71
}
615
616
617
static int AFPCreateSocket(AFPThreadVars *ptv, char *devname, int verbose);
618
619
static inline void AFPDumpCounters(AFPThreadVars *ptv)
620
0
{
621
0
#ifdef PACKET_STATISTICS
622
0
    struct tpacket_stats kstats;
623
0
    socklen_t len = sizeof (struct tpacket_stats);
624
0
    if (getsockopt(ptv->socket, SOL_PACKET, PACKET_STATISTICS,
625
0
                &kstats, &len) > -1) {
626
0
        SCLogDebug("(%s) Kernel: Packets %" PRIu32 ", dropped %" PRIu32 "",
627
0
                ptv->tv->name,
628
0
                kstats.tp_packets, kstats.tp_drops);
629
0
        StatsAddUI64(ptv->tv, ptv->capture_kernel_packets, kstats.tp_packets);
630
0
        StatsAddUI64(ptv->tv, ptv->capture_kernel_drops, kstats.tp_drops);
631
0
        (void) SC_ATOMIC_ADD(ptv->livedev->drop, (uint64_t) kstats.tp_drops);
632
0
        (void) SC_ATOMIC_ADD(ptv->livedev->pkts, (uint64_t) kstats.tp_packets);
633
634
0
        const uint64_t value = SC_ATOMIC_GET(ptv->mpeer->send_errors);
635
0
        if (value > ptv->send_errors_logged) {
636
0
            StatsAddUI64(ptv->tv, ptv->capture_afp_send_err, value - ptv->send_errors_logged);
637
0
            ptv->send_errors_logged = value;
638
0
        }
639
0
    }
640
0
#endif
641
0
}
642
643
/**
644
 * \brief AF packet write function.
645
 *
646
 * This function has to be called before the memory
647
 * related to Packet in ring buffer is released.
648
 *
649
 * \param pointer to Packet
650
 * \param version of capture: TPACKET_V2 or TPACKET_V3
651
 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
652
 *
653
 */
654
static void AFPWritePacket(Packet *p, int version)
655
0
{
656
0
    struct sockaddr_ll socket_address;
657
0
    int socket;
658
659
0
    if (p->afp_v.copy_mode == AFP_COPY_MODE_IPS) {
660
0
        if (PacketCheckAction(p, ACTION_DROP)) {
661
0
            return;
662
0
        }
663
0
    }
664
665
0
    if (p->ethh == NULL) {
666
0
        SCLogWarning("packet should have an ethernet header");
667
0
        return;
668
0
    }
669
670
    /* Index of the network device */
671
0
    socket_address.sll_ifindex = SC_ATOMIC_GET(p->afp_v.peer->if_idx);
672
    /* Address length*/
673
0
    socket_address.sll_halen = ETH_ALEN;
674
    /* Destination MAC */
675
0
    memcpy(socket_address.sll_addr, p->ethh, 6);
676
677
    /* Send packet, locking the socket if necessary */
678
0
    if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
679
0
        SCMutexLock(&p->afp_v.peer->sock_protect);
680
0
    socket = SC_ATOMIC_GET(p->afp_v.peer->socket);
681
682
0
    if (sendto(socket, GET_PKT_DATA(p), GET_PKT_LEN(p), 0, (struct sockaddr *)&socket_address,
683
0
                sizeof(struct sockaddr_ll)) < 0) {
684
0
        if (SC_ATOMIC_ADD(p->afp_v.peer->send_errors, 1) == 0) {
685
0
            SCLogWarning("%s: sending packet failed on socket %d: %s", p->afp_v.peer->iface, socket,
686
0
                    strerror(errno));
687
0
        }
688
0
    }
689
0
    if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
690
0
        SCMutexUnlock(&p->afp_v.peer->sock_protect);
691
0
}
692
693
static void AFPReleaseDataFromRing(Packet *p)
694
0
{
695
0
    DEBUG_VALIDATE_BUG_ON(PKT_IS_PSEUDOPKT(p));
696
697
    /* Need to be in copy mode and need to detect early release
698
       where Ethernet header could not be set (and pseudo packet) */
699
0
    if (p->afp_v.copy_mode != AFP_COPY_MODE_NONE) {
700
0
        AFPWritePacket(p, TPACKET_V2);
701
0
    }
702
703
0
    BUG_ON(p->afp_v.relptr == NULL);
704
705
0
    union thdr h;
706
0
    h.raw = p->afp_v.relptr;
707
0
    h.h2->tp_status = TP_STATUS_KERNEL;
708
709
0
    (void)AFPDerefSocket(p->afp_v.mpeer);
710
711
0
    AFPV_CLEANUP(&p->afp_v);
712
0
}
713
714
#ifdef HAVE_TPACKET_V3
715
static void AFPReleasePacketV3(Packet *p)
716
0
{
717
0
    DEBUG_VALIDATE_BUG_ON(PKT_IS_PSEUDOPKT(p));
718
719
    /* Need to be in copy mode and need to detect early release
720
       where Ethernet header could not be set (and pseudo packet) */
721
0
    if (p->afp_v.copy_mode != AFP_COPY_MODE_NONE) {
722
0
        AFPWritePacket(p, TPACKET_V3);
723
0
    }
724
0
    PacketFreeOrRelease(p);
725
0
}
726
#endif
727
728
static void AFPReleasePacket(Packet *p)
729
0
{
730
0
    AFPReleaseDataFromRing(p);
731
0
    PacketFreeOrRelease(p);
732
0
}
733
734
/** \internal
735
 *  \brief recoverable error - release packet and
736
 *         return AFP_SURI_FAILURE
737
 */
738
static inline int AFPSuriFailure(AFPThreadVars *ptv, union thdr h)
739
0
{
740
0
    h.h2->tp_status = TP_STATUS_KERNEL;
741
0
    if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
742
0
        ptv->frame_offset = 0;
743
0
    }
744
0
    SCReturnInt(AFP_SURI_FAILURE);
745
0
}
746
747
static inline void AFPReadApplyBypass(const AFPThreadVars *ptv, Packet *p)
748
0
{
749
#ifdef HAVE_PACKET_EBPF
750
    if (ptv->flags & AFP_BYPASS) {
751
        p->BypassPacketsFlow = AFPBypassCallback;
752
        p->afp_v.v4_map_fd = ptv->v4_map_fd;
753
        p->afp_v.v6_map_fd = ptv->v6_map_fd;
754
        p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
755
    }
756
    if (ptv->flags & AFP_XDPBYPASS) {
757
        p->BypassPacketsFlow = AFPXDPBypassCallback;
758
        p->afp_v.v4_map_fd = ptv->v4_map_fd;
759
        p->afp_v.v6_map_fd = ptv->v6_map_fd;
760
        p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
761
    }
762
#endif
763
0
}
764
765
/** \internal
766
 *  \brief setup packet for AFPReadFromRing
767
 */
768
static void AFPReadFromRingSetupPacket(
769
        AFPThreadVars *ptv, union thdr h, const unsigned int tp_status, Packet *p)
770
0
{
771
0
    PKT_SET_SRC(p, PKT_SRC_WIRE);
772
773
    /* flag the packet as TP_STATUS_USER_BUSY, which is ignore by the kernel, but
774
     * acts as an indicator that we've reached a frame that is not yet released by
775
     * us in autofp mode. It will be cleared when the frame gets released to the kernel. */
776
0
    h.h2->tp_status |= TP_STATUS_USER_BUSY;
777
0
    p->livedev = ptv->livedev;
778
0
    p->datalink = ptv->datalink;
779
0
    ptv->pkts++;
780
781
0
    AFPReadApplyBypass(ptv, p);
782
783
0
    if (h.h2->tp_len > h.h2->tp_snaplen) {
784
0
        SCLogDebug("Packet length (%d) > snaplen (%d), truncating", h.h2->tp_len, h.h2->tp_snaplen);
785
0
        ENGINE_SET_INVALID_EVENT(p, AFP_TRUNC_PKT);
786
0
    }
787
788
    /* get vlan id from header */
789
0
    if ((ptv->flags & AFP_VLAN_IN_HEADER) &&
790
0
            (tp_status & TP_STATUS_VLAN_VALID || h.h2->tp_vlan_tci)) {
791
0
        p->vlan_id[0] = h.h2->tp_vlan_tci & 0x0fff;
792
0
        p->vlan_idx = 1;
793
0
        p->afp_v.vlan_tci = h.h2->tp_vlan_tci;
794
0
    }
795
796
0
    (void)PacketSetData(p, (unsigned char *)h.raw + h.h2->tp_mac, h.h2->tp_snaplen);
797
798
0
    p->ReleasePacket = AFPReleasePacket;
799
0
    p->afp_v.relptr = h.raw;
800
0
    if (ptv->flags & AFP_NEED_PEER) {
801
0
        p->afp_v.mpeer = ptv->mpeer;
802
0
        AFPRefSocket(ptv->mpeer);
803
0
    } else {
804
0
        p->afp_v.mpeer = NULL;
805
0
    }
806
0
    p->afp_v.copy_mode = ptv->copy_mode;
807
0
    p->afp_v.peer = (p->afp_v.copy_mode == AFP_COPY_MODE_NONE) ? NULL : ptv->mpeer->peer;
808
809
    /* Timestamp */
810
0
    p->ts = (SCTime_t){ .secs = h.h2->tp_sec, .usecs = h.h2->tp_nsec / 1000 };
811
0
    SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)", GET_PKT_LEN(p), p, GET_PKT_DATA(p));
812
813
    /* We only check for checksum disable */
814
0
    if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
815
0
        p->flags |= PKT_IGNORE_CHECKSUM;
816
0
    } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
817
0
        if (ChecksumAutoModeCheck(ptv->pkts, SC_ATOMIC_GET(ptv->livedev->pkts),
818
0
                    SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
819
0
            ptv->checksum_mode = CHECKSUM_VALIDATION_DISABLE;
820
0
            p->flags |= PKT_IGNORE_CHECKSUM;
821
0
        }
822
0
    } else {
823
0
        if (tp_status & TP_STATUS_CSUMNOTREADY) {
824
0
            p->flags |= PKT_IGNORE_CHECKSUM;
825
0
        }
826
0
    }
827
0
}
828
829
static inline int AFPReadFromRingWaitForPacket(AFPThreadVars *ptv)
830
0
{
831
0
    union thdr h;
832
0
    struct timeval start_time;
833
0
    gettimeofday(&start_time, NULL);
834
0
    uint64_t busy_loop_iter = 0;
835
836
    /* busy wait loop until we have packets available */
837
0
    while (1) {
838
0
        if (unlikely(suricata_ctl_flags != 0)) {
839
0
            break;
840
0
        }
841
0
        h.raw = (((union thdr **)ptv->ring.v2)[ptv->frame_offset]);
842
0
        if (unlikely(h.raw == NULL)) {
843
0
            return AFP_READ_FAILURE;
844
0
        }
845
0
        const unsigned int tp_status = h.h2->tp_status;
846
0
        if (tp_status == TP_STATUS_KERNEL) {
847
0
            busy_loop_iter++;
848
849
0
            struct timeval cur_time;
850
0
            memset(&cur_time, 0, sizeof(cur_time));
851
0
            uint64_t milliseconds =
852
0
                    ((cur_time.tv_sec - start_time.tv_sec) * 1000) +
853
0
                    (((1000000 + cur_time.tv_usec - start_time.tv_usec) / 1000) - 1000);
854
0
            if (milliseconds > 1000) {
855
0
                break;
856
0
            }
857
0
            continue;
858
0
        }
859
0
        break;
860
0
    }
861
0
    if (busy_loop_iter) {
862
0
        StatsAddUI64(ptv->tv, ptv->afpacket_spin, busy_loop_iter);
863
0
    }
864
0
    return AFP_READ_OK;
865
0
}
866
867
/**
868
 * \brief AF packet frame ignore logic
869
 *
870
 * Given a sockaddr_ll of a frame, use the pkttype_filter_mask to decide if the
871
 * frame should be ignored. Protect from undefined behavior if there's ever
872
 * a sll_pkttype that would shift by too much. At this point, only outgoing
873
 * packets (4) are ignored. The highest value in if_linux.h is PACKET_KERNEL (7),
874
 * this extra check is being overly cautious.
875
 *
876
 * \retval true if the frame should be ignored
877
 */
878
static inline bool AFPShouldIgnoreFrame(AFPThreadVars *ptv, const struct sockaddr_ll *sll)
879
0
{
880
0
    if (unlikely(sll->sll_pkttype > 31))
881
0
        return false;
882
883
0
    return (ptv->pkttype_filter_mask & BIT_U32(sll->sll_pkttype)) != 0;
884
0
}
885
886
/**
887
 * \brief AF packet read function for ring
888
 *
889
 * This function fills
890
 * From here the packets are picked up by the DecodeAFP thread.
891
 *
892
 * \param user pointer to AFPThreadVars
893
 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
894
 */
895
static int AFPReadFromRing(AFPThreadVars *ptv)
896
0
{
897
0
    union thdr h;
898
0
    bool emergency_flush = false;
899
0
    const unsigned int start_pos = ptv->frame_offset;
900
901
    /* poll() told us there are frames, so lets wait for at least
902
     * one frame to become available. */
903
0
    if (AFPReadFromRingWaitForPacket(ptv) != AFP_READ_OK)
904
0
        return AFP_READ_FAILURE;
905
906
    /* process the frames in the ring */
907
0
    while (1) {
908
0
        if (unlikely(suricata_ctl_flags != 0)) {
909
0
            break;
910
0
        }
911
0
        h.raw = (((union thdr **)ptv->ring.v2)[ptv->frame_offset]);
912
0
        if (unlikely(h.raw == NULL)) {
913
0
            return AFP_READ_FAILURE;
914
0
        }
915
0
        const unsigned int tp_status = h.h2->tp_status;
916
        /* if we find a kernel frame we are done */
917
0
        if (unlikely(tp_status == TP_STATUS_KERNEL)) {
918
0
            break;
919
0
        }
920
        /* if in autofp mode the frame is still busy, return to poll */
921
0
        if (unlikely(FRAME_BUSY(tp_status))) {
922
0
            break;
923
0
        }
924
0
        emergency_flush |= ((tp_status & TP_STATUS_LOSING) != 0);
925
926
0
        if ((ptv->flags & AFP_EMERGENCY_MODE) && emergency_flush) {
927
0
            h.h2->tp_status = TP_STATUS_KERNEL;
928
0
            goto next_frame;
929
0
        }
930
931
0
        const struct sockaddr_ll *sll =
932
0
                (const struct sockaddr_ll *)((uint8_t *)h.h2 +
933
0
                                             TPACKET_ALIGN(sizeof(struct tpacket2_hdr)));
934
0
        if (unlikely(AFPShouldIgnoreFrame(ptv, sll)))
935
0
            goto next_frame;
936
937
0
        Packet *p = PacketGetFromQueueOrAlloc();
938
0
        if (p == NULL) {
939
0
            return AFPSuriFailure(ptv, h);
940
0
        }
941
0
        AFPReadFromRingSetupPacket(ptv, h, tp_status, p);
942
943
0
        if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
944
0
            return AFPSuriFailure(ptv, h);
945
0
        }
946
0
next_frame:
947
0
        if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
948
0
            ptv->frame_offset = 0;
949
            /* Get out of loop to be sure we will reach maintenance tasks */
950
0
            if (ptv->frame_offset == start_pos)
951
0
                break;
952
0
        }
953
0
    }
954
0
    if (emergency_flush) {
955
0
        AFPDumpCounters(ptv);
956
0
    }
957
0
    SCReturnInt(AFP_READ_OK);
958
0
}
959
960
#ifdef HAVE_TPACKET_V3
961
static inline void AFPFlushBlock(struct tpacket_block_desc *pbd)
962
0
{
963
0
    pbd->hdr.bh1.block_status = TP_STATUS_KERNEL;
964
0
}
965
966
static inline int AFPParsePacketV3(AFPThreadVars *ptv, struct tpacket_block_desc *pbd, struct tpacket3_hdr *ppd)
967
0
{
968
0
    Packet *p = PacketGetFromQueueOrAlloc();
969
0
    if (p == NULL) {
970
0
        SCReturnInt(AFP_SURI_FAILURE);
971
0
    }
972
0
    PKT_SET_SRC(p, PKT_SRC_WIRE);
973
974
0
    AFPReadApplyBypass(ptv, p);
975
976
0
    ptv->pkts++;
977
0
    p->livedev = ptv->livedev;
978
0
    p->datalink = ptv->datalink;
979
980
0
    if ((ptv->flags & AFP_VLAN_IN_HEADER) &&
981
0
            (ppd->tp_status & TP_STATUS_VLAN_VALID || ppd->hv1.tp_vlan_tci)) {
982
0
        p->vlan_id[0] = ppd->hv1.tp_vlan_tci & 0x0fff;
983
0
        p->vlan_idx = 1;
984
0
        p->afp_v.vlan_tci = (uint16_t)ppd->hv1.tp_vlan_tci;
985
0
    }
986
987
0
    if (ppd->tp_len > ppd->tp_snaplen) {
988
0
        SCLogDebug("Packet length (%d) > snaplen (%d), truncating", ppd->tp_len, ppd->tp_snaplen);
989
0
        ENGINE_SET_INVALID_EVENT(p, AFP_TRUNC_PKT);
990
0
    }
991
992
0
    (void)PacketSetData(p, (unsigned char *)ppd + ppd->tp_mac, ppd->tp_snaplen);
993
994
0
    p->ReleasePacket = AFPReleasePacketV3;
995
0
    p->afp_v.relptr = NULL;
996
0
    p->afp_v.mpeer = NULL;
997
0
    p->afp_v.copy_mode = ptv->copy_mode;
998
0
    p->afp_v.peer = (p->afp_v.copy_mode == AFP_COPY_MODE_NONE) ? NULL : ptv->mpeer->peer;
999
1000
    /* Timestamp */
1001
0
    p->ts = (SCTime_t){ .secs = ppd->tp_sec, .usecs = ppd->tp_nsec / 1000 };
1002
0
    SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)",
1003
0
            GET_PKT_LEN(p), p, GET_PKT_DATA(p));
1004
1005
    /* We only check for checksum disable */
1006
0
    if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
1007
0
        p->flags |= PKT_IGNORE_CHECKSUM;
1008
0
    } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
1009
0
        if (ChecksumAutoModeCheck(ptv->pkts,
1010
0
                    SC_ATOMIC_GET(ptv->livedev->pkts),
1011
0
                    SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
1012
0
            ptv->checksum_mode = CHECKSUM_VALIDATION_DISABLE;
1013
0
            p->flags |= PKT_IGNORE_CHECKSUM;
1014
0
        }
1015
0
    } else {
1016
0
        if (ppd->tp_status & TP_STATUS_CSUMNOTREADY) {
1017
0
            p->flags |= PKT_IGNORE_CHECKSUM;
1018
0
        }
1019
0
    }
1020
1021
0
    if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
1022
0
        SCReturnInt(AFP_SURI_FAILURE);
1023
0
    }
1024
1025
0
    SCReturnInt(AFP_READ_OK);
1026
0
}
1027
1028
static inline int AFPWalkBlock(AFPThreadVars *ptv, struct tpacket_block_desc *pbd)
1029
0
{
1030
0
    const int num_pkts = pbd->hdr.bh1.num_pkts;
1031
0
    uint8_t *ppd = (uint8_t *)pbd + pbd->hdr.bh1.offset_to_first_pkt;
1032
1033
0
    for (int i = 0; i < num_pkts; ++i) {
1034
0
        const struct sockaddr_ll *sll =
1035
0
                (const struct sockaddr_ll *)(ppd + TPACKET_ALIGN(sizeof(struct tpacket3_hdr)));
1036
0
        if (unlikely(AFPShouldIgnoreFrame(ptv, sll))) {
1037
0
            ppd = ppd + ((struct tpacket3_hdr *)ppd)->tp_next_offset;
1038
0
            continue;
1039
0
        }
1040
0
        int ret = AFPParsePacketV3(ptv, pbd, (struct tpacket3_hdr *)ppd);
1041
0
        switch (ret) {
1042
0
            case AFP_READ_OK:
1043
0
                break;
1044
0
            case AFP_SURI_FAILURE:
1045
                /* Internal error but let's just continue and
1046
                 * treat thenext packet */
1047
0
                break;
1048
0
            case AFP_READ_FAILURE:
1049
0
                SCReturnInt(AFP_READ_FAILURE);
1050
0
            default:
1051
0
                SCReturnInt(ret);
1052
0
        }
1053
0
        ppd = ppd + ((struct tpacket3_hdr *)ppd)->tp_next_offset;
1054
0
    }
1055
1056
0
    SCReturnInt(AFP_READ_OK);
1057
0
}
1058
#endif /* HAVE_TPACKET_V3 */
1059
1060
/**
1061
 * \brief AF packet read function for ring
1062
 *
1063
 * This function fills
1064
 * From here the packets are picked up by the DecodeAFP thread.
1065
 *
1066
 * \param user pointer to AFPThreadVars
1067
 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
1068
 */
1069
static int AFPReadFromRingV3(AFPThreadVars *ptv)
1070
0
{
1071
0
#ifdef HAVE_TPACKET_V3
1072
    /* Loop till we have packets available */
1073
0
    while (1) {
1074
0
        if (unlikely(suricata_ctl_flags != 0)) {
1075
0
            SCLogDebug("Exiting AFP V3 read loop");
1076
0
            break;
1077
0
        }
1078
1079
0
        struct tpacket_block_desc *pbd =
1080
0
                (struct tpacket_block_desc *)ptv->ring.v3[ptv->frame_offset].iov_base;
1081
1082
        /* block is not ready to be read */
1083
0
        if ((pbd->hdr.bh1.block_status & TP_STATUS_USER) == 0) {
1084
0
            SCReturnInt(AFP_READ_OK);
1085
0
        }
1086
1087
0
        int ret = AFPWalkBlock(ptv, pbd);
1088
0
        if (unlikely(ret != AFP_READ_OK)) {
1089
0
            AFPFlushBlock(pbd);
1090
0
            SCReturnInt(ret);
1091
0
        }
1092
1093
0
        AFPFlushBlock(pbd);
1094
0
        ptv->frame_offset = (ptv->frame_offset + 1) % ptv->req.v3.tp_block_nr;
1095
        /* return to maintenance task after one loop on the ring */
1096
0
        if (ptv->frame_offset == 0) {
1097
0
            SCReturnInt(AFP_READ_OK);
1098
0
        }
1099
0
    }
1100
0
#endif
1101
0
    SCReturnInt(AFP_READ_OK);
1102
0
}
1103
1104
/**
1105
 * \brief Reference socket
1106
 *
1107
 * \retval O in case of failure, 1 in case of success
1108
 */
1109
static int AFPRefSocket(AFPPeer* peer)
1110
0
{
1111
0
    if (unlikely(peer == NULL))
1112
0
        return 0;
1113
1114
0
    (void)SC_ATOMIC_ADD(peer->sock_usage, 1);
1115
0
    return 1;
1116
0
}
1117
1118
1119
/**
1120
 * \brief Dereference socket
1121
 *
1122
 * \retval 1 if socket is still alive, 0 if not
1123
 */
1124
static int AFPDerefSocket(AFPPeer* peer)
1125
0
{
1126
0
    if (peer == NULL)
1127
0
        return 1;
1128
1129
0
    if (SC_ATOMIC_SUB(peer->sock_usage, 1) == 1) {
1130
0
        return 0;
1131
0
    }
1132
0
    return 1;
1133
0
}
1134
1135
static void AFPCloseSocket(AFPThreadVars *ptv)
1136
0
{
1137
0
    if (ptv->mpeer != NULL)
1138
0
        BUG_ON(SC_ATOMIC_GET(ptv->mpeer->sock_usage) != 0);
1139
1140
0
    if (ptv->flags & AFP_TPACKET_V3) {
1141
0
#ifdef HAVE_TPACKET_V3
1142
0
        if (ptv->ring.v3) {
1143
0
            SCFree(ptv->ring.v3);
1144
0
            ptv->ring.v3 = NULL;
1145
0
        }
1146
0
#endif
1147
0
    } else {
1148
0
        if (ptv->ring.v2) {
1149
            /* only used in reading phase, we can free it */
1150
0
            SCFree(ptv->ring.v2);
1151
0
            ptv->ring.v2 = NULL;
1152
0
        }
1153
0
    }
1154
0
    if (ptv->socket != -1) {
1155
0
        SCLogDebug("Cleaning socket connected to '%s'", ptv->iface);
1156
0
        munmap(ptv->ring_buf, ptv->ring_buflen);
1157
0
        close(ptv->socket);
1158
0
        ptv->socket = -1;
1159
0
    }
1160
0
}
1161
1162
static void AFPSwitchState(AFPThreadVars *ptv, uint8_t state)
1163
0
{
1164
0
    ptv->afp_state = state;
1165
0
    ptv->down_count = 0;
1166
1167
0
    if (state == AFP_STATE_DOWN) {
1168
        /* cleanup is done on thread cleanup or try reopen
1169
         * as there may still be packets in autofp that
1170
         * are referencing us */
1171
0
        (void)SC_ATOMIC_SUB(ptv->mpeer->sock_usage, 1);
1172
0
    }
1173
0
    if (state == AFP_STATE_UP) {
1174
0
        AFPPeerUpdate(ptv);
1175
0
        (void)SC_ATOMIC_SET(ptv->mpeer->sock_usage, 1);
1176
0
    }
1177
0
}
1178
1179
static int AFPReadAndDiscardFromRing(AFPThreadVars *ptv, struct timeval *synctv,
1180
                                     uint64_t *discarded_pkts)
1181
0
{
1182
0
    if (unlikely(suricata_ctl_flags != 0)) {
1183
0
        return 1;
1184
0
    }
1185
1186
0
#ifdef HAVE_TPACKET_V3
1187
0
    if (ptv->flags & AFP_TPACKET_V3) {
1188
0
        int ret = 0;
1189
0
        struct tpacket_block_desc *pbd =
1190
0
                (struct tpacket_block_desc *)ptv->ring.v3[ptv->frame_offset].iov_base;
1191
0
        *discarded_pkts += pbd->hdr.bh1.num_pkts;
1192
0
        struct tpacket3_hdr *ppd =
1193
0
            (struct tpacket3_hdr *)((uint8_t *)pbd + pbd->hdr.bh1.offset_to_first_pkt);
1194
0
        if (((time_t)ppd->tp_sec > synctv->tv_sec) ||
1195
0
                ((time_t)ppd->tp_sec == synctv->tv_sec &&
1196
0
                 (suseconds_t) (ppd->tp_nsec / 1000) > (suseconds_t)synctv->tv_usec)) {
1197
0
            ret = 1;
1198
0
        }
1199
0
        AFPFlushBlock(pbd);
1200
0
        ptv->frame_offset = (ptv->frame_offset + 1) % ptv->req.v3.tp_block_nr;
1201
0
        return ret;
1202
1203
0
    } else
1204
0
#endif
1205
0
    {
1206
        /* Read packet from ring */
1207
0
        union thdr h;
1208
0
        h.raw = (((union thdr **)ptv->ring.v2)[ptv->frame_offset]);
1209
0
        if (h.raw == NULL) {
1210
0
            return -1;
1211
0
        }
1212
0
        if (h.h2->tp_status == TP_STATUS_KERNEL)
1213
0
            return 0;
1214
1215
0
        if (((time_t)h.h2->tp_sec > synctv->tv_sec) ||
1216
0
                ((time_t)h.h2->tp_sec == synctv->tv_sec &&
1217
0
                 (suseconds_t) (h.h2->tp_nsec / 1000) > synctv->tv_usec)) {
1218
0
            return 1;
1219
0
        }
1220
1221
0
        (*discarded_pkts)++;
1222
0
        h.h2->tp_status = TP_STATUS_KERNEL;
1223
0
        if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
1224
0
            ptv->frame_offset = 0;
1225
0
        }
1226
0
    }
1227
1228
0
    return 0;
1229
0
}
1230
1231
/** \brief wait for all afpacket threads to fully init
1232
 *
1233
 *  Discard packets before all threads are ready, as the cluster
1234
 *  setup is not complete yet.
1235
 *
1236
 *  if AFPPeersListStarted() returns true init is complete
1237
 *
1238
 *  \retval r 1 = happy, otherwise unhappy
1239
 */
1240
static int AFPSynchronizeStart(AFPThreadVars *ptv, uint64_t *discarded_pkts)
1241
0
{
1242
0
    struct timeval synctv;
1243
0
    struct pollfd fds;
1244
1245
0
    fds.fd = ptv->socket;
1246
0
    fds.events = POLLIN;
1247
1248
    /* Set timeval to end of the world */
1249
0
    synctv.tv_sec = 0xffffffff;
1250
0
    synctv.tv_usec = 0xffffffff;
1251
1252
0
    while (1) {
1253
0
        int r = poll(&fds, 1, POLL_TIMEOUT);
1254
0
        if (r > 0 &&
1255
0
                (fds.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL))) {
1256
0
            SCLogWarning("%s: poll failed %02x", ptv->iface,
1257
0
                    fds.revents & (POLLHUP | POLLRDHUP | POLLERR | POLLNVAL));
1258
0
            return 0;
1259
0
        } else if (r > 0) {
1260
0
            if (AFPPeersListStarted() && synctv.tv_sec == (time_t) 0xffffffff) {
1261
0
                gettimeofday(&synctv, NULL);
1262
0
            }
1263
0
            r = AFPReadAndDiscardFromRing(ptv, &synctv, discarded_pkts);
1264
0
            SCLogDebug("Discarding on %s", ptv->tv->name);
1265
0
            switch (r) {
1266
0
                case 1:
1267
0
                    SCLogDebug("Starting to read on %s", ptv->tv->name);
1268
0
                    return 1;
1269
0
                case -1:
1270
0
                    return r;
1271
0
            }
1272
        /* no packets */
1273
0
        } else if (r == 0 && AFPPeersListStarted()) {
1274
0
            SCLogDebug("Starting to read on %s", ptv->tv->name);
1275
0
            return 1;
1276
0
        } else if (r < 0) { /* only exit on error */
1277
0
            SCLogWarning("poll failed with retval %d", r);
1278
0
            return 0;
1279
0
        }
1280
0
    }
1281
0
    return 1;
1282
0
}
1283
1284
/**
1285
 * \brief Try to reopen socket
1286
 *
1287
 * \retval 0 in case of success, negative if error occurs or a condition
1288
 * is not met.
1289
 */
1290
static int AFPTryReopen(AFPThreadVars *ptv)
1291
0
{
1292
0
    ptv->down_count++;
1293
1294
    /* Don't reconnect till we have packet that did not release data */
1295
0
    if (SC_ATOMIC_GET(ptv->mpeer->sock_usage) != 0) {
1296
0
        return -1;
1297
0
    }
1298
1299
    /* ref cnt 0, we can close the old socket */
1300
0
    AFPCloseSocket(ptv);
1301
1302
0
    int afp_activate_r = AFPCreateSocket(ptv, ptv->iface, 0);
1303
0
    if (afp_activate_r != 0) {
1304
0
        if (ptv->down_count % AFP_DOWN_COUNTER_INTERVAL == 0) {
1305
0
            SCLogWarning("%s: can't reopen interface", ptv->iface);
1306
0
        }
1307
0
        return afp_activate_r;
1308
0
    }
1309
1310
0
    SCLogInfo("%s: interface is back up", ptv->iface);
1311
0
    return 0;
1312
0
}
1313
1314
/**
1315
 *  \brief Main AF_PACKET reading Loop function
1316
 */
1317
TmEcode ReceiveAFPLoop(ThreadVars *tv, void *data, void *slot)
1318
0
{
1319
0
    SCEnter();
1320
1321
0
    AFPThreadVars *ptv = (AFPThreadVars *)data;
1322
0
    struct pollfd fds;
1323
0
    int r;
1324
0
    TmSlot *s = (TmSlot *)slot;
1325
0
    time_t last_dump = 0;
1326
0
    time_t current_time;
1327
0
    int (*AFPReadFunc) (AFPThreadVars *);
1328
0
    uint64_t discarded_pkts = 0;
1329
1330
0
    ptv->slot = s->slot_next;
1331
1332
0
    if (ptv->flags & AFP_TPACKET_V3) {
1333
0
        AFPReadFunc = AFPReadFromRingV3;
1334
0
    } else {
1335
0
        AFPReadFunc = AFPReadFromRing;
1336
0
    }
1337
1338
0
    if (ptv->afp_state == AFP_STATE_DOWN) {
1339
        /* Wait for our turn, threads before us must have opened the socket */
1340
0
        while (AFPPeersListWaitTurn(ptv->mpeer)) {
1341
0
            usleep(1000);
1342
0
            if (suricata_ctl_flags != 0) {
1343
0
                break;
1344
0
            }
1345
0
        }
1346
0
        r = AFPCreateSocket(ptv, ptv->iface, 1);
1347
0
        if (r < 0) {
1348
0
            switch (-r) {
1349
0
                case AFP_FATAL_ERROR:
1350
0
                    SCLogError("%s: failed to init socket for interface", ptv->iface);
1351
0
                    SCReturnInt(TM_ECODE_FAILED);
1352
0
                case AFP_RECOVERABLE_ERROR:
1353
0
                    SCLogWarning(
1354
0
                            "%s: failed to init socket for interface, retrying soon", ptv->iface);
1355
0
            }
1356
0
        }
1357
0
        AFPPeersListReachedInc();
1358
0
    }
1359
0
    if (ptv->afp_state == AFP_STATE_UP) {
1360
0
        SCLogDebug("Thread %s using socket %d", tv->name, ptv->socket);
1361
0
        AFPSynchronizeStart(ptv, &discarded_pkts);
1362
        /* let's reset counter as we will start the capture at the
1363
         * next function call */
1364
0
#ifdef PACKET_STATISTICS
1365
0
         struct tpacket_stats kstats;
1366
0
         socklen_t len = sizeof (struct tpacket_stats);
1367
0
         if (getsockopt(ptv->socket, SOL_PACKET, PACKET_STATISTICS,
1368
0
                     &kstats, &len) > -1) {
1369
0
             uint64_t pkts = 0;
1370
0
             SCLogDebug("(%s) Kernel socket startup: Packets %" PRIu32
1371
0
                     ", dropped %" PRIu32 "",
1372
0
                     ptv->tv->name,
1373
0
                     kstats.tp_packets, kstats.tp_drops);
1374
0
             pkts = kstats.tp_packets - discarded_pkts - kstats.tp_drops;
1375
0
             StatsAddUI64(ptv->tv, ptv->capture_kernel_packets, pkts);
1376
0
             (void) SC_ATOMIC_ADD(ptv->livedev->pkts, pkts);
1377
0
         }
1378
0
#endif
1379
0
    }
1380
1381
0
    fds.fd = ptv->socket;
1382
0
    fds.events = POLLIN;
1383
1384
    // Indicate that the thread is actually running its application level code (i.e., it can poll
1385
    // packets)
1386
0
    TmThreadsSetFlag(tv, THV_RUNNING);
1387
1388
0
    while (1) {
1389
        /* Start by checking the state of our interface */
1390
0
        if (unlikely(ptv->afp_state == AFP_STATE_DOWN)) {
1391
0
            int dbreak = 0;
1392
1393
0
            do {
1394
0
                usleep(AFP_RECONNECT_TIMEOUT);
1395
0
                if (suricata_ctl_flags != 0) {
1396
0
                    dbreak = 1;
1397
0
                    break;
1398
0
                }
1399
0
                r = AFPTryReopen(ptv);
1400
0
                fds.fd = ptv->socket;
1401
0
            } while (r < 0);
1402
0
            if (dbreak == 1)
1403
0
                break;
1404
0
        }
1405
1406
        /* make sure we have at least one packet in the packet pool, to prevent
1407
         * us from alloc'ing packets at line rate */
1408
0
        PacketPoolWait();
1409
1410
0
        StatsIncr(ptv->tv, ptv->capture_afp_poll);
1411
1412
0
        r = poll(&fds, 1, POLL_TIMEOUT);
1413
1414
0
        if (suricata_ctl_flags != 0) {
1415
0
            break;
1416
0
        }
1417
1418
0
        if (r > 0 &&
1419
0
                (fds.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL))) {
1420
0
            StatsIncr(ptv->tv, ptv->capture_afp_poll_signal);
1421
0
            if (fds.revents & (POLLHUP | POLLRDHUP)) {
1422
0
                AFPSwitchState(ptv, AFP_STATE_DOWN);
1423
0
                continue;
1424
0
            } else if (fds.revents & POLLERR) {
1425
0
                char c;
1426
                /* Do a recv to get errno */
1427
0
                if (recv(ptv->socket, &c, sizeof c, MSG_PEEK) != -1)
1428
0
                    continue; /* what, no error? */
1429
0
                SCLogWarning("%s: failed to poll interface: %s", ptv->iface, strerror(errno));
1430
0
                AFPSwitchState(ptv, AFP_STATE_DOWN);
1431
0
                continue;
1432
0
            } else if (fds.revents & POLLNVAL) {
1433
0
                SCLogWarning("%s: invalid poll request: %s", ptv->iface, strerror(errno));
1434
0
                AFPSwitchState(ptv, AFP_STATE_DOWN);
1435
0
                continue;
1436
0
            }
1437
0
        } else if (r > 0) {
1438
0
            StatsIncr(ptv->tv, ptv->capture_afp_poll_data);
1439
0
            r = AFPReadFunc(ptv);
1440
0
            switch (r) {
1441
0
                case AFP_READ_OK:
1442
                    /* Trigger one dump of stats every second */
1443
0
                    current_time = time(NULL);
1444
0
                    if (current_time != last_dump) {
1445
0
                        AFPDumpCounters(ptv);
1446
0
                        last_dump = current_time;
1447
0
                    }
1448
0
                    break;
1449
0
                case AFP_READ_FAILURE:
1450
                    /* AFPRead in error: best to reset the socket */
1451
0
                    SCLogWarning("%s: read failure: %s", ptv->iface, strerror(errno));
1452
0
                    AFPSwitchState(ptv, AFP_STATE_DOWN);
1453
0
                    continue;
1454
0
                case AFP_SURI_FAILURE:
1455
0
                    StatsIncr(ptv->tv, ptv->capture_errors);
1456
0
                    break;
1457
0
                case AFP_KERNEL_DROP:
1458
0
                    AFPDumpCounters(ptv);
1459
0
                    break;
1460
0
            }
1461
0
        } else if (unlikely(r == 0)) {
1462
0
            StatsIncr(ptv->tv, ptv->capture_afp_poll_timeout);
1463
            /* Trigger one dump of stats every second */
1464
0
            current_time = time(NULL);
1465
0
            if (current_time != last_dump) {
1466
0
                AFPDumpCounters(ptv);
1467
0
                last_dump = current_time;
1468
0
            }
1469
            /* poll timed out, lets see handle our timeout path */
1470
0
            TmThreadsCaptureHandleTimeout(tv, NULL);
1471
1472
0
        } else if ((r < 0) && (errno != EINTR)) {
1473
0
            StatsIncr(ptv->tv, ptv->capture_afp_poll_err);
1474
0
            SCLogWarning("%s: poll failure: %s", ptv->iface, strerror(errno));
1475
0
            AFPSwitchState(ptv, AFP_STATE_DOWN);
1476
0
            continue;
1477
0
        }
1478
0
        StatsSyncCountersIfSignalled(tv);
1479
0
    }
1480
1481
0
    AFPDumpCounters(ptv);
1482
0
    StatsSyncCountersIfSignalled(tv);
1483
0
    SCReturnInt(TM_ECODE_OK);
1484
0
}
1485
1486
static int AFPGetDevFlags(int fd, const char *ifname)
1487
0
{
1488
0
    struct ifreq ifr;
1489
1490
0
    memset(&ifr, 0, sizeof(ifr));
1491
0
    strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
1492
1493
0
    if (ioctl(fd, SIOCGIFFLAGS, &ifr) == -1) {
1494
0
        SCLogError("%s: failed to get interface flags: %s", ifname, strerror(errno));
1495
0
        return -1;
1496
0
    }
1497
1498
0
    return ifr.ifr_flags;
1499
0
}
1500
1501
1502
static int AFPGetIfnumByDev(int fd, const char *ifname, int verbose)
1503
0
{
1504
0
    struct ifreq ifr;
1505
1506
0
    memset(&ifr, 0, sizeof(ifr));
1507
0
    strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
1508
1509
0
    if (ioctl(fd, SIOCGIFINDEX, &ifr) == -1) {
1510
0
        if (verbose)
1511
0
            SCLogError("%s: failed to find interface: %s", ifname, strerror(errno));
1512
0
        return -1;
1513
0
    }
1514
1515
0
    return ifr.ifr_ifindex;
1516
0
}
1517
1518
static int AFPGetDevLinktype(int fd, const char *ifname)
1519
0
{
1520
0
    struct ifreq ifr;
1521
1522
0
    memset(&ifr, 0, sizeof(ifr));
1523
0
    strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
1524
1525
0
    if (ioctl(fd, SIOCGIFHWADDR, &ifr) == -1) {
1526
0
        SCLogError("%s: failed to find interface type: %s", ifname, strerror(errno));
1527
0
        return -1;
1528
0
    }
1529
1530
0
    switch (ifr.ifr_hwaddr.sa_family) {
1531
0
        case ARPHRD_LOOPBACK:
1532
0
            return LINKTYPE_ETHERNET;
1533
0
        case ARPHRD_PPP:
1534
0
        case ARPHRD_NONE:
1535
0
            return LINKTYPE_RAW;
1536
0
        default:
1537
0
            return ifr.ifr_hwaddr.sa_family;
1538
0
    }
1539
0
}
1540
1541
int AFPGetLinkType(const char *ifname)
1542
0
{
1543
0
    int ltype;
1544
1545
0
    int fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
1546
0
    if (fd == -1) {
1547
0
        SCLogError("%s: failed to create AF_PACKET socket: %s", ifname, strerror(errno));
1548
0
        return LINKTYPE_RAW;
1549
0
    }
1550
1551
0
    ltype =  AFPGetDevLinktype(fd, ifname);
1552
0
    close(fd);
1553
1554
0
    DatalinkSetGlobalType(ltype);
1555
1556
0
    return ltype;
1557
0
}
1558
1559
static int AFPComputeRingParams(AFPThreadVars *ptv, int order)
1560
0
{
1561
    /* Compute structure:
1562
       Target is to store all pending packets
1563
       with a size equal to MTU + auxdata
1564
       And we keep a decent number of block
1565
1566
       To do so:
1567
       Compute frame_size (aligned to be able to fit in block
1568
       Check which block size we need. Blocksize is a 2^n * pagesize
1569
       We then need to get order, big enough to have
1570
       frame_size < block size
1571
       Find number of frame per block (divide)
1572
       Fill in packet_req
1573
1574
       Compute frame size:
1575
       described in packet_mmap.txt
1576
       dependent on snaplen (need to use a variable ?)
1577
snaplen: MTU ?
1578
tp_hdrlen determine_version in daq_afpacket
1579
in V1:  sizeof(struct tpacket_hdr);
1580
in V2: val in getsockopt(instance->fd, SOL_PACKET, PACKET_HDRLEN, &val, &len)
1581
frame size: TPACKET_ALIGN(snaplen + TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct
1582
sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
1583
1584
     */
1585
0
    int tp_hdrlen = sizeof(struct tpacket_hdr);
1586
0
    int snaplen = default_packet_size;
1587
1588
0
    if (snaplen == 0) {
1589
0
        if (ptv->cluster_type & PACKET_FANOUT_FLAG_DEFRAG) {
1590
0
            SCLogConfig("%s: defrag enabled, setting snaplen to %d", ptv->iface,
1591
0
                    DEFAULT_TPACKET_DEFRAG_SNAPLEN);
1592
0
            snaplen = DEFAULT_TPACKET_DEFRAG_SNAPLEN;
1593
0
        } else {
1594
0
            snaplen = GetIfaceMaxPacketSize(ptv->livedev);
1595
0
            if (snaplen <= 0) {
1596
0
                SCLogWarning("%s: unable to get MTU, setting snaplen default of 1514", ptv->iface);
1597
0
                snaplen = 1514;
1598
0
            }
1599
0
        }
1600
0
    }
1601
0
    ptv->snaplen = snaplen;
1602
1603
0
    ptv->req.v2.tp_frame_size = TPACKET_ALIGN(snaplen +TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
1604
0
    ptv->req.v2.tp_block_size = getpagesize() << order;
1605
0
    int frames_per_block = ptv->req.v2.tp_block_size / ptv->req.v2.tp_frame_size;
1606
0
    if (frames_per_block == 0) {
1607
0
        SCLogError("%s: Frame size bigger than block size", ptv->iface);
1608
0
        return -1;
1609
0
    }
1610
0
    ptv->req.v2.tp_frame_nr = ptv->ring_size;
1611
0
    ptv->req.v2.tp_block_nr = ptv->req.v2.tp_frame_nr / frames_per_block + 1;
1612
    /* exact division */
1613
0
    ptv->req.v2.tp_frame_nr = ptv->req.v2.tp_block_nr * frames_per_block;
1614
0
    SCLogPerf("%s: rx ring: block_size=%d block_nr=%d frame_size=%d frame_nr=%d", ptv->iface,
1615
0
            ptv->req.v2.tp_block_size, ptv->req.v2.tp_block_nr, ptv->req.v2.tp_frame_size,
1616
0
            ptv->req.v2.tp_frame_nr);
1617
0
    return 1;
1618
0
}
1619
1620
static int AFPComputeRingParamsWithBlockSize(AFPThreadVars *ptv, unsigned int block_size)
1621
0
{
1622
    /* Compute structure:
1623
       Target is to store all pending packets
1624
       with a size equal to MTU + auxdata
1625
       And we keep a decent number of block
1626
1627
       To do so:
1628
       Compute frame_size (aligned to be able to fit in block
1629
       Check which block size we need. Blocksize is a 2^n * pagesize
1630
       We then need to get order, big enough to have
1631
       frame_size < block size
1632
       Find number of frame per block (divide)
1633
       Fill in packet_req
1634
1635
       Compute frame size:
1636
       described in packet_mmap.txt
1637
       dependent on snaplen (need to use a variable ?)
1638
snaplen: MTU ?
1639
tp_hdrlen determine_version in daq_afpacket
1640
in V1:  sizeof(struct tpacket_hdr);
1641
in V2: val in getsockopt(instance->fd, SOL_PACKET, PACKET_HDRLEN, &val, &len)
1642
frame size: TPACKET_ALIGN(snaplen + TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct
1643
sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
1644
1645
     */
1646
0
    int tp_hdrlen = sizeof(struct tpacket_hdr);
1647
0
    int snaplen = default_packet_size;
1648
1649
0
    if (snaplen == 0) {
1650
0
        if (ptv->cluster_type & PACKET_FANOUT_FLAG_DEFRAG) {
1651
0
            SCLogConfig("%s: defrag enabled, setting snaplen to %d", ptv->iface,
1652
0
                    DEFAULT_TPACKET_DEFRAG_SNAPLEN);
1653
0
            snaplen = DEFAULT_TPACKET_DEFRAG_SNAPLEN;
1654
0
        } else {
1655
0
            snaplen = GetIfaceMaxPacketSize(ptv->livedev);
1656
0
            if (snaplen <= 0) {
1657
0
                SCLogWarning("%s: unable to get MTU, setting snaplen default of 1514", ptv->iface);
1658
0
                snaplen = 1514;
1659
0
            }
1660
0
        }
1661
0
    }
1662
0
    ptv->snaplen = snaplen;
1663
1664
0
    ptv->req.v2.tp_frame_size = TPACKET_ALIGN(
1665
0
            snaplen +
1666
0
            TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) -
1667
0
            ETH_HLEN);
1668
0
    ptv->req.v2.tp_block_size = block_size;
1669
0
    int frames_per_block = ptv->req.v2.tp_block_size / ptv->req.v2.tp_frame_size;
1670
0
    if (frames_per_block == 0) {
1671
0
        SCLogError("%s: Frame size bigger than block size", ptv->iface);
1672
0
        return -1;
1673
0
    }
1674
0
    ptv->req.v2.tp_frame_nr = ptv->ring_size;
1675
0
    ptv->req.v2.tp_block_nr = ptv->req.v2.tp_frame_nr / frames_per_block + 1;
1676
    /* exact division */
1677
0
    ptv->req.v2.tp_frame_nr = ptv->req.v2.tp_block_nr * frames_per_block;
1678
0
    SCLogPerf("%s: rx ring: block_size=%d block_nr=%d frame_size=%d frame_nr=%d", ptv->iface,
1679
0
            ptv->req.v2.tp_block_size, ptv->req.v2.tp_block_nr, ptv->req.v2.tp_frame_size,
1680
0
            ptv->req.v2.tp_frame_nr);
1681
0
    return 1;
1682
0
}
1683
1684
#ifdef HAVE_TPACKET_V3
1685
static int AFPComputeRingParamsV3(AFPThreadVars *ptv)
1686
0
{
1687
0
    ptv->req.v3.tp_block_size = ptv->block_size;
1688
0
    ptv->req.v3.tp_frame_size = 2048;
1689
0
    int frames_per_block = 0;
1690
0
    int tp_hdrlen = sizeof(struct tpacket3_hdr);
1691
0
    int snaplen = default_packet_size;
1692
1693
0
    if (snaplen == 0) {
1694
0
        snaplen = GetIfaceMaxPacketSize(ptv->livedev);
1695
0
        if (snaplen <= 0) {
1696
0
            SCLogWarning("%s: unable to get MTU, setting snaplen default of 1514", ptv->iface);
1697
0
            snaplen = 1514;
1698
0
        }
1699
0
    }
1700
0
    ptv->snaplen = snaplen;
1701
1702
0
    ptv->req.v3.tp_frame_size = TPACKET_ALIGN(snaplen +TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
1703
0
    frames_per_block = ptv->req.v3.tp_block_size / ptv->req.v3.tp_frame_size;
1704
1705
0
    if (frames_per_block == 0) {
1706
0
        SCLogError("%s: block size is too small, it should be at least %d", ptv->iface,
1707
0
                ptv->req.v3.tp_frame_size);
1708
0
        return -1;
1709
0
    }
1710
0
    ptv->req.v3.tp_block_nr = ptv->ring_size / frames_per_block + 1;
1711
    /* exact division */
1712
0
    ptv->req.v3.tp_frame_nr = ptv->req.v3.tp_block_nr * frames_per_block;
1713
0
    ptv->req.v3.tp_retire_blk_tov = ptv->block_timeout;
1714
0
    ptv->req.v3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
1715
0
    SCLogPerf("%s: rx ring params: block_size=%d block_nr=%d frame_size=%d frame_nr=%d (mem: %d)",
1716
0
            ptv->iface, ptv->req.v3.tp_block_size, ptv->req.v3.tp_block_nr,
1717
0
            ptv->req.v3.tp_frame_size, ptv->req.v3.tp_frame_nr,
1718
0
            ptv->req.v3.tp_block_size * ptv->req.v3.tp_block_nr);
1719
0
    return 1;
1720
0
}
1721
#endif
1722
1723
static int AFPSetupRing(AFPThreadVars *ptv, char *devname)
1724
0
{
1725
0
    int val;
1726
0
    unsigned int len = sizeof(val), i;
1727
0
    int order;
1728
0
    int r, mmap_flag;
1729
1730
0
#ifdef HAVE_TPACKET_V3
1731
0
    if (ptv->flags & AFP_TPACKET_V3) {
1732
0
        val = TPACKET_V3;
1733
0
    } else
1734
0
#endif
1735
0
    {
1736
0
        val = TPACKET_V2;
1737
0
    }
1738
0
    if (getsockopt(ptv->socket, SOL_PACKET, PACKET_HDRLEN, &val, &len) < 0) {
1739
0
        if (errno == ENOPROTOOPT) {
1740
0
            if (ptv->flags & AFP_TPACKET_V3) {
1741
0
                SCLogError("%s: kernel too old for TPACKET_V3 (need 3.2+)", devname);
1742
0
            } else {
1743
0
                SCLogError("%s: kernel too old (need 2.6.27+)", devname);
1744
0
            }
1745
0
        }
1746
0
        SCLogError("%s: failed to retrieve packet header len", devname);
1747
0
        return AFP_FATAL_ERROR;
1748
0
    }
1749
1750
0
    val = TPACKET_V2;
1751
0
#ifdef HAVE_TPACKET_V3
1752
0
    if (ptv->flags & AFP_TPACKET_V3) {
1753
0
        val = TPACKET_V3;
1754
0
    }
1755
0
#endif
1756
0
    if (setsockopt(ptv->socket, SOL_PACKET, PACKET_VERSION, &val,
1757
0
                sizeof(val)) < 0) {
1758
0
        SCLogError("%s: failed to activate TPACKET_V2/TPACKET_V3 on packet socket: %s", devname,
1759
0
                strerror(errno));
1760
0
        return AFP_FATAL_ERROR;
1761
0
    }
1762
1763
0
#ifdef HAVE_HW_TIMESTAMPING
1764
0
    if ((ptv->flags & AFP_DISABLE_HWTIMESTAMP) == 0) {
1765
0
        int req = SOF_TIMESTAMPING_RAW_HARDWARE;
1766
0
        if (setsockopt(ptv->socket, SOL_PACKET, PACKET_TIMESTAMP, (void *)&req, sizeof(req)) < 0) {
1767
0
            SCLogWarning("%s: failed to activate hardware timestamping on packet socket: %s",
1768
0
                    devname, strerror(errno));
1769
0
        }
1770
0
    } else {
1771
0
        SCLogConfig("%s: hardware timestamping disabled", devname);
1772
0
    }
1773
0
#endif
1774
1775
    /* Reserve head room for a VLAN header. One vlan is extracted from AFP header
1776
     * so one VLAN header length is enough. */
1777
0
    int reserve = VLAN_HEADER_LEN;
1778
0
    if (setsockopt(ptv->socket, SOL_PACKET, PACKET_RESERVE, (void *)&reserve, sizeof(reserve)) <
1779
0
            0) {
1780
0
        SCLogError("%s: failed to activate reserve on packet socket: %s", devname, strerror(errno));
1781
0
        return AFP_FATAL_ERROR;
1782
0
    }
1783
1784
    /* Allocate RX ring */
1785
0
#ifdef HAVE_TPACKET_V3
1786
0
    if (ptv->flags & AFP_TPACKET_V3) {
1787
0
        if (AFPComputeRingParamsV3(ptv) != 1) {
1788
0
            return AFP_FATAL_ERROR;
1789
0
        }
1790
0
        r = setsockopt(ptv->socket, SOL_PACKET, PACKET_RX_RING,
1791
0
                (void *) &ptv->req.v3, sizeof(ptv->req.v3));
1792
0
        if (r < 0) {
1793
0
            SCLogError("%s: failed to allocate RX Ring: %s", devname, strerror(errno));
1794
0
            return AFP_FATAL_ERROR;
1795
0
        }
1796
0
    } else {
1797
0
#endif
1798
0
        if (ptv->v2_block_size) {
1799
1800
0
            if (AFPComputeRingParamsWithBlockSize(ptv, ptv->v2_block_size) != 1) {
1801
0
                SCLogError("%s: ring parameters are incorrect. Please file a bug report", devname);
1802
0
                return AFP_FATAL_ERROR;
1803
0
            }
1804
1805
0
            r = setsockopt(
1806
0
                    ptv->socket, SOL_PACKET, PACKET_RX_RING, (void *)&ptv->req, sizeof(ptv->req));
1807
1808
0
            if (r < 0) {
1809
0
                if (errno == ENOMEM) {
1810
0
                    SCLogError("%s: memory issue with ring parameters", devname);
1811
0
                    return AFP_FATAL_ERROR;
1812
0
                }
1813
0
                SCLogError("%s: failed to setup RX Ring: %s", devname, strerror(errno));
1814
0
                return AFP_FATAL_ERROR;
1815
0
            }
1816
1817
0
        } else {
1818
0
            for (order = AFP_BLOCK_SIZE_DEFAULT_ORDER; order >= 0; order--) {
1819
0
                if (AFPComputeRingParams(ptv, order) != 1) {
1820
0
                    SCLogError(
1821
0
                            "%s: ring parameters are incorrect. Please file a bug report", devname);
1822
0
                    return AFP_FATAL_ERROR;
1823
0
                }
1824
1825
0
                r = setsockopt(ptv->socket, SOL_PACKET, PACKET_RX_RING, (void *)&ptv->req,
1826
0
                        sizeof(ptv->req));
1827
1828
0
                if (r < 0) {
1829
0
                    if (errno == ENOMEM) {
1830
0
                        SCLogWarning("%s: memory issue with ring parameters. Retrying", devname);
1831
0
                        continue;
1832
0
                    }
1833
0
                    SCLogError("%s: failed to setup RX Ring: %s", devname, strerror(errno));
1834
0
                    return AFP_FATAL_ERROR;
1835
0
                } else {
1836
0
                    break;
1837
0
                }
1838
0
            }
1839
0
            if (order < 0) {
1840
0
                SCLogError("%s: failed to setup RX Ring (order 0 failed)", devname);
1841
0
                return AFP_FATAL_ERROR;
1842
0
            }
1843
0
        }
1844
0
#ifdef HAVE_TPACKET_V3
1845
0
    }
1846
0
#endif
1847
1848
    /* Allocate the Ring */
1849
0
#ifdef HAVE_TPACKET_V3
1850
0
    if (ptv->flags & AFP_TPACKET_V3) {
1851
0
        ptv->ring_buflen = ptv->req.v3.tp_block_nr * ptv->req.v3.tp_block_size;
1852
0
    } else {
1853
0
#endif
1854
0
        ptv->ring_buflen = ptv->req.v2.tp_block_nr * ptv->req.v2.tp_block_size;
1855
0
#ifdef HAVE_TPACKET_V3
1856
0
    }
1857
0
#endif
1858
0
    mmap_flag = MAP_SHARED;
1859
0
    if (ptv->flags & AFP_MMAP_LOCKED)
1860
0
        mmap_flag |= MAP_LOCKED;
1861
0
    ptv->ring_buf = mmap(0, ptv->ring_buflen, PROT_READ|PROT_WRITE,
1862
0
            mmap_flag, ptv->socket, 0);
1863
0
    if (ptv->ring_buf == MAP_FAILED) {
1864
0
        SCLogError("%s: failed to mmap: %s", devname, strerror(errno));
1865
0
        goto mmap_err;
1866
0
    }
1867
0
#ifdef HAVE_TPACKET_V3
1868
0
    if (ptv->flags & AFP_TPACKET_V3) {
1869
0
        ptv->ring.v3 = SCMalloc(ptv->req.v3.tp_block_nr * sizeof(*ptv->ring.v3));
1870
0
        if (!ptv->ring.v3) {
1871
0
            SCLogError("%s: failed to alloc ring: %s", devname, strerror(errno));
1872
0
            goto postmmap_err;
1873
0
        }
1874
0
        for (i = 0; i < ptv->req.v3.tp_block_nr; ++i) {
1875
0
            ptv->ring.v3[i].iov_base = ptv->ring_buf + (i * ptv->req.v3.tp_block_size);
1876
0
            ptv->ring.v3[i].iov_len = ptv->req.v3.tp_block_size;
1877
0
        }
1878
0
    } else {
1879
0
#endif
1880
        /* allocate a ring for each frame header pointer*/
1881
0
        ptv->ring.v2 = SCCalloc(ptv->req.v2.tp_frame_nr, sizeof(union thdr *));
1882
0
        if (ptv->ring.v2 == NULL) {
1883
0
            SCLogError("%s: failed to alloc ring: %s", devname, strerror(errno));
1884
0
            goto postmmap_err;
1885
0
        }
1886
        /* fill the header ring with proper frame ptr*/
1887
0
        ptv->frame_offset = 0;
1888
0
        for (i = 0; i < ptv->req.v2.tp_block_nr; ++i) {
1889
0
            void *base = &(ptv->ring_buf[i * ptv->req.v2.tp_block_size]);
1890
0
            unsigned int j;
1891
0
            for (j = 0; j < ptv->req.v2.tp_block_size / ptv->req.v2.tp_frame_size; ++j, ++ptv->frame_offset) {
1892
0
                (((union thdr **)ptv->ring.v2)[ptv->frame_offset]) = base;
1893
0
                base += ptv->req.v2.tp_frame_size;
1894
0
            }
1895
0
        }
1896
0
        ptv->frame_offset = 0;
1897
0
#ifdef HAVE_TPACKET_V3
1898
0
    }
1899
0
#endif
1900
1901
0
    return 0;
1902
1903
0
postmmap_err:
1904
0
    munmap(ptv->ring_buf, ptv->ring_buflen);
1905
0
    if (ptv->ring.v2)
1906
0
        SCFree(ptv->ring.v2);
1907
0
    if (ptv->ring.v3)
1908
0
        SCFree(ptv->ring.v3);
1909
0
mmap_err:
1910
    /* Packet mmap does the cleaning when socket is closed */
1911
0
    return AFP_FATAL_ERROR;
1912
0
}
1913
1914
/** \brief test if we can use FANOUT. Older kernels like those in
1915
 *         CentOS6 have HAVE_PACKET_FANOUT defined but fail to work
1916
 */
1917
int AFPIsFanoutSupported(uint16_t cluster_id)
1918
0
{
1919
0
#ifdef HAVE_PACKET_FANOUT
1920
0
    int fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
1921
0
    if (fd < 0)
1922
0
        return 0;
1923
1924
0
    uint32_t mode = PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_DEFRAG;
1925
0
    uint32_t option = (mode << 16) | cluster_id;
1926
0
    int r = setsockopt(fd, SOL_PACKET, PACKET_FANOUT,(void *)&option, sizeof(option));
1927
0
    close(fd);
1928
1929
0
    if (r < 0) {
1930
0
        SCLogError("fanout not supported by kernel: "
1931
0
                   "Kernel too old or cluster-id %d already in use.",
1932
0
                cluster_id);
1933
0
        return 0;
1934
0
    }
1935
0
    return 1;
1936
#else
1937
    return 0;
1938
#endif
1939
0
}
1940
1941
#ifdef HAVE_PACKET_EBPF
1942
1943
static int SockFanoutSeteBPF(AFPThreadVars *ptv)
1944
{
1945
    int pfd = ptv->ebpf_lb_fd;
1946
    if (pfd == -1) {
1947
        SCLogError("Fanout file descriptor is invalid");
1948
        return -1;
1949
    }
1950
1951
    if (setsockopt(ptv->socket, SOL_PACKET, PACKET_FANOUT_DATA, &pfd, sizeof(pfd))) {
1952
        SCLogError("Error setting ebpf");
1953
        return -1;
1954
    }
1955
    SCLogInfo("Activated eBPF on socket");
1956
1957
    return 0;
1958
}
1959
1960
static int SetEbpfFilter(AFPThreadVars *ptv)
1961
{
1962
    int pfd = ptv->ebpf_filter_fd;
1963
    if (pfd == -1) {
1964
        SCLogError("Filter file descriptor is invalid");
1965
        return -1;
1966
    }
1967
1968
    if (setsockopt(ptv->socket, SOL_SOCKET, SO_ATTACH_BPF, &pfd, sizeof(pfd))) {
1969
        SCLogError("Error setting ebpf: %s", strerror(errno));
1970
        return -1;
1971
    }
1972
    SCLogInfo("Activated eBPF filter on socket");
1973
1974
    return 0;
1975
}
1976
#endif
1977
1978
static int AFPCreateSocket(AFPThreadVars *ptv, char *devname, int verbose)
1979
0
{
1980
0
    int r;
1981
0
    int ret = AFP_FATAL_ERROR;
1982
0
    struct packet_mreq sock_params;
1983
0
    struct sockaddr_ll bind_address;
1984
0
    int if_idx;
1985
1986
    /* open socket */
1987
0
    ptv->socket = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
1988
0
    if (ptv->socket == -1) {
1989
0
        SCLogError("%s: failed to create socket: %s", devname, strerror(errno));
1990
0
        goto error;
1991
0
    }
1992
1993
0
    if_idx = AFPGetIfnumByDev(ptv->socket, devname, verbose);
1994
0
    if (if_idx == -1) {
1995
0
        goto socket_err;
1996
0
    }
1997
1998
    /* bind socket */
1999
0
    memset(&bind_address, 0, sizeof(bind_address));
2000
0
    bind_address.sll_family = AF_PACKET;
2001
0
    bind_address.sll_protocol = htons(ETH_P_ALL);
2002
0
    bind_address.sll_ifindex = if_idx;
2003
0
    if (bind_address.sll_ifindex == -1) {
2004
0
        if (verbose)
2005
0
            SCLogWarning("%s: device for found", devname);
2006
0
        ret = AFP_RECOVERABLE_ERROR;
2007
0
        goto socket_err;
2008
0
    }
2009
2010
0
    int if_flags = AFPGetDevFlags(ptv->socket, ptv->iface);
2011
0
    if (if_flags == -1) {
2012
0
        if (verbose) {
2013
0
            SCLogWarning("%s: failed to get interface flags", ptv->iface);
2014
0
        }
2015
0
        ret = AFP_RECOVERABLE_ERROR;
2016
0
        goto socket_err;
2017
0
    } else if ((if_flags & (IFF_UP | IFF_RUNNING)) == 0) {
2018
0
        if (verbose) {
2019
0
            SCLogWarning("%s: interface is down", ptv->iface);
2020
0
        }
2021
0
        ret = AFP_RECOVERABLE_ERROR;
2022
0
        goto socket_err;
2023
0
    }
2024
2025
    /* ignore outgoing packets on loopback interfaces */
2026
0
    if (if_flags & IFF_LOOPBACK)
2027
0
        ptv->pkttype_filter_mask |= BIT_U32(PACKET_OUTGOING);
2028
2029
0
    if (ptv->promisc != 0) {
2030
        /* Force promiscuous mode */
2031
0
        memset(&sock_params, 0, sizeof(sock_params));
2032
0
        sock_params.mr_type = PACKET_MR_PROMISC;
2033
0
        sock_params.mr_ifindex = bind_address.sll_ifindex;
2034
0
        r = setsockopt(ptv->socket, SOL_PACKET, PACKET_ADD_MEMBERSHIP,(void *)&sock_params, sizeof(sock_params));
2035
0
        if (r < 0) {
2036
0
            SCLogError("%s: failed to set promisc mode: %s", devname, strerror(errno));
2037
0
            goto socket_err;
2038
0
        }
2039
0
    }
2040
2041
0
    if (ptv->checksum_mode == CHECKSUM_VALIDATION_KERNEL) {
2042
0
        int val = 1;
2043
0
        if (setsockopt(ptv->socket, SOL_PACKET, PACKET_AUXDATA, &val,
2044
0
                    sizeof(val)) == -1 && errno != ENOPROTOOPT) {
2045
0
            SCLogWarning(
2046
0
                    "%s: 'kernel' checksum mode not supported, falling back to full mode", devname);
2047
0
            ptv->checksum_mode = CHECKSUM_VALIDATION_ENABLE;
2048
0
        }
2049
0
    }
2050
2051
    /* set socket recv buffer size */
2052
0
    if (ptv->buffer_size != 0) {
2053
        /*
2054
         * Set the socket buffer size to the specified value.
2055
         */
2056
0
        SCLogPerf("%s: setting socket buffer to %d", devname, ptv->buffer_size);
2057
0
        if (setsockopt(ptv->socket, SOL_SOCKET, SO_RCVBUF,
2058
0
                       &ptv->buffer_size,
2059
0
                       sizeof(ptv->buffer_size)) == -1) {
2060
0
            SCLogError("%s: failed to set buffer size to %d: %s", devname, ptv->buffer_size,
2061
0
                    strerror(errno));
2062
0
            goto socket_err;
2063
0
        }
2064
0
    }
2065
2066
0
    r = bind(ptv->socket, (struct sockaddr *)&bind_address, sizeof(bind_address));
2067
0
    if (r < 0) {
2068
0
        if (verbose) {
2069
0
            if (errno == ENETDOWN) {
2070
0
                SCLogWarning("%s: failed to bind socket, iface is down", devname);
2071
0
            } else {
2072
0
                SCLogWarning("%s: failed to bind socket: %s", devname, strerror(errno));
2073
0
            }
2074
0
        }
2075
0
        ret = AFP_RECOVERABLE_ERROR;
2076
0
        goto socket_err;
2077
0
    }
2078
2079
2080
0
#ifdef HAVE_PACKET_FANOUT
2081
    /* add bound socket to fanout group */
2082
0
    if (ptv->threads > 1) {
2083
0
        uint32_t mode = ptv->cluster_type;
2084
0
        uint16_t id = ptv->cluster_id;
2085
0
        uint32_t option = (mode << 16) | (id & 0xffff);
2086
0
        r = setsockopt(ptv->socket, SOL_PACKET, PACKET_FANOUT,(void *)&option, sizeof(option));
2087
0
        if (r < 0) {
2088
0
            SCLogError("%s: failed to set fanout mode: %s", devname, strerror(errno));
2089
0
            goto socket_err;
2090
0
        }
2091
0
    }
2092
0
#endif
2093
2094
#ifdef HAVE_PACKET_EBPF
2095
    if (ptv->cluster_type == PACKET_FANOUT_EBPF) {
2096
        r = SockFanoutSeteBPF(ptv);
2097
        if (r < 0) {
2098
            SCLogError("%s: failed to set eBPF: %s", devname, strerror(errno));
2099
            goto socket_err;
2100
        }
2101
    }
2102
#endif
2103
2104
0
    ret = AFPSetupRing(ptv, devname);
2105
0
    if (ret != 0)
2106
0
        goto socket_err;
2107
2108
0
    SCLogDebug("Using interface '%s' via socket %d", (char *)devname, ptv->socket);
2109
2110
0
    ptv->datalink = AFPGetDevLinktype(ptv->socket, ptv->iface);
2111
2112
0
    TmEcode rc = AFPSetBPFFilter(ptv);
2113
0
    if (rc == TM_ECODE_FAILED) {
2114
0
        ret = AFP_FATAL_ERROR;
2115
0
        goto socket_err;
2116
0
    }
2117
2118
    /* Init is ok */
2119
0
    AFPSwitchState(ptv, AFP_STATE_UP);
2120
0
    return 0;
2121
2122
0
socket_err:
2123
0
    close(ptv->socket);
2124
0
    ptv->socket = -1;
2125
0
    if (ptv->flags & AFP_TPACKET_V3) {
2126
0
        if (ptv->ring.v3) {
2127
0
            SCFree(ptv->ring.v3);
2128
0
            ptv->ring.v3 = NULL;
2129
0
        }
2130
0
    } else {
2131
0
        if (ptv->ring.v2) {
2132
0
            SCFree(ptv->ring.v2);
2133
0
            ptv->ring.v2 = NULL;
2134
0
        }
2135
0
    }
2136
2137
0
error:
2138
0
    return -ret;
2139
0
}
2140
2141
TmEcode AFPSetBPFFilter(AFPThreadVars *ptv)
2142
0
{
2143
0
    struct bpf_program filter;
2144
0
    struct sock_fprog  fcode;
2145
0
    int rc;
2146
2147
#ifdef HAVE_PACKET_EBPF
2148
    if (ptv->ebpf_filter_fd != -1) {
2149
        return SetEbpfFilter(ptv);
2150
    }
2151
#endif
2152
2153
0
    if (!ptv->bpf_filter)
2154
0
        return TM_ECODE_OK;
2155
2156
0
    SCLogInfo("%s: using BPF '%s'", ptv->iface, ptv->bpf_filter);
2157
2158
0
    char errbuf[PCAP_ERRBUF_SIZE];
2159
0
    if (SCBPFCompile(ptv->snaplen, /* snaplen_arg */
2160
0
                ptv->datalink,     /* linktype_arg */
2161
0
                &filter,           /* program */
2162
0
                ptv->bpf_filter,   /* const char *buf */
2163
0
                1,                 /* optimize */
2164
0
                0,                 /* mask */
2165
0
                errbuf, sizeof(errbuf)) == -1) {
2166
0
        SCLogError("%s: failed to compile BPF \"%s\": %s", ptv->iface, ptv->bpf_filter, errbuf);
2167
0
        return TM_ECODE_FAILED;
2168
0
    }
2169
2170
0
    if (filter.bf_len > USHRT_MAX) {
2171
0
        return TM_ECODE_FAILED;
2172
0
    }
2173
0
    fcode.len = (unsigned short)filter.bf_len;
2174
0
    fcode.filter = (struct sock_filter*)filter.bf_insns;
2175
2176
0
    rc = setsockopt(ptv->socket, SOL_SOCKET, SO_ATTACH_FILTER, &fcode, sizeof(fcode));
2177
2178
0
    SCBPFFree(&filter);
2179
0
    if(rc == -1) {
2180
0
        SCLogError("%s: failed to attach filter: %s", ptv->iface, strerror(errno));
2181
0
        return TM_ECODE_FAILED;
2182
0
    }
2183
2184
0
    return TM_ECODE_OK;
2185
0
}
2186
2187
#ifdef HAVE_PACKET_EBPF
2188
/**
2189
 * Insert a half flow in the kernel bypass table
2190
 *
2191
 * \param mapfd file descriptor of the protocol bypass table
2192
 * \param key data to use as key in the table
2193
 * \return 0 in case of error, 1 if success
2194
 */
2195
static int AFPInsertHalfFlow(int mapd, void *key, unsigned int nr_cpus)
2196
{
2197
    BPF_DECLARE_PERCPU(struct pair, value, nr_cpus);
2198
    unsigned int i;
2199
2200
    if (mapd == -1) {
2201
        return 0;
2202
    }
2203
2204
    /* We use a per CPU structure so we have to set an array of values as the kernel
2205
     * is not duplicating the data on each CPU by itself. */
2206
    for (i = 0; i < nr_cpus; i++) {
2207
        BPF_PERCPU(value, i).packets = 0;
2208
        BPF_PERCPU(value, i).bytes = 0;
2209
    }
2210
    if (bpf_map_update_elem(mapd, key, value, BPF_NOEXIST) != 0) {
2211
        switch (errno) {
2212
            /* no more place in the hash */
2213
            case E2BIG:
2214
                return 0;
2215
            /* no more place in the hash for some hardware bypass */
2216
            case EAGAIN:
2217
                return 0;
2218
            /* if we already have the key then bypass is a success */
2219
            case EEXIST:
2220
                return 1;
2221
            /* Not supposed to be there so issue a error */
2222
            default:
2223
                SCLogError("Can't update eBPF map: %s (%d)", strerror(errno), errno);
2224
                return 0;
2225
        }
2226
    }
2227
    return 1;
2228
}
2229
2230
static int AFPSetFlowStorage(Packet *p, int map_fd, void *key0, void* key1,
2231
                             int family)
2232
{
2233
    FlowBypassInfo *fc = FlowGetStorageById(p->flow, GetFlowBypassInfoID());
2234
    if (fc) {
2235
        if (fc->bypass_data != NULL) {
2236
            // bypass already activated
2237
            SCFree(key0);
2238
            SCFree(key1);
2239
            return 1;
2240
        }
2241
        EBPFBypassData *eb = SCCalloc(1, sizeof(EBPFBypassData));
2242
        if (eb == NULL) {
2243
            EBPFDeleteKey(map_fd, key0);
2244
            EBPFDeleteKey(map_fd, key1);
2245
            LiveDevAddBypassFail(p->livedev, 1, family);
2246
            SCFree(key0);
2247
            SCFree(key1);
2248
            return 0;
2249
        }
2250
        eb->key[0] = key0;
2251
        eb->key[1] = key1;
2252
        eb->mapfd = map_fd;
2253
        eb->cpus_count = p->afp_v.nr_cpus;
2254
        fc->BypassUpdate = EBPFBypassUpdate;
2255
        fc->BypassFree = EBPFBypassFree;
2256
        fc->bypass_data = eb;
2257
    } else {
2258
        EBPFDeleteKey(map_fd, key0);
2259
        EBPFDeleteKey(map_fd, key1);
2260
        LiveDevAddBypassFail(p->livedev, 1, family);
2261
        SCFree(key0);
2262
        SCFree(key1);
2263
        return 0;
2264
    }
2265
2266
    LiveDevAddBypassStats(p->livedev, 1, family);
2267
    LiveDevAddBypassSuccess(p->livedev, 1, family);
2268
    return 1;
2269
}
2270
2271
/**
2272
 * Bypass function for AF_PACKET capture in eBPF mode
2273
 *
2274
 * This function creates two half flows in the map shared with the kernel
2275
 * to trigger bypass.
2276
 *
2277
 * The implementation of bypass is done via an IPv4 and an IPv6 flow table.
2278
 * This table contains the list of half flows to bypass. The in-kernel filter
2279
 * will skip/drop the packet if they belong to a flow in one of the flows
2280
 * table.
2281
 *
2282
 * \param p the packet belonging to the flow to bypass
2283
 * \return 0 if unable to bypass, 1 if success
2284
 */
2285
static int AFPBypassCallback(Packet *p)
2286
{
2287
    SCLogDebug("Calling af_packet callback function");
2288
    /* Only bypass TCP and UDP */
2289
    if (!(PKT_IS_TCP(p) || PKT_IS_UDP(p))) {
2290
        return 0;
2291
    }
2292
2293
    /* If we don't have a flow attached to packet the eBPF map entries
2294
     * will be destroyed at first flow bypass manager pass as we won't
2295
     * find any associated entry */
2296
    if (p->flow == NULL) {
2297
        return 0;
2298
    }
2299
    /* Bypassing tunneled packets is currently not supported
2300
     * because we can't discard the inner packet only due to
2301
     * primitive parsing in eBPF */
2302
    if (IS_TUNNEL_PKT(p)) {
2303
        return 0;
2304
    }
2305
    if (PKT_IS_IPV4(p)) {
2306
        SCLogDebug("add an IPv4");
2307
        if (p->afp_v.v4_map_fd == -1) {
2308
            return 0;
2309
        }
2310
        struct flowv4_keys *keys[2];
2311
        keys[0] = SCCalloc(1, sizeof(struct flowv4_keys));
2312
        if (keys[0] == NULL) {
2313
            return 0;
2314
        }
2315
        keys[0]->src = htonl(GET_IPV4_SRC_ADDR_U32(p));
2316
        keys[0]->dst = htonl(GET_IPV4_DST_ADDR_U32(p));
2317
        keys[0]->port16[0] = GET_TCP_SRC_PORT(p);
2318
        keys[0]->port16[1] = GET_TCP_DST_PORT(p);
2319
        keys[0]->vlan0 = p->vlan_id[0];
2320
        keys[0]->vlan1 = p->vlan_id[1];
2321
        keys[0]->vlan2 = p->vlan_id[2];
2322
2323
        if (IPV4_GET_IPPROTO(p) == IPPROTO_TCP) {
2324
            keys[0]->ip_proto = 1;
2325
        } else {
2326
            keys[0]->ip_proto = 0;
2327
        }
2328
        if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, keys[0],
2329
                              p->afp_v.nr_cpus) == 0) {
2330
            LiveDevAddBypassFail(p->livedev, 1, AF_INET);
2331
            SCFree(keys[0]);
2332
            return 0;
2333
        }
2334
        keys[1]= SCCalloc(1, sizeof(struct flowv4_keys));
2335
        if (keys[1] == NULL) {
2336
            EBPFDeleteKey(p->afp_v.v4_map_fd, keys[0]);
2337
            LiveDevAddBypassFail(p->livedev, 1, AF_INET);
2338
            SCFree(keys[0]);
2339
            return 0;
2340
        }
2341
        keys[1]->src = htonl(GET_IPV4_DST_ADDR_U32(p));
2342
        keys[1]->dst = htonl(GET_IPV4_SRC_ADDR_U32(p));
2343
        keys[1]->port16[0] = GET_TCP_DST_PORT(p);
2344
        keys[1]->port16[1] = GET_TCP_SRC_PORT(p);
2345
        keys[1]->vlan0 = p->vlan_id[0];
2346
        keys[1]->vlan1 = p->vlan_id[1];
2347
        keys[1]->vlan2 = p->vlan_id[2];
2348
2349
        keys[1]->ip_proto = keys[0]->ip_proto;
2350
        if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, keys[1],
2351
                              p->afp_v.nr_cpus) == 0) {
2352
            EBPFDeleteKey(p->afp_v.v4_map_fd, keys[0]);
2353
            LiveDevAddBypassFail(p->livedev, 1, AF_INET);
2354
            SCFree(keys[0]);
2355
            SCFree(keys[1]);
2356
            return 0;
2357
        }
2358
        EBPFUpdateFlow(p->flow, p, NULL);
2359
        return AFPSetFlowStorage(p, p->afp_v.v4_map_fd, keys[0], keys[1], AF_INET);
2360
    }
2361
    /* For IPv6 case we don't handle extended header in eBPF */
2362
    if (PKT_IS_IPV6(p) &&
2363
        ((IPV6_GET_NH(p) == IPPROTO_TCP) || (IPV6_GET_NH(p) == IPPROTO_UDP))) {
2364
        int i;
2365
        if (p->afp_v.v6_map_fd == -1) {
2366
            return 0;
2367
        }
2368
        SCLogDebug("add an IPv6");
2369
        struct flowv6_keys *keys[2];
2370
        keys[0] = SCCalloc(1, sizeof(struct flowv6_keys));
2371
        if (keys[0] == NULL) {
2372
            LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
2373
            return 0;
2374
        }
2375
        for (i = 0; i < 4; i++) {
2376
            keys[0]->src[i] = ntohl(GET_IPV6_SRC_ADDR(p)[i]);
2377
            keys[0]->dst[i] = ntohl(GET_IPV6_DST_ADDR(p)[i]);
2378
        }
2379
        keys[0]->port16[0] = GET_TCP_SRC_PORT(p);
2380
        keys[0]->port16[1] = GET_TCP_DST_PORT(p);
2381
        keys[0]->vlan0 = p->vlan_id[0];
2382
        keys[0]->vlan1 = p->vlan_id[1];
2383
        keys[0]->vlan2 = p->vlan_id[2];
2384
2385
        if (IPV6_GET_NH(p) == IPPROTO_TCP) {
2386
            keys[0]->ip_proto = 1;
2387
        } else {
2388
            keys[0]->ip_proto = 0;
2389
        }
2390
        if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, keys[0],
2391
                              p->afp_v.nr_cpus) == 0) {
2392
            LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
2393
            SCFree(keys[0]);
2394
            return 0;
2395
        }
2396
        keys[1]= SCCalloc(1, sizeof(struct flowv6_keys));
2397
        if (keys[1] == NULL) {
2398
            EBPFDeleteKey(p->afp_v.v6_map_fd, keys[0]);
2399
            LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
2400
            SCFree(keys[0]);
2401
            return 0;
2402
        }
2403
        for (i = 0; i < 4; i++) {
2404
            keys[1]->src[i] = ntohl(GET_IPV6_DST_ADDR(p)[i]);
2405
            keys[1]->dst[i] = ntohl(GET_IPV6_SRC_ADDR(p)[i]);
2406
        }
2407
        keys[1]->port16[0] = GET_TCP_DST_PORT(p);
2408
        keys[1]->port16[1] = GET_TCP_SRC_PORT(p);
2409
        keys[1]->vlan0 = p->vlan_id[0];
2410
        keys[1]->vlan1 = p->vlan_id[1];
2411
        keys[1]->vlan2 = p->vlan_id[2];
2412
2413
        keys[1]->ip_proto = keys[0]->ip_proto;
2414
        if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, keys[1],
2415
                              p->afp_v.nr_cpus) == 0) {
2416
            EBPFDeleteKey(p->afp_v.v6_map_fd, keys[0]);
2417
            LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
2418
            SCFree(keys[0]);
2419
            SCFree(keys[1]);
2420
            return 0;
2421
        }
2422
        if (p->flow)
2423
            EBPFUpdateFlow(p->flow, p, NULL);
2424
        return AFPSetFlowStorage(p, p->afp_v.v6_map_fd, keys[0], keys[1], AF_INET6);
2425
    }
2426
    return 0;
2427
}
2428
2429
/**
2430
 * Bypass function for AF_PACKET capture in XDP mode
2431
 *
2432
 * This function creates two half flows in the map shared with the kernel
2433
 * to trigger bypass. This function is similar to AFPBypassCallback() but
2434
 * the bytes order is changed for some data due to the way we get the data
2435
 * in the XDP case.
2436
 *
2437
 * \param p the packet belonging to the flow to bypass
2438
 * \return 0 if unable to bypass, 1 if success
2439
 */
2440
static int AFPXDPBypassCallback(Packet *p)
2441
{
2442
    SCLogDebug("Calling af_packet callback function");
2443
    /* Only bypass TCP and UDP */
2444
    if (!(PKT_IS_TCP(p) || PKT_IS_UDP(p))) {
2445
        return 0;
2446
    }
2447
2448
    /* If we don't have a flow attached to packet the eBPF map entries
2449
     * will be destroyed at first flow bypass manager pass as we won't
2450
     * find any associated entry */
2451
    if (p->flow == NULL) {
2452
        return 0;
2453
    }
2454
    /* Bypassing tunneled packets is currently not supported
2455
     * because we can't discard the inner packet only due to
2456
     * primitive parsing in eBPF */
2457
    if (IS_TUNNEL_PKT(p)) {
2458
        return 0;
2459
    }
2460
    if (PKT_IS_IPV4(p)) {
2461
        struct flowv4_keys *keys[2];
2462
        keys[0]= SCCalloc(1, sizeof(struct flowv4_keys));
2463
        if (keys[0] == NULL) {
2464
            LiveDevAddBypassFail(p->livedev, 1, AF_INET);
2465
            return 0;
2466
        }
2467
        if (p->afp_v.v4_map_fd == -1) {
2468
            SCFree(keys[0]);
2469
            return 0;
2470
        }
2471
        keys[0]->src = p->src.addr_data32[0];
2472
        keys[0]->dst = p->dst.addr_data32[0];
2473
        /* In the XDP filter we get port from parsing of packet and not from skb
2474
         * (as in eBPF filter) so we need to pass from host to network order */
2475
        keys[0]->port16[0] = htons(p->sp);
2476
        keys[0]->port16[1] = htons(p->dp);
2477
        keys[0]->vlan0 = p->vlan_id[0];
2478
        keys[0]->vlan1 = p->vlan_id[1];
2479
        keys[0]->vlan2 = p->vlan_id[2];
2480
        if (IPV4_GET_IPPROTO(p) == IPPROTO_TCP) {
2481
            keys[0]->ip_proto = 1;
2482
        } else {
2483
            keys[0]->ip_proto = 0;
2484
        }
2485
        if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, keys[0],
2486
                              p->afp_v.nr_cpus) == 0) {
2487
            LiveDevAddBypassFail(p->livedev, 1, AF_INET);
2488
            SCFree(keys[0]);
2489
            return 0;
2490
        }
2491
        keys[1]= SCCalloc(1, sizeof(struct flowv4_keys));
2492
        if (keys[1] == NULL) {
2493
            EBPFDeleteKey(p->afp_v.v4_map_fd, keys[0]);
2494
            LiveDevAddBypassFail(p->livedev, 1, AF_INET);
2495
            SCFree(keys[0]);
2496
            return 0;
2497
        }
2498
        keys[1]->src = p->dst.addr_data32[0];
2499
        keys[1]->dst = p->src.addr_data32[0];
2500
        keys[1]->port16[0] = htons(p->dp);
2501
        keys[1]->port16[1] = htons(p->sp);
2502
        keys[1]->vlan0 = p->vlan_id[0];
2503
        keys[1]->vlan1 = p->vlan_id[1];
2504
        keys[1]->vlan2 = p->vlan_id[2];
2505
        keys[1]->ip_proto = keys[0]->ip_proto;
2506
        if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, keys[1],
2507
                              p->afp_v.nr_cpus) == 0) {
2508
            EBPFDeleteKey(p->afp_v.v4_map_fd, keys[0]);
2509
            LiveDevAddBypassFail(p->livedev, 1, AF_INET);
2510
            SCFree(keys[0]);
2511
            SCFree(keys[1]);
2512
            return 0;
2513
        }
2514
        return AFPSetFlowStorage(p, p->afp_v.v4_map_fd, keys[0], keys[1], AF_INET);
2515
    }
2516
    /* For IPv6 case we don't handle extended header in eBPF */
2517
    if (PKT_IS_IPV6(p) &&
2518
        ((IPV6_GET_NH(p) == IPPROTO_TCP) || (IPV6_GET_NH(p) == IPPROTO_UDP))) {
2519
        SCLogDebug("add an IPv6");
2520
        if (p->afp_v.v6_map_fd == -1) {
2521
            return 0;
2522
        }
2523
        int i;
2524
        struct flowv6_keys *keys[2];
2525
        keys[0] = SCCalloc(1, sizeof(struct flowv6_keys));
2526
        if (keys[0] == NULL) {
2527
            return 0;
2528
        }
2529
2530
        for (i = 0; i < 4; i++) {
2531
            keys[0]->src[i] = GET_IPV6_SRC_ADDR(p)[i];
2532
            keys[0]->dst[i] = GET_IPV6_DST_ADDR(p)[i];
2533
        }
2534
        keys[0]->port16[0] = htons(GET_TCP_SRC_PORT(p));
2535
        keys[0]->port16[1] = htons(GET_TCP_DST_PORT(p));
2536
        keys[0]->vlan0 = p->vlan_id[0];
2537
        keys[0]->vlan1 = p->vlan_id[1];
2538
        keys[0]->vlan2 = p->vlan_id[2];
2539
        if (IPV6_GET_NH(p) == IPPROTO_TCP) {
2540
            keys[0]->ip_proto = 1;
2541
        } else {
2542
            keys[0]->ip_proto = 0;
2543
        }
2544
        if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, keys[0],
2545
                              p->afp_v.nr_cpus) == 0) {
2546
            LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
2547
            SCFree(keys[0]);
2548
            return 0;
2549
        }
2550
        keys[1]= SCCalloc(1, sizeof(struct flowv6_keys));
2551
        if (keys[1] == NULL) {
2552
            EBPFDeleteKey(p->afp_v.v6_map_fd, keys[0]);
2553
            LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
2554
            SCFree(keys[0]);
2555
            return 0;
2556
        }
2557
        for (i = 0; i < 4; i++) {
2558
            keys[1]->src[i] = GET_IPV6_DST_ADDR(p)[i];
2559
            keys[1]->dst[i] = GET_IPV6_SRC_ADDR(p)[i];
2560
        }
2561
        keys[1]->port16[0] = htons(GET_TCP_DST_PORT(p));
2562
        keys[1]->port16[1] = htons(GET_TCP_SRC_PORT(p));
2563
        keys[1]->vlan0 = p->vlan_id[0];
2564
        keys[1]->vlan1 = p->vlan_id[1];
2565
        keys[1]->vlan2 = p->vlan_id[2];
2566
        keys[1]->ip_proto = keys[0]->ip_proto;
2567
        if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, keys[1],
2568
                              p->afp_v.nr_cpus) == 0) {
2569
            EBPFDeleteKey(p->afp_v.v6_map_fd, keys[0]);
2570
            LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
2571
            SCFree(keys[0]);
2572
            SCFree(keys[1]);
2573
            return 0;
2574
        }
2575
        return AFPSetFlowStorage(p, p->afp_v.v6_map_fd, keys[0], keys[1], AF_INET6);
2576
    }
2577
    return 0;
2578
}
2579
2580
bool g_flowv4_ok = true;
2581
bool g_flowv6_ok = true;
2582
2583
#endif /* HAVE_PACKET_EBPF */
2584
2585
/**
2586
 * \brief Init function for ReceiveAFP.
2587
 *
2588
 * \param tv pointer to ThreadVars
2589
 * \param initdata pointer to the interface passed from the user
2590
 * \param data pointer gets populated with AFPThreadVars
2591
 *
2592
 * \todo Create a general AFP setup function.
2593
 */
2594
TmEcode ReceiveAFPThreadInit(ThreadVars *tv, const void *initdata, void **data)
2595
0
{
2596
0
    SCEnter();
2597
0
    AFPIfaceConfig *afpconfig = (AFPIfaceConfig *)initdata;
2598
2599
0
    if (initdata == NULL) {
2600
0
        SCLogError("initdata == NULL");
2601
0
        SCReturnInt(TM_ECODE_FAILED);
2602
0
    }
2603
2604
0
    AFPThreadVars *ptv = SCMalloc(sizeof(AFPThreadVars));
2605
0
    if (unlikely(ptv == NULL)) {
2606
0
        afpconfig->DerefFunc(afpconfig);
2607
0
        SCReturnInt(TM_ECODE_FAILED);
2608
0
    }
2609
0
    memset(ptv, 0, sizeof(AFPThreadVars));
2610
2611
0
    ptv->tv = tv;
2612
2613
0
    strlcpy(ptv->iface, afpconfig->iface, AFP_IFACE_NAME_LENGTH);
2614
0
    ptv->iface[AFP_IFACE_NAME_LENGTH - 1]= '\0';
2615
2616
0
    ptv->livedev = LiveGetDevice(ptv->iface);
2617
0
    if (ptv->livedev == NULL) {
2618
0
        SCLogError("Unable to find Live device");
2619
0
        SCFree(ptv);
2620
0
        SCReturnInt(TM_ECODE_FAILED);
2621
0
    }
2622
2623
0
    ptv->buffer_size = afpconfig->buffer_size;
2624
0
    ptv->ring_size = afpconfig->ring_size;
2625
0
    ptv->v2_block_size = afpconfig->v2_block_size;
2626
0
    ptv->block_size = afpconfig->block_size;
2627
0
    ptv->block_timeout = afpconfig->block_timeout;
2628
2629
0
    ptv->promisc = afpconfig->promisc;
2630
0
    ptv->checksum_mode = afpconfig->checksum_mode;
2631
0
    ptv->bpf_filter = NULL;
2632
2633
0
    ptv->threads = 1;
2634
0
#ifdef HAVE_PACKET_FANOUT
2635
0
    ptv->cluster_type = PACKET_FANOUT_LB;
2636
0
    ptv->cluster_id = 1;
2637
    /* We only set cluster info if the number of reader threads is greater than 1 */
2638
0
    if (afpconfig->threads > 1) {
2639
0
        ptv->cluster_id = afpconfig->cluster_id;
2640
0
        ptv->cluster_type = afpconfig->cluster_type;
2641
0
        ptv->threads = afpconfig->threads;
2642
0
    }
2643
0
#endif
2644
0
    ptv->flags = afpconfig->flags;
2645
2646
0
    if (afpconfig->bpf_filter) {
2647
0
        ptv->bpf_filter = afpconfig->bpf_filter;
2648
0
    }
2649
#ifdef HAVE_PACKET_EBPF
2650
    ptv->ebpf_lb_fd = afpconfig->ebpf_lb_fd;
2651
    ptv->ebpf_filter_fd = afpconfig->ebpf_filter_fd;
2652
    ptv->xdp_mode = afpconfig->xdp_mode;
2653
    ptv->ebpf_t_config.cpus_count = UtilCpuGetNumProcessorsConfigured();
2654
2655
    if (ptv->flags & (AFP_BYPASS|AFP_XDPBYPASS)) {
2656
        ptv->v4_map_fd = EBPFGetMapFDByName(ptv->iface, "flow_table_v4");
2657
        if (ptv->v4_map_fd == -1) {
2658
            if (g_flowv4_ok == false) {
2659
                SCLogError("Can't find eBPF map fd for '%s'", "flow_table_v4");
2660
                g_flowv4_ok = true;
2661
            }
2662
        }
2663
        ptv->v6_map_fd = EBPFGetMapFDByName(ptv->iface, "flow_table_v6");
2664
        if (ptv->v6_map_fd  == -1) {
2665
            if (g_flowv6_ok) {
2666
                SCLogError("Can't find eBPF map fd for '%s'", "flow_table_v6");
2667
                g_flowv6_ok = false;
2668
            }
2669
        }
2670
    }
2671
    ptv->ebpf_t_config = afpconfig->ebpf_t_config;
2672
#endif
2673
2674
0
#ifdef PACKET_STATISTICS
2675
0
    ptv->capture_kernel_packets = StatsRegisterCounter("capture.kernel_packets",
2676
0
            ptv->tv);
2677
0
    ptv->capture_kernel_drops = StatsRegisterCounter("capture.kernel_drops",
2678
0
            ptv->tv);
2679
0
    ptv->capture_errors = StatsRegisterCounter("capture.errors",
2680
0
            ptv->tv);
2681
2682
0
    ptv->afpacket_spin = StatsRegisterAvgCounter("capture.afpacket.busy_loop_avg", ptv->tv);
2683
2684
0
    ptv->capture_afp_poll = StatsRegisterCounter("capture.afpacket.polls", ptv->tv);
2685
0
    ptv->capture_afp_poll_signal = StatsRegisterCounter("capture.afpacket.poll_signal", ptv->tv);
2686
0
    ptv->capture_afp_poll_timeout = StatsRegisterCounter("capture.afpacket.poll_timeout", ptv->tv);
2687
0
    ptv->capture_afp_poll_data = StatsRegisterCounter("capture.afpacket.poll_data", ptv->tv);
2688
0
    ptv->capture_afp_poll_err = StatsRegisterCounter("capture.afpacket.poll_errors", ptv->tv);
2689
0
    ptv->capture_afp_send_err = StatsRegisterCounter("capture.afpacket.send_errors", ptv->tv);
2690
0
#endif
2691
2692
0
    ptv->copy_mode = afpconfig->copy_mode;
2693
0
    if (ptv->copy_mode != AFP_COPY_MODE_NONE) {
2694
0
        strlcpy(ptv->out_iface, afpconfig->out_iface, AFP_IFACE_NAME_LENGTH);
2695
0
        ptv->out_iface[AFP_IFACE_NAME_LENGTH - 1]= '\0';
2696
        /* Warn about BPF filter consequence */
2697
0
        if (ptv->bpf_filter) {
2698
0
            SCLogWarning("Enabling a BPF filter in IPS mode result"
2699
0
                         " in dropping all non matching packets.");
2700
0
        }
2701
0
    }
2702
2703
2704
0
    if (AFPPeersListAdd(ptv) == TM_ECODE_FAILED) {
2705
0
        SCFree(ptv);
2706
0
        afpconfig->DerefFunc(afpconfig);
2707
0
        SCReturnInt(TM_ECODE_FAILED);
2708
0
    }
2709
2710
0
    *data = (void *)ptv;
2711
2712
0
    afpconfig->DerefFunc(afpconfig);
2713
2714
    /* If kernel is older than 3.0, VLAN is not stripped so we don't
2715
     * get the info from packet extended header but we will use a standard
2716
     * parsing of packet data (See Linux commit bcc6d47903612c3861201cc3a866fb604f26b8b2) */
2717
0
    if (SCKernelVersionIsAtLeast(3, 0)) {
2718
0
        ptv->flags |= AFP_VLAN_IN_HEADER;
2719
0
    }
2720
2721
0
    SCReturnInt(TM_ECODE_OK);
2722
0
}
2723
2724
/**
2725
 * \brief This function prints stats to the screen at exit.
2726
 * \param tv pointer to ThreadVars
2727
 * \param data pointer that gets cast into AFPThreadVars for ptv
2728
 */
2729
void ReceiveAFPThreadExitStats(ThreadVars *tv, void *data)
2730
0
{
2731
0
    SCEnter();
2732
0
    AFPThreadVars *ptv = (AFPThreadVars *)data;
2733
2734
0
#ifdef PACKET_STATISTICS
2735
0
    AFPDumpCounters(ptv);
2736
0
    SCLogPerf("%s: (%s) kernel: Packets %" PRIu64 ", dropped %" PRIu64 "", ptv->iface, tv->name,
2737
0
            StatsGetLocalCounterValue(tv, ptv->capture_kernel_packets),
2738
0
            StatsGetLocalCounterValue(tv, ptv->capture_kernel_drops));
2739
0
#endif
2740
0
}
2741
2742
/**
2743
 * \brief DeInit function closes af packet socket at exit.
2744
 * \param tv pointer to ThreadVars
2745
 * \param data pointer that gets cast into AFPThreadVars for ptv
2746
 */
2747
TmEcode ReceiveAFPThreadDeinit(ThreadVars *tv, void *data)
2748
0
{
2749
0
    AFPThreadVars *ptv = (AFPThreadVars *)data;
2750
2751
0
    AFPSwitchState(ptv, AFP_STATE_DOWN);
2752
2753
#ifdef HAVE_PACKET_XDP
2754
    if ((ptv->ebpf_t_config.flags & EBPF_XDP_CODE) &&
2755
        (!(ptv->ebpf_t_config.flags & EBPF_PINNED_MAPS))) {
2756
        EBPFSetupXDP(ptv->iface, -1, ptv->xdp_mode);
2757
    }
2758
#endif
2759
2760
0
    ptv->bpf_filter = NULL;
2761
0
    if ((ptv->flags & AFP_TPACKET_V3) && ptv->ring.v3) {
2762
0
        SCFree(ptv->ring.v3);
2763
0
    } else {
2764
0
        if (ptv->ring.v2)
2765
0
            SCFree(ptv->ring.v2);
2766
0
    }
2767
2768
0
    SCFree(ptv);
2769
0
    SCReturnInt(TM_ECODE_OK);
2770
0
}
2771
2772
/** \internal
2773
 *  \brief add a VLAN header into the raw data for inspection, logging
2774
 *         and sending out in IPS mode
2775
 *
2776
 *  The kernel doesn't provide the first VLAN header the raw packet data,
2777
 *  but instead feeds it to us through meta data. For logging and IPS
2778
 *  we need to put it back into the raw data. Luckily there is some head
2779
 *  room in the original data so its enough to move the ethernet header
2780
 *  a bit to make space for the VLAN header.
2781
 */
2782
static void UpdateRawDataForVLANHdr(Packet *p)
2783
0
{
2784
0
    if (p->afp_v.vlan_tci != 0) {
2785
0
        uint8_t *pstart = GET_PKT_DATA(p) - VLAN_HEADER_LEN;
2786
0
        size_t plen = GET_PKT_LEN(p) + VLAN_HEADER_LEN;
2787
        /* move ethernet addresses */
2788
0
        memmove(pstart, GET_PKT_DATA(p), 2 * ETH_ALEN);
2789
        /* write vlan info */
2790
0
        *(uint16_t *)(pstart + 2 * ETH_ALEN) = htons(0x8100);
2791
0
        *(uint16_t *)(pstart + 2 * ETH_ALEN + 2) = htons(p->afp_v.vlan_tci);
2792
2793
        /* update the packet raw data pointer to start at the new offset */
2794
0
        (void)PacketSetData(p, pstart, plen);
2795
        /* update ethernet header pointer to point to the new start of the data */
2796
0
        p->ethh = (void *)pstart;
2797
0
    }
2798
0
}
2799
2800
/**
2801
 * \brief This function passes off to link type decoders.
2802
 *
2803
 * DecodeAFP decodes packets from AF_PACKET and passes
2804
 * them off to the proper link type decoder.
2805
 *
2806
 * \param t pointer to ThreadVars
2807
 * \param p pointer to the current packet
2808
 * \param data pointer that gets cast into AFPThreadVars for ptv
2809
 */
2810
TmEcode DecodeAFP(ThreadVars *tv, Packet *p, void *data)
2811
0
{
2812
0
    SCEnter();
2813
2814
0
    const bool afp_vlan_hdr = p->vlan_idx != 0;
2815
0
    DecodeThreadVars *dtv = (DecodeThreadVars *)data;
2816
2817
0
    DEBUG_VALIDATE_BUG_ON(PKT_IS_PSEUDOPKT(p));
2818
2819
    /* update counters */
2820
0
    DecodeUpdatePacketCounters(tv, dtv, p);
2821
2822
    /* call the decoder */
2823
0
    DecodeLinkLayer(tv, dtv, p->datalink, p, GET_PKT_DATA(p), GET_PKT_LEN(p));
2824
    /* post-decoding put vlan hdr back into the raw data) */
2825
0
    if (afp_vlan_hdr) {
2826
0
        StatsIncr(tv, dtv->counter_vlan);
2827
0
        UpdateRawDataForVLANHdr(p);
2828
0
    }
2829
2830
0
    PacketDecodeFinalize(tv, dtv, p);
2831
2832
0
    SCReturnInt(TM_ECODE_OK);
2833
0
}
2834
2835
TmEcode DecodeAFPThreadInit(ThreadVars *tv, const void *initdata, void **data)
2836
0
{
2837
0
    SCEnter();
2838
0
    DecodeThreadVars *dtv = DecodeThreadVarsAlloc(tv);
2839
0
    if (dtv == NULL)
2840
0
        SCReturnInt(TM_ECODE_FAILED);
2841
2842
0
    DecodeRegisterPerfCounters(dtv, tv);
2843
2844
0
    *data = (void *)dtv;
2845
2846
0
    SCReturnInt(TM_ECODE_OK);
2847
0
}
2848
2849
TmEcode DecodeAFPThreadDeinit(ThreadVars *tv, void *data)
2850
0
{
2851
0
    if (data != NULL)
2852
0
        DecodeThreadVarsFree(tv, data);
2853
0
    SCReturnInt(TM_ECODE_OK);
2854
0
}
2855
2856
#endif /* HAVE_AF_PACKET */
2857
/* eof */
2858
/**
2859
 * @}
2860
 */