/src/suricata7/src/source-netmap.c
Line | Count | Source |
1 | | /* Copyright (C) 2011-2022 Open Information Security Foundation |
2 | | * |
3 | | * You can copy, redistribute or modify this Program under the terms of |
4 | | * the GNU General Public License version 2 as published by the Free |
5 | | * Software Foundation. |
6 | | * |
7 | | * This program is distributed in the hope that it will be useful, |
8 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
9 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
10 | | * GNU General Public License for more details. |
11 | | * |
12 | | * You should have received a copy of the GNU General Public License |
13 | | * version 2 along with this program; if not, write to the Free Software |
14 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
15 | | * 02110-1301, USA. |
16 | | */ |
17 | | |
18 | | /** |
19 | | * \defgroup netmap Netmap running mode |
20 | | * |
21 | | * @{ |
22 | | */ |
23 | | |
24 | | /** |
25 | | * \file |
26 | | * |
27 | | * \author Aleksey Katargin <gureedo@gmail.com> |
28 | | * \author Victor Julien <victor@inliniac.net> |
29 | | * \author Bill Meeks <billmeeks8@gmail.com> |
30 | | * |
31 | | * Netmap socket acquisition support |
32 | | * |
33 | | * Many thanks to Luigi Rizzo for guidance and support. |
34 | | * |
35 | | */ |
36 | | |
37 | | #include "suricata.h" |
38 | | #include "suricata-common.h" |
39 | | #include "tm-threads.h" |
40 | | #include "packet.h" |
41 | | #include "util-bpf.h" |
42 | | #include "util-privs.h" |
43 | | #include "util-validate.h" |
44 | | #include "util-datalink.h" |
45 | | |
46 | | #include "source-netmap.h" |
47 | | |
48 | | #ifdef HAVE_NETMAP |
49 | | |
50 | | #define NETMAP_WITH_LIBS |
51 | | #ifdef DEBUG |
52 | | #define DEBUG_NETMAP_USER |
53 | | #endif |
54 | | |
55 | | #include <net/netmap_user.h> |
56 | | #include <libnetmap.h> |
57 | | |
58 | | #endif /* HAVE_NETMAP */ |
59 | | |
60 | | #include "util-ioctl.h" |
61 | | |
62 | | #ifndef HAVE_NETMAP |
63 | | |
64 | | /** |
65 | | * \brief this function prints an error message and exits. |
66 | | */ |
67 | | static TmEcode NoNetmapSupportExit(ThreadVars *tv, const void *initdata, void **data) |
68 | 0 | { |
69 | 0 | FatalError("Error creating thread %s: Netmap is not enabled. " |
70 | 0 | "Make sure to pass --enable-netmap to configure when building.", |
71 | 0 | tv->name); |
72 | 0 | } |
73 | | |
74 | | void TmModuleReceiveNetmapRegister (void) |
75 | 71 | { |
76 | 71 | tmm_modules[TMM_RECEIVENETMAP].name = "ReceiveNetmap"; |
77 | 71 | tmm_modules[TMM_RECEIVENETMAP].ThreadInit = NoNetmapSupportExit; |
78 | 71 | tmm_modules[TMM_RECEIVENETMAP].flags = TM_FLAG_RECEIVE_TM; |
79 | 71 | } |
80 | | |
81 | | /** |
82 | | * \brief Registration Function for DecodeNetmap. |
83 | | */ |
84 | | void TmModuleDecodeNetmapRegister (void) |
85 | 71 | { |
86 | 71 | tmm_modules[TMM_DECODENETMAP].name = "DecodeNetmap"; |
87 | 71 | tmm_modules[TMM_DECODENETMAP].ThreadInit = NoNetmapSupportExit; |
88 | 71 | tmm_modules[TMM_DECODENETMAP].flags = TM_FLAG_DECODE_TM; |
89 | 71 | } |
90 | | |
91 | | #else /* We have NETMAP support */ |
92 | | |
93 | | #include "action-globals.h" |
94 | | |
95 | | #define POLL_TIMEOUT 100 |
96 | | |
97 | | #if defined(__linux__) |
98 | | #define POLL_EVENTS (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL) |
99 | | |
100 | | #ifndef IFF_PPROMISC |
101 | | #define IFF_PPROMISC IFF_PROMISC |
102 | | #endif |
103 | | |
104 | | #else |
105 | | #define POLL_EVENTS (POLLHUP|POLLERR|POLLNVAL) |
106 | | #endif |
107 | | |
108 | | enum { NETMAP_FLAG_ZERO_COPY = 1, NETMAP_FLAG_EXCL_RING_ACCESS = 2 }; |
109 | | |
110 | | /** |
111 | | * \brief Netmap device instance. Each ring for each device gets its own |
112 | | * device. |
113 | | */ |
114 | | typedef struct NetmapDevice_ |
115 | | { |
116 | | struct nmport_d *nmd; |
117 | | unsigned int ref; |
118 | | SC_ATOMIC_DECLARE(unsigned int, threads_run); |
119 | | TAILQ_ENTRY(NetmapDevice_) next; |
120 | | // actual ifname can only be 16, but we store a bit more, |
121 | | // like the options string and a 'netmap:' prefix. |
122 | | char ifname[32]; |
123 | | int ring; |
124 | | int direction; // 0 rx, 1 tx |
125 | | |
126 | | // autofp: Used to lock a destination ring while we are sending data. |
127 | | SCMutex netmap_dev_lock; |
128 | | } NetmapDevice; |
129 | | |
130 | | /** |
131 | | * \brief Module thread local variables. |
132 | | */ |
133 | | typedef struct NetmapThreadVars_ |
134 | | { |
135 | | /* receive interface */ |
136 | | NetmapDevice *ifsrc; |
137 | | /* dst interface for IPS mode */ |
138 | | NetmapDevice *ifdst; |
139 | | |
140 | | int flags; |
141 | | struct bpf_program bpf_prog; |
142 | | |
143 | | /* suricata internals */ |
144 | | TmSlot *slot; |
145 | | ThreadVars *tv; |
146 | | LiveDevice *livedev; |
147 | | |
148 | | /* copy from config */ |
149 | | int copy_mode; |
150 | | ChecksumValidationMode checksum_mode; |
151 | | |
152 | | /* counters */ |
153 | | uint64_t pkts; |
154 | | uint64_t bytes; |
155 | | uint64_t drops; |
156 | | uint16_t capture_kernel_packets; |
157 | | uint16_t capture_kernel_drops; |
158 | | } NetmapThreadVars; |
159 | | |
160 | | typedef TAILQ_HEAD(NetmapDeviceList_, NetmapDevice_) NetmapDeviceList; |
161 | | |
162 | | static NetmapDeviceList netmap_devlist = TAILQ_HEAD_INITIALIZER(netmap_devlist); |
163 | | static SCMutex netmap_devlist_lock = SCMUTEX_INITIALIZER; |
164 | | |
165 | | /** \brief get RSS RX-queue count |
166 | | * \retval rx_rings RSS RX queue count or 0 on error |
167 | | */ |
168 | | int NetmapGetRSSCount(const char *ifname) |
169 | | { |
170 | | struct nmreq_port_info_get req; |
171 | | struct nmreq_header hdr; |
172 | | int rx_rings = 0; |
173 | | |
174 | | /* we need the base interface name to query queues */ |
175 | | char base_name[IFNAMSIZ]; |
176 | | strlcpy(base_name, ifname, sizeof(base_name)); |
177 | | if (strlen(base_name) > 0 && |
178 | | (base_name[strlen(base_name) - 1] == '^' || base_name[strlen(base_name) - 1] == '*')) { |
179 | | base_name[strlen(base_name) - 1] = '\0'; |
180 | | } |
181 | | |
182 | | SCMutexLock(&netmap_devlist_lock); |
183 | | |
184 | | /* open netmap device */ |
185 | | int fd = open("/dev/netmap", O_RDWR); |
186 | | if (fd == -1) { |
187 | | SCLogError("%s: open netmap device failed: %s", ifname, strerror(errno)); |
188 | | goto error_open; |
189 | | } |
190 | | |
191 | | /* query netmap interface info for ring count */ |
192 | | memset(&req, 0, sizeof(req)); |
193 | | memset(&hdr, 0, sizeof(hdr)); |
194 | | hdr.nr_version = NETMAP_API; |
195 | | hdr.nr_reqtype = NETMAP_REQ_PORT_INFO_GET; |
196 | | hdr.nr_body = (uintptr_t)&req; |
197 | | strlcpy(hdr.nr_name, base_name, sizeof(hdr.nr_name)); |
198 | | |
199 | | if (ioctl(fd, NIOCCTRL, &hdr) != 0) { |
200 | | SCLogError( |
201 | | "Query of netmap HW rings count on %s failed; error: %s", ifname, strerror(errno)); |
202 | | goto error_fd; |
203 | | }; |
204 | | |
205 | | /* return RX rings count if it equals TX rings count */ |
206 | | if (req.nr_rx_rings == req.nr_tx_rings) { |
207 | | rx_rings = req.nr_rx_rings; |
208 | | } |
209 | | |
210 | | error_fd: |
211 | | close(fd); |
212 | | error_open: |
213 | | SCMutexUnlock(&netmap_devlist_lock); |
214 | | return rx_rings; |
215 | | } |
216 | | |
217 | | static void NetmapDestroyDevice(NetmapDevice *pdev) |
218 | | { |
219 | | nmport_close(pdev->nmd); |
220 | | SCMutexDestroy(&pdev->netmap_dev_lock); |
221 | | SCFree(pdev); |
222 | | } |
223 | | |
224 | | /** |
225 | | * \brief Close or dereference netmap device instance. |
226 | | * \param dev Netmap device instance. |
227 | | * \return Zero on success. |
228 | | */ |
229 | | static int NetmapClose(NetmapDevice *dev) |
230 | | { |
231 | | NetmapDevice *pdev, *tmp; |
232 | | |
233 | | SCMutexLock(&netmap_devlist_lock); |
234 | | |
235 | | TAILQ_FOREACH_SAFE (pdev, &netmap_devlist, next, tmp) { |
236 | | if (pdev == dev) { |
237 | | pdev->ref--; |
238 | | if (!pdev->ref) { |
239 | | NetmapDestroyDevice(pdev); |
240 | | } |
241 | | SCMutexUnlock(&netmap_devlist_lock); |
242 | | return 0; |
243 | | } |
244 | | } |
245 | | |
246 | | SCMutexUnlock(&netmap_devlist_lock); |
247 | | return -1; |
248 | | } |
249 | | |
250 | | /** |
251 | | * \brief Close all open netmap device instances. |
252 | | */ |
253 | | static void NetmapCloseAll(void) |
254 | | { |
255 | | NetmapDevice *pdev, *tmp; |
256 | | |
257 | | SCMutexLock(&netmap_devlist_lock); |
258 | | |
259 | | TAILQ_FOREACH_SAFE (pdev, &netmap_devlist, next, tmp) { |
260 | | NetmapDestroyDevice(pdev); |
261 | | } |
262 | | |
263 | | SCMutexUnlock(&netmap_devlist_lock); |
264 | | } |
265 | | |
266 | | /** |
267 | | * \brief Open interface in netmap mode. |
268 | | * \param ifname Interface name. |
269 | | * \param promisc Enable promiscuous mode. |
270 | | * \param dev Pointer to requested netmap device instance. |
271 | | * \param verbose Verbose error logging. |
272 | | * \param read Indicates direction: RX or TX |
273 | | * \param zerocopy 1 if zerocopy access requested |
274 | | * \param soft Use Host stack (software) interface |
275 | | * \return Zero on success. |
276 | | */ |
277 | | static int NetmapOpen(NetmapIfaceSettings *ns, NetmapDevice **pdevice, int verbose, int read, |
278 | | bool zerocopy, bool soft) |
279 | | { |
280 | | SCEnter(); |
281 | | SCLogDebug("ifname %s", ns->iface); |
282 | | |
283 | | char base_name[IFNAMSIZ]; |
284 | | strlcpy(base_name, ns->iface, sizeof(base_name)); |
285 | | if (strlen(base_name) > 0 && |
286 | | (base_name[strlen(base_name)-1] == '^' || |
287 | | base_name[strlen(base_name)-1] == '*')) |
288 | | { |
289 | | base_name[strlen(base_name)-1] = '\0'; |
290 | | } |
291 | | |
292 | | if (ns->real) { |
293 | | /* check interface is up */ |
294 | | int if_flags = GetIfaceFlags(base_name); |
295 | | if (if_flags == -1) { |
296 | | if (verbose) { |
297 | | SCLogError("%s: cannot access network interface: %s", base_name, ns->iface); |
298 | | } |
299 | | goto error; |
300 | | } |
301 | | |
302 | | /* bring iface up if it is down */ |
303 | | if ((if_flags & IFF_UP) == 0) { |
304 | | SCLogError("%s: interface is down", base_name); |
305 | | goto error; |
306 | | } |
307 | | /* if needed, try to set iface in promisc mode */ |
308 | | if (ns->promisc && (if_flags & (IFF_PROMISC|IFF_PPROMISC)) == 0) { |
309 | | if_flags |= IFF_PPROMISC; |
310 | | SetIfaceFlags(base_name, if_flags); // TODO reset at exit |
311 | | // TODO move to parse config? |
312 | | } |
313 | | } |
314 | | NetmapDevice *pdev = NULL, *spdev = NULL; |
315 | | pdev = SCCalloc(1, sizeof(*pdev)); |
316 | | if (unlikely(pdev == NULL)) { |
317 | | SCLogError("%s: memory allocation failed", base_name); |
318 | | goto error; |
319 | | } |
320 | | SC_ATOMIC_INIT(pdev->threads_run); |
321 | | |
322 | | SCMutexLock(&netmap_devlist_lock); |
323 | | |
324 | | const int direction = (read != 1); |
325 | | int ring = 0; |
326 | | /* Search for interface in our already opened list. */ |
327 | | /* We will find it when opening multiple rings on */ |
328 | | /* the device when it exposes multiple RSS queues. */ |
329 | | TAILQ_FOREACH(spdev, &netmap_devlist, next) { |
330 | | SCLogDebug("spdev %s", spdev->ifname); |
331 | | if (direction == spdev->direction && strcmp(ns->iface, spdev->ifname) == 0) { |
332 | | ring = spdev->ring + 1; |
333 | | } |
334 | | } |
335 | | SCLogDebug("netmap/%s: using ring %d", ns->iface, ring); |
336 | | |
337 | | const char *opt_R = "R"; |
338 | | const char *opt_T = "T"; |
339 | | const char *opt_x = "x"; // not for IPS |
340 | | const char *opt_z = "z"; // zero copy, not for IPS |
341 | | |
342 | | /* assemble options string */ |
343 | | char optstr[16]; |
344 | | if (ns->ips) |
345 | | opt_x = ""; |
346 | | // z seems to not play well with multiple opens of a real dev on linux |
347 | | opt_z = ""; |
348 | | |
349 | | /* |
350 | | * How netmap endpoint names are selected: |
351 | | * |
352 | | * The following logic within the "retry" loop builds endpoint names. |
353 | | * |
354 | | * IPS Mode: |
355 | | * There are two endpoints: one hardware NIC and either a hardware NIC or host stack "NIC". |
356 | | * |
357 | | * IDS Mode: |
358 | | * One endpoint -- usually a hardware NIC. |
359 | | * |
360 | | * IPS mode -- with one endpoint a host stack "NIC": |
361 | | * When using multiple rings/threads, then the open of the initial Ring 0 MUST |
362 | | * instruct netmap to open multiple Host Stack rings (as the default is to open only a single |
363 | | * pair). This is also critical for the HW NIC endpoint. This is done by adding |
364 | | * “@conf:host-rings=x” suffix option (where “x” is the number of host rings desired) |
365 | | * to BOTH endpoint nmport_open_desc() calls for ring 0 (hardware and host stack). |
366 | | * For subsequent additional ring open calls, omit the suffix option specifying host ring count. |
367 | | * |
368 | | * IPS mode -- both endpoints are hardware NICs: |
369 | | * Do NOT pass any suffix option (even for Ring 0). You do not need to tell netmap how many |
370 | | * rings, because it already knows the correct value from the NIC driver itself. Specifying a |
371 | | * desired ring count when both ends are Hardware NICs confuses netmap, and it seems to default |
372 | | * to using only a single hardware ring. In this scenario, specify only the specific ring number |
373 | | * being opened. |
374 | | */ |
375 | | |
376 | | // loop to retry opening if unsupported options are used |
377 | | retry: |
378 | | snprintf(optstr, sizeof(optstr), "%s%s%s", opt_z, opt_x, direction == 0 ? opt_R : opt_T); |
379 | | |
380 | | char devname[128]; |
381 | | if (strncmp(ns->iface, "netmap:", 7) == 0) { |
382 | | snprintf(devname, sizeof(devname), "%s}%d%s%s", |
383 | | ns->iface, ring, strlen(optstr) ? "/" : "", optstr); |
384 | | } else if (strlen(ns->iface) > 5 && strncmp(ns->iface, "vale", 4) == 0 && isdigit(ns->iface[4])) { |
385 | | snprintf(devname, sizeof(devname), "%s", ns->iface); |
386 | | } else if (ring == 0 && ns->threads == 1) { |
387 | | /* just a single thread and ring, so don't use ring param */ |
388 | | snprintf(devname, sizeof(devname), "netmap:%s%s%s", |
389 | | ns->iface, strlen(optstr) ? "/" : "", optstr); |
390 | | SCLogDebug("device with %s-ring enabled (devname): %s", soft ? "SW" : "HW", devname); |
391 | | } else { |
392 | | /* Going to be using multiple threads and rings */ |
393 | | if (ns->sw_ring) { |
394 | | /* Opening a host stack interface */ |
395 | | if (ring == 0) { |
396 | | /* Ring 0, so tell netmap how many host rings we want created */ |
397 | | snprintf(devname, sizeof(devname), "netmap:%s%d%s%s@conf:host-rings=%d", ns->iface, |
398 | | ring, strlen(optstr) ? "/" : "", optstr, ns->threads); |
399 | | } else { |
400 | | /* Software (host) ring, but not initial open of ring 0 */ |
401 | | snprintf(devname, sizeof(devname), "netmap:%s%d%s%s", ns->iface, ring, |
402 | | strlen(optstr) ? "/" : "", optstr); |
403 | | } |
404 | | SCLogDebug("device with SW-ring enabled (devname): %s", devname); |
405 | | } else if (ring == 0 && soft) { |
406 | | /* Ring 0 of HW endpoint, and other endpoint is SW stack, |
407 | | * so request SW host stack rings to match HW rings count. |
408 | | */ |
409 | | snprintf(devname, sizeof(devname), "netmap:%s-%d%s%s@conf:host-rings=%d", ns->iface, |
410 | | ring, strlen(optstr) ? "/" : "", optstr, ns->threads); |
411 | | SCLogDebug("device with HW-ring enabled (devname): %s", devname); |
412 | | } else { |
413 | | /* Hardware ring other than ring 0, or both endpoints are HW |
414 | | * and there is no host stack (SW) endpoint */ |
415 | | snprintf(devname, sizeof(devname), "netmap:%s-%d%s%s", ns->iface, ring, |
416 | | strlen(optstr) ? "/" : "", optstr); |
417 | | SCLogDebug("device with HW-ring enabled (devname): %s", devname); |
418 | | } |
419 | | } |
420 | | |
421 | | strlcpy(pdev->ifname, ns->iface, sizeof(pdev->ifname)); |
422 | | |
423 | | /* have the netmap API parse device name and prepare the port descriptor for us */ |
424 | | pdev->nmd = nmport_prepare(devname); |
425 | | |
426 | | if (pdev->nmd != NULL) { |
427 | | /* For RX devices, set the nr_mode flag we need on the netmap port TX rings prior to opening |
428 | | */ |
429 | | if (read) { |
430 | | pdev->nmd->reg.nr_flags |= NR_NO_TX_POLL; |
431 | | } |
432 | | |
433 | | /* Now attempt to actually open the netmap port descriptor */ |
434 | | if (nmport_open_desc(pdev->nmd) < 0) { |
435 | | /* the open failed, so clean-up the descriptor and fall through to error handler */ |
436 | | nmport_close(pdev->nmd); |
437 | | pdev->nmd = NULL; |
438 | | } |
439 | | } |
440 | | |
441 | | if (pdev->nmd == NULL) { |
442 | | if (errno == EINVAL) { |
443 | | if (opt_z[0] == 'z') { |
444 | | SCLogNotice( |
445 | | "%s: dev '%s' got EINVAL: going to retry without 'z'", base_name, devname); |
446 | | opt_z = ""; |
447 | | goto retry; |
448 | | } else if (opt_x[0] == 'x') { |
449 | | SCLogNotice( |
450 | | "%s: dev '%s' got EINVAL: going to retry without 'x'", base_name, devname); |
451 | | opt_x = ""; |
452 | | goto retry; |
453 | | } |
454 | | } |
455 | | |
456 | | SCMutexUnlock(&netmap_devlist_lock); |
457 | | NetmapCloseAll(); |
458 | | FatalError("opening devname %s failed: %s", devname, strerror(errno)); |
459 | | } |
460 | | |
461 | | /* Work around bug in libnetmap library where "cur_{r,t}x_ring" values not initialized */ |
462 | | SCLogDebug("%s -- cur rings: [%d, %d] first rings: [%d, %d]", devname, pdev->nmd->cur_rx_ring, |
463 | | pdev->nmd->cur_tx_ring, pdev->nmd->first_rx_ring, pdev->nmd->first_tx_ring); |
464 | | pdev->nmd->cur_rx_ring = pdev->nmd->first_rx_ring; |
465 | | pdev->nmd->cur_tx_ring = pdev->nmd->first_tx_ring; |
466 | | |
467 | | SCLogInfo("%s: %s opened [fd: %d]", devname, ns->iface, pdev->nmd->fd); |
468 | | |
469 | | pdev->direction = direction; |
470 | | pdev->ring = ring; |
471 | | SCMutexInit(&pdev->netmap_dev_lock, NULL); |
472 | | TAILQ_INSERT_TAIL(&netmap_devlist, pdev, next); |
473 | | |
474 | | SCMutexUnlock(&netmap_devlist_lock); |
475 | | *pdevice = pdev; |
476 | | |
477 | | return 0; |
478 | | error: |
479 | | return -1; |
480 | | } |
481 | | |
482 | | /** |
483 | | * \brief PcapDumpCounters |
484 | | * \param ntv |
485 | | */ |
486 | | static inline void NetmapDumpCounters(NetmapThreadVars *ntv) |
487 | | { |
488 | | StatsAddUI64(ntv->tv, ntv->capture_kernel_packets, ntv->pkts); |
489 | | StatsAddUI64(ntv->tv, ntv->capture_kernel_drops, ntv->drops); |
490 | | (void) SC_ATOMIC_ADD(ntv->livedev->drop, ntv->drops); |
491 | | (void) SC_ATOMIC_ADD(ntv->livedev->pkts, ntv->pkts); |
492 | | ntv->drops = 0; |
493 | | ntv->pkts = 0; |
494 | | } |
495 | | |
496 | | /** |
497 | | * \brief Init function for ReceiveNetmap. |
498 | | * \param tv pointer to ThreadVars |
499 | | * \param initdata pointer to the interface passed from the user |
500 | | * \param data pointer gets populated with NetmapThreadVars |
501 | | */ |
502 | | static TmEcode ReceiveNetmapThreadInit(ThreadVars *tv, const void *initdata, void **data) |
503 | | { |
504 | | SCEnter(); |
505 | | |
506 | | NetmapIfaceConfig *aconf = (NetmapIfaceConfig *)initdata; |
507 | | if (initdata == NULL) { |
508 | | SCLogError("initdata == NULL"); |
509 | | SCReturnInt(TM_ECODE_FAILED); |
510 | | } |
511 | | |
512 | | NetmapThreadVars *ntv = SCCalloc(1, sizeof(*ntv)); |
513 | | if (unlikely(ntv == NULL)) { |
514 | | SCLogError("Memory allocation failed"); |
515 | | goto error; |
516 | | } |
517 | | |
518 | | ntv->livedev = LiveGetDevice(aconf->iface_name); |
519 | | if (ntv->livedev == NULL) { |
520 | | SCLogError("Unable to find Live device"); |
521 | | goto error_ntv; |
522 | | } |
523 | | |
524 | | ntv->tv = tv; |
525 | | ntv->checksum_mode = aconf->in.checksum_mode; |
526 | | ntv->copy_mode = aconf->in.copy_mode; |
527 | | |
528 | | /* enable zero-copy mode for workers runmode */ |
529 | | char const *active_runmode = RunmodeGetActive(); |
530 | | if (strcmp("workers", active_runmode) == 0) { |
531 | | ntv->flags |= NETMAP_FLAG_ZERO_COPY; |
532 | | SCLogDebug("Enabling zero copy mode for %s", aconf->in.iface); |
533 | | } else if (strcmp("autofp", active_runmode) == 0) { |
534 | | ntv->flags |= NETMAP_FLAG_EXCL_RING_ACCESS; |
535 | | } |
536 | | |
537 | | /* Need to insure open of ring 0 conveys requested ring count for open */ |
538 | | bool soft = aconf->in.sw_ring || aconf->out.sw_ring; |
539 | | if (NetmapOpen(&aconf->in, &ntv->ifsrc, 1, 1, (ntv->flags & NETMAP_FLAG_ZERO_COPY) != 0, |
540 | | soft) != 0) { |
541 | | goto error_ntv; |
542 | | } |
543 | | |
544 | | if (aconf->in.copy_mode != NETMAP_COPY_MODE_NONE) { |
545 | | if (NetmapOpen(&aconf->out, &ntv->ifdst, 1, 0, (ntv->flags & NETMAP_FLAG_ZERO_COPY) != 0, |
546 | | soft) != 0) { |
547 | | goto error_src; |
548 | | } |
549 | | } |
550 | | |
551 | | /* basic counters */ |
552 | | ntv->capture_kernel_packets = StatsRegisterCounter("capture.kernel_packets", |
553 | | ntv->tv); |
554 | | ntv->capture_kernel_drops = StatsRegisterCounter("capture.kernel_drops", |
555 | | ntv->tv); |
556 | | |
557 | | if (aconf->in.bpf_filter) { |
558 | | SCLogConfig("%s: using BPF '%s'", ntv->ifsrc->ifname, aconf->in.bpf_filter); |
559 | | char errbuf[PCAP_ERRBUF_SIZE]; |
560 | | if (SCBPFCompile(default_packet_size, /* snaplen_arg */ |
561 | | LINKTYPE_ETHERNET, /* linktype_arg */ |
562 | | &ntv->bpf_prog, /* program */ |
563 | | aconf->in.bpf_filter, /* const char *buf */ |
564 | | 1, /* optimize */ |
565 | | PCAP_NETMASK_UNKNOWN, /* mask */ |
566 | | errbuf, |
567 | | sizeof(errbuf)) == -1) |
568 | | { |
569 | | SCLogError("%s: failed to compile BPF \"%s\": %s", ntv->ifsrc->ifname, |
570 | | aconf->in.bpf_filter, errbuf); |
571 | | goto error_dst; |
572 | | } |
573 | | } |
574 | | |
575 | | SCLogDebug("thread: %s polling on fd: %d", tv->name, ntv->ifsrc->nmd->fd); |
576 | | |
577 | | DatalinkSetGlobalType(LINKTYPE_ETHERNET); |
578 | | |
579 | | *data = (void *)ntv; |
580 | | aconf->DerefFunc(aconf); |
581 | | SCReturnInt(TM_ECODE_OK); |
582 | | |
583 | | error_dst: |
584 | | if (aconf->in.copy_mode != NETMAP_COPY_MODE_NONE) { |
585 | | NetmapClose(ntv->ifdst); |
586 | | } |
587 | | |
588 | | error_src: |
589 | | NetmapClose(ntv->ifsrc); |
590 | | |
591 | | error_ntv: |
592 | | SCFree(ntv); |
593 | | |
594 | | error: |
595 | | aconf->DerefFunc(aconf); |
596 | | SCReturnInt(TM_ECODE_FAILED); |
597 | | } |
598 | | |
599 | | /** |
600 | | * \brief Output packet to destination interface or drop. |
601 | | * \param ntv Thread local variables. |
602 | | * \param p Source packet. |
603 | | */ |
604 | | static TmEcode NetmapWritePacket(NetmapThreadVars *ntv, Packet *p) |
605 | | { |
606 | | if (ntv->copy_mode == NETMAP_COPY_MODE_IPS) { |
607 | | if (PacketCheckAction(p, ACTION_DROP)) { |
608 | | return TM_ECODE_OK; |
609 | | } |
610 | | } |
611 | | DEBUG_VALIDATE_BUG_ON(ntv->ifdst == NULL); |
612 | | |
613 | | /* Lock the destination netmap ring while writing to it */ |
614 | | if (ntv->flags & NETMAP_FLAG_EXCL_RING_ACCESS) { |
615 | | SCMutexLock(&ntv->ifdst->netmap_dev_lock); |
616 | | } |
617 | | |
618 | | int write_tries = 0; |
619 | | try_write: |
620 | | /* attempt to write the packet into the netmap ring buffer(s) */ |
621 | | if (nmport_inject(ntv->ifdst->nmd, GET_PKT_DATA(p), GET_PKT_LEN(p)) == 0) { |
622 | | |
623 | | /* writing the packet failed, but ask kernel to sync TX rings |
624 | | * for us as the ring buffers may simply be full */ |
625 | | (void)ioctl(ntv->ifdst->nmd->fd, NIOCTXSYNC, 0); |
626 | | |
627 | | /* Try write up to 2 more times before giving up */ |
628 | | if (write_tries < 3) { |
629 | | write_tries++; |
630 | | goto try_write; |
631 | | } |
632 | | |
633 | | if (ntv->flags & NETMAP_FLAG_EXCL_RING_ACCESS) { |
634 | | SCMutexUnlock(&ntv->ifdst->netmap_dev_lock); |
635 | | } |
636 | | SCLogDebug("failed to send %s -> %s", ntv->ifsrc->ifname, ntv->ifdst->ifname); |
637 | | ntv->drops++; |
638 | | return TM_ECODE_FAILED; |
639 | | } |
640 | | |
641 | | SCLogDebug("sent successfully: %s(%d)->%s(%d) (%u)", ntv->ifsrc->ifname, ntv->ifsrc->ring, |
642 | | ntv->ifdst->ifname, ntv->ifdst->ring, GET_PKT_LEN(p)); |
643 | | |
644 | | /* Instruct netmap to push the data on the TX ring on the destination port */ |
645 | | (void)ioctl(ntv->ifdst->nmd->fd, NIOCTXSYNC, 0); |
646 | | if (ntv->flags & NETMAP_FLAG_EXCL_RING_ACCESS) { |
647 | | SCMutexUnlock(&ntv->ifdst->netmap_dev_lock); |
648 | | } |
649 | | return TM_ECODE_OK; |
650 | | } |
651 | | |
652 | | /** |
653 | | * \brief Packet release routine. |
654 | | * \param p Packet. |
655 | | */ |
656 | | static void NetmapReleasePacket(Packet *p) |
657 | | { |
658 | | NetmapThreadVars *ntv = (NetmapThreadVars *)p->netmap_v.ntv; |
659 | | |
660 | | if ((ntv->copy_mode != NETMAP_COPY_MODE_NONE) && !PKT_IS_PSEUDOPKT(p)) { |
661 | | NetmapWritePacket(ntv, p); |
662 | | } |
663 | | |
664 | | PacketFreeOrRelease(p); |
665 | | } |
666 | | |
667 | | static void NetmapProcessPacket(NetmapThreadVars *ntv, const struct nm_pkthdr *ph) |
668 | | { |
669 | | if (ntv->bpf_prog.bf_len) { |
670 | | struct pcap_pkthdr pkthdr = { {0, 0}, ph->len, ph->len }; |
671 | | if (pcap_offline_filter(&ntv->bpf_prog, &pkthdr, ph->buf) == 0) { |
672 | | return; |
673 | | } |
674 | | } |
675 | | |
676 | | Packet *p = PacketPoolGetPacket(); |
677 | | if (unlikely(p == NULL)) { |
678 | | return; |
679 | | } |
680 | | |
681 | | PKT_SET_SRC(p, PKT_SRC_WIRE); |
682 | | p->livedev = ntv->livedev; |
683 | | p->datalink = LINKTYPE_ETHERNET; |
684 | | p->ts = SCTIME_FROM_TIMEVAL(&ph->ts); |
685 | | ntv->pkts++; |
686 | | ntv->bytes += ph->len; |
687 | | |
688 | | if (ntv->flags & NETMAP_FLAG_ZERO_COPY) { |
689 | | if (PacketSetData(p, (uint8_t *)ph->buf, ph->len) == -1) { |
690 | | TmqhOutputPacketpool(ntv->tv, p); |
691 | | return; |
692 | | } |
693 | | } else { |
694 | | if (PacketCopyData(p, (uint8_t *)ph->buf, ph->len) == -1) { |
695 | | TmqhOutputPacketpool(ntv->tv, p); |
696 | | return; |
697 | | } |
698 | | } |
699 | | |
700 | | p->ReleasePacket = NetmapReleasePacket; |
701 | | p->netmap_v.ntv = ntv; |
702 | | |
703 | | SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)", |
704 | | GET_PKT_LEN(p), p, GET_PKT_DATA(p)); |
705 | | |
706 | | (void)TmThreadsSlotProcessPkt(ntv->tv, ntv->slot, p); |
707 | | } |
708 | | |
709 | | /** |
710 | | * \brief Copy netmap rings data into Packet structures. |
711 | | * \param *d nmport_d (or nm_desc) netmap if structure. |
712 | | * \param cnt int count of packets to read (-1 = all). |
713 | | * \param *ntv NetmapThreadVars. |
714 | | */ |
715 | | static TmEcode NetmapReadPackets(struct nmport_d *d, int cnt, NetmapThreadVars *ntv) |
716 | | { |
717 | | struct nm_pkthdr hdr; |
718 | | int last_ring = d->last_rx_ring - d->first_rx_ring + 1; |
719 | | int cur_ring, got = 0, cur_rx_ring = d->cur_rx_ring; |
720 | | |
721 | | memset(&hdr, 0, sizeof(hdr)); |
722 | | hdr.flags = NM_MORE_PKTS; |
723 | | |
724 | | if (cnt == 0) |
725 | | cnt = -1; |
726 | | |
727 | | for (cur_ring = 0; cur_ring < last_ring && cnt != got; cur_ring++, cur_rx_ring++) { |
728 | | struct netmap_ring *ring; |
729 | | |
730 | | if (cur_rx_ring > d->last_rx_ring) |
731 | | cur_rx_ring = d->first_rx_ring; |
732 | | |
733 | | ring = NETMAP_RXRING(d->nifp, cur_rx_ring); |
734 | | |
735 | | /* cycle through the non-empty ring slots to fetch their data */ |
736 | | for (; !nm_ring_empty(ring) && cnt != got; got++) { |
737 | | u_int idx, i; |
738 | | u_char *oldbuf; |
739 | | struct netmap_slot *slot; |
740 | | |
741 | | if (hdr.buf) { /* from previous round */ |
742 | | NetmapProcessPacket(ntv, &hdr); |
743 | | } |
744 | | |
745 | | i = ring->cur; |
746 | | slot = &ring->slot[i]; |
747 | | idx = slot->buf_idx; |
748 | | d->cur_rx_ring = cur_rx_ring; |
749 | | hdr.slot = slot; |
750 | | oldbuf = hdr.buf = (u_char *)NETMAP_BUF(ring, idx); |
751 | | hdr.len = hdr.caplen = slot->len; |
752 | | |
753 | | /* loop through the ring slots to get packet data */ |
754 | | while (slot->flags & NS_MOREFRAG) { |
755 | | /* packet can be fragmented across multiple slots, */ |
756 | | /* so loop until we find the slot with the flag */ |
757 | | /* cleared, signalling the end of the packet data. */ |
758 | | u_char *nbuf; |
759 | | u_int oldlen = slot->len; |
760 | | i = nm_ring_next(ring, i); |
761 | | slot = &ring->slot[i]; |
762 | | hdr.len += slot->len; |
763 | | nbuf = (u_char *)NETMAP_BUF(ring, slot->buf_idx); |
764 | | |
765 | | if (oldbuf != NULL && nbuf - oldbuf == ring->nr_buf_size && |
766 | | oldlen == ring->nr_buf_size) { |
767 | | hdr.caplen += slot->len; |
768 | | oldbuf = nbuf; |
769 | | } else { |
770 | | oldbuf = NULL; |
771 | | } |
772 | | } |
773 | | |
774 | | hdr.ts = ring->ts; |
775 | | ring->head = ring->cur = nm_ring_next(ring, i); |
776 | | } |
777 | | } |
778 | | |
779 | | if (hdr.buf) { /* from previous round */ |
780 | | hdr.flags = 0; |
781 | | NetmapProcessPacket(ntv, &hdr); |
782 | | } |
783 | | return got; |
784 | | } |
785 | | |
786 | | /** |
787 | | * \brief Main netmap reading loop function |
788 | | */ |
789 | | static TmEcode ReceiveNetmapLoop(ThreadVars *tv, void *data, void *slot) |
790 | | { |
791 | | SCEnter(); |
792 | | |
793 | | TmSlot *s = (TmSlot *)slot; |
794 | | NetmapThreadVars *ntv = (NetmapThreadVars *)data; |
795 | | struct pollfd fds; |
796 | | |
797 | | ntv->slot = s->slot_next; |
798 | | fds.fd = ntv->ifsrc->nmd->fd; |
799 | | fds.events = POLLIN; |
800 | | |
801 | | SCLogDebug("thread %s polling on %d", tv->name, fds.fd); |
802 | | |
803 | | // Indicate that the thread is actually running its application level code (i.e., it can poll |
804 | | // packets) |
805 | | TmThreadsSetFlag(tv, THV_RUNNING); |
806 | | |
807 | | for(;;) { |
808 | | if (unlikely(suricata_ctl_flags != 0)) { |
809 | | break; |
810 | | } |
811 | | |
812 | | /* make sure we have at least one packet in the packet pool, |
813 | | * to prevent us from alloc'ing packets at line rate */ |
814 | | PacketPoolWait(); |
815 | | |
816 | | int r = poll(&fds, 1, POLL_TIMEOUT); |
817 | | if (r < 0) { |
818 | | /* error */ |
819 | | if (errno != EINTR) |
820 | | SCLogError("%s: error polling netmap: %s", ntv->ifsrc->ifname, strerror(errno)); |
821 | | continue; |
822 | | |
823 | | } else if (r == 0) { |
824 | | /* no events, timeout */ |
825 | | /* sync counters */ |
826 | | NetmapDumpCounters(ntv); |
827 | | StatsSyncCountersIfSignalled(tv); |
828 | | |
829 | | /* poll timed out, lets handle the timeout */ |
830 | | TmThreadsCaptureHandleTimeout(tv, NULL); |
831 | | continue; |
832 | | } |
833 | | |
834 | | if (unlikely(fds.revents & POLL_EVENTS)) { |
835 | | if (fds.revents & POLLERR) { |
836 | | SCLogError("%s: error reading netmap data via polling: %s", ntv->ifsrc->ifname, |
837 | | strerror(errno)); |
838 | | } else if (fds.revents & POLLNVAL) { |
839 | | SCLogError("%s: invalid polling request", ntv->ifsrc->ifname); |
840 | | } |
841 | | continue; |
842 | | } |
843 | | |
844 | | if (likely(fds.revents & POLLIN)) { |
845 | | /* have data on RX ring, so copy to Packet for processing */ |
846 | | NetmapReadPackets(ntv->ifsrc->nmd, -1, ntv); |
847 | | } |
848 | | |
849 | | NetmapDumpCounters(ntv); |
850 | | StatsSyncCountersIfSignalled(tv); |
851 | | } |
852 | | |
853 | | NetmapDumpCounters(ntv); |
854 | | StatsSyncCountersIfSignalled(tv); |
855 | | SCReturnInt(TM_ECODE_OK); |
856 | | } |
857 | | |
858 | | /** |
859 | | * \brief This function prints stats to the screen at exit. |
860 | | * \param tv pointer to ThreadVars |
861 | | * \param data pointer that gets cast into NetmapThreadVars for ntv |
862 | | */ |
863 | | static void ReceiveNetmapThreadExitStats(ThreadVars *tv, void *data) |
864 | | { |
865 | | SCEnter(); |
866 | | NetmapThreadVars *ntv = (NetmapThreadVars *)data; |
867 | | |
868 | | NetmapDumpCounters(ntv); |
869 | | SCLogPerf("%s: (%s) packets %" PRIu64 ", dropped %" PRIu64 ", bytes %" PRIu64 "", |
870 | | ntv->ifsrc->ifname, tv->name, |
871 | | StatsGetLocalCounterValue(tv, ntv->capture_kernel_packets), |
872 | | StatsGetLocalCounterValue(tv, ntv->capture_kernel_drops), ntv->bytes); |
873 | | } |
874 | | |
875 | | /** |
876 | | * \brief |
877 | | * \param tv |
878 | | * \param data Pointer to NetmapThreadVars. |
879 | | */ |
880 | | static TmEcode ReceiveNetmapThreadDeinit(ThreadVars *tv, void *data) |
881 | | { |
882 | | SCEnter(); |
883 | | |
884 | | NetmapThreadVars *ntv = (NetmapThreadVars *)data; |
885 | | |
886 | | if (ntv->ifsrc) { |
887 | | NetmapClose(ntv->ifsrc); |
888 | | ntv->ifsrc = NULL; |
889 | | } |
890 | | if (ntv->ifdst) { |
891 | | NetmapClose(ntv->ifdst); |
892 | | ntv->ifdst = NULL; |
893 | | } |
894 | | if (ntv->bpf_prog.bf_insns) { |
895 | | SCBPFFree(&ntv->bpf_prog); |
896 | | } |
897 | | |
898 | | SCFree(ntv); |
899 | | |
900 | | SCReturnInt(TM_ECODE_OK); |
901 | | } |
902 | | |
903 | | /** |
904 | | * \brief Prepare netmap decode thread. |
905 | | * \param tv Thread local variables. |
906 | | * \param initdata Thread config. |
907 | | * \param data Pointer to DecodeThreadVars placed here. |
908 | | */ |
909 | | static TmEcode DecodeNetmapThreadInit(ThreadVars *tv, const void *initdata, void **data) |
910 | | { |
911 | | SCEnter(); |
912 | | |
913 | | DecodeThreadVars *dtv = DecodeThreadVarsAlloc(tv); |
914 | | if (dtv == NULL) |
915 | | SCReturnInt(TM_ECODE_FAILED); |
916 | | |
917 | | DecodeRegisterPerfCounters(dtv, tv); |
918 | | |
919 | | *data = (void *)dtv; |
920 | | |
921 | | SCReturnInt(TM_ECODE_OK); |
922 | | } |
923 | | |
924 | | /** |
925 | | * \brief This function passes off to link type decoders. |
926 | | * |
927 | | * \param t pointer to ThreadVars |
928 | | * \param p pointer to the current packet |
929 | | * \param data pointer that gets cast into NetmapThreadVars for ntv |
930 | | */ |
931 | | static TmEcode DecodeNetmap(ThreadVars *tv, Packet *p, void *data) |
932 | | { |
933 | | SCEnter(); |
934 | | |
935 | | DecodeThreadVars *dtv = (DecodeThreadVars *)data; |
936 | | |
937 | | BUG_ON(PKT_IS_PSEUDOPKT(p)); |
938 | | |
939 | | /* update counters */ |
940 | | DecodeUpdatePacketCounters(tv, dtv, p); |
941 | | |
942 | | DecodeEthernet(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p)); |
943 | | |
944 | | PacketDecodeFinalize(tv, dtv, p); |
945 | | |
946 | | SCReturnInt(TM_ECODE_OK); |
947 | | } |
948 | | |
949 | | /** |
950 | | * \brief |
951 | | * \param tv |
952 | | * \param data Pointer to DecodeThreadVars. |
953 | | */ |
954 | | static TmEcode DecodeNetmapThreadDeinit(ThreadVars *tv, void *data) |
955 | | { |
956 | | SCEnter(); |
957 | | |
958 | | if (data != NULL) |
959 | | DecodeThreadVarsFree(tv, data); |
960 | | |
961 | | SCReturnInt(TM_ECODE_OK); |
962 | | } |
963 | | |
964 | | /** |
965 | | * \brief Registration Function for ReceiveNetmap. |
966 | | */ |
967 | | void TmModuleReceiveNetmapRegister(void) |
968 | | { |
969 | | tmm_modules[TMM_RECEIVENETMAP].name = "ReceiveNetmap"; |
970 | | tmm_modules[TMM_RECEIVENETMAP].ThreadInit = ReceiveNetmapThreadInit; |
971 | | tmm_modules[TMM_RECEIVENETMAP].PktAcqLoop = ReceiveNetmapLoop; |
972 | | tmm_modules[TMM_RECEIVENETMAP].ThreadExitPrintStats = ReceiveNetmapThreadExitStats; |
973 | | tmm_modules[TMM_RECEIVENETMAP].ThreadDeinit = ReceiveNetmapThreadDeinit; |
974 | | tmm_modules[TMM_RECEIVENETMAP].cap_flags = SC_CAP_NET_RAW; |
975 | | tmm_modules[TMM_RECEIVENETMAP].flags = TM_FLAG_RECEIVE_TM; |
976 | | } |
977 | | |
978 | | /** |
979 | | * \brief Registration Function for DecodeNetmap. |
980 | | */ |
981 | | void TmModuleDecodeNetmapRegister(void) |
982 | | { |
983 | | tmm_modules[TMM_DECODENETMAP].name = "DecodeNetmap"; |
984 | | tmm_modules[TMM_DECODENETMAP].ThreadInit = DecodeNetmapThreadInit; |
985 | | tmm_modules[TMM_DECODENETMAP].Func = DecodeNetmap; |
986 | | tmm_modules[TMM_DECODENETMAP].ThreadDeinit = DecodeNetmapThreadDeinit; |
987 | | tmm_modules[TMM_DECODENETMAP].cap_flags = 0; |
988 | | tmm_modules[TMM_DECODENETMAP].flags = TM_FLAG_DECODE_TM; |
989 | | } |
990 | | |
991 | | #endif /* HAVE_NETMAP */ |
992 | | |
993 | | /** |
994 | | * @} |
995 | | */ |