Coverage Report

Created: 2026-02-26 06:15

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libpcap/pcap-linux.c
Line
Count
Source
1
/*
2
 *  pcap-linux.c: Packet capture interface to the Linux kernel
3
 *
4
 *  Copyright (c) 2000 Torsten Landschoff <torsten@debian.org>
5
 *           Sebastian Krahmer  <krahmer@cs.uni-potsdam.de>
6
 *
7
 *  License: BSD
8
 *
9
 *  Redistribution and use in source and binary forms, with or without
10
 *  modification, are permitted provided that the following conditions
11
 *  are met:
12
 *
13
 *  1. Redistributions of source code must retain the above copyright
14
 *     notice, this list of conditions and the following disclaimer.
15
 *  2. Redistributions in binary form must reproduce the above copyright
16
 *     notice, this list of conditions and the following disclaimer in
17
 *     the documentation and/or other materials provided with the
18
 *     distribution.
19
 *  3. The names of the authors may not be used to endorse or promote
20
 *     products derived from this software without specific prior
21
 *     written permission.
22
 *
23
 *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
24
 *  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
25
 *  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
26
 *
27
 *  Modifications:     Added PACKET_MMAP support
28
 *                     Paolo Abeni <paolo.abeni@email.it>
29
 *                     Added TPACKET_V3 support
30
 *                     Gabor Tatarka <gabor.tatarka@ericsson.com>
31
 *
32
 *                     based on previous works of:
33
 *                     Simon Patarin <patarin@cs.unibo.it>
34
 *                     Phil Wood <cpw@lanl.gov>
35
 *
36
 * Monitor-mode support for mac80211 includes code taken from the iw
37
 * command; the copyright notice for that code is
38
 *
39
 * Copyright (c) 2007, 2008 Johannes Berg
40
 * Copyright (c) 2007   Andy Lutomirski
41
 * Copyright (c) 2007   Mike Kershaw
42
 * Copyright (c) 2008   Gábor Stefanik
43
 *
44
 * All rights reserved.
45
 *
46
 * Redistribution and use in source and binary forms, with or without
47
 * modification, are permitted provided that the following conditions
48
 * are met:
49
 * 1. Redistributions of source code must retain the above copyright
50
 *    notice, this list of conditions and the following disclaimer.
51
 * 2. Redistributions in binary form must reproduce the above copyright
52
 *    notice, this list of conditions and the following disclaimer in the
53
 *    documentation and/or other materials provided with the distribution.
54
 * 3. The name of the author may not be used to endorse or promote products
55
 *    derived from this software without specific prior written permission.
56
 *
57
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
58
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
59
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
60
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
61
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
62
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
63
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
64
 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
65
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
66
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
67
 * SUCH DAMAGE.
68
 */
69
70
71
#ifndef _GNU_SOURCE
72
#define _GNU_SOURCE
73
#endif
74
75
#include <config.h>
76
77
#include <errno.h>
78
#include <stdio.h>
79
#include <stdlib.h>
80
#include <unistd.h>
81
#include <fcntl.h>
82
#include <string.h>
83
#include <limits.h>
84
#include <endian.h>
85
#include <sys/stat.h>
86
#include <sys/socket.h>
87
#include <sys/ioctl.h>
88
#include <sys/utsname.h>
89
#include <sys/mman.h>
90
#include <linux/if.h>
91
#include <linux/if_packet.h>
92
#include <linux/sockios.h>
93
#include <linux/ethtool.h>
94
#include <netinet/in.h>
95
#include <linux/if_ether.h>
96
#include <linux/netlink.h>
97
98
#include <linux/if_arp.h>
99
#ifndef ARPHRD_IEEE802154
100
  // Linux before 2.6.31
101
  #define ARPHRD_IEEE802154 804
102
#endif
103
#ifndef ARPHRD_IEEE802154_MONITOR
104
  // Linux before 3.5
105
  #define ARPHRD_IEEE802154_MONITOR 805
106
#endif
107
#ifndef ARPHRD_NETLINK
108
  // Linux before 3.11
109
  #define ARPHRD_NETLINK 824
110
#endif
111
#ifndef ARPHRD_6LOWPAN
112
  // Linux before 3.14
113
  #define ARPHRD_6LOWPAN 825
114
#endif
115
#ifndef ARPHRD_VSOCKMON
116
  // Linux before 4.12
117
  #define ARPHRD_VSOCKMON 826
118
#endif
119
#ifndef ARPHRD_LAPD
120
  /*
121
   * ARPHRD_LAPD is unofficial and randomly allocated, if reallocation
122
   * is needed, please report it to <daniele@orlandi.com>
123
   */
124
0
  #define ARPHRD_LAPD 8445
125
#endif
126
127
#include <poll.h>
128
#include <dirent.h>
129
#include <sys/eventfd.h>
130
131
#include "pcap-int.h"
132
#include "pcap-util.h"
133
#include "pcap-snf.h"
134
#include "pcap/sll.h"
135
#include "pcap/vlan.h"
136
#include "pcap/can_socketcan.h"
137
138
#include "diag-control.h"
139
140
/*
141
 * We require TPACKET_V2 support.
142
 */
143
#ifndef TPACKET2_HDRLEN
144
#error "Libpcap will only work if TPACKET_V2 is supported; you must build for a 2.6.27 or later kernel"
145
#endif
146
147
/* check for memory mapped access availability. We assume every needed
148
 * struct is defined if the macro TPACKET_HDRLEN is defined, because it
149
 * uses many ring related structs and macros */
150
#ifdef TPACKET3_HDRLEN
151
# define HAVE_TPACKET3
152
#endif /* TPACKET3_HDRLEN */
153
154
/*
155
 * Not all compilers that are used to compile code to run on Linux have
156
 * these builtins.  For example, older versions of GCC don't, and at
157
 * least some people are doing cross-builds for MIPS with older versions
158
 * of GCC.
159
 */
160
#ifndef HAVE___ATOMIC_LOAD_N
161
#define __atomic_load_n(ptr, memory_model)    (*(ptr))
162
#endif
163
#ifndef HAVE___ATOMIC_STORE_N
164
#define __atomic_store_n(ptr, val, memory_model)  *(ptr) = (val)
165
#endif
166
167
#define packet_mmap_acquire(pkt) \
168
0
  (__atomic_load_n(&pkt->tp_status, __ATOMIC_ACQUIRE) != TP_STATUS_KERNEL)
169
#define packet_mmap_release(pkt) \
170
0
  (__atomic_store_n(&pkt->tp_status, TP_STATUS_KERNEL, __ATOMIC_RELEASE))
171
#define packet_mmap_v3_acquire(pkt) \
172
0
  (__atomic_load_n(&pkt->hdr.bh1.block_status, __ATOMIC_ACQUIRE) != TP_STATUS_KERNEL)
173
#define packet_mmap_v3_release(pkt) \
174
0
  (__atomic_store_n(&pkt->hdr.bh1.block_status, TP_STATUS_KERNEL, __ATOMIC_RELEASE))
175
176
#include <linux/types.h>
177
#include <linux/filter.h>
178
179
#ifdef HAVE_LINUX_NET_TSTAMP_H
180
#include <linux/net_tstamp.h>
181
#endif
182
183
/*
184
 * For checking whether a device is a bonding device.
185
 */
186
#include <linux/if_bonding.h>
187
188
/*
189
 * Got libnl?
190
 */
191
#ifdef HAVE_LIBNL
192
#include <linux/nl80211.h>
193
194
#include <netlink/genl/genl.h>
195
#include <netlink/genl/family.h>
196
#include <netlink/genl/ctrl.h>
197
#include <netlink/msg.h>
198
#include <netlink/attr.h>
199
#endif /* HAVE_LIBNL */
200
201
#ifndef HAVE_SOCKLEN_T
202
typedef int   socklen_t;
203
#endif
204
205
0
#define MAX_LINKHEADER_SIZE 256
206
207
/*
208
 * When capturing on all interfaces we use this as the buffer size.
209
 * Should be bigger then all MTUs that occur in real life.
210
 * 64kB should be enough for now.
211
 */
212
0
#define BIGGER_THAN_ALL_MTUS  (64*1024)
213
214
/*
215
 * Private data for capturing on Linux PF_PACKET sockets.
216
 */
217
struct pcap_linux {
218
  long long sysfs_dropped; /* packets reported dropped by /sys/class/net/{if_name}/statistics/rx_{missed,fifo}_errors */
219
  struct pcap_stat stat;
220
221
  char  *device;  /* device name */
222
  int filter_in_userland; /* must filter in userland */
223
  u_int blocks_to_filter_in_userland;
224
  int must_do_on_close; /* stuff we must do when we close */
225
  int timeout;  /* timeout for buffering */
226
  int cooked;   /* using SOCK_DGRAM rather than SOCK_RAW */
227
  int ifindex;  /* interface index of device we're bound to */
228
  int lo_ifindex; /* interface index of the loopback device */
229
  int netdown;  /* we got an ENETDOWN and haven't resolved it */
230
  bpf_u_int32 oldmode;  /* mode to restore when turning monitor mode off */
231
  char  *mondevice; /* mac80211 monitor device we created */
232
  u_char  *mmapbuf; /* memory-mapped region pointer */
233
  size_t  mmapbuflen; /* size of region */
234
  int vlan_offset;  /* offset at which to insert vlan tags; if -1, don't insert */
235
  u_int tp_version; /* version of tpacket_hdr for mmaped ring */
236
  u_int tp_hdrlen;  /* hdrlen of tpacket_hdr for mmaped ring */
237
  u_char  *oneshot_buffer; /* buffer for copy of packet */
238
  int poll_timeout; /* timeout to use in poll() */
239
#ifdef HAVE_TPACKET3
240
  unsigned char *current_packet; /* Current packet within the TPACKET_V3 block. Move to next block if NULL. */
241
  int packets_left; /* Unhandled packets left within the block from previous call to pcap_read_linux_mmap_v3 in case of TPACKET_V3. */
242
#endif
243
  int poll_breakloop_fd; /* fd to an eventfd to break from blocking operations */
244
};
245
246
/*
247
 * Stuff to do when we close.
248
 */
249
#define MUST_DELETE_MONIF 0x00000001  /* delete monitor-mode interface */
250
251
/*
252
 * Prototypes for internal functions and methods.
253
 */
254
static int is_wifi(const char *);
255
static int pcap_activate_linux(pcap_t *);
256
static int setup_socket(pcap_t *, int);
257
static int setup_mmapped(pcap_t *);
258
static int pcap_can_set_rfmon_linux(pcap_t *);
259
static int pcap_inject_linux(pcap_t *, const void *, int);
260
static int pcap_stats_linux(pcap_t *, struct pcap_stat *);
261
static int pcap_setfilter_linux(pcap_t *, struct bpf_program *);
262
static int pcap_setdirection_linux(pcap_t *, pcap_direction_t);
263
static int pcap_set_datalink_linux(pcap_t *, int);
264
265
union thdr {
266
  struct tpacket2_hdr   *h2;
267
#ifdef HAVE_TPACKET3
268
  struct tpacket_block_desc *h3;
269
#endif
270
  u_char        *raw;
271
};
272
273
0
#define RING_GET_FRAME_AT(h, offset) (((u_char **)h->buffer)[(offset)])
274
0
#define RING_GET_CURRENT_FRAME(h) RING_GET_FRAME_AT(h, h->offset)
275
276
static void destroy_ring(pcap_t *handle);
277
static int create_ring(pcap_t *handle);
278
static int prepare_tpacket_socket(pcap_t *handle);
279
static int pcap_read_linux_mmap_v2(pcap_t *, int, pcap_handler , u_char *);
280
#ifdef HAVE_TPACKET3
281
static int pcap_read_linux_mmap_v3(pcap_t *, int, pcap_handler , u_char *);
282
#endif
283
static int pcap_setnonblock_linux(pcap_t *p, int nonblock);
284
static int pcap_getnonblock_linux(pcap_t *p);
285
static void pcapint_oneshot_linux(u_char *user, const struct pcap_pkthdr *h,
286
    const u_char *bytes);
287
288
/*
289
 * In pre-3.0 kernels, the tp_vlan_tci field is set to whatever the
290
 * vlan_tci field in the skbuff is.  0 can either mean "not on a VLAN"
291
 * or "on VLAN 0".  There is no flag set in the tp_status field to
292
 * distinguish between them.
293
 *
294
 * In 3.0 and later kernels, if there's a VLAN tag present, the tp_vlan_tci
295
 * field is set to the VLAN tag, and the TP_STATUS_VLAN_VALID flag is set
296
 * in the tp_status field, otherwise the tp_vlan_tci field is set to 0 and
297
 * the TP_STATUS_VLAN_VALID flag isn't set in the tp_status field.
298
 *
299
 * With a pre-3.0 kernel, we cannot distinguish between packets with no
300
 * VLAN tag and packets on VLAN 0, so we will mishandle some packets, and
301
 * there's nothing we can do about that.
302
 *
303
 * So, on those systems, which never set the TP_STATUS_VLAN_VALID flag, we
304
 * continue the behavior of earlier libpcaps, wherein we treated packets
305
 * with a VLAN tag of 0 as being packets without a VLAN tag rather than packets
306
 * on VLAN 0.  We do this by treating packets with a tp_vlan_tci of 0 and
307
 * with the TP_STATUS_VLAN_VALID flag not set in tp_status as not having
308
 * VLAN tags.  This does the right thing on 3.0 and later kernels, and
309
 * continues the old unfixably-imperfect behavior on pre-3.0 kernels.
310
 *
311
 * If TP_STATUS_VLAN_VALID isn't defined, we test it as the 0x10 bit; it
312
 * has that value in 3.0 and later kernels.
313
 */
314
#ifdef TP_STATUS_VLAN_VALID
315
0
  #define VLAN_VALID(hdr, hv) ((hv)->tp_vlan_tci != 0 || ((hdr)->tp_status & TP_STATUS_VLAN_VALID))
316
#else
317
  /*
318
   * This is being compiled on a system that lacks TP_STATUS_VLAN_VALID,
319
   * so we test with the value it has in the 3.0 and later kernels, so
320
   * we can test it if we're running on a system that has it.  (If we're
321
   * running on a system that doesn't have it, it won't be set in the
322
   * tp_status field, so the tests of it will always fail; that means
323
   * we behave the way we did before we introduced this macro.)
324
   */
325
  #define VLAN_VALID(hdr, hv) ((hv)->tp_vlan_tci != 0 || ((hdr)->tp_status & 0x10))
326
#endif
327
328
#ifdef TP_STATUS_VLAN_TPID_VALID
329
0
# define VLAN_TPID(hdr, hv) (((hv)->tp_vlan_tpid || ((hdr)->tp_status & TP_STATUS_VLAN_TPID_VALID)) ? (hv)->tp_vlan_tpid : ETH_P_8021Q)
330
#else
331
# define VLAN_TPID(hdr, hv) ETH_P_8021Q
332
#endif
333
334
/*
335
 * Required select timeout if we're polling for an "interface disappeared"
336
 * indication - 1 millisecond.
337
 */
338
static const struct timeval netdown_timeout = {
339
  0, 1000   /* 1000 microseconds = 1 millisecond */
340
};
341
342
/*
343
 * Wrap some ioctl calls
344
 */
345
static int  iface_get_id(int fd, const char *device, char *ebuf);
346
static int  iface_get_mtu(int fd, const char *device, char *ebuf);
347
static int  iface_get_arptype(int fd, const char *device, char *ebuf);
348
static int  iface_bind(int fd, int ifindex, char *ebuf, int protocol);
349
static int  enter_rfmon_mode(pcap_t *handle, int sock_fd,
350
    const char *device);
351
static int  iface_get_ts_types(const char *device, pcap_t *handle,
352
    char *ebuf);
353
static int  iface_get_offload(pcap_t *handle);
354
355
static int  fix_program(pcap_t *handle, struct sock_fprog *fcode);
356
static int  fix_offset(pcap_t *handle, struct bpf_insn *p);
357
static int  set_kernel_filter(pcap_t *handle, struct sock_fprog *fcode);
358
static int  reset_kernel_filter(pcap_t *handle);
359
360
static struct sock_filter total_insn
361
  = BPF_STMT(BPF_RET | BPF_K, 0);
362
static struct sock_fprog  total_fcode
363
  = { 1, &total_insn };
364
365
static int  iface_dsa_get_proto_info(const char *device, pcap_t *handle);
366
367
pcap_t *
368
pcapint_create_interface(const char *device, char *ebuf)
369
0
{
370
0
  pcap_t *handle;
371
372
0
  handle = PCAP_CREATE_COMMON(ebuf, struct pcap_linux);
373
0
  if (handle == NULL)
374
0
    return NULL;
375
376
0
  handle->activate_op = pcap_activate_linux;
377
0
  handle->can_set_rfmon_op = pcap_can_set_rfmon_linux;
378
379
  /*
380
   * See what time stamp types we support.
381
   */
382
0
  if (iface_get_ts_types(device, handle, ebuf) == -1) {
383
0
    pcap_close(handle);
384
0
    return NULL;
385
0
  }
386
387
  /*
388
   * We claim that we support microsecond and nanosecond time
389
   * stamps.
390
   *
391
   * XXX - with adapter-supplied time stamps, can we choose
392
   * microsecond or nanosecond time stamps on arbitrary
393
   * adapters?
394
   */
395
0
  handle->tstamp_precision_list = malloc(2 * sizeof(u_int));
396
0
  if (handle->tstamp_precision_list == NULL) {
397
0
    pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE,
398
0
        errno, "malloc");
399
0
    pcap_close(handle);
400
0
    return NULL;
401
0
  }
402
0
  handle->tstamp_precision_list[0] = PCAP_TSTAMP_PRECISION_MICRO;
403
0
  handle->tstamp_precision_list[1] = PCAP_TSTAMP_PRECISION_NANO;
404
0
  handle->tstamp_precision_count = 2;
405
406
  /*
407
   * Start out with the breakloop handle not open; we don't
408
   * need it until we're activated and ready to capture.
409
   */
410
0
  struct pcap_linux *handlep = handle->priv;
411
0
  handlep->poll_breakloop_fd = -1;
412
413
0
  return handle;
414
0
}
415
416
#ifdef HAVE_LIBNL
417
/*
418
 * If interface {if_name} is a mac80211 driver, the file
419
 * /sys/class/net/{if_name}/phy80211 is a symlink to
420
 * /sys/class/ieee80211/{phydev_name}, for some {phydev_name}.
421
 *
422
 * On Fedora 9, with a 2.6.26.3-29 kernel, my Zydas stick, at
423
 * least, has a "wmaster0" device and a "wlan0" device; the
424
 * latter is the one with the IP address.  Both show up in
425
 * "tcpdump -D" output.  Capturing on the wmaster0 device
426
 * captures with 802.11 headers.
427
 *
428
 * airmon-ng searches through /sys/class/net for devices named
429
 * monN, starting with mon0; as soon as one *doesn't* exist,
430
 * it chooses that as the monitor device name.  If the "iw"
431
 * command exists, it does
432
 *
433
 *    iw dev {if_name} interface add {monif_name} type monitor
434
 *
435
 * where {monif_name} is the monitor device.  It then (sigh) sleeps
436
 * .1 second, and then configures the device up.  Otherwise, if
437
 * /sys/class/ieee80211/{phydev_name}/add_iface is a file, it writes
438
 * {mondev_name}, without a newline, to that file, and again (sigh)
439
 * sleeps .1 second, and then iwconfig's that device into monitor
440
 * mode and configures it up.  Otherwise, you can't do monitor mode.
441
 *
442
 * All these devices are "glued" together by having the
443
 * /sys/class/net/{if_name}/phy80211 links pointing to the same
444
 * place, so, given a wmaster, wlan, or mon device, you can
445
 * find the other devices by looking for devices with
446
 * the same phy80211 link.
447
 *
448
 * To turn monitor mode off, delete the monitor interface,
449
 * either with
450
 *
451
 *    iw dev {monif_name} interface del
452
 *
453
 * or by sending {monif_name}, with no NL, down
454
 * /sys/class/ieee80211/{phydev_name}/remove_iface
455
 *
456
 * Note: if you try to create a monitor device named "monN", and
457
 * there's already a "monN" device, it fails, as least with
458
 * the netlink interface (which is what iw uses), with a return
459
 * value of -ENFILE.  (Return values are negative errnos.)  We
460
 * could probably use that to find an unused device.
461
 *
462
 * Yes, you can have multiple monitor devices for a given
463
 * physical device.
464
 */
465
466
/*
467
 * Is this a mac80211 device?  If so, fill in the physical device path and
468
 * return 1; if not, return 0.  On an error, fill in handle->errbuf and
469
 * return PCAP_ERROR.
470
 */
471
static int
472
get_mac80211_phydev(pcap_t *handle, const char *device, char *phydev_path,
473
    size_t phydev_max_pathlen)
474
{
475
  char *pathstr;
476
  ssize_t bytes_read;
477
478
  /*
479
   * Generate the path string for the symlink to the physical device.
480
   */
481
  if (asprintf(&pathstr, "/sys/class/net/%s/phy80211", device) == -1) {
482
    snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
483
        "%s: Can't generate path name string for /sys/class/net device",
484
        device);
485
    return PCAP_ERROR;
486
  }
487
  bytes_read = readlink(pathstr, phydev_path, phydev_max_pathlen);
488
  if (bytes_read == -1) {
489
    if (errno == ENOENT) {
490
      /*
491
       * This either means that the directory
492
       * /sys/class/net/{device} exists but doesn't
493
       * have anything named "phy80211" in it,
494
       * in which case it's not a mac80211 device,
495
       * or that the directory doesn't exist,
496
       * in which case the device doesn't exist.
497
       *
498
       * Directly check whether the directory
499
       * exists.
500
       */
501
      struct stat statb;
502
503
      free(pathstr);
504
      if (asprintf(&pathstr, "/sys/class/net/%s", device) == -1) {
505
        snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
506
            "%s: Can't generate path name string for /sys/class/net device",
507
            device);
508
        return PCAP_ERROR;
509
      }
510
      if (stat(pathstr, &statb) == -1) {
511
        if (errno == ENOENT) {
512
          /*
513
           * No such device.
514
           */
515
          snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
516
              "%s: %s doesn't exist",
517
              device, pathstr);
518
          free(pathstr);
519
          return PCAP_ERROR_NO_SUCH_DEVICE;
520
        }
521
        snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
522
            "%s: Can't stat %s: %s",
523
            device, pathstr, strerror(errno));
524
        free(pathstr);
525
        return PCAP_ERROR;
526
      }
527
528
      /*
529
       * Path to the directory that would contain
530
       * "phy80211" exists, but "phy80211" doesn't
531
       * exist; that means it's not a mac80211
532
       * device.
533
       */
534
      free(pathstr);
535
      return 0;
536
    }
537
    if (errno == EINVAL) {
538
      /*
539
       * Exists, but it's not a symlink; assume that
540
       * means it's not a mac80211 device.
541
       */
542
      free(pathstr);
543
      return 0;
544
    }
545
    pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
546
        errno, "%s: Can't readlink %s", device, pathstr);
547
    free(pathstr);
548
    return PCAP_ERROR;
549
  }
550
  free(pathstr);
551
  phydev_path[bytes_read] = '\0';
552
  return 1;
553
}
554
555
struct nl80211_state {
556
  struct nl_sock *nl_sock;
557
  struct nl_cache *nl_cache;
558
  struct genl_family *nl80211;
559
};
560
561
static int
562
nl80211_init(pcap_t *handle, struct nl80211_state *state, const char *device)
563
{
564
  int err;
565
566
  state->nl_sock = nl_socket_alloc();
567
  if (!state->nl_sock) {
568
    snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
569
        "%s: failed to allocate netlink handle", device);
570
    return PCAP_ERROR;
571
  }
572
573
  if (genl_connect(state->nl_sock)) {
574
    snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
575
        "%s: failed to connect to generic netlink", device);
576
    goto out_handle_destroy;
577
  }
578
579
  err = genl_ctrl_alloc_cache(state->nl_sock, &state->nl_cache);
580
  if (err < 0) {
581
    snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
582
        "%s: failed to allocate generic netlink cache: %s",
583
        device, nl_geterror(-err));
584
    goto out_handle_destroy;
585
  }
586
587
  state->nl80211 = genl_ctrl_search_by_name(state->nl_cache, "nl80211");
588
  if (!state->nl80211) {
589
    snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
590
        "%s: nl80211 not found", device);
591
    goto out_cache_free;
592
  }
593
594
  return 0;
595
596
out_cache_free:
597
  nl_cache_free(state->nl_cache);
598
out_handle_destroy:
599
  nl_socket_free(state->nl_sock);
600
  return PCAP_ERROR;
601
}
602
603
static void
604
nl80211_cleanup(struct nl80211_state *state)
605
{
606
  genl_family_put(state->nl80211);
607
  nl_cache_free(state->nl_cache);
608
  nl_socket_free(state->nl_sock);
609
}
610
611
static int
612
del_mon_if(pcap_t *handle, int sock_fd, struct nl80211_state *state,
613
    const char *device, const char *mondevice);
614
615
static int
616
if_type_cb(struct nl_msg *msg, void* arg)
617
{
618
  struct nlmsghdr* ret_hdr = nlmsg_hdr(msg);
619
  struct nlattr *tb_msg[NL80211_ATTR_MAX + 1];
620
  int *type = (int*)arg;
621
622
  struct genlmsghdr *gnlh = (struct genlmsghdr*) nlmsg_data(ret_hdr);
623
624
  nla_parse(tb_msg, NL80211_ATTR_MAX, genlmsg_attrdata(gnlh, 0),
625
    genlmsg_attrlen(gnlh, 0), NULL);
626
627
  /*
628
   * We sent a message asking for info about a single index.
629
   * To be really paranoid, we could check if the index matched
630
   * by examining nla_get_u32(tb_msg[NL80211_ATTR_IFINDEX]).
631
   */
632
633
  if (tb_msg[NL80211_ATTR_IFTYPE]) {
634
    *type = nla_get_u32(tb_msg[NL80211_ATTR_IFTYPE]);
635
  }
636
637
  return NL_SKIP;
638
}
639
640
static int
641
get_if_type(pcap_t *handle, int sock_fd, struct nl80211_state *state,
642
    const char *device, int *type)
643
{
644
  int ifindex;
645
  struct nl_msg *msg;
646
  int err;
647
648
  ifindex = iface_get_id(sock_fd, device, handle->errbuf);
649
  if (ifindex == -1)
650
    return PCAP_ERROR;
651
652
  msg = nlmsg_alloc();
653
  if (!msg) {
654
    snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
655
        "%s: failed to allocate netlink msg", device);
656
    return PCAP_ERROR;
657
  }
658
659
  genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ,
660
        genl_family_get_id(state->nl80211), 0,
661
        0, NL80211_CMD_GET_INTERFACE, 0);
662
  NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, ifindex);
663
664
  err = nl_send_auto(state->nl_sock, msg);
665
  nlmsg_free(msg);
666
  if (err < 0) {
667
    if (err == -NLE_FAILURE) {
668
      /*
669
       * Device not available; our caller should just
670
       * keep trying.  (libnl 2.x maps ENFILE to
671
       * NLE_FAILURE; it can also map other errors
672
       * to that, but there's not much we can do
673
       * about that.)
674
       */
675
      return 0;
676
    } else {
677
      /*
678
       * Real failure, not just "that device is not
679
       * available.
680
       */
681
      snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
682
          "%s: nl_send_auto failed getting interface type: %s",
683
          device, nl_geterror(-err));
684
      return PCAP_ERROR;
685
    }
686
  }
687
688
  struct nl_cb *cb = nl_cb_alloc(NL_CB_DEFAULT);
689
  nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, if_type_cb, (void*)type);
690
  err = nl_recvmsgs(state->nl_sock, cb);
691
  nl_cb_put(cb);
692
693
  if (err < 0) {
694
    snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
695
        "%s: nl_recvmsgs failed getting interface type: %s",
696
        device, nl_geterror(-err));
697
    return PCAP_ERROR;
698
  }
699
700
  /*
701
  * If this is a mac80211 device not in monitor mode, nl_sock will be
702
  * reused for add_mon_if. So we must wait for the ACK here so that
703
  * add_mon_if does not receive it instead and incorrectly interpret
704
  * the ACK as its NEW_INTERFACE command succeeding, even when it fails.
705
  */
706
  err = nl_wait_for_ack(state->nl_sock);
707
  if (err < 0) {
708
    snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
709
        "%s: nl_wait_for_ack failed getting interface type: %s",
710
        device, nl_geterror(-err));
711
    return PCAP_ERROR;
712
  }
713
714
  /*
715
   * Success.
716
   */
717
  return 1;
718
719
nla_put_failure:
720
  snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
721
      "%s: nl_put failed getting interface type",
722
      device);
723
  nlmsg_free(msg);
724
  // Do not call nl_cb_put(): nl_cb_alloc() has not been called.
725
  return PCAP_ERROR;
726
}
727
728
static int
729
add_mon_if(pcap_t *handle, int sock_fd, struct nl80211_state *state,
730
    const char *device, const char *mondevice)
731
{
732
  struct pcap_linux *handlep = handle->priv;
733
  int ifindex;
734
  struct nl_msg *msg;
735
  int err;
736
737
  ifindex = iface_get_id(sock_fd, device, handle->errbuf);
738
  if (ifindex == -1)
739
    return PCAP_ERROR;
740
741
  msg = nlmsg_alloc();
742
  if (!msg) {
743
    snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
744
        "%s: failed to allocate netlink msg", device);
745
    return PCAP_ERROR;
746
  }
747
748
  genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ,
749
        genl_family_get_id(state->nl80211), 0,
750
        0, NL80211_CMD_NEW_INTERFACE, 0);
751
  NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, ifindex);
752
DIAG_OFF_NARROWING
753
  NLA_PUT_STRING(msg, NL80211_ATTR_IFNAME, mondevice);
754
DIAG_ON_NARROWING
755
  NLA_PUT_U32(msg, NL80211_ATTR_IFTYPE, NL80211_IFTYPE_MONITOR);
756
757
  err = nl_send_sync(state->nl_sock, msg); // calls nlmsg_free()
758
  if (err < 0) {
759
    switch (err) {
760
761
    case -NLE_FAILURE:
762
    case -NLE_AGAIN:
763
      /*
764
       * Device not available; our caller should just
765
       * keep trying.  (libnl 2.x maps ENFILE to
766
       * NLE_FAILURE; it can also map other errors
767
       * to that, but there's not much we can do
768
       * about that.)
769
       */
770
      return 0;
771
772
    case -NLE_OPNOTSUPP:
773
      /*
774
       * Device is a mac80211 device but adding it as a
775
       * monitor mode device isn't supported.  Report our
776
       * error.
777
       */
778
      return PCAP_ERROR_RFMON_NOTSUP;
779
780
    default:
781
      /*
782
       * Real failure, not just "that device is not
783
       * available."  Report a generic error, using the
784
       * error message from libnl.
785
       */
786
      snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
787
          "%s: nl_send_sync failed adding %s interface: %s",
788
          device, mondevice, nl_geterror(-err));
789
      return PCAP_ERROR;
790
    }
791
  }
792
793
  /*
794
   * Success.
795
   */
796
797
  /*
798
   * Try to remember the monitor device.
799
   */
800
  handlep->mondevice = strdup(mondevice);
801
  if (handlep->mondevice == NULL) {
802
    pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
803
        errno, "strdup");
804
    /*
805
     * Get rid of the monitor device.
806
     */
807
    del_mon_if(handle, sock_fd, state, device, mondevice);
808
    return PCAP_ERROR;
809
  }
810
  return 1;
811
812
nla_put_failure:
813
  snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
814
      "%s: nl_put failed adding %s interface",
815
      device, mondevice);
816
  nlmsg_free(msg);
817
  return PCAP_ERROR;
818
}
819
820
static int
821
del_mon_if(pcap_t *handle, int sock_fd, struct nl80211_state *state,
822
    const char *device, const char *mondevice)
823
{
824
  int ifindex;
825
  struct nl_msg *msg;
826
  int err;
827
828
  ifindex = iface_get_id(sock_fd, mondevice, handle->errbuf);
829
  if (ifindex == -1)
830
    return PCAP_ERROR;
831
832
  msg = nlmsg_alloc();
833
  if (!msg) {
834
    snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
835
        "%s: failed to allocate netlink msg", device);
836
    return PCAP_ERROR;
837
  }
838
839
  genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ,
840
        genl_family_get_id(state->nl80211), 0,
841
        0, NL80211_CMD_DEL_INTERFACE, 0);
842
  NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, ifindex);
843
844
  err = nl_send_sync(state->nl_sock, msg); // calls nlmsg_free()
845
  if (err < 0) {
846
    snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
847
        "%s: nl_send_sync failed deleting %s interface: %s",
848
        device, mondevice, nl_geterror(-err));
849
    return PCAP_ERROR;
850
  }
851
852
  /*
853
   * Success.
854
   */
855
  return 1;
856
857
nla_put_failure:
858
  snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
859
      "%s: nl_put failed deleting %s interface",
860
      device, mondevice);
861
  nlmsg_free(msg);
862
  return PCAP_ERROR;
863
}
864
#endif /* HAVE_LIBNL */
865
866
static int pcap_protocol(pcap_t *handle)
867
0
{
868
0
  int protocol;
869
870
0
  protocol = handle->opt.protocol;
871
0
  if (protocol == 0)
872
0
    protocol = ETH_P_ALL;
873
874
0
  return htons(protocol);
875
0
}
876
877
static int
878
pcap_can_set_rfmon_linux(pcap_t *handle)
879
0
{
880
#ifdef HAVE_LIBNL
881
  char phydev_path[PATH_MAX+1];
882
  int ret;
883
#endif
884
885
0
  if (strcmp(handle->opt.device, "any") == 0) {
886
    /*
887
     * Monitor mode makes no sense on the "any" device.
888
     */
889
0
    return 0;
890
0
  }
891
892
#ifdef HAVE_LIBNL
893
  /*
894
   * Bleah.  There doesn't seem to be a way to ask a mac80211
895
   * device, through libnl, whether it supports monitor mode;
896
   * we'll just check whether the device appears to be a
897
   * mac80211 device and, if so, assume the device supports
898
   * monitor mode.
899
   */
900
  ret = get_mac80211_phydev(handle, handle->opt.device, phydev_path,
901
      PATH_MAX);
902
  if (ret < 0)
903
    return ret; /* error */
904
  if (ret == 1)
905
    return 1; /* mac80211 device */
906
#endif
907
908
0
  return 0;
909
0
}
910
911
/*
912
 * Grabs the number of missed packets by the interface from
913
 * /sys/class/net/{if_name}/statistics/rx_{missed,fifo}_errors.
914
 *
915
 * Compared to /proc/net/dev this avoids counting software drops,
916
 * but may be unimplemented and just return 0.
917
 * The author has found no straightforward way to check for support.
918
 */
919
static long long int
920
0
linux_get_stat(const char * if_name, const char * stat_name) {
921
0
  ssize_t bytes_read;
922
0
  int fd;
923
0
  char buffer[PATH_MAX];
924
925
0
  snprintf(buffer, sizeof(buffer), "/sys/class/net/%s/statistics/%s", if_name, stat_name);
926
0
  fd = open(buffer, O_RDONLY);
927
0
  if (fd == -1)
928
0
    return 0;
929
930
0
  bytes_read = read(fd, buffer, sizeof(buffer) - 1);
931
0
  close(fd);
932
0
  if (bytes_read == -1)
933
0
    return 0;
934
0
  buffer[bytes_read] = '\0';
935
936
0
  return strtoll(buffer, NULL, 10);
937
0
}
938
939
static long long int
940
linux_if_drops(const char * if_name)
941
0
{
942
0
  long long int missed = linux_get_stat(if_name, "rx_missed_errors");
943
0
  long long int fifo = linux_get_stat(if_name, "rx_fifo_errors");
944
0
  return missed + fifo;
945
0
}
946
947
948
/*
949
 * Monitor mode is kind of interesting because we have to reset the
950
 * interface before exiting. The problem can't really be solved without
951
 * some daemon taking care of managing usage counts.  If we put the
952
 * interface into monitor mode, we set a flag indicating that we must
953
 * take it out of that mode when the interface is closed, and, when
954
 * closing the interface, if that flag is set we take it out of monitor
955
 * mode.
956
 */
957
958
static void pcap_cleanup_linux( pcap_t *handle )
959
0
{
960
0
  struct pcap_linux *handlep = handle->priv;
961
#ifdef HAVE_LIBNL
962
  struct nl80211_state nlstate;
963
  int ret;
964
#endif /* HAVE_LIBNL */
965
966
0
  if (handlep->must_do_on_close != 0) {
967
    /*
968
     * There's something we have to do when closing this
969
     * pcap_t.
970
     */
971
#ifdef HAVE_LIBNL
972
    if (handlep->must_do_on_close & MUST_DELETE_MONIF) {
973
      ret = nl80211_init(handle, &nlstate, handlep->device);
974
      if (ret >= 0) {
975
        ret = del_mon_if(handle, handle->fd, &nlstate,
976
            handlep->device, handlep->mondevice);
977
        nl80211_cleanup(&nlstate);
978
      }
979
      if (ret < 0) {
980
        fprintf(stderr,
981
            "Can't delete monitor interface %s (%s).\n"
982
            "Please delete manually.\n",
983
            handlep->mondevice, handle->errbuf);
984
      }
985
    }
986
#endif /* HAVE_LIBNL */
987
988
    /*
989
     * Take this pcap out of the list of pcaps for which we
990
     * have to take the interface out of some mode.
991
     */
992
0
    pcapint_remove_from_pcaps_to_close(handle);
993
0
  }
994
995
0
  if (handle->fd != -1) {
996
    /*
997
     * Destroy the ring buffer (assuming we've set it up),
998
     * and unmap it if it's mapped.
999
     */
1000
0
    destroy_ring(handle);
1001
0
  }
1002
1003
0
  if (handlep->oneshot_buffer != NULL) {
1004
0
    munmap(handlep->oneshot_buffer, handle->snapshot);
1005
0
    handlep->oneshot_buffer = NULL;
1006
0
  }
1007
1008
0
  if (handlep->mondevice != NULL) {
1009
0
    free(handlep->mondevice);
1010
0
    handlep->mondevice = NULL;
1011
0
  }
1012
0
  if (handlep->device != NULL) {
1013
0
    free(handlep->device);
1014
0
    handlep->device = NULL;
1015
0
  }
1016
1017
0
  if (handlep->poll_breakloop_fd != -1) {
1018
0
    close(handlep->poll_breakloop_fd);
1019
0
    handlep->poll_breakloop_fd = -1;
1020
0
  }
1021
0
  pcapint_cleanup_live_common(handle);
1022
0
}
1023
1024
#ifdef HAVE_TPACKET3
1025
/*
1026
 * Some versions of TPACKET_V3 have annoying bugs/misfeatures
1027
 * around which we have to work.  Determine if we have those
1028
 * problems or not.
1029
 * 3.19 is the first release with a fixed version of
1030
 * TPACKET_V3.  We treat anything before that as
1031
 * not having a fixed version; that may really mean
1032
 * it has *no* version.
1033
 */
1034
static int has_broken_tpacket_v3(void)
1035
0
{
1036
0
  struct utsname utsname;
1037
0
  const char *release;
1038
0
  long major, minor;
1039
0
  int matches, verlen;
1040
1041
  /* No version information, assume broken. */
1042
0
  if (uname(&utsname) == -1)
1043
0
    return 1;
1044
0
  release = utsname.release;
1045
1046
  /* A malformed version, ditto. */
1047
0
  matches = sscanf(release, "%ld.%ld%n", &major, &minor, &verlen);
1048
0
  if (matches != 2)
1049
0
    return 1;
1050
0
  if (release[verlen] != '.' && release[verlen] != '\0')
1051
0
    return 1;
1052
1053
  /* OK, a fixed version. */
1054
0
  if (major > 3 || (major == 3 && minor >= 19))
1055
0
    return 0;
1056
1057
  /* Too old :( */
1058
0
  return 1;
1059
0
}
1060
#endif
1061
1062
/*
1063
 * Set the timeout to be used in poll() with memory-mapped packet capture.
1064
 */
1065
static void
1066
set_poll_timeout(struct pcap_linux *handlep)
1067
0
{
1068
0
#ifdef HAVE_TPACKET3
1069
0
  int broken_tpacket_v3 = has_broken_tpacket_v3();
1070
0
#endif
1071
0
  if (handlep->timeout == 0) {
1072
0
#ifdef HAVE_TPACKET3
1073
    /*
1074
     * XXX - due to a set of (mis)features in the TPACKET_V3
1075
     * kernel code prior to the 3.19 kernel, blocking forever
1076
     * with a TPACKET_V3 socket can, if few packets are
1077
     * arriving and passing the socket filter, cause most
1078
     * packets to be dropped.  See libpcap issue #335 for the
1079
     * full painful story.
1080
     *
1081
     * The workaround is to have poll() time out very quickly,
1082
     * so we grab the frames handed to us, and return them to
1083
     * the kernel, ASAP.
1084
     */
1085
0
    if (handlep->tp_version == TPACKET_V3 && broken_tpacket_v3)
1086
0
      handlep->poll_timeout = 1; /* don't block for very long */
1087
0
    else
1088
0
#endif
1089
0
      handlep->poll_timeout = -1; /* block forever */
1090
0
  } else if (handlep->timeout > 0) {
1091
0
#ifdef HAVE_TPACKET3
1092
    /*
1093
     * For TPACKET_V3, the timeout is handled by the kernel,
1094
     * so block forever; that way, we don't get extra timeouts.
1095
     * Don't do that if we have a broken TPACKET_V3, though.
1096
     */
1097
0
    if (handlep->tp_version == TPACKET_V3 && !broken_tpacket_v3)
1098
0
      handlep->poll_timeout = -1; /* block forever, let TPACKET_V3 wake us up */
1099
0
    else
1100
0
#endif
1101
0
      handlep->poll_timeout = handlep->timeout; /* block for that amount of time */
1102
0
  } else {
1103
    /*
1104
     * Non-blocking mode; we call poll() to pick up error
1105
     * indications, but we don't want it to wait for
1106
     * anything.
1107
     */
1108
0
    handlep->poll_timeout = 0;
1109
0
  }
1110
0
}
1111
1112
static void pcap_breakloop_linux(pcap_t *handle)
1113
0
{
1114
0
  pcapint_breakloop_common(handle);
1115
0
  struct pcap_linux *handlep = handle->priv;
1116
1117
0
  uint64_t value = 1;
1118
1119
0
  if (handlep->poll_breakloop_fd != -1) {
1120
    /*
1121
     * XXX - pcap_breakloop() doesn't have a return value,
1122
     * so we can't indicate an error.
1123
     */
1124
0
DIAG_OFF_WARN_UNUSED_RESULT
1125
0
    (void)write(handlep->poll_breakloop_fd, &value, sizeof(value));
1126
0
DIAG_ON_WARN_UNUSED_RESULT
1127
0
  }
1128
0
}
1129
1130
/*
1131
 * Set the offset at which to insert VLAN tags.
1132
 * That should be the offset of the type field.
1133
 */
1134
static void
1135
set_vlan_offset(pcap_t *handle)
1136
0
{
1137
0
  struct pcap_linux *handlep = handle->priv;
1138
1139
0
  switch (handle->linktype) {
1140
1141
0
  case DLT_EN10MB:
1142
    /*
1143
     * The type field is after the destination and source
1144
     * MAC address.
1145
     */
1146
0
    handlep->vlan_offset = 2 * ETH_ALEN;
1147
0
    break;
1148
1149
0
  case DLT_LINUX_SLL:
1150
    /*
1151
     * The type field is in the last 2 bytes of the
1152
     * DLT_LINUX_SLL header.
1153
     */
1154
0
    handlep->vlan_offset = SLL_HDR_LEN - 2;
1155
0
    break;
1156
1157
0
  default:
1158
0
    handlep->vlan_offset = -1; /* unknown */
1159
0
    break;
1160
0
  }
1161
0
}
1162
1163
static int
1164
pcap_activate_linux(pcap_t *handle)
1165
0
{
1166
0
  struct pcap_linux *handlep = handle->priv;
1167
0
  const char  *device;
1168
0
  int   is_any_device;
1169
0
  struct ifreq  ifr;
1170
0
  int   status;
1171
0
  int   ret;
1172
1173
0
  device = handle->opt.device;
1174
1175
  /*
1176
   * Start out assuming no warnings.
1177
   */
1178
0
  status = 0;
1179
1180
  /*
1181
   * Make sure the name we were handed will fit into the ioctls we
1182
   * might perform on the device; if not, return a "No such device"
1183
   * indication, as the Linux kernel shouldn't support creating
1184
   * a device whose name won't fit into those ioctls.
1185
   *
1186
   * "Will fit" means "will fit, complete with a null terminator",
1187
   * so if the length, which does *not* include the null terminator,
1188
   * is greater than *or equal to* the size of the field into which
1189
   * we'll be copying it, that won't fit.
1190
   */
1191
0
  if (strlen(device) >= sizeof(ifr.ifr_name)) {
1192
    /*
1193
     * There's nothing more to say, so clear the error
1194
     * message.
1195
     */
1196
0
    handle->errbuf[0] = '\0';
1197
0
    status = PCAP_ERROR_NO_SUCH_DEVICE;
1198
0
    goto fail;
1199
0
  }
1200
1201
  /*
1202
   * Turn a negative snapshot value (invalid), a snapshot value of
1203
   * 0 (unspecified), or a value bigger than the normal maximum
1204
   * value, into the maximum allowed value.
1205
   *
1206
   * If some application really *needs* a bigger snapshot
1207
   * length, we should just increase MAXIMUM_SNAPLEN.
1208
   */
1209
0
  if (handle->snapshot <= 0 || handle->snapshot > MAXIMUM_SNAPLEN)
1210
0
    handle->snapshot = MAXIMUM_SNAPLEN;
1211
1212
0
  handlep->device = strdup(device);
1213
0
  if (handlep->device == NULL) {
1214
0
    pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
1215
0
        errno, "strdup");
1216
0
    status = PCAP_ERROR;
1217
0
    goto fail;
1218
0
  }
1219
1220
  /*
1221
   * The "any" device is a special device which causes us not
1222
   * to bind to a particular device and thus to look at all
1223
   * devices.
1224
   */
1225
0
  is_any_device = (strcmp(device, "any") == 0);
1226
0
  if (is_any_device) {
1227
0
    if (handle->opt.promisc) {
1228
0
      handle->opt.promisc = 0;
1229
      /* Just a warning. */
1230
0
      snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
1231
0
          "Promiscuous mode not supported on the \"any\" device");
1232
0
      status = PCAP_WARNING_PROMISC_NOTSUP;
1233
0
    }
1234
0
  }
1235
1236
  /* copy timeout value */
1237
0
  handlep->timeout = handle->opt.timeout;
1238
1239
  /*
1240
   * If we're in promiscuous mode, then we probably want
1241
   * to see when the interface drops packets too, so get an
1242
   * initial count from
1243
   * /sys/class/net/{if_name}/statistics/rx_{missed,fifo}_errors
1244
   */
1245
0
  if (handle->opt.promisc)
1246
0
    handlep->sysfs_dropped = linux_if_drops(handlep->device);
1247
1248
  /*
1249
   * If the "any" device is specified, try to open a SOCK_DGRAM.
1250
   * Otherwise, open a SOCK_RAW.
1251
   */
1252
0
  ret = setup_socket(handle, is_any_device);
1253
0
  if (ret < 0) {
1254
    /*
1255
     * Fatal error; the return value is the error code,
1256
     * and handle->errbuf has been set to an appropriate
1257
     * error message.
1258
     */
1259
0
    status = ret;
1260
0
    goto fail;
1261
0
  }
1262
0
  if (ret > 0) {
1263
    /*
1264
     * We got a warning; return that, as handle->errbuf
1265
     * might have been overwritten by this warning.
1266
     */
1267
0
    status = ret;
1268
0
  }
1269
1270
  /*
1271
   * Success (possibly with a warning).
1272
   *
1273
   * First, try to allocate an event FD for breakloop, if
1274
   * we're not going to start in non-blocking mode.
1275
   */
1276
0
  if (!handle->opt.nonblock) {
1277
0
    handlep->poll_breakloop_fd = eventfd(0, EFD_NONBLOCK);
1278
0
    if (handlep->poll_breakloop_fd == -1) {
1279
      /*
1280
       * Failed.
1281
       */
1282
0
      pcapint_fmt_errmsg_for_errno(handle->errbuf,
1283
0
          PCAP_ERRBUF_SIZE, errno, "could not open eventfd");
1284
0
      status = PCAP_ERROR;
1285
0
      goto fail;
1286
0
    }
1287
0
  }
1288
1289
  /*
1290
   * Succeeded.
1291
   * Try to set up memory-mapped access.
1292
   */
1293
0
  ret = setup_mmapped(handle);
1294
0
  if (ret < 0) {
1295
    /*
1296
     * We failed to set up to use it, or the
1297
     * kernel supports it, but we failed to
1298
     * enable it.  The return value is the
1299
     * error status to return and, if it's
1300
     * PCAP_ERROR, handle->errbuf contains
1301
     * the error message.
1302
     */
1303
0
    status = ret;
1304
0
    goto fail;
1305
0
  }
1306
0
  if (ret > 0) {
1307
    /*
1308
     * We got a warning; return that, as handle->errbuf
1309
     * might have been overwritten by this warning.
1310
     */
1311
0
    status = ret;
1312
0
  }
1313
1314
  /*
1315
   * We succeeded.  status has been set to the status to return,
1316
   * which might be 0, or might be a PCAP_WARNING_ value.
1317
   */
1318
  /*
1319
   * Now that we have activated the mmap ring, we can
1320
   * set the correct protocol.
1321
   */
1322
0
  if ((ret = iface_bind(handle->fd, handlep->ifindex,
1323
0
      handle->errbuf, pcap_protocol(handle))) != 0) {
1324
0
    status = ret;
1325
0
    goto fail;
1326
0
  }
1327
1328
0
  handle->inject_op = pcap_inject_linux;
1329
0
  handle->setfilter_op = pcap_setfilter_linux;
1330
0
  handle->setdirection_op = pcap_setdirection_linux;
1331
0
  handle->set_datalink_op = pcap_set_datalink_linux;
1332
0
  handle->setnonblock_op = pcap_setnonblock_linux;
1333
0
  handle->getnonblock_op = pcap_getnonblock_linux;
1334
0
  handle->cleanup_op = pcap_cleanup_linux;
1335
0
  handle->stats_op = pcap_stats_linux;
1336
0
  handle->breakloop_op = pcap_breakloop_linux;
1337
1338
0
  switch (handlep->tp_version) {
1339
1340
0
  case TPACKET_V2:
1341
0
    handle->read_op = pcap_read_linux_mmap_v2;
1342
0
    break;
1343
0
#ifdef HAVE_TPACKET3
1344
0
  case TPACKET_V3:
1345
0
    handle->read_op = pcap_read_linux_mmap_v3;
1346
0
    break;
1347
0
#endif
1348
0
  }
1349
0
  handle->oneshot_callback = pcapint_oneshot_linux;
1350
0
  handle->selectable_fd = handle->fd;
1351
1352
0
  return status;
1353
1354
0
fail:
1355
0
  pcap_cleanup_linux(handle);
1356
0
  return status;
1357
0
}
1358
1359
static int
1360
pcap_set_datalink_linux(pcap_t *handle, int dlt)
1361
0
{
1362
0
  handle->linktype = dlt;
1363
1364
  /*
1365
   * Update the offset at which to insert VLAN tags for the
1366
   * new link-layer type.
1367
   */
1368
0
  set_vlan_offset(handle);
1369
1370
0
  return 0;
1371
0
}
1372
1373
/*
1374
 * linux_check_direction()
1375
 *
1376
 * Do checks based on packet direction.
1377
 */
1378
static inline int
1379
linux_check_direction(const pcap_t *handle, const struct sockaddr_ll *sll)
1380
0
{
1381
0
  struct pcap_linux *handlep = handle->priv;
1382
1383
0
  if (sll->sll_pkttype == PACKET_OUTGOING) {
1384
    /*
1385
     * Outgoing packet.
1386
     * If this is from the loopback device, reject it;
1387
     * we'll see the packet as an incoming packet as well,
1388
     * and we don't want to see it twice.
1389
     */
1390
0
    if (sll->sll_ifindex == handlep->lo_ifindex)
1391
0
      return 0;
1392
1393
    /*
1394
     * If this is an outgoing CAN frame, and the user doesn't
1395
     * want only outgoing packets, reject it; CAN devices
1396
     * and drivers, and the CAN stack, always arrange to
1397
     * loop back transmitted packets, so they also appear
1398
     * as incoming packets.  We don't want duplicate packets,
1399
     * and we can't easily distinguish packets looped back
1400
     * by the CAN layer than those received by the CAN layer,
1401
     * so we eliminate this packet instead.
1402
     *
1403
     * We check whether this is a CAN frame by checking whether
1404
     * the device's hardware type is ARPHRD_CAN.
1405
     */
1406
0
    if (sll->sll_hatype == ARPHRD_CAN &&
1407
0
         handle->direction != PCAP_D_OUT)
1408
0
      return 0;
1409
1410
    /*
1411
     * If the user only wants incoming packets, reject it.
1412
     */
1413
0
    if (handle->direction == PCAP_D_IN)
1414
0
      return 0;
1415
0
  } else {
1416
    /*
1417
     * Incoming packet.
1418
     * If the user only wants outgoing packets, reject it.
1419
     */
1420
0
    if (handle->direction == PCAP_D_OUT)
1421
0
      return 0;
1422
0
  }
1423
0
  return 1;
1424
0
}
1425
1426
/*
1427
 * Check whether the device to which the pcap_t is bound still exists.
1428
 * We do so by asking what address the socket is bound to, and checking
1429
 * whether the ifindex in the address is -1, meaning "that device is gone",
1430
 * or some other value, meaning "that device still exists".
1431
 */
1432
static int
1433
device_still_exists(pcap_t *handle)
1434
0
{
1435
0
  struct pcap_linux *handlep = handle->priv;
1436
0
  struct sockaddr_ll addr;
1437
0
  socklen_t addr_len;
1438
1439
  /*
1440
   * If handlep->ifindex is -1, the socket isn't bound, meaning
1441
   * we're capturing on the "any" device; that device never
1442
   * disappears.  (It should also never be configured down, so
1443
   * we shouldn't even get here, but let's make sure.)
1444
   */
1445
0
  if (handlep->ifindex == -1)
1446
0
    return (1); /* it's still here */
1447
1448
  /*
1449
   * OK, now try to get the address for the socket.
1450
   */
1451
0
  addr_len = sizeof (addr);
1452
0
  if (getsockname(handle->fd, (struct sockaddr *) &addr, &addr_len) == -1) {
1453
    /*
1454
     * Error - report an error and return -1.
1455
     */
1456
0
    pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
1457
0
        errno, "getsockname failed");
1458
0
    return (-1);
1459
0
  }
1460
0
  if (addr.sll_ifindex == -1) {
1461
    /*
1462
     * This means the device went away.
1463
     */
1464
0
    return (0);
1465
0
  }
1466
1467
  /*
1468
   * The device presumably just went down.
1469
   */
1470
0
  return (1);
1471
0
}
1472
1473
static int
1474
pcap_inject_linux(pcap_t *handle, const void *buf, int size)
1475
0
{
1476
0
  struct pcap_linux *handlep = handle->priv;
1477
0
  int ret;
1478
1479
0
  if (handlep->ifindex == -1) {
1480
    /*
1481
     * We don't support sending on the "any" device.
1482
     */
1483
0
    pcapint_strlcpy(handle->errbuf,
1484
0
        "Sending packets isn't supported on the \"any\" device",
1485
0
        PCAP_ERRBUF_SIZE);
1486
0
    return (-1);
1487
0
  }
1488
1489
0
  if (handlep->cooked) {
1490
    /*
1491
     * We don't support sending on cooked-mode sockets.
1492
     *
1493
     * XXX - how do you send on a bound cooked-mode
1494
     * socket?
1495
     * Is a "sendto()" required there?
1496
     */
1497
0
    pcapint_strlcpy(handle->errbuf,
1498
0
        "Sending packets isn't supported in cooked mode",
1499
0
        PCAP_ERRBUF_SIZE);
1500
0
    return (-1);
1501
0
  }
1502
1503
0
  ret = (int)send(handle->fd, buf, size, 0);
1504
0
  if (ret == -1) {
1505
0
    pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
1506
0
        errno, "send");
1507
0
    return (-1);
1508
0
  }
1509
0
  return (ret);
1510
0
}
1511
1512
/*
1513
 *  Get the statistics for the given packet capture handle.
1514
 */
1515
static int
1516
pcap_stats_linux(pcap_t *handle, struct pcap_stat *stats)
1517
0
{
1518
0
  struct pcap_linux *handlep = handle->priv;
1519
0
#ifdef HAVE_TPACKET3
1520
  /*
1521
   * For sockets using TPACKET_V2, the extra stuff at the end
1522
   * of a struct tpacket_stats_v3 will not be filled in, and
1523
   * we don't look at it so this is OK even for those sockets.
1524
   * In addition, the PF_PACKET socket code in the kernel only
1525
   * uses the length parameter to compute how much data to
1526
   * copy out and to indicate how much data was copied out, so
1527
   * it's OK to base it on the size of a struct tpacket_stats.
1528
   *
1529
   * XXX - it's probably OK, in fact, to just use a
1530
   * struct tpacket_stats for V3 sockets, as we don't
1531
   * care about the tp_freeze_q_cnt stat.
1532
   */
1533
0
  struct tpacket_stats_v3 kstats;
1534
#else /* HAVE_TPACKET3 */
1535
  struct tpacket_stats kstats;
1536
#endif /* HAVE_TPACKET3 */
1537
0
  socklen_t len = sizeof (struct tpacket_stats);
1538
1539
0
  long long if_dropped = 0;
1540
1541
  /*
1542
   * To fill in ps_ifdrop, we parse
1543
   * /sys/class/net/{if_name}/statistics/rx_{missed,fifo}_errors
1544
   * for the numbers
1545
   */
1546
0
  if (handle->opt.promisc)
1547
0
  {
1548
    /*
1549
     * XXX - is there any reason to do this by remembering
1550
     * the last counts value, subtracting it from the
1551
     * current counts value, and adding that to stat.ps_ifdrop,
1552
     * maintaining stat.ps_ifdrop as a count, rather than just
1553
     * saving the *initial* counts value and setting
1554
     * stat.ps_ifdrop to the difference between the current
1555
     * value and the initial value?
1556
     *
1557
     * One reason might be to handle the count wrapping
1558
     * around, on platforms where the count is 32 bits
1559
     * and where you might get more than 2^32 dropped
1560
     * packets; is there any other reason?
1561
     *
1562
     * (We maintain the count as a long long int so that,
1563
     * if the kernel maintains the counts as 64-bit even
1564
     * on 32-bit platforms, we can handle the real count.
1565
     *
1566
     * Unfortunately, we can't report 64-bit counts; we
1567
     * need a better API for reporting statistics, such as
1568
     * one that reports them in a style similar to the
1569
     * pcapng Interface Statistics Block, so that 1) the
1570
     * counts are 64-bit, 2) it's easier to add new statistics
1571
     * without breaking the ABI, and 3) it's easier to
1572
     * indicate to a caller that wants one particular
1573
     * statistic that it's not available by just not supplying
1574
     * it.)
1575
     */
1576
0
    if_dropped = handlep->sysfs_dropped;
1577
0
    handlep->sysfs_dropped = linux_if_drops(handlep->device);
1578
0
    handlep->stat.ps_ifdrop += (u_int)(handlep->sysfs_dropped - if_dropped);
1579
0
  }
1580
1581
  /*
1582
   * Try to get the packet counts from the kernel.
1583
   */
1584
0
  if (getsockopt(handle->fd, SOL_PACKET, PACKET_STATISTICS,
1585
0
      &kstats, &len) > -1) {
1586
    /*
1587
     * "ps_recv" counts only packets that *passed* the
1588
     * filter, not packets that didn't pass the filter.
1589
     * This includes packets later dropped because we
1590
     * ran out of buffer space.
1591
     *
1592
     * "ps_drop" counts packets dropped because we ran
1593
     * out of buffer space.  It doesn't count packets
1594
     * dropped by the interface driver.  It counts only
1595
     * packets that passed the filter.
1596
     *
1597
     * See above for ps_ifdrop.
1598
     *
1599
     * Both statistics include packets not yet read from
1600
     * the kernel by libpcap, and thus not yet seen by
1601
     * the application.
1602
     *
1603
     * In "linux/net/packet/af_packet.c", at least in 2.6.27
1604
     * through 5.6 kernels, "tp_packets" is incremented for
1605
     * every packet that passes the packet filter *and* is
1606
     * successfully copied to the ring buffer; "tp_drops" is
1607
     * incremented for every packet dropped because there's
1608
     * not enough free space in the ring buffer.
1609
     *
1610
     * When the statistics are returned for a PACKET_STATISTICS
1611
     * "getsockopt()" call, "tp_drops" is added to "tp_packets",
1612
     * so that "tp_packets" counts all packets handed to
1613
     * the PF_PACKET socket, including packets dropped because
1614
     * there wasn't room on the socket buffer - but not
1615
     * including packets that didn't pass the filter.
1616
     *
1617
     * In the BSD BPF, the count of received packets is
1618
     * incremented for every packet handed to BPF, regardless
1619
     * of whether it passed the filter.
1620
     *
1621
     * We can't make "pcap_stats()" work the same on both
1622
     * platforms, but the best approximation is to return
1623
     * "tp_packets" as the count of packets and "tp_drops"
1624
     * as the count of drops.
1625
     *
1626
     * Keep a running total because each call to
1627
     *    getsockopt(handle->fd, SOL_PACKET, PACKET_STATISTICS, ....
1628
     * resets the counters to zero.
1629
     */
1630
0
    handlep->stat.ps_recv += kstats.tp_packets;
1631
0
    handlep->stat.ps_drop += kstats.tp_drops;
1632
0
    *stats = handlep->stat;
1633
0
    return 0;
1634
0
  }
1635
1636
0
  pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, errno,
1637
0
      "failed to get statistics from socket");
1638
0
  return -1;
1639
0
}
1640
1641
/*
1642
 * A PF_PACKET socket can be bound to any network interface.
1643
 */
1644
static int
1645
can_be_bound(const char *name _U_)
1646
0
{
1647
0
  return (1);
1648
0
}
1649
1650
/*
1651
 * Get a socket to use with various interface ioctls.
1652
 */
1653
static int
1654
get_if_ioctl_socket(void)
1655
0
{
1656
0
  int fd;
1657
1658
  /*
1659
   * This is a bit ugly.
1660
   *
1661
   * There isn't a socket type that's guaranteed to work.
1662
   *
1663
   * AF_NETLINK will work *if* you have Netlink configured into the
1664
   * kernel (can it be configured out if you have any networking
1665
   * support at all?) *and* if you're running a sufficiently recent
1666
   * kernel, but not all the kernels we support are sufficiently
1667
   * recent - that feature was introduced in Linux 4.6.
1668
   *
1669
   * AF_UNIX will work *if* you have UNIX-domain sockets configured
1670
   * into the kernel and *if* you're not on a system that doesn't
1671
   * allow them - some SELinux systems don't allow you create them.
1672
   * Most systems probably have them configured in, but not all systems
1673
   * have them configured in and allow them to be created.
1674
   *
1675
   * AF_INET will work *if* you have IPv4 configured into the kernel,
1676
   * but, apparently, some systems have network adapters but have
1677
   * kernels without IPv4 support.
1678
   *
1679
   * AF_INET6 will work *if* you have IPv6 configured into the
1680
   * kernel, but if you don't have AF_INET, you might not have
1681
   * AF_INET6, either (that is, independently on its own grounds).
1682
   *
1683
   * AF_PACKET would work, except that some of these calls should
1684
   * work even if you *don't* have capture permission (you should be
1685
   * able to enumerate interfaces and get information about them
1686
   * without capture permission; you shouldn't get a failure until
1687
   * you try pcap_activate()).  (If you don't allow programs to
1688
   * get as much information as possible about interfaces if you
1689
   * don't have permission to capture, you run the risk of users
1690
   * asking "why isn't it showing XXX" - or, worse, if you don't
1691
   * show interfaces *at all* if you don't have permission to
1692
   * capture on them, "why do no interfaces show up?" - when the
1693
   * real problem is a permissions problem.  Error reports of that
1694
   * type require a lot more back-and-forth to debug, as evidenced
1695
   * by many Wireshark bugs/mailing list questions/Q&A questions.)
1696
   *
1697
   * So:
1698
   *
1699
   * we first try an AF_NETLINK socket, where "try" includes
1700
   * "try to do a device ioctl on it", as, in the future, once
1701
   * pre-4.6 kernels are sufficiently rare, that will probably
1702
   * be the mechanism most likely to work;
1703
   *
1704
   * if that fails, we try an AF_UNIX socket, as that's less
1705
   * likely to be configured out on a networking-capable system
1706
   * than is IP;
1707
   *
1708
   * if that fails, we try an AF_INET6 socket;
1709
   *
1710
   * if that fails, we try an AF_INET socket.
1711
   */
1712
0
  fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
1713
0
  if (fd != -1) {
1714
    /*
1715
     * OK, let's make sure we can do an SIOCGIFNAME
1716
     * ioctl.
1717
     */
1718
0
    struct ifreq ifr;
1719
1720
0
    memset(&ifr, 0, sizeof(ifr));
1721
0
    if (ioctl(fd, SIOCGIFNAME, &ifr) == 0 ||
1722
0
        errno != EOPNOTSUPP) {
1723
      /*
1724
       * It succeeded, or failed for some reason
1725
       * other than "netlink sockets don't support
1726
       * device ioctls".  Go with the AF_NETLINK
1727
       * socket.
1728
       */
1729
0
      return (fd);
1730
0
    }
1731
1732
    /*
1733
     * OK, that didn't work, so it's as bad as "netlink
1734
     * sockets aren't available".  Close the socket and
1735
     * drive on.
1736
     */
1737
0
    close(fd);
1738
0
  }
1739
1740
  /*
1741
   * Now try an AF_UNIX socket.
1742
   */
1743
0
  fd = socket(AF_UNIX, SOCK_RAW, 0);
1744
0
  if (fd != -1) {
1745
    /*
1746
     * OK, we got it!
1747
     */
1748
0
    return (fd);
1749
0
  }
1750
1751
  /*
1752
   * Now try an AF_INET6 socket.
1753
   */
1754
0
  fd = socket(AF_INET6, SOCK_DGRAM, 0);
1755
0
  if (fd != -1) {
1756
0
    return (fd);
1757
0
  }
1758
1759
  /*
1760
   * Now try an AF_INET socket.
1761
   *
1762
   * XXX - if that fails, is there anything else we should try?
1763
   * AF_CAN, for embedded systems in vehicles, in case they're
1764
   * built without Internet protocol support?  Any other socket
1765
   * types popular in non-Internet embedded systems?
1766
   */
1767
0
  return (socket(AF_INET, SOCK_DGRAM, 0));
1768
0
}
1769
1770
/*
1771
 * Get additional flags for a device, using SIOCETHTOOL.
1772
 */
1773
static int
1774
get_if_flags(const char *name, bpf_u_int32 *flags, char *errbuf)
1775
0
{
1776
0
  int sock;
1777
0
  FILE *fh;
1778
0
  unsigned int arptype = ARPHRD_VOID;
1779
0
  struct ifreq ifr;
1780
0
  struct ethtool_value info;
1781
1782
0
  if (*flags & PCAP_IF_LOOPBACK) {
1783
    /*
1784
     * Loopback devices aren't wireless, and "connected"/
1785
     * "disconnected" doesn't apply to them.
1786
     */
1787
0
    *flags |= PCAP_IF_CONNECTION_STATUS_NOT_APPLICABLE;
1788
0
    return 0;
1789
0
  }
1790
1791
0
  sock = get_if_ioctl_socket();
1792
0
  if (sock == -1) {
1793
0
    pcapint_fmt_errmsg_for_errno(errbuf, PCAP_ERRBUF_SIZE, errno,
1794
0
        "Can't create socket to get ethtool information for %s",
1795
0
        name);
1796
0
    return -1;
1797
0
  }
1798
1799
  /*
1800
   * OK, what type of network is this?
1801
   * In particular, is it wired or wireless?
1802
   */
1803
0
  if (is_wifi(name)) {
1804
    /*
1805
     * Wi-Fi, hence wireless.
1806
     */
1807
0
    *flags |= PCAP_IF_WIRELESS;
1808
0
  } else {
1809
    /*
1810
     * OK, what does /sys/class/net/{if_name}/type contain?
1811
     * (We don't use that for Wi-Fi, as it'll report
1812
     * "Ethernet", i.e. ARPHRD_ETHER, for non-monitor-
1813
     * mode devices.)
1814
     */
1815
0
    char *pathstr;
1816
1817
0
    if (asprintf(&pathstr, "/sys/class/net/%s/type", name) == -1) {
1818
0
      snprintf(errbuf, PCAP_ERRBUF_SIZE,
1819
0
          "%s: Can't generate path name string for /sys/class/net device",
1820
0
          name);
1821
0
      close(sock);
1822
0
      return -1;
1823
0
    }
1824
0
    fh = fopen(pathstr, "r");
1825
0
    if (fh != NULL) {
1826
0
      if (fscanf(fh, "%u", &arptype) == 1) {
1827
        /*
1828
         * OK, we got an ARPHRD_ type; what is it?
1829
         */
1830
0
        switch (arptype) {
1831
1832
0
        case ARPHRD_LOOPBACK:
1833
          /*
1834
           * These are types to which
1835
           * "connected" and "disconnected"
1836
           * don't apply, so don't bother
1837
           * asking about it.
1838
           *
1839
           * XXX - add other types?
1840
           */
1841
0
          close(sock);
1842
0
          fclose(fh);
1843
0
          free(pathstr);
1844
0
          return 0;
1845
1846
0
        case ARPHRD_IRDA:
1847
0
        case ARPHRD_IEEE80211:
1848
0
        case ARPHRD_IEEE80211_PRISM:
1849
0
        case ARPHRD_IEEE80211_RADIOTAP:
1850
0
        case ARPHRD_IEEE802154:
1851
0
        case ARPHRD_IEEE802154_MONITOR:
1852
0
        case ARPHRD_6LOWPAN:
1853
          /*
1854
           * Various wireless types.
1855
           */
1856
0
          *flags |= PCAP_IF_WIRELESS;
1857
0
          break;
1858
0
        }
1859
0
      }
1860
0
      fclose(fh);
1861
0
    }
1862
0
    free(pathstr);
1863
0
  }
1864
1865
0
#ifdef ETHTOOL_GLINK
1866
0
  memset(&ifr, 0, sizeof(ifr));
1867
0
  pcapint_strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
1868
0
  info.cmd = ETHTOOL_GLINK;
1869
  /*
1870
   * XXX - while Valgrind handles SIOCETHTOOL and knows that
1871
   * the ETHTOOL_GLINK command sets the .data member of the
1872
   * structure, Memory Sanitizer doesn't yet do so:
1873
   *
1874
   *    https://bugs.llvm.org/show_bug.cgi?id=45814
1875
   *
1876
   * For now, we zero it out to squelch warnings; if the bug
1877
   * in question is fixed, we can remove this.
1878
   */
1879
0
  info.data = 0;
1880
0
  ifr.ifr_data = (caddr_t)&info;
1881
0
  if (ioctl(sock, SIOCETHTOOL, &ifr) == -1) {
1882
0
    int save_errno = errno;
1883
1884
0
    switch (save_errno) {
1885
1886
0
    case EOPNOTSUPP:
1887
0
    case EINVAL:
1888
      /*
1889
       * OK, this OS version or driver doesn't support
1890
       * asking for this information.
1891
       * XXX - distinguish between "this doesn't
1892
       * support ethtool at all because it's not
1893
       * that type of device" vs. "this doesn't
1894
       * support ethtool even though it's that
1895
       * type of device", and return "unknown".
1896
       */
1897
0
      *flags |= PCAP_IF_CONNECTION_STATUS_NOT_APPLICABLE;
1898
0
      close(sock);
1899
0
      return 0;
1900
1901
0
    case ENODEV:
1902
      /*
1903
       * OK, no such device.
1904
       * The user will find that out when they try to
1905
       * activate the device; just say "OK" and
1906
       * don't set anything.
1907
       */
1908
0
      close(sock);
1909
0
      return 0;
1910
1911
0
    case EPERM:
1912
      /*
1913
       * OK, this version of the kernel requires
1914
       * CAP_NET_ADMIN privileges for this, and
1915
       * we don't have those privileges.
1916
       *
1917
       * Just leave it as PCAP_IF_CONNECTION_STATUS_UNKNOWN,
1918
       * as we can't determine the status; just say "OK" and
1919
       * don't set anything.
1920
       */
1921
0
      close(sock);
1922
0
      return 0;
1923
1924
0
    default:
1925
      /*
1926
       * Other error.
1927
       */
1928
0
      pcapint_fmt_errmsg_for_errno(errbuf, PCAP_ERRBUF_SIZE,
1929
0
          save_errno,
1930
0
          "%s: SIOCETHTOOL(ETHTOOL_GLINK) ioctl failed",
1931
0
          name);
1932
0
      close(sock);
1933
0
      return -1;
1934
0
    }
1935
0
  }
1936
1937
  /*
1938
   * Is it connected?
1939
   */
1940
0
  if (info.data) {
1941
    /*
1942
     * It's connected.
1943
     */
1944
0
    *flags |= PCAP_IF_CONNECTION_STATUS_CONNECTED;
1945
0
  } else {
1946
    /*
1947
     * It's disconnected.
1948
     */
1949
0
    *flags |= PCAP_IF_CONNECTION_STATUS_DISCONNECTED;
1950
0
  }
1951
0
#endif
1952
1953
0
  close(sock);
1954
1955
#ifdef HAVE_SNF_API
1956
  // For "down" SNF devices the SNF API makes the flags more relevant.
1957
  if (arptype == ARPHRD_ETHER &&
1958
      ! (*flags & PCAP_IF_UP) &&
1959
      snf_get_if_flags(name, flags, errbuf) < 0)
1960
    return PCAP_ERROR;
1961
#endif // HAVE_SNF_API
1962
1963
0
  return 0;
1964
0
}
1965
1966
int
1967
pcapint_platform_finddevs(pcap_if_list_t *devlistp, char *errbuf)
1968
0
{
1969
  /*
1970
   * Get the list of regular interfaces first.
1971
   */
1972
0
  if (pcapint_findalldevs_interfaces(devlistp, errbuf, can_be_bound,
1973
0
      get_if_flags) == -1)
1974
0
    return (-1); /* failure */
1975
1976
  /*
1977
   * Add the "any" device.
1978
   */
1979
0
  if (pcapint_add_any_dev(devlistp, errbuf) == NULL)
1980
0
    return (-1);
1981
1982
0
  return (0);
1983
0
}
1984
1985
/*
1986
 * Set direction flag: Which packets do we accept on a forwarding
1987
 * single device? IN, OUT or both?
1988
 */
1989
static int
1990
pcap_setdirection_linux(pcap_t *handle, pcap_direction_t d)
1991
0
{
1992
  /*
1993
   * It's guaranteed, at this point, that d is a valid
1994
   * direction value.
1995
   */
1996
0
  handle->direction = d;
1997
0
  return 0;
1998
0
}
1999
2000
static int
2001
is_wifi(const char *device)
2002
0
{
2003
0
  char *pathstr;
2004
0
  struct stat statb;
2005
2006
  /*
2007
   * See if there's a sysfs wireless directory for it.
2008
   * If so, it's a wireless interface.
2009
   */
2010
0
  if (asprintf(&pathstr, "/sys/class/net/%s/wireless", device) == -1) {
2011
    /*
2012
     * Just give up here.
2013
     */
2014
0
    return 0;
2015
0
  }
2016
0
  if (stat(pathstr, &statb) == 0) {
2017
0
    free(pathstr);
2018
0
    return 1;
2019
0
  }
2020
0
  free(pathstr);
2021
2022
0
  return 0;
2023
0
}
2024
2025
/*
2026
 *  Linux uses the ARP hardware type to identify the type of an
2027
 *  interface. pcap uses the DLT_xxx constants for this. This
2028
 *  function takes a pointer to a "pcap_t", and an ARPHRD_xxx
2029
 *  constant, as arguments, and sets "handle->linktype" to the
2030
 *  appropriate DLT_XXX constant and sets "handle->offset" to
2031
 *  the appropriate value (to make "handle->offset" plus link-layer
2032
 *  header length be a multiple of 4, so that the link-layer payload
2033
 *  will be aligned on a 4-byte boundary when capturing packets).
2034
 *  (If the offset isn't set here, it'll be 0; add code as appropriate
2035
 *  for cases where it shouldn't be 0.)
2036
 *
2037
 *  If "cooked_ok" is non-zero, we can use DLT_LINUX_SLL and capture
2038
 *  in cooked mode; otherwise, we can't use cooked mode, so we have
2039
 *  to pick some type that works in raw mode, or fail.
2040
 *
2041
 *  Sets the link type to -1 if unable to map the type.
2042
 *
2043
 *  Returns 0 on success or a PCAP_ERROR_ value on error.
2044
 */
2045
static int map_arphrd_to_dlt(pcap_t *handle, int arptype,
2046
           const char *device, int cooked_ok)
2047
0
{
2048
0
  static const char cdma_rmnet[] = "cdma_rmnet";
2049
2050
0
  switch (arptype) {
2051
2052
0
  case ARPHRD_ETHER:
2053
    /*
2054
     * For various annoying reasons having to do with DHCP
2055
     * software, some versions of Android give the mobile-
2056
     * phone-network interface an ARPHRD_ value of
2057
     * ARPHRD_ETHER, even though the packets supplied by
2058
     * that interface have no link-layer header, and begin
2059
     * with an IP header, so that the ARPHRD_ value should
2060
     * be ARPHRD_NONE.
2061
     *
2062
     * Detect those devices by checking the device name, and
2063
     * use DLT_RAW for them.
2064
     */
2065
0
    if (strncmp(device, cdma_rmnet, sizeof cdma_rmnet - 1) == 0) {
2066
0
      handle->linktype = DLT_RAW;
2067
0
      return 0;
2068
0
    }
2069
2070
    /*
2071
     * Is this a real Ethernet device?  If so, give it a
2072
     * link-layer-type list with DLT_EN10MB and DLT_DOCSIS, so
2073
     * that an application can let you choose it, in case you're
2074
     * capturing DOCSIS traffic that a Cisco Cable Modem
2075
     * Termination System is putting out onto an Ethernet (it
2076
     * doesn't put an Ethernet header onto the wire, it puts raw
2077
     * DOCSIS frames out on the wire inside the low-level
2078
     * Ethernet framing).
2079
     *
2080
     * XXX - are there any other sorts of "fake Ethernet" that
2081
     * have ARPHRD_ETHER but that shouldn't offer DLT_DOCSIS as
2082
     * a Cisco CMTS won't put traffic onto it or get traffic
2083
     * bridged onto it?  ISDN is handled in "setup_socket()",
2084
     * as we fall back on cooked mode there, and we use
2085
     * is_wifi() to check for 802.11 devices; are there any
2086
     * others?
2087
     */
2088
0
    if (!is_wifi(device)) {
2089
0
      int ret;
2090
2091
      /*
2092
       * This is not a Wi-Fi device but it could be
2093
       * a DSA master/management network device.
2094
       */
2095
0
      ret = iface_dsa_get_proto_info(device, handle);
2096
0
      if (ret < 0)
2097
0
        return ret;
2098
2099
0
      if (ret == 1) {
2100
        /*
2101
         * This is a DSA master/management network
2102
         * device, linktype is already set by
2103
         * iface_dsa_get_proto_info(), set an
2104
         * appropriate offset here.
2105
         */
2106
0
        handle->offset = 2;
2107
0
        break;
2108
0
      }
2109
2110
      /*
2111
       * It's not a Wi-Fi device; offer DOCSIS.
2112
       */
2113
0
      handle->dlt_list = (u_int *) malloc(sizeof(u_int) * 2);
2114
0
      if (handle->dlt_list == NULL) {
2115
0
        pcapint_fmt_errmsg_for_errno(handle->errbuf,
2116
0
            PCAP_ERRBUF_SIZE, errno, "malloc");
2117
0
        return (PCAP_ERROR);
2118
0
      }
2119
0
      handle->dlt_list[0] = DLT_EN10MB;
2120
0
      handle->dlt_list[1] = DLT_DOCSIS;
2121
0
      handle->dlt_count = 2;
2122
0
    }
2123
    /* FALLTHROUGH */
2124
2125
0
  case ARPHRD_METRICOM:
2126
0
  case ARPHRD_LOOPBACK:
2127
0
    handle->linktype = DLT_EN10MB;
2128
0
    handle->offset = 2;
2129
0
    break;
2130
2131
0
  case ARPHRD_EETHER:
2132
0
    handle->linktype = DLT_EN3MB;
2133
0
    break;
2134
2135
0
  case ARPHRD_AX25:
2136
0
    handle->linktype = DLT_AX25_KISS;
2137
0
    break;
2138
2139
0
  case ARPHRD_PRONET:
2140
0
    handle->linktype = DLT_PRONET;
2141
0
    break;
2142
2143
0
  case ARPHRD_CHAOS:
2144
0
    handle->linktype = DLT_CHAOS;
2145
0
    break;
2146
2147
0
  case ARPHRD_CAN:
2148
0
    handle->linktype = DLT_CAN_SOCKETCAN;
2149
0
    break;
2150
2151
0
  case ARPHRD_IEEE802_TR:
2152
0
  case ARPHRD_IEEE802:
2153
0
    handle->linktype = DLT_IEEE802;
2154
0
    handle->offset = 2;
2155
0
    break;
2156
2157
0
  case ARPHRD_ARCNET:
2158
0
    handle->linktype = DLT_ARCNET_LINUX;
2159
0
    break;
2160
2161
0
  case ARPHRD_FDDI:
2162
0
    handle->linktype = DLT_FDDI;
2163
0
    handle->offset = 3;
2164
0
    break;
2165
2166
0
  case ARPHRD_ATM:
2167
    /*
2168
     * The Classical IP implementation in ATM for Linux
2169
     * supports both what RFC 1483 calls "LLC Encapsulation",
2170
     * in which each packet has an LLC header, possibly
2171
     * with a SNAP header as well, prepended to it, and
2172
     * what RFC 1483 calls "VC Based Multiplexing", in which
2173
     * different virtual circuits carry different network
2174
     * layer protocols, and no header is prepended to packets.
2175
     *
2176
     * They both have an ARPHRD_ type of ARPHRD_ATM, so
2177
     * you can't use the ARPHRD_ type to find out whether
2178
     * captured packets will have an LLC header, and,
2179
     * while there's a socket ioctl to *set* the encapsulation
2180
     * type, there's no ioctl to *get* the encapsulation type.
2181
     *
2182
     * This means that
2183
     *
2184
     *  programs that dissect Linux Classical IP frames
2185
     *  would have to check for an LLC header and,
2186
     *  depending on whether they see one or not, dissect
2187
     *  the frame as LLC-encapsulated or as raw IP (I
2188
     *  don't know whether there's any traffic other than
2189
     *  IP that would show up on the socket, or whether
2190
     *  there's any support for IPv6 in the Linux
2191
     *  Classical IP code);
2192
     *
2193
     *  filter expressions would have to compile into
2194
     *  code that checks for an LLC header and does
2195
     *  the right thing.
2196
     *
2197
     * Both of those are a nuisance - and, at least on systems
2198
     * that support PF_PACKET sockets, we don't have to put
2199
     * up with those nuisances; instead, we can just capture
2200
     * in cooked mode.  That's what we'll do, if we can.
2201
     * Otherwise, we'll just fail.
2202
     */
2203
0
    if (cooked_ok)
2204
0
      handle->linktype = DLT_LINUX_SLL;
2205
0
    else
2206
0
      handle->linktype = -1;
2207
0
    break;
2208
2209
0
  case ARPHRD_IEEE80211:
2210
0
    handle->linktype = DLT_IEEE802_11;
2211
0
    break;
2212
2213
0
  case ARPHRD_IEEE80211_PRISM:
2214
0
    handle->linktype = DLT_PRISM_HEADER;
2215
0
    break;
2216
2217
0
  case ARPHRD_IEEE80211_RADIOTAP:
2218
0
    handle->linktype = DLT_IEEE802_11_RADIO;
2219
0
    break;
2220
2221
0
  case ARPHRD_PPP:
2222
    /*
2223
     * Some PPP code in the kernel supplies no link-layer
2224
     * header whatsoever to PF_PACKET sockets; other PPP
2225
     * code supplies PPP link-layer headers ("syncppp.c");
2226
     * some PPP code might supply random link-layer
2227
     * headers (PPP over ISDN - there's code in Wireshark,
2228
     * for example, to cope with PPP-over-ISDN captures
2229
     * with which the Wireshark developers have had to cope,
2230
     * heuristically trying to determine which of the
2231
     * oddball link-layer headers particular packets have).
2232
     *
2233
     * As such, we just punt, and run all PPP interfaces
2234
     * in cooked mode, if we can; otherwise, we just treat
2235
     * it as DLT_RAW, for now - if somebody needs to capture,
2236
     * on a 2.0[.x] kernel, on PPP devices that supply a
2237
     * link-layer header, they'll have to add code here to
2238
     * map to the appropriate DLT_ type (possibly adding a
2239
     * new DLT_ type, if necessary).
2240
     */
2241
0
    if (cooked_ok)
2242
0
      handle->linktype = DLT_LINUX_SLL;
2243
0
    else {
2244
      /*
2245
       * XXX - handle ISDN types here?  We can't fall
2246
       * back on cooked sockets, so we'd have to
2247
       * figure out from the device name what type of
2248
       * link-layer encapsulation it's using, and map
2249
       * that to an appropriate DLT_ value, meaning
2250
       * we'd map "isdnN" devices to DLT_RAW (they
2251
       * supply raw IP packets with no link-layer
2252
       * header) and "isdY" devices to a new DLT_I4L_IP
2253
       * type that has only an Ethernet packet type as
2254
       * a link-layer header.
2255
       *
2256
       * But sometimes we seem to get random crap
2257
       * in the link-layer header when capturing on
2258
       * ISDN devices....
2259
       */
2260
0
      handle->linktype = DLT_RAW;
2261
0
    }
2262
0
    break;
2263
2264
0
  case ARPHRD_CISCO:
2265
0
    handle->linktype = DLT_C_HDLC;
2266
0
    break;
2267
2268
  /* Not sure if this is correct for all tunnels, but it
2269
   * works for CIPE */
2270
0
  case ARPHRD_TUNNEL:
2271
0
  case ARPHRD_SIT:
2272
0
  case ARPHRD_CSLIP:
2273
0
  case ARPHRD_SLIP6:
2274
0
  case ARPHRD_CSLIP6:
2275
0
  case ARPHRD_ADAPT:
2276
0
  case ARPHRD_SLIP:
2277
0
  case ARPHRD_RAWHDLC:
2278
0
  case ARPHRD_DLCI:
2279
    /*
2280
     * XXX - should some of those be mapped to DLT_LINUX_SLL
2281
     * instead?  Should we just map all of them to DLT_LINUX_SLL?
2282
     */
2283
0
    handle->linktype = DLT_RAW;
2284
0
    break;
2285
2286
0
  case ARPHRD_FRAD:
2287
0
    handle->linktype = DLT_FRELAY;
2288
0
    break;
2289
2290
0
  case ARPHRD_LOCALTLK:
2291
0
    handle->linktype = DLT_LTALK;
2292
0
    break;
2293
2294
0
  case 18:
2295
    /*
2296
     * RFC 4338 defines an encapsulation for IP and ARP
2297
     * packets that's compatible with the RFC 2625
2298
     * encapsulation, but that uses a different ARP
2299
     * hardware type and hardware addresses.  That
2300
     * ARP hardware type is 18; Linux doesn't define
2301
     * any ARPHRD_ value as 18, but if it ever officially
2302
     * supports RFC 4338-style IP-over-FC, it should define
2303
     * one.
2304
     *
2305
     * For now, we map it to DLT_IP_OVER_FC, in the hopes
2306
     * that this will encourage its use in the future,
2307
     * should Linux ever officially support RFC 4338-style
2308
     * IP-over-FC.
2309
     */
2310
0
    handle->linktype = DLT_IP_OVER_FC;
2311
0
    break;
2312
2313
0
  case ARPHRD_FCPP:
2314
0
  case ARPHRD_FCAL:
2315
0
  case ARPHRD_FCPL:
2316
0
  case ARPHRD_FCFABRIC:
2317
    /*
2318
     * Back in 2002, Donald Lee at Cray wanted a DLT_ for
2319
     * IP-over-FC:
2320
     *
2321
     *  https://www.mail-archive.com/tcpdump-workers@sandelman.ottawa.on.ca/msg01043.html
2322
     *
2323
     * and one was assigned.
2324
     *
2325
     * In a later private discussion (spun off from a message
2326
     * on the ethereal-users list) on how to get that DLT_
2327
     * value in libpcap on Linux, I ended up deciding that
2328
     * the best thing to do would be to have him tweak the
2329
     * driver to set the ARPHRD_ value to some ARPHRD_FCxx
2330
     * type, and map all those types to DLT_IP_OVER_FC:
2331
     *
2332
     *  I've checked into the libpcap and tcpdump CVS tree
2333
     *  support for DLT_IP_OVER_FC.  In order to use that,
2334
     *  you'd have to modify your modified driver to return
2335
     *  one of the ARPHRD_FCxxx types, in "fcLINUXfcp.c" -
2336
     *  change it to set "dev->type" to ARPHRD_FCFABRIC, for
2337
     *  example (the exact value doesn't matter, it can be
2338
     *  any of ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL, or
2339
     *  ARPHRD_FCFABRIC).
2340
     *
2341
     * 11 years later, Christian Svensson wanted to map
2342
     * various ARPHRD_ values to DLT_FC_2 and
2343
     * DLT_FC_2_WITH_FRAME_DELIMS for raw Fibre Channel
2344
     * frames:
2345
     *
2346
     *  https://github.com/mcr/libpcap/pull/29
2347
     *
2348
     * There don't seem to be any network drivers that use
2349
     * any of the ARPHRD_FC* values for IP-over-FC, and
2350
     * it's not exactly clear what the "Dummy types for non
2351
     * ARP hardware" are supposed to mean (link-layer
2352
     * header type?  Physical network type?), so it's
2353
     * not exactly clear why the ARPHRD_FC* types exist
2354
     * in the first place.
2355
     *
2356
     * For now, we map them to DLT_FC_2, and provide an
2357
     * option of DLT_FC_2_WITH_FRAME_DELIMS, as well as
2358
     * DLT_IP_OVER_FC just in case there's some old
2359
     * driver out there that uses one of those types for
2360
     * IP-over-FC on which somebody wants to capture
2361
     * packets.
2362
     */
2363
0
    handle->linktype = DLT_FC_2;
2364
0
    handle->dlt_list = (u_int *) malloc(sizeof(u_int) * 3);
2365
0
    if (handle->dlt_list == NULL) {
2366
0
      pcapint_fmt_errmsg_for_errno(handle->errbuf,
2367
0
          PCAP_ERRBUF_SIZE, errno, "malloc");
2368
0
      return (PCAP_ERROR);
2369
0
    }
2370
0
    handle->dlt_list[0] = DLT_FC_2;
2371
0
    handle->dlt_list[1] = DLT_FC_2_WITH_FRAME_DELIMS;
2372
0
    handle->dlt_list[2] = DLT_IP_OVER_FC;
2373
0
    handle->dlt_count = 3;
2374
0
    break;
2375
2376
0
  case ARPHRD_IRDA:
2377
    /* Don't expect IP packet out of this interfaces... */
2378
0
    handle->linktype = DLT_LINUX_IRDA;
2379
    /* We need to save packet direction for IrDA decoding,
2380
     * so let's use "Linux-cooked" mode. Jean II
2381
     *
2382
     * XXX - this is handled in setup_socket(). */
2383
    /* handlep->cooked = 1; */
2384
0
    break;
2385
2386
0
  case ARPHRD_LAPD:
2387
    /* Don't expect IP packet out of this interfaces... */
2388
0
    handle->linktype = DLT_LINUX_LAPD;
2389
0
    break;
2390
2391
0
  case ARPHRD_NONE:
2392
    /*
2393
     * No link-layer header; packets are just IP
2394
     * packets, so use DLT_RAW.
2395
     */
2396
0
    handle->linktype = DLT_RAW;
2397
0
    break;
2398
2399
0
  case ARPHRD_IEEE802154:
2400
0
    handle->linktype = DLT_IEEE802_15_4_NOFCS;
2401
0
    break;
2402
2403
0
  case ARPHRD_NETLINK:
2404
0
    handle->linktype = DLT_NETLINK;
2405
    /*
2406
     * We need to use cooked mode, so that in sll_protocol we
2407
     * pick up the netlink protocol type such as NETLINK_ROUTE,
2408
     * NETLINK_GENERIC, NETLINK_FIB_LOOKUP, etc.
2409
     *
2410
     * XXX - this is handled in setup_socket().
2411
     */
2412
    /* handlep->cooked = 1; */
2413
0
    break;
2414
2415
0
  case ARPHRD_VSOCKMON:
2416
0
    handle->linktype = DLT_VSOCK;
2417
0
    break;
2418
2419
0
  default:
2420
0
    handle->linktype = -1;
2421
0
    break;
2422
0
  }
2423
0
  return (0);
2424
0
}
2425
2426
/*
2427
 * Try to set up a PF_PACKET socket.
2428
 * Returns 0 or a PCAP_WARNING_ value on success and a PCAP_ERROR_ value
2429
 * on failure.
2430
 */
2431
static int
2432
setup_socket(pcap_t *handle, int is_any_device)
2433
0
{
2434
0
  struct pcap_linux *handlep = handle->priv;
2435
0
  const char    *device = handle->opt.device;
2436
0
  int     status = 0;
2437
0
  int     sock_fd, arptype;
2438
0
  int     val;
2439
0
  int     err = 0;
2440
0
  struct packet_mreq  mr;
2441
2442
  /*
2443
   * Open a socket with protocol family packet. If cooked is true,
2444
   * we open a SOCK_DGRAM socket for the cooked interface, otherwise
2445
   * we open a SOCK_RAW socket for the raw interface.
2446
   *
2447
   * The protocol is set to 0.  This means we will receive no
2448
   * packets until we "bind" the socket with a non-zero
2449
   * protocol.  This allows us to setup the ring buffers without
2450
   * dropping any packets.
2451
   */
2452
0
  sock_fd = is_any_device ?
2453
0
    socket(PF_PACKET, SOCK_DGRAM, 0) :
2454
0
    socket(PF_PACKET, SOCK_RAW, 0);
2455
2456
0
  if (sock_fd == -1) {
2457
0
    if (errno == EPERM || errno == EACCES) {
2458
      /*
2459
       * You don't have permission to open the
2460
       * socket.
2461
       */
2462
0
      status = PCAP_ERROR_PERM_DENIED;
2463
0
      snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
2464
0
          "Attempt to create packet socket failed - CAP_NET_RAW may be required");
2465
0
    } else if (errno == EAFNOSUPPORT) {
2466
      /*
2467
       * PF_PACKET sockets not supported.
2468
       * Perhaps we're running on the WSL1 module
2469
       * in the Windows NT kernel rather than on
2470
       * a real Linux kernel.
2471
       */
2472
0
      status = PCAP_ERROR_CAPTURE_NOTSUP;
2473
0
      snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
2474
0
          "PF_PACKET sockets not supported - is this WSL1?");
2475
0
    } else {
2476
      /*
2477
       * Other error.
2478
       */
2479
0
      status = PCAP_ERROR;
2480
0
      pcapint_fmt_errmsg_for_errno(handle->errbuf,
2481
0
          PCAP_ERRBUF_SIZE, errno, "socket");
2482
0
    }
2483
0
    return status;
2484
0
  }
2485
2486
  /*
2487
   * Get the interface index of the loopback device.
2488
   * If the attempt fails, don't fail, just set the
2489
   * "handlep->lo_ifindex" to -1.
2490
   *
2491
   * XXX - can there be more than one device that loops
2492
   * packets back, i.e. devices other than "lo"?  If so,
2493
   * we'd need to find them all, and have an array of
2494
   * indices for them, and check all of them in
2495
   * "pcap_read_packet()".
2496
   */
2497
0
  handlep->lo_ifindex = iface_get_id(sock_fd, "lo", handle->errbuf);
2498
2499
  /*
2500
   * Default value for offset to align link-layer payload
2501
   * on a 4-byte boundary.
2502
   */
2503
0
  handle->offset   = 0;
2504
2505
  /*
2506
   * What kind of frames do we have to deal with? Fall back
2507
   * to cooked mode if we have an unknown interface type
2508
   * or a type we know doesn't work well in raw mode.
2509
   */
2510
0
  if (!is_any_device) {
2511
    /* Assume for now we don't need cooked mode. */
2512
0
    handlep->cooked = 0;
2513
2514
0
    if (handle->opt.rfmon) {
2515
      /*
2516
       * We were asked to turn on monitor mode.
2517
       * Do so before we get the link-layer type,
2518
       * because entering monitor mode could change
2519
       * the link-layer type.
2520
       */
2521
0
      err = enter_rfmon_mode(handle, sock_fd, device);
2522
0
      if (err < 0) {
2523
        /* Hard failure */
2524
0
        close(sock_fd);
2525
0
        return err;
2526
0
      }
2527
0
      if (err == 0) {
2528
        /*
2529
         * Nothing worked for turning monitor mode
2530
         * on.
2531
         */
2532
0
        close(sock_fd);
2533
2534
0
        return PCAP_ERROR_RFMON_NOTSUP;
2535
0
      }
2536
2537
      /*
2538
       * Either monitor mode has been turned on for
2539
       * the device, or we've been given a different
2540
       * device to open for monitor mode.  If we've
2541
       * been given a different device, use it.
2542
       */
2543
0
      if (handlep->mondevice != NULL)
2544
0
        device = handlep->mondevice;
2545
0
    }
2546
0
    arptype = iface_get_arptype(sock_fd, device, handle->errbuf);
2547
0
    if (arptype < 0) {
2548
0
      close(sock_fd);
2549
0
      return arptype;
2550
0
    }
2551
0
    status = map_arphrd_to_dlt(handle, arptype, device, 1);
2552
0
    if (status < 0) {
2553
0
      close(sock_fd);
2554
0
      return status;
2555
0
    }
2556
0
    if (handle->linktype == -1 ||
2557
0
        handle->linktype == DLT_LINUX_SLL ||
2558
0
        handle->linktype == DLT_LINUX_IRDA ||
2559
0
        handle->linktype == DLT_LINUX_LAPD ||
2560
0
        handle->linktype == DLT_NETLINK ||
2561
0
        (handle->linktype == DLT_EN10MB &&
2562
0
         (strncmp("isdn", device, 4) == 0 ||
2563
0
          strncmp("isdY", device, 4) == 0))) {
2564
      /*
2565
       * Unknown interface type (-1), or a
2566
       * device we explicitly chose to run
2567
       * in cooked mode (e.g., PPP devices),
2568
       * or an ISDN device (whose link-layer
2569
       * type we can only determine by using
2570
       * APIs that may be different on different
2571
       * kernels) - reopen in cooked mode.
2572
       *
2573
       * If the type is unknown, return a warning;
2574
       * map_arphrd_to_dlt() has already set the
2575
       * warning message.
2576
       */
2577
0
      if (close(sock_fd) == -1) {
2578
0
        pcapint_fmt_errmsg_for_errno(handle->errbuf,
2579
0
            PCAP_ERRBUF_SIZE, errno, "close");
2580
0
        return PCAP_ERROR;
2581
0
      }
2582
0
      sock_fd = socket(PF_PACKET, SOCK_DGRAM, 0);
2583
0
      if (sock_fd < 0) {
2584
        /*
2585
         * Fatal error.  We treat this as
2586
         * a generic error; we already know
2587
         * that we were able to open a
2588
         * PF_PACKET/SOCK_RAW socket, so
2589
         * any failure is a "this shouldn't
2590
         * happen" case.
2591
         */
2592
0
        pcapint_fmt_errmsg_for_errno(handle->errbuf,
2593
0
            PCAP_ERRBUF_SIZE, errno, "socket");
2594
0
        return PCAP_ERROR;
2595
0
      }
2596
0
      handlep->cooked = 1;
2597
2598
      /*
2599
       * Get rid of any link-layer type list
2600
       * we allocated - this only supports cooked
2601
       * capture.
2602
       */
2603
0
      if (handle->dlt_list != NULL) {
2604
0
        free(handle->dlt_list);
2605
0
        handle->dlt_list = NULL;
2606
0
        handle->dlt_count = 0;
2607
0
      }
2608
2609
0
      if (handle->linktype == -1) {
2610
        /*
2611
         * Warn that we're falling back on
2612
         * cooked mode; we may want to
2613
         * update "map_arphrd_to_dlt()"
2614
         * to handle the new type.
2615
         */
2616
0
        snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
2617
0
          "arptype %d not "
2618
0
          "supported by libpcap - "
2619
0
          "falling back to cooked "
2620
0
          "socket",
2621
0
          arptype);
2622
0
        status = PCAP_WARNING;
2623
0
      }
2624
2625
      /*
2626
       * IrDA capture is not a real "cooked" capture,
2627
       * it's IrLAP frames, not IP packets.  The
2628
       * same applies to LAPD capture.
2629
       */
2630
0
      if (handle->linktype != DLT_LINUX_IRDA &&
2631
0
          handle->linktype != DLT_LINUX_LAPD &&
2632
0
          handle->linktype != DLT_NETLINK)
2633
0
        handle->linktype = DLT_LINUX_SLL;
2634
0
    }
2635
2636
0
    handlep->ifindex = iface_get_id(sock_fd, device,
2637
0
        handle->errbuf);
2638
0
    if (handlep->ifindex == -1) {
2639
0
      close(sock_fd);
2640
0
      return PCAP_ERROR;
2641
0
    }
2642
2643
0
    if ((err = iface_bind(sock_fd, handlep->ifindex,
2644
0
        handle->errbuf, 0)) != 0) {
2645
0
      close(sock_fd);
2646
0
      return err;
2647
0
    }
2648
0
  } else {
2649
    /*
2650
     * The "any" device.
2651
     */
2652
0
    if (handle->opt.rfmon) {
2653
      /*
2654
       * It doesn't support monitor mode.
2655
       */
2656
0
      close(sock_fd);
2657
0
      return PCAP_ERROR_RFMON_NOTSUP;
2658
0
    }
2659
2660
    /*
2661
     * It uses cooked mode.
2662
     * Support both DLT_LINUX_SLL and DLT_LINUX_SLL2.
2663
     */
2664
0
    handlep->cooked = 1;
2665
0
    handle->linktype = DLT_LINUX_SLL;
2666
0
    handle->dlt_list = (u_int *) malloc(sizeof(u_int) * 2);
2667
0
    if (handle->dlt_list == NULL) {
2668
0
      pcapint_fmt_errmsg_for_errno(handle->errbuf,
2669
0
          PCAP_ERRBUF_SIZE, errno, "malloc");
2670
0
      close(sock_fd);
2671
0
      return (PCAP_ERROR);
2672
0
    }
2673
0
    handle->dlt_list[0] = DLT_LINUX_SLL;
2674
0
    handle->dlt_list[1] = DLT_LINUX_SLL2;
2675
0
    handle->dlt_count = 2;
2676
2677
    /*
2678
     * We're not bound to a device.
2679
     * For now, we're using this as an indication
2680
     * that we can't transmit; stop doing that only
2681
     * if we figure out how to transmit in cooked
2682
     * mode.
2683
     */
2684
0
    handlep->ifindex = -1;
2685
0
  }
2686
2687
  /*
2688
   * Select promiscuous mode on if "promisc" is set.
2689
   *
2690
   * Do not turn allmulti mode on if we don't select
2691
   * promiscuous mode - on some devices (e.g., Orinoco
2692
   * wireless interfaces), allmulti mode isn't supported
2693
   * and the driver implements it by turning promiscuous
2694
   * mode on, and that screws up the operation of the
2695
   * card as a normal networking interface, and on no
2696
   * other platform I know of does starting a non-
2697
   * promiscuous capture affect which multicast packets
2698
   * are received by the interface.
2699
   */
2700
2701
  /*
2702
   * Hmm, how can we set promiscuous mode on all interfaces?
2703
   * I am not sure if that is possible at all.  For now, we
2704
   * silently ignore attempts to turn promiscuous mode on
2705
   * for the "any" device (so you don't have to explicitly
2706
   * disable it in programs such as tcpdump).
2707
   */
2708
2709
0
  if (!is_any_device && handle->opt.promisc) {
2710
0
    memset(&mr, 0, sizeof(mr));
2711
0
    mr.mr_ifindex = handlep->ifindex;
2712
0
    mr.mr_type    = PACKET_MR_PROMISC;
2713
0
    if (setsockopt(sock_fd, SOL_PACKET, PACKET_ADD_MEMBERSHIP,
2714
0
        &mr, sizeof(mr)) == -1) {
2715
0
      pcapint_fmt_errmsg_for_errno(handle->errbuf,
2716
0
          PCAP_ERRBUF_SIZE, errno, "setsockopt (PACKET_ADD_MEMBERSHIP)");
2717
0
      close(sock_fd);
2718
0
      return PCAP_ERROR;
2719
0
    }
2720
0
  }
2721
2722
  /*
2723
   * Enable auxiliary data and reserve room for reconstructing
2724
   * VLAN headers.
2725
   *
2726
   * XXX - is enabling auxiliary data necessary, now that we
2727
   * only support memory-mapped capture?  The kernel's memory-mapped
2728
   * capture code doesn't seem to check whether auxiliary data
2729
   * is enabled, it seems to provide it whether it is or not.
2730
   */
2731
0
  val = 1;
2732
0
  if (setsockopt(sock_fd, SOL_PACKET, PACKET_AUXDATA, &val,
2733
0
           sizeof(val)) == -1 && errno != ENOPROTOOPT) {
2734
0
    pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
2735
0
        errno, "setsockopt (PACKET_AUXDATA)");
2736
0
    close(sock_fd);
2737
0
    return PCAP_ERROR;
2738
0
  }
2739
0
  handle->offset += VLAN_TAG_LEN;
2740
2741
  /*
2742
   * If we're in cooked mode, make the snapshot length
2743
   * large enough to hold a "cooked mode" header plus
2744
   * 1 byte of packet data (so we don't pass a byte
2745
   * count of 0 to "recvfrom()").
2746
   * XXX - we don't know whether this will be DLT_LINUX_SLL
2747
   * or DLT_LINUX_SLL2, so make sure it's big enough for
2748
   * a DLT_LINUX_SLL2 "cooked mode" header; a snapshot length
2749
   * that small is silly anyway.
2750
   */
2751
0
  if (handlep->cooked) {
2752
0
    if (handle->snapshot < SLL2_HDR_LEN + 1)
2753
0
      handle->snapshot = SLL2_HDR_LEN + 1;
2754
0
  }
2755
0
  handle->bufsize = handle->snapshot;
2756
2757
  /*
2758
   * Set the offset at which to insert VLAN tags.
2759
   */
2760
0
  set_vlan_offset(handle);
2761
2762
0
  if (handle->opt.tstamp_precision == PCAP_TSTAMP_PRECISION_NANO) {
2763
0
    int nsec_tstamps = 1;
2764
2765
0
    if (setsockopt(sock_fd, SOL_SOCKET, SO_TIMESTAMPNS, &nsec_tstamps, sizeof(nsec_tstamps)) < 0) {
2766
0
      snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, "setsockopt: unable to set SO_TIMESTAMPNS");
2767
0
      close(sock_fd);
2768
0
      return PCAP_ERROR;
2769
0
    }
2770
0
  }
2771
2772
  /*
2773
   * We've succeeded. Save the socket FD in the pcap structure.
2774
   */
2775
0
  handle->fd = sock_fd;
2776
2777
  /*
2778
   * Any supported Linux version implements at least four auxiliary
2779
   * data items (SKF_AD_PROTOCOL, SKF_AD_PKTTYPE, SKF_AD_IFINDEX and
2780
   * SKF_AD_NLATTR).  Set a flag so the code generator can use these
2781
   * items if necessary.
2782
   */
2783
0
  handle->bpf_codegen_flags |= BPF_SPECIAL_BASIC_HANDLING;
2784
2785
  /*
2786
   * Can we generate special code for VLAN checks?
2787
   * (XXX - what if we need the special code but it's not supported
2788
   * by the OS?  Is that possible?)
2789
   *
2790
   * This depends on both a runtime condition (the running Linux kernel
2791
   * must support at least SKF_AD_VLAN_TAG_PRESENT in the auxiliary data
2792
   * and must support SO_BPF_EXTENSIONS in order to tell the userland
2793
   * process what it supports) and a compile-time condition (the OS
2794
   * headers must define both constants in order to compile libpcap code
2795
   * that asks the kernel about the support).
2796
   */
2797
0
#if defined(SO_BPF_EXTENSIONS) && defined(SKF_AD_VLAN_TAG_PRESENT)
2798
0
  int bpf_extensions;
2799
0
  socklen_t len = sizeof(bpf_extensions);
2800
0
  if (getsockopt(sock_fd, SOL_SOCKET, SO_BPF_EXTENSIONS,
2801
0
      &bpf_extensions, &len) == 0) {
2802
0
    if (bpf_extensions >= SKF_AD_VLAN_TAG_PRESENT) {
2803
      /*
2804
       * Yes, we can.  Request that we do so.
2805
       */
2806
0
      handle->bpf_codegen_flags |= BPF_SPECIAL_VLAN_HANDLING;
2807
0
    }
2808
0
  }
2809
0
#endif // defined(SO_BPF_EXTENSIONS) && defined(SKF_AD_VLAN_TAG_PRESENT)
2810
2811
0
  return status;
2812
0
}
2813
2814
/*
2815
 * Attempt to setup memory-mapped access.
2816
 *
2817
 * On success, returns 0 if there are no warnings or a PCAP_WARNING_ code
2818
 * if there is a warning.
2819
 *
2820
 * On error, returns the appropriate error code; if that is PCAP_ERROR,
2821
 * sets handle->errbuf to the appropriate message.
2822
 */
2823
static int
2824
setup_mmapped(pcap_t *handle)
2825
0
{
2826
0
  struct pcap_linux *handlep = handle->priv;
2827
0
  int flags = MAP_ANONYMOUS | MAP_PRIVATE;
2828
0
  int status;
2829
2830
  /*
2831
   * Attempt to allocate a buffer to hold the contents of one
2832
   * packet, for use by the oneshot callback.
2833
   */
2834
0
#ifdef MAP_32BIT
2835
0
  if (pcapint_mmap_32bit) flags |= MAP_32BIT;
2836
0
#endif
2837
0
  handlep->oneshot_buffer = mmap(0, handle->snapshot, PROT_READ | PROT_WRITE, flags, -1, 0);
2838
0
  if (handlep->oneshot_buffer == MAP_FAILED) {
2839
0
    pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
2840
0
        errno, "can't allocate oneshot buffer");
2841
0
    return PCAP_ERROR;
2842
0
  }
2843
2844
0
  if (handle->opt.buffer_size == 0) {
2845
    /* by default request 2M for the ring buffer */
2846
0
    handle->opt.buffer_size = 2*1024*1024;
2847
0
  }
2848
0
  status = prepare_tpacket_socket(handle);
2849
0
  if (status == -1) {
2850
0
    munmap(handlep->oneshot_buffer, handle->snapshot);
2851
0
    handlep->oneshot_buffer = NULL;
2852
0
    return PCAP_ERROR;
2853
0
  }
2854
0
  status = create_ring(handle);
2855
0
  if (status < 0) {
2856
    /*
2857
     * Error attempting to enable memory-mapped capture;
2858
     * fail.  The return value is the status to return.
2859
     */
2860
0
    munmap(handlep->oneshot_buffer, handle->snapshot);
2861
0
    handlep->oneshot_buffer = NULL;
2862
0
    return status;
2863
0
  }
2864
2865
  /*
2866
   * Success.  status has been set either to 0 if there are no
2867
   * warnings or to a PCAP_WARNING_ value if there is a warning.
2868
   *
2869
   * handle->offset is used to get the current position into the rx ring.
2870
   * handle->cc is used to store the ring size.
2871
   */
2872
2873
  /*
2874
   * Set the timeout to use in poll() before returning.
2875
   */
2876
0
  set_poll_timeout(handlep);
2877
2878
0
  return status;
2879
0
}
2880
2881
/*
2882
 * Attempt to set the socket to the specified version of the memory-mapped
2883
 * header.
2884
 *
2885
 * Return 0 if we succeed; return 1 if we fail because that version isn't
2886
 * supported; return -1 on any other error, and set handle->errbuf.
2887
 */
2888
static int
2889
init_tpacket(pcap_t *handle, int version, const char *version_str)
2890
0
{
2891
0
  struct pcap_linux *handlep = handle->priv;
2892
0
  int val = version;
2893
0
  socklen_t len = sizeof(val);
2894
2895
  /*
2896
   * Probe whether kernel supports the specified TPACKET version;
2897
   * this also gets the length of the header for that version.
2898
   *
2899
   * This socket option was introduced in 2.6.27, which was
2900
   * also the first release with TPACKET_V2 support.
2901
   */
2902
0
  if (getsockopt(handle->fd, SOL_PACKET, PACKET_HDRLEN, &val, &len) < 0) {
2903
0
    if (errno == EINVAL) {
2904
      /*
2905
       * EINVAL means this specific version of TPACKET
2906
       * is not supported. Tell the caller they can try
2907
       * with a different one; if they've run out of
2908
       * others to try, let them set the error message
2909
       * appropriately.
2910
       */
2911
0
      return 1;
2912
0
    }
2913
2914
    /*
2915
     * All other errors are fatal.
2916
     */
2917
0
    if (errno == ENOPROTOOPT) {
2918
      /*
2919
       * PACKET_HDRLEN isn't supported, which means
2920
       * that memory-mapped capture isn't supported.
2921
       * Indicate that in the message.
2922
       */
2923
0
      snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
2924
0
          "Kernel doesn't support memory-mapped capture; a 2.6.27 or later 2.x kernel is required, with CONFIG_PACKET_MMAP specified for 2.x kernels");
2925
0
    } else {
2926
      /*
2927
       * Some unexpected error.
2928
       */
2929
0
      pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
2930
0
          errno, "can't get %s header len on packet socket",
2931
0
          version_str);
2932
0
    }
2933
0
    return -1;
2934
0
  }
2935
0
  handlep->tp_hdrlen = val;
2936
2937
0
  val = version;
2938
0
  if (setsockopt(handle->fd, SOL_PACKET, PACKET_VERSION, &val,
2939
0
         sizeof(val)) < 0) {
2940
0
    pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
2941
0
        errno, "can't activate %s on packet socket", version_str);
2942
0
    return -1;
2943
0
  }
2944
0
  handlep->tp_version = version;
2945
2946
0
  return 0;
2947
0
}
2948
2949
/*
2950
 * Attempt to set the socket to version 3 of the memory-mapped header and,
2951
 * if that fails because version 3 isn't supported, attempt to fall
2952
 * back to version 2.  If version 2 isn't supported, just fail.
2953
 *
2954
 * Return 0 if we succeed and -1 on any other error, and set handle->errbuf.
2955
 */
2956
static int
2957
prepare_tpacket_socket(pcap_t *handle)
2958
0
{
2959
0
  int ret;
2960
2961
0
#ifdef HAVE_TPACKET3
2962
  /*
2963
   * Try setting the version to TPACKET_V3.
2964
   *
2965
   * The only mode in which buffering is done on PF_PACKET
2966
   * sockets, so that packets might not be delivered
2967
   * immediately, is TPACKET_V3 mode.
2968
   *
2969
   * The buffering cannot be disabled in that mode, so
2970
   * if the user has requested immediate mode, we don't
2971
   * use TPACKET_V3.
2972
   */
2973
0
  if (!handle->opt.immediate) {
2974
0
    ret = init_tpacket(handle, TPACKET_V3, "TPACKET_V3");
2975
0
    if (ret == 0) {
2976
      /*
2977
       * Success.
2978
       */
2979
0
      return 0;
2980
0
    }
2981
0
    if (ret == -1) {
2982
      /*
2983
       * We failed for some reason other than "the
2984
       * kernel doesn't support TPACKET_V3".
2985
       */
2986
0
      return -1;
2987
0
    }
2988
2989
    /*
2990
     * This means it returned 1, which means "the kernel
2991
     * doesn't support TPACKET_V3"; try TPACKET_V2.
2992
     */
2993
0
  }
2994
0
#endif /* HAVE_TPACKET3 */
2995
2996
  /*
2997
   * Try setting the version to TPACKET_V2.
2998
   */
2999
0
  ret = init_tpacket(handle, TPACKET_V2, "TPACKET_V2");
3000
0
  if (ret == 0) {
3001
    /*
3002
     * Success.
3003
     */
3004
0
    return 0;
3005
0
  }
3006
3007
0
  if (ret == 1) {
3008
    /*
3009
     * OK, the kernel supports memory-mapped capture, but
3010
     * not TPACKET_V2.  Set the error message appropriately.
3011
     */
3012
0
    snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
3013
0
        "Kernel doesn't support TPACKET_V2; a 2.6.27 or later kernel is required");
3014
0
  }
3015
3016
  /*
3017
   * We failed.
3018
   */
3019
0
  return -1;
3020
0
}
3021
3022
/*
3023
 * Attempt to set up memory-mapped access.
3024
 *
3025
 * On success, returns 0 if there are no warnings or to a PCAP_WARNING_ code
3026
 * if there is a warning.
3027
 *
3028
 * On error, returns the appropriate error code; if that is PCAP_ERROR,
3029
 * sets handle->errbuf to the appropriate message.
3030
 */
3031
static int
3032
create_ring(pcap_t *handle)
3033
0
{
3034
0
  struct pcap_linux *handlep = handle->priv;
3035
0
  unsigned i, j, frames_per_block;
3036
0
  int flags = MAP_SHARED;
3037
0
#ifdef HAVE_TPACKET3
3038
  /*
3039
   * For sockets using TPACKET_V2, the extra stuff at the end of a
3040
   * struct tpacket_req3 will be ignored, so this is OK even for
3041
   * those sockets.
3042
   */
3043
0
  struct tpacket_req3 req;
3044
#else
3045
  struct tpacket_req req;
3046
#endif
3047
0
  socklen_t len;
3048
0
  unsigned int sk_type, tp_reserve, maclen, tp_hdrlen, netoff, macoff;
3049
0
  unsigned int frame_size;
3050
0
  int status;
3051
3052
  /*
3053
   * Start out assuming no warnings.
3054
   */
3055
0
  status = 0;
3056
3057
  /*
3058
   * Reserve space for VLAN tag reconstruction.
3059
   */
3060
0
  tp_reserve = VLAN_TAG_LEN;
3061
3062
  /*
3063
   * If we're capturing in cooked mode, reserve space for
3064
   * a DLT_LINUX_SLL2 header; we don't know yet whether
3065
   * we'll be using DLT_LINUX_SLL or DLT_LINUX_SLL2, as
3066
   * that can be changed on an open device, so we reserve
3067
   * space for the larger of the two.
3068
   *
3069
   * XXX - we assume that the kernel is still adding
3070
   * 16 bytes of extra space, so we subtract 16 from
3071
   * SLL2_HDR_LEN to get the additional space needed.
3072
   * (Are they doing that for DLT_LINUX_SLL, the link-
3073
   * layer header for which is 16 bytes?)
3074
   *
3075
   * XXX - should we use TPACKET_ALIGN(SLL2_HDR_LEN - 16)?
3076
   */
3077
0
  if (handlep->cooked)
3078
0
    tp_reserve += SLL2_HDR_LEN - 16;
3079
3080
  /*
3081
   * Try to request that amount of reserve space.
3082
   * This must be done before creating the ring buffer.
3083
   */
3084
0
  len = sizeof(tp_reserve);
3085
0
  if (setsockopt(handle->fd, SOL_PACKET, PACKET_RESERVE,
3086
0
      &tp_reserve, len) < 0) {
3087
0
    pcapint_fmt_errmsg_for_errno(handle->errbuf,
3088
0
        PCAP_ERRBUF_SIZE, errno,
3089
0
        "setsockopt (PACKET_RESERVE)");
3090
0
    return PCAP_ERROR;
3091
0
  }
3092
3093
0
  switch (handlep->tp_version) {
3094
3095
0
  case TPACKET_V2:
3096
    /* Note that with large snapshot length (say 256K, which is
3097
     * the default for recent versions of tcpdump, Wireshark,
3098
     * TShark, dumpcap or 64K, the value that "-s 0" has given for
3099
     * a long time with tcpdump), if we use the snapshot
3100
     * length to calculate the frame length, only a few frames
3101
     * will be available in the ring even with pretty
3102
     * large ring size (and a lot of memory will be unused).
3103
     *
3104
     * Ideally, we should choose a frame length based on the
3105
     * minimum of the specified snapshot length and the maximum
3106
     * packet size.  That's not as easy as it sounds; consider,
3107
     * for example, an 802.11 interface in monitor mode, where
3108
     * the frame would include a radiotap header, where the
3109
     * maximum radiotap header length is device-dependent.
3110
     *
3111
     * So, for now, we just do this for Ethernet devices, where
3112
     * there's no metadata header, and the link-layer header is
3113
     * fixed length.  We can get the maximum packet size by
3114
     * adding 18, the Ethernet header length plus the CRC length
3115
     * (just in case we happen to get the CRC in the packet), to
3116
     * the MTU of the interface; we fetch the MTU in the hopes
3117
     * that it reflects support for jumbo frames.  (Even if the
3118
     * interface is just being used for passive snooping, the
3119
     * driver might set the size of buffers in the receive ring
3120
     * based on the MTU, so that the MTU limits the maximum size
3121
     * of packets that we can receive.)
3122
     *
3123
     * If segmentation/fragmentation or receive offload are
3124
     * enabled, we can get reassembled/aggregated packets larger
3125
     * than MTU, but bounded to 65535 plus the Ethernet overhead,
3126
     * due to kernel and protocol constraints */
3127
0
    frame_size = handle->snapshot;
3128
0
    if (handle->linktype == DLT_EN10MB) {
3129
0
      unsigned int max_frame_len;
3130
0
      int mtu;
3131
0
      int offload;
3132
3133
0
      mtu = iface_get_mtu(handle->fd, handle->opt.device,
3134
0
          handle->errbuf);
3135
0
      if (mtu == -1)
3136
0
        return PCAP_ERROR;
3137
0
      offload = iface_get_offload(handle);
3138
0
      if (offload == -1)
3139
0
        return PCAP_ERROR;
3140
0
      if (offload)
3141
0
        max_frame_len = max(mtu, 65535);
3142
0
      else
3143
0
        max_frame_len = mtu;
3144
0
      max_frame_len += 18;
3145
3146
0
      if (frame_size > max_frame_len)
3147
0
        frame_size = max_frame_len;
3148
0
    }
3149
3150
    /* NOTE: calculus matching those in tpacket_rcv()
3151
     * in linux-2.6/net/packet/af_packet.c
3152
     */
3153
0
    len = sizeof(sk_type);
3154
0
    if (getsockopt(handle->fd, SOL_SOCKET, SO_TYPE, &sk_type,
3155
0
        &len) < 0) {
3156
0
      pcapint_fmt_errmsg_for_errno(handle->errbuf,
3157
0
          PCAP_ERRBUF_SIZE, errno, "getsockopt (SO_TYPE)");
3158
0
      return PCAP_ERROR;
3159
0
    }
3160
0
    maclen = (sk_type == SOCK_DGRAM) ? 0 : MAX_LINKHEADER_SIZE;
3161
      /* XXX: in the kernel maclen is calculated from
3162
       * LL_ALLOCATED_SPACE(dev) and vnet_hdr.hdr_len
3163
       * in:  packet_snd()           in linux-2.6/net/packet/af_packet.c
3164
       * then packet_alloc_skb()     in linux-2.6/net/packet/af_packet.c
3165
       * then sock_alloc_send_pskb() in linux-2.6/net/core/sock.c
3166
       * but I see no way to get those sizes in userspace,
3167
       * like for instance with an ifreq ioctl();
3168
       * the best thing I've found so far is MAX_HEADER in
3169
       * the kernel part of linux-2.6/include/linux/netdevice.h
3170
       * which goes up to 128+48=176; since pcap-linux.c
3171
       * defines a MAX_LINKHEADER_SIZE of 256 which is
3172
       * greater than that, let's use it.. maybe is it even
3173
       * large enough to directly replace macoff..
3174
       */
3175
0
    tp_hdrlen = TPACKET_ALIGN(handlep->tp_hdrlen) + sizeof(struct sockaddr_ll) ;
3176
0
    netoff = TPACKET_ALIGN(tp_hdrlen + (maclen < 16 ? 16 : maclen)) + tp_reserve;
3177
      /* NOTE: AFAICS tp_reserve may break the TPACKET_ALIGN
3178
       * of netoff, which contradicts
3179
       * linux-2.6/Documentation/networking/packet_mmap.txt
3180
       * documenting that:
3181
       * "- Gap, chosen so that packet data (Start+tp_net)
3182
       * aligns to TPACKET_ALIGNMENT=16"
3183
       */
3184
      /* NOTE: in linux-2.6/include/linux/skbuff.h:
3185
       * "CPUs often take a performance hit
3186
       *  when accessing unaligned memory locations"
3187
       */
3188
0
    macoff = netoff - maclen;
3189
0
    req.tp_frame_size = TPACKET_ALIGN(macoff + frame_size);
3190
    /*
3191
     * Round the buffer size up to a multiple of the
3192
     * frame size (rather than rounding down, which
3193
     * would give a buffer smaller than our caller asked
3194
     * for, and possibly give zero frames if the requested
3195
     * buffer size is too small for one frame).
3196
     */
3197
0
    req.tp_frame_nr = (handle->opt.buffer_size + req.tp_frame_size - 1)/req.tp_frame_size;
3198
0
    break;
3199
3200
0
#ifdef HAVE_TPACKET3
3201
0
  case TPACKET_V3:
3202
    /* The "frames" for this are actually buffers that
3203
     * contain multiple variable-sized frames.
3204
     *
3205
     * We pick a "frame" size of MAXIMUM_SNAPLEN to leave
3206
     * enough room for at least one reasonably-sized packet
3207
     * in the "frame". */
3208
0
    req.tp_frame_size = MAXIMUM_SNAPLEN;
3209
    /*
3210
     * Round the buffer size up to a multiple of the
3211
     * "frame" size (rather than rounding down, which
3212
     * would give a buffer smaller than our caller asked
3213
     * for, and possibly give zero "frames" if the requested
3214
     * buffer size is too small for one "frame").
3215
     */
3216
0
    req.tp_frame_nr = (handle->opt.buffer_size + req.tp_frame_size - 1)/req.tp_frame_size;
3217
0
    break;
3218
0
#endif
3219
0
  default:
3220
0
    snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
3221
0
        "Internal error: unknown TPACKET_ value %u",
3222
0
        handlep->tp_version);
3223
0
    return PCAP_ERROR;
3224
0
  }
3225
3226
  /* compute the minimum block size that will handle this frame.
3227
   * The block has to be page size aligned.
3228
   * The max block size allowed by the kernel is arch-dependent and
3229
   * it's not explicitly checked here. */
3230
0
  req.tp_block_size = getpagesize();
3231
0
  while (req.tp_block_size < req.tp_frame_size)
3232
0
    req.tp_block_size <<= 1;
3233
3234
0
  frames_per_block = req.tp_block_size/req.tp_frame_size;
3235
3236
  /*
3237
   * PACKET_TIMESTAMP was added after linux/net_tstamp.h was,
3238
   * so we check for PACKET_TIMESTAMP.  We check for
3239
   * linux/net_tstamp.h just in case a system somehow has
3240
   * PACKET_TIMESTAMP but not linux/net_tstamp.h; that might
3241
   * be unnecessary.
3242
   *
3243
   * SIOCSHWTSTAMP was introduced in the patch that introduced
3244
   * linux/net_tstamp.h, so we don't bother checking whether
3245
   * SIOCSHWTSTAMP is defined (if your Linux system has
3246
   * linux/net_tstamp.h but doesn't define SIOCSHWTSTAMP, your
3247
   * Linux system is badly broken).
3248
   */
3249
0
#if defined(HAVE_LINUX_NET_TSTAMP_H) && defined(PACKET_TIMESTAMP)
3250
  /*
3251
   * If we were told to do so, ask the kernel and the driver
3252
   * to use hardware timestamps.
3253
   *
3254
   * Hardware timestamps are only supported with mmapped
3255
   * captures.
3256
   */
3257
0
  if (handle->opt.tstamp_type == PCAP_TSTAMP_ADAPTER ||
3258
0
      handle->opt.tstamp_type == PCAP_TSTAMP_ADAPTER_UNSYNCED) {
3259
0
    struct hwtstamp_config hwconfig;
3260
0
    struct ifreq ifr;
3261
0
    int timesource;
3262
3263
    /*
3264
     * Ask for hardware time stamps on all packets,
3265
     * including transmitted packets.
3266
     */
3267
0
    memset(&hwconfig, 0, sizeof(hwconfig));
3268
0
    hwconfig.tx_type = HWTSTAMP_TX_ON;
3269
0
    hwconfig.rx_filter = HWTSTAMP_FILTER_ALL;
3270
3271
0
    memset(&ifr, 0, sizeof(ifr));
3272
0
    pcapint_strlcpy(ifr.ifr_name, handle->opt.device, sizeof(ifr.ifr_name));
3273
0
    ifr.ifr_data = (void *)&hwconfig;
3274
3275
    /*
3276
     * This may require CAP_NET_ADMIN.
3277
     */
3278
0
    if (ioctl(handle->fd, SIOCSHWTSTAMP, &ifr) < 0) {
3279
0
      switch (errno) {
3280
3281
0
      case EPERM:
3282
        /*
3283
         * Treat this as an error, as the
3284
         * user should try to run this
3285
         * with the appropriate privileges -
3286
         * and, if they can't, shouldn't
3287
         * try requesting hardware time stamps.
3288
         */
3289
0
        snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
3290
0
            "Attempt to set hardware timestamp failed - CAP_NET_ADMIN may be required");
3291
0
        return PCAP_ERROR_PERM_DENIED;
3292
3293
0
      case EOPNOTSUPP:
3294
0
      case ERANGE:
3295
        /*
3296
         * Treat this as a warning, as the
3297
         * only way to fix the warning is to
3298
         * get an adapter that supports hardware
3299
         * time stamps for *all* packets.
3300
         * (ERANGE means "we support hardware
3301
         * time stamps, but for packets matching
3302
         * that particular filter", so it means
3303
         * "we don't support hardware time stamps
3304
         * for all incoming packets" here.)
3305
         *
3306
         * We'll just fall back on the standard
3307
         * host time stamps.
3308
         */
3309
0
        status = PCAP_WARNING_TSTAMP_TYPE_NOTSUP;
3310
0
        break;
3311
3312
0
      default:
3313
0
        pcapint_fmt_errmsg_for_errno(handle->errbuf,
3314
0
            PCAP_ERRBUF_SIZE, errno,
3315
0
            "SIOCSHWTSTAMP failed");
3316
0
        return PCAP_ERROR;
3317
0
      }
3318
0
    } else {
3319
      /*
3320
       * Well, that worked.  Now specify the type of
3321
       * hardware time stamp we want for this
3322
       * socket.
3323
       */
3324
0
      if (handle->opt.tstamp_type == PCAP_TSTAMP_ADAPTER) {
3325
        /*
3326
         * Hardware timestamp, synchronized
3327
         * with the system clock.
3328
         */
3329
0
        timesource = SOF_TIMESTAMPING_SYS_HARDWARE;
3330
0
      } else {
3331
        /*
3332
         * PCAP_TSTAMP_ADAPTER_UNSYNCED - hardware
3333
         * timestamp, not synchronized with the
3334
         * system clock.
3335
         */
3336
0
        timesource = SOF_TIMESTAMPING_RAW_HARDWARE;
3337
0
      }
3338
0
      if (setsockopt(handle->fd, SOL_PACKET, PACKET_TIMESTAMP,
3339
0
        (void *)&timesource, sizeof(timesource))) {
3340
0
        pcapint_fmt_errmsg_for_errno(handle->errbuf,
3341
0
            PCAP_ERRBUF_SIZE, errno,
3342
0
            "can't set PACKET_TIMESTAMP");
3343
0
        return PCAP_ERROR;
3344
0
      }
3345
0
    }
3346
0
  }
3347
0
#endif /* HAVE_LINUX_NET_TSTAMP_H && PACKET_TIMESTAMP */
3348
3349
  /* ask the kernel to create the ring */
3350
0
retry:
3351
0
  req.tp_block_nr = req.tp_frame_nr / frames_per_block;
3352
3353
  /* req.tp_frame_nr is requested to match frames_per_block*req.tp_block_nr */
3354
0
  req.tp_frame_nr = req.tp_block_nr * frames_per_block;
3355
3356
0
#ifdef HAVE_TPACKET3
3357
  /* timeout value to retire block - use the configured buffering timeout, or default if <0. */
3358
0
  if (handlep->timeout > 0) {
3359
    /* Use the user specified timeout as the block timeout */
3360
0
    req.tp_retire_blk_tov = handlep->timeout;
3361
0
  } else if (handlep->timeout == 0) {
3362
    /*
3363
     * In pcap, this means "infinite timeout"; TPACKET_V3
3364
     * doesn't support that, so just set it to UINT_MAX
3365
     * milliseconds.  In the TPACKET_V3 loop, if the
3366
     * timeout is 0, and we haven't yet seen any packets,
3367
     * and we block and still don't have any packets, we
3368
     * keep blocking until we do.
3369
     */
3370
0
    req.tp_retire_blk_tov = UINT_MAX;
3371
0
  } else {
3372
    /*
3373
     * XXX - this is not valid; use 0, meaning "have the
3374
     * kernel pick a default", for now.
3375
     */
3376
0
    req.tp_retire_blk_tov = 0;
3377
0
  }
3378
  /* private data not used */
3379
0
  req.tp_sizeof_priv = 0;
3380
  /* Rx ring - feature request bits - none (rxhash will not be filled) */
3381
0
  req.tp_feature_req_word = 0;
3382
0
#endif
3383
3384
0
  if (setsockopt(handle->fd, SOL_PACKET, PACKET_RX_RING,
3385
0
          (void *) &req, sizeof(req))) {
3386
0
    if ((errno == ENOMEM) && (req.tp_block_nr > 1)) {
3387
      /*
3388
       * Memory failure; try to reduce the requested ring
3389
       * size.
3390
       *
3391
       * We used to reduce this by half -- do 5% instead.
3392
       * That may result in more iterations and a longer
3393
       * startup, but the user will be much happier with
3394
       * the resulting buffer size.
3395
       */
3396
0
      if (req.tp_frame_nr < 20)
3397
0
        req.tp_frame_nr -= 1;
3398
0
      else
3399
0
        req.tp_frame_nr -= req.tp_frame_nr/20;
3400
0
      goto retry;
3401
0
    }
3402
0
    pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
3403
0
        errno, "can't create rx ring on packet socket");
3404
0
    return PCAP_ERROR;
3405
0
  }
3406
3407
  /* memory map the rx ring */
3408
0
  handlep->mmapbuflen = req.tp_block_nr * req.tp_block_size;
3409
0
#ifdef MAP_32BIT
3410
0
  if (pcapint_mmap_32bit) flags |= MAP_32BIT;
3411
0
#endif
3412
0
  handlep->mmapbuf = mmap(0, handlep->mmapbuflen, PROT_READ | PROT_WRITE, flags, handle->fd, 0);
3413
0
  if (handlep->mmapbuf == MAP_FAILED) {
3414
0
    pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
3415
0
        errno, "can't mmap rx ring");
3416
3417
    /* clear the allocated ring on error*/
3418
0
    destroy_ring(handle);
3419
0
    return PCAP_ERROR;
3420
0
  }
3421
3422
  /* allocate a ring for each frame header pointer*/
3423
0
  handle->cc = req.tp_frame_nr;
3424
0
  handle->buffer = malloc(handle->cc * sizeof(union thdr *));
3425
0
  if (!handle->buffer) {
3426
0
    pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
3427
0
        errno, "can't allocate ring of frame headers");
3428
3429
0
    destroy_ring(handle);
3430
0
    return PCAP_ERROR;
3431
0
  }
3432
3433
  /* fill the header ring with proper frame ptr*/
3434
0
  handle->offset = 0;
3435
0
  for (i=0; i<req.tp_block_nr; ++i) {
3436
0
    u_char *base = &handlep->mmapbuf[i*req.tp_block_size];
3437
0
    for (j=0; j<frames_per_block; ++j, ++handle->offset) {
3438
0
      RING_GET_CURRENT_FRAME(handle) = base;
3439
0
      base += req.tp_frame_size;
3440
0
    }
3441
0
  }
3442
3443
0
  handle->bufsize = req.tp_frame_size;
3444
0
  handle->offset = 0;
3445
0
  return status;
3446
0
}
3447
3448
/* free all ring related resources*/
3449
static void
3450
destroy_ring(pcap_t *handle)
3451
0
{
3452
0
  struct pcap_linux *handlep = handle->priv;
3453
3454
  /*
3455
   * Tell the kernel to destroy the ring.
3456
   * We don't check for setsockopt failure, as 1) we can't recover
3457
   * from an error and 2) we might not yet have set it up in the
3458
   * first place.
3459
   */
3460
0
  struct tpacket_req req;
3461
0
  memset(&req, 0, sizeof(req));
3462
0
  (void)setsockopt(handle->fd, SOL_PACKET, PACKET_RX_RING,
3463
0
        (void *) &req, sizeof(req));
3464
3465
  /* if ring is mapped, unmap it*/
3466
0
  if (handlep->mmapbuf) {
3467
    /* do not test for mmap failure, as we can't recover from any error */
3468
0
    (void)munmap(handlep->mmapbuf, handlep->mmapbuflen);
3469
0
    handlep->mmapbuf = NULL;
3470
0
  }
3471
0
}
3472
3473
/*
3474
 * Special one-shot callback, used for pcap_next() and pcap_next_ex(),
3475
 * for Linux mmapped capture.
3476
 *
3477
 * The problem is that pcap_next() and pcap_next_ex() expect the packet
3478
 * data handed to the callback to be valid after the callback returns,
3479
 * but pcap_read_linux_mmap() has to release that packet as soon as
3480
 * the callback returns (otherwise, the kernel thinks there's still
3481
 * at least one unprocessed packet available in the ring, so a select()
3482
 * will immediately return indicating that there's data to process), so,
3483
 * in the callback, we have to make a copy of the packet.
3484
 *
3485
 * Yes, this means that, if the capture is using the ring buffer, using
3486
 * pcap_next() or pcap_next_ex() requires more copies than using
3487
 * pcap_loop() or pcap_dispatch().  If that bothers you, don't use
3488
 * pcap_next() or pcap_next_ex().
3489
 */
3490
static void
3491
pcapint_oneshot_linux(u_char *user, const struct pcap_pkthdr *h,
3492
    const u_char *bytes)
3493
0
{
3494
0
  struct oneshot_userdata *sp = (struct oneshot_userdata *)user;
3495
0
  pcap_t *handle = sp->pd;
3496
0
  struct pcap_linux *handlep = handle->priv;
3497
3498
0
  *sp->hdr = *h;
3499
0
  memcpy(handlep->oneshot_buffer, bytes, h->caplen);
3500
0
  *sp->pkt = handlep->oneshot_buffer;
3501
0
}
3502
3503
static int
3504
pcap_getnonblock_linux(pcap_t *handle)
3505
0
{
3506
0
  struct pcap_linux *handlep = handle->priv;
3507
3508
  /* use negative value of timeout to indicate non blocking ops */
3509
0
  return (handlep->timeout<0);
3510
0
}
3511
3512
static int
3513
pcap_setnonblock_linux(pcap_t *handle, int nonblock)
3514
0
{
3515
0
  struct pcap_linux *handlep = handle->priv;
3516
3517
  /*
3518
   * Set the file descriptor to the requested mode, as we use
3519
   * it for sending packets.
3520
   */
3521
0
  if (pcapint_setnonblock_fd(handle, nonblock) == -1)
3522
0
    return -1;
3523
3524
  /*
3525
   * Map each value to their corresponding negation to
3526
   * preserve the timeout value provided with pcap_set_timeout.
3527
   */
3528
0
  if (nonblock) {
3529
    /*
3530
     * We're setting the mode to non-blocking mode.
3531
     */
3532
0
    if (handlep->timeout >= 0) {
3533
      /*
3534
       * Indicate that we're switching to
3535
       * non-blocking mode.
3536
       */
3537
0
      handlep->timeout = ~handlep->timeout;
3538
0
    }
3539
0
    if (handlep->poll_breakloop_fd != -1) {
3540
      /* Close the eventfd; we do not need it in nonblock mode. */
3541
0
      close(handlep->poll_breakloop_fd);
3542
0
      handlep->poll_breakloop_fd = -1;
3543
0
    }
3544
0
  } else {
3545
    /*
3546
     * We're setting the mode to blocking mode.
3547
     */
3548
0
    if (handlep->poll_breakloop_fd == -1) {
3549
      /* If we did not have an eventfd, open one now that we are blocking. */
3550
0
      if ( ( handlep->poll_breakloop_fd = eventfd(0, EFD_NONBLOCK) ) == -1 ) {
3551
0
        pcapint_fmt_errmsg_for_errno(handle->errbuf,
3552
0
            PCAP_ERRBUF_SIZE, errno,
3553
0
            "could not open eventfd");
3554
0
        return -1;
3555
0
      }
3556
0
    }
3557
0
    if (handlep->timeout < 0) {
3558
0
      handlep->timeout = ~handlep->timeout;
3559
0
    }
3560
0
  }
3561
  /* Update the timeout to use in poll(). */
3562
0
  set_poll_timeout(handlep);
3563
0
  return 0;
3564
0
}
3565
3566
/*
3567
 * Get the status field of the ring buffer frame at a specified offset.
3568
 */
3569
static inline u_int
3570
pcap_get_ring_frame_status(pcap_t *handle, u_int offset)
3571
0
{
3572
0
  struct pcap_linux *handlep = handle->priv;
3573
0
  union thdr h;
3574
3575
0
  h.raw = RING_GET_FRAME_AT(handle, offset);
3576
0
  switch (handlep->tp_version) {
3577
0
  case TPACKET_V2:
3578
0
    return __atomic_load_n(&h.h2->tp_status, __ATOMIC_ACQUIRE);
3579
0
#ifdef HAVE_TPACKET3
3580
0
  case TPACKET_V3:
3581
0
    return __atomic_load_n(&h.h3->hdr.bh1.block_status, __ATOMIC_ACQUIRE);
3582
0
#endif
3583
0
  }
3584
  /* This should not happen. */
3585
0
  return 0;
3586
0
}
3587
3588
/*
3589
 * Block waiting for frames to be available.
3590
 */
3591
static int pcap_wait_for_frames_mmap(pcap_t *handle)
3592
0
{
3593
0
  struct pcap_linux *handlep = handle->priv;
3594
0
  int timeout;
3595
0
  struct ifreq ifr;
3596
0
  int ret;
3597
0
  struct pollfd pollinfo[2];
3598
0
  int numpollinfo;
3599
0
  pollinfo[0].fd = handle->fd;
3600
0
  pollinfo[0].events = POLLIN;
3601
0
  if ( handlep->poll_breakloop_fd == -1 ) {
3602
0
    numpollinfo = 1;
3603
0
    pollinfo[1].revents = 0;
3604
    /*
3605
     * We set pollinfo[1].revents to zero, even though
3606
     * numpollinfo = 1 meaning that poll() doesn't see
3607
     * pollinfo[1], so that we do not have to add a
3608
     * conditional of numpollinfo > 1 below when we
3609
     * test pollinfo[1].revents.
3610
     */
3611
0
  } else {
3612
0
    pollinfo[1].fd = handlep->poll_breakloop_fd;
3613
0
    pollinfo[1].events = POLLIN;
3614
0
    numpollinfo = 2;
3615
0
  }
3616
3617
  /*
3618
   * Keep polling until we either get some packets to read, see
3619
   * that we got told to break out of the loop, get a fatal error,
3620
   * or discover that the device went away.
3621
   *
3622
   * In non-blocking mode, we must still do one poll() to catch
3623
   * any pending error indications, but the poll() has a timeout
3624
   * of 0, so that it doesn't block, and we quit after that one
3625
   * poll().
3626
   *
3627
   * If we've seen an ENETDOWN, it might be the first indication
3628
   * that the device went away, or it might just be that it was
3629
   * configured down.  Unfortunately, there's no guarantee that
3630
   * the device has actually been removed as an interface, because:
3631
   *
3632
   * 1) if, as appears to be the case at least some of the time,
3633
   * the PF_PACKET socket code first gets a NETDEV_DOWN indication
3634
   * for the device and then gets a NETDEV_UNREGISTER indication
3635
   * for it, the first indication will cause a wakeup with ENETDOWN
3636
   * but won't set the packet socket's field for the interface index
3637
   * to -1, and the second indication won't cause a wakeup (because
3638
   * the first indication also caused the protocol hook to be
3639
   * unregistered) but will set the packet socket's field for the
3640
   * interface index to -1;
3641
   *
3642
   * 2) even if just a NETDEV_UNREGISTER indication is registered,
3643
   * the packet socket's field for the interface index only gets
3644
   * set to -1 after the wakeup, so there's a small but non-zero
3645
   * risk that a thread blocked waiting for the wakeup will get
3646
   * to the "fetch the socket name" code before the interface index
3647
   * gets set to -1, so it'll get the old interface index.
3648
   *
3649
   * Therefore, if we got an ENETDOWN and haven't seen a packet
3650
   * since then, we assume that we might be waiting for the interface
3651
   * to disappear, and poll with a timeout to try again in a short
3652
   * period of time.  If we *do* see a packet, the interface has
3653
   * come back up again, and is *definitely* still there, so we
3654
   * don't need to poll.
3655
   */
3656
0
  for (;;) {
3657
    /*
3658
     * Yes, we do this even in non-blocking mode, as it's
3659
     * the only way to get error indications from a
3660
     * tpacket socket.
3661
     *
3662
     * The timeout is 0 in non-blocking mode, so poll()
3663
     * returns immediately.
3664
     */
3665
0
    timeout = handlep->poll_timeout;
3666
3667
    /*
3668
     * If we got an ENETDOWN and haven't gotten an indication
3669
     * that the device has gone away or that the device is up,
3670
     * we don't yet know for certain whether the device has
3671
     * gone away or not, do a poll() with a 1-millisecond timeout,
3672
     * as we have to poll indefinitely for "device went away"
3673
     * indications until we either get one or see that the
3674
     * device is up.
3675
     */
3676
0
    if (handlep->netdown) {
3677
0
      if (timeout != 0)
3678
0
        timeout = 1;
3679
0
    }
3680
0
    ret = poll(pollinfo, numpollinfo, timeout);
3681
0
    if (ret < 0) {
3682
      /*
3683
       * Error.  If it's not EINTR, report it.
3684
       */
3685
0
      if (errno != EINTR) {
3686
0
        pcapint_fmt_errmsg_for_errno(handle->errbuf,
3687
0
            PCAP_ERRBUF_SIZE, errno,
3688
0
            "can't poll on packet socket");
3689
0
        return PCAP_ERROR;
3690
0
      }
3691
3692
      /*
3693
       * It's EINTR; if we were told to break out of
3694
       * the loop, do so.
3695
       */
3696
0
      if (handle->break_loop) {
3697
0
        handle->break_loop = 0;
3698
0
        return PCAP_ERROR_BREAK;
3699
0
      }
3700
0
    } else if (ret > 0) {
3701
      /*
3702
       * OK, some descriptor is ready.
3703
       * Check the socket descriptor first.
3704
       *
3705
       * As I read the Linux man page, pollinfo[0].revents
3706
       * will either be POLLIN, POLLERR, POLLHUP, or POLLNVAL.
3707
       */
3708
0
      if (pollinfo[0].revents == POLLIN) {
3709
        /*
3710
         * OK, we may have packets to
3711
         * read.
3712
         */
3713
0
        break;
3714
0
      }
3715
0
      if (pollinfo[0].revents != 0) {
3716
        /*
3717
         * There's some indication other than
3718
         * "you can read on this descriptor" on
3719
         * the descriptor.
3720
         */
3721
0
        if (pollinfo[0].revents & POLLNVAL) {
3722
0
          snprintf(handle->errbuf,
3723
0
              PCAP_ERRBUF_SIZE,
3724
0
              "Invalid polling request on packet socket");
3725
0
          return PCAP_ERROR;
3726
0
        }
3727
0
        if (pollinfo[0].revents & (POLLHUP | POLLRDHUP)) {
3728
0
          snprintf(handle->errbuf,
3729
0
              PCAP_ERRBUF_SIZE,
3730
0
              "Hangup on packet socket");
3731
0
          return PCAP_ERROR;
3732
0
        }
3733
0
        if (pollinfo[0].revents & POLLERR) {
3734
          /*
3735
           * Get the error.
3736
           */
3737
0
          int err;
3738
0
          socklen_t errlen;
3739
3740
0
          errlen = sizeof(err);
3741
0
          if (getsockopt(handle->fd, SOL_SOCKET,
3742
0
              SO_ERROR, &err, &errlen) == -1) {
3743
            /*
3744
             * The call *itself* returned
3745
             * an error; make *that*
3746
             * the error.
3747
             */
3748
0
            err = errno;
3749
0
          }
3750
3751
          /*
3752
           * OK, we have the error.
3753
           */
3754
0
          if (err == ENETDOWN) {
3755
            /*
3756
             * The device on which we're
3757
             * capturing went away or the
3758
             * interface was taken down.
3759
             *
3760
             * We don't know for certain
3761
             * which happened, and the
3762
             * next poll() may indicate
3763
             * that there are packets
3764
             * to be read, so just set
3765
             * a flag to get us to do
3766
             * checks later, and set
3767
             * the required select
3768
             * timeout to 1 millisecond
3769
             * so that event loops that
3770
             * check our socket descriptor
3771
             * also time out so that
3772
             * they can call us and we
3773
             * can do the checks.
3774
             */
3775
0
            handlep->netdown = 1;
3776
0
            handle->required_select_timeout = &netdown_timeout;
3777
0
          } else if (err == 0) {
3778
            /*
3779
             * This shouldn't happen, so
3780
             * report a special indication
3781
             * that it did.
3782
             */
3783
0
            snprintf(handle->errbuf,
3784
0
                PCAP_ERRBUF_SIZE,
3785
0
                "Error condition on packet socket: Reported error was 0");
3786
0
            return PCAP_ERROR;
3787
0
          } else {
3788
0
            pcapint_fmt_errmsg_for_errno(handle->errbuf,
3789
0
                PCAP_ERRBUF_SIZE,
3790
0
                err,
3791
0
                "Error condition on packet socket");
3792
0
            return PCAP_ERROR;
3793
0
          }
3794
0
        }
3795
0
      }
3796
      /*
3797
       * Now check the event device.
3798
       */
3799
0
      if (pollinfo[1].revents & POLLIN) {
3800
0
        ssize_t nread;
3801
0
        uint64_t value;
3802
3803
        /*
3804
         * This should never fail, but, just
3805
         * in case....
3806
         */
3807
0
        nread = read(handlep->poll_breakloop_fd, &value,
3808
0
            sizeof(value));
3809
0
        if (nread == -1) {
3810
0
          pcapint_fmt_errmsg_for_errno(handle->errbuf,
3811
0
              PCAP_ERRBUF_SIZE,
3812
0
              errno,
3813
0
              "Error reading from event FD");
3814
0
          return PCAP_ERROR;
3815
0
        }
3816
3817
        /*
3818
         * According to the Linux read(2) man
3819
         * page, read() will transfer at most
3820
         * 2^31-1 bytes, so the return value is
3821
         * either -1 or a value between 0
3822
         * and 2^31-1, so it's non-negative.
3823
         *
3824
         * Cast it to size_t to squelch
3825
         * warnings from the compiler; add this
3826
         * comment to squelch warnings from
3827
         * humans reading the code. :-)
3828
         *
3829
         * Don't treat an EOF as an error, but
3830
         * *do* treat a short read as an error;
3831
         * that "shouldn't happen", but....
3832
         */
3833
0
        if (nread != 0 &&
3834
0
            (size_t)nread < sizeof(value)) {
3835
0
          snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
3836
0
              "Short read from event FD: expected %zu, got %zd",
3837
0
              sizeof(value), nread);
3838
0
          return PCAP_ERROR;
3839
0
        }
3840
3841
        /*
3842
         * This event gets signaled by a
3843
         * pcap_breakloop() call; if we were told
3844
         * to break out of the loop, do so.
3845
         */
3846
0
        if (handle->break_loop) {
3847
0
          handle->break_loop = 0;
3848
0
          return PCAP_ERROR_BREAK;
3849
0
        }
3850
0
      }
3851
0
    }
3852
3853
    /*
3854
     * Either:
3855
     *
3856
     *   1) we got neither an error from poll() nor any
3857
     *      readable descriptors, in which case there
3858
     *      are no packets waiting to read
3859
     *
3860
     * or
3861
     *
3862
     *   2) We got readable descriptors but the PF_PACKET
3863
     *      socket wasn't one of them, in which case there
3864
     *      are no packets waiting to read
3865
     *
3866
     * so, if we got an ENETDOWN, we've drained whatever
3867
     * packets were available to read at the point of the
3868
     * ENETDOWN.
3869
     *
3870
     * So, if we got an ENETDOWN and haven't gotten an indication
3871
     * that the device has gone away or that the device is up,
3872
     * we don't yet know for certain whether the device has
3873
     * gone away or not, check whether the device exists and is
3874
     * up.
3875
     */
3876
0
    if (handlep->netdown) {
3877
0
      if (!device_still_exists(handle)) {
3878
        /*
3879
         * The device doesn't exist any more;
3880
         * report that.
3881
         *
3882
         * XXX - we should really return an
3883
         * appropriate error for that, but
3884
         * pcap_dispatch() etc. aren't documented
3885
         * as having error returns other than
3886
         * PCAP_ERROR or PCAP_ERROR_BREAK.
3887
         */
3888
0
        snprintf(handle->errbuf,  PCAP_ERRBUF_SIZE,
3889
0
            "The interface disappeared");
3890
0
        return PCAP_ERROR;
3891
0
      }
3892
3893
      /*
3894
       * The device still exists; try to see if it's up.
3895
       */
3896
0
      memset(&ifr, 0, sizeof(ifr));
3897
0
      pcapint_strlcpy(ifr.ifr_name, handlep->device,
3898
0
          sizeof(ifr.ifr_name));
3899
0
      if (ioctl(handle->fd, SIOCGIFFLAGS, &ifr) == -1) {
3900
0
        if (errno == ENXIO || errno == ENODEV) {
3901
          /*
3902
           * OK, *now* it's gone.
3903
           *
3904
           * XXX - see above comment.
3905
           */
3906
0
          snprintf(handle->errbuf,
3907
0
              PCAP_ERRBUF_SIZE,
3908
0
              "The interface disappeared");
3909
0
          return PCAP_ERROR;
3910
0
        } else {
3911
0
          pcapint_fmt_errmsg_for_errno(handle->errbuf,
3912
0
              PCAP_ERRBUF_SIZE, errno,
3913
0
              "%s: Can't get flags",
3914
0
              handlep->device);
3915
0
          return PCAP_ERROR;
3916
0
        }
3917
0
      }
3918
0
      if (ifr.ifr_flags & IFF_UP) {
3919
        /*
3920
         * It's up, so it definitely still exists.
3921
         * Cancel the ENETDOWN indication - we
3922
         * presumably got it due to the interface
3923
         * going down rather than the device going
3924
         * away - and revert to "no required select
3925
         * timeout.
3926
         */
3927
0
        handlep->netdown = 0;
3928
0
        handle->required_select_timeout = NULL;
3929
0
      }
3930
0
    }
3931
3932
    /*
3933
     * If we're in non-blocking mode, just quit now, rather
3934
     * than spinning in a loop doing poll()s that immediately
3935
     * time out if there's no indication on any descriptor.
3936
     */
3937
0
    if (handlep->poll_timeout == 0)
3938
0
      break;
3939
0
  }
3940
0
  return 0;
3941
0
}
3942
3943
/* handle a single memory mapped packet */
3944
static int pcap_handle_packet_mmap(
3945
    pcap_t *handle,
3946
    pcap_handler callback,
3947
    u_char *user,
3948
    unsigned char *frame,
3949
    unsigned int tp_len,
3950
    unsigned int tp_mac,
3951
    unsigned int tp_snaplen,
3952
    unsigned int tp_sec,
3953
    unsigned int tp_usec,
3954
    int tp_vlan_tci_valid,
3955
    __u16 tp_vlan_tci,
3956
    __u16 tp_vlan_tpid)
3957
0
{
3958
0
  struct pcap_linux *handlep = handle->priv;
3959
0
  unsigned char *bp;
3960
0
  struct sockaddr_ll *sll;
3961
0
  struct pcap_pkthdr pcaphdr;
3962
0
  unsigned int snaplen = tp_snaplen;
3963
0
  struct utsname utsname;
3964
3965
  /* perform sanity check on internal offset. */
3966
0
  if (tp_mac + tp_snaplen > handle->bufsize) {
3967
    /*
3968
     * Report some system information as a debugging aid.
3969
     */
3970
0
    if (uname(&utsname) != -1) {
3971
0
      snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
3972
0
        "corrupted frame on kernel ring mac "
3973
0
        "offset %u + caplen %u > frame len %d "
3974
0
        "(kernel %.32s version %s, machine %.16s)",
3975
0
        tp_mac, tp_snaplen, handle->bufsize,
3976
0
        utsname.release, utsname.version,
3977
0
        utsname.machine);
3978
0
    } else {
3979
0
      snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
3980
0
        "corrupted frame on kernel ring mac "
3981
0
        "offset %u + caplen %u > frame len %d",
3982
0
        tp_mac, tp_snaplen, handle->bufsize);
3983
0
    }
3984
0
    return -1;
3985
0
  }
3986
3987
  /* run filter on received packet
3988
   * If the kernel filtering is enabled we need to run the
3989
   * filter until all the frames present into the ring
3990
   * at filter creation time are processed.
3991
   * In this case, blocks_to_filter_in_userland is used
3992
   * as a counter for the packet we need to filter.
3993
   * Note: alternatively it could be possible to stop applying
3994
   * the filter when the ring became empty, but it can possibly
3995
   * happen a lot later... */
3996
0
  bp = frame + tp_mac;
3997
3998
  /* if required build in place the sll header*/
3999
0
  sll = (void *)(frame + TPACKET_ALIGN(handlep->tp_hdrlen));
4000
0
  if (handlep->cooked) {
4001
0
    if (handle->linktype == DLT_LINUX_SLL2) {
4002
0
      struct sll2_header *hdrp;
4003
4004
      /*
4005
       * The kernel should have left us with enough
4006
       * space for an sll header; back up the packet
4007
       * data pointer into that space, as that'll be
4008
       * the beginning of the packet we pass to the
4009
       * callback.
4010
       */
4011
0
      bp -= SLL2_HDR_LEN;
4012
4013
      /*
4014
       * Let's make sure that's past the end of
4015
       * the tpacket header, i.e. >=
4016
       * ((u_char *)thdr + TPACKET_HDRLEN), so we
4017
       * don't step on the header when we construct
4018
       * the sll header.
4019
       */
4020
0
      if (bp < (u_char *)frame +
4021
0
             TPACKET_ALIGN(handlep->tp_hdrlen) +
4022
0
             sizeof(struct sockaddr_ll)) {
4023
0
        snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
4024
0
          "cooked-mode frame doesn't have room for sll header");
4025
0
        return -1;
4026
0
      }
4027
4028
      /*
4029
       * OK, that worked; construct the sll header.
4030
       */
4031
0
      hdrp = (struct sll2_header *)bp;
4032
0
      hdrp->sll2_protocol = sll->sll_protocol;
4033
0
      hdrp->sll2_reserved_mbz = 0;
4034
0
      hdrp->sll2_if_index = htonl(sll->sll_ifindex);
4035
0
      hdrp->sll2_hatype = htons(sll->sll_hatype);
4036
0
      hdrp->sll2_pkttype = sll->sll_pkttype;
4037
0
      hdrp->sll2_halen = sll->sll_halen;
4038
0
      memcpy(hdrp->sll2_addr, sll->sll_addr, SLL_ADDRLEN);
4039
4040
0
      snaplen += sizeof(struct sll2_header);
4041
0
    } else {
4042
0
      struct sll_header *hdrp;
4043
4044
      /*
4045
       * The kernel should have left us with enough
4046
       * space for an sll header; back up the packet
4047
       * data pointer into that space, as that'll be
4048
       * the beginning of the packet we pass to the
4049
       * callback.
4050
       */
4051
0
      bp -= SLL_HDR_LEN;
4052
4053
      /*
4054
       * Let's make sure that's past the end of
4055
       * the tpacket header, i.e. >=
4056
       * ((u_char *)thdr + TPACKET_HDRLEN), so we
4057
       * don't step on the header when we construct
4058
       * the sll header.
4059
       */
4060
0
      if (bp < (u_char *)frame +
4061
0
             TPACKET_ALIGN(handlep->tp_hdrlen) +
4062
0
             sizeof(struct sockaddr_ll)) {
4063
0
        snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
4064
0
          "cooked-mode frame doesn't have room for sll header");
4065
0
        return -1;
4066
0
      }
4067
4068
      /*
4069
       * OK, that worked; construct the sll header.
4070
       */
4071
0
      hdrp = (struct sll_header *)bp;
4072
0
      hdrp->sll_pkttype = htons(sll->sll_pkttype);
4073
0
      hdrp->sll_hatype = htons(sll->sll_hatype);
4074
0
      hdrp->sll_halen = htons(sll->sll_halen);
4075
0
      memcpy(hdrp->sll_addr, sll->sll_addr, SLL_ADDRLEN);
4076
0
      hdrp->sll_protocol = sll->sll_protocol;
4077
4078
0
      snaplen += sizeof(struct sll_header);
4079
0
    }
4080
0
  } else {
4081
    /*
4082
     * If this is a packet from a CAN device, so that
4083
     * sll->sll_hatype is ARPHRD_CAN, then, as we're
4084
     * not capturing in cooked mode, its link-layer
4085
     * type is DLT_CAN_SOCKETCAN.  Fix up the header
4086
     * provided by the code below us to match what
4087
     * DLT_CAN_SOCKETCAN is expected to provide.
4088
     */
4089
0
    if (sll->sll_hatype == ARPHRD_CAN) {
4090
0
      pcap_can_socketcan_hdr *canhdr = (pcap_can_socketcan_hdr *)bp;
4091
0
      pcap_can_socketcan_xl_hdr *canxl_hdr = (pcap_can_socketcan_xl_hdr *)bp;
4092
0
      uint16_t protocol = ntohs(sll->sll_protocol);
4093
4094
      /*
4095
       * Check the protocol field from the sll header.
4096
       * If it's one of the known CAN protocol types,
4097
       * make sure the appropriate flags are set, so
4098
       * that a program can tell what type of frame
4099
       * it is.
4100
       *
4101
       * These operations should not have any effect
4102
       * when reading proper CAN frames from Linux
4103
       * CAN interfaces. Enforcing these bit values
4104
       * ensures proper DLT_CAN_SOCKETCAN data even
4105
       * with malformed PF_PACKET content.
4106
       *
4107
       * The two flags are:
4108
       *
4109
       *   CANFD_FDF, which is in the fd_flags field
4110
       *   of the CAN CC/CAN FD header;
4111
       *
4112
       *   CANXL_XLF, which is in the flags field
4113
       *   of the CAN XL header, which overlaps
4114
       *   the payload_length field of the CAN CC/
4115
       *   CAN FD header. Setting CANXL_XLF in the
4116
       *   payload_length of CAN CC/FD frames would
4117
       *   intentionally break the payload length.
4118
       */
4119
0
      switch (protocol) {
4120
4121
0
      case LINUX_SLL_P_CAN:
4122
        /*
4123
         * CAN CC frame (aka Classical CAN, CAN 2.0B)
4124
         *
4125
         * Zero out the CAN FD and CAN XL flags
4126
         * so that this frame will be identified
4127
         * as a CAN CC frame.
4128
         */
4129
0
        canxl_hdr->flags &= ~CANXL_XLF;
4130
0
        canhdr->fd_flags &= ~CANFD_FDF;
4131
0
        break;
4132
4133
0
      case LINUX_SLL_P_CANFD:
4134
        /*
4135
         * CAN FD frame
4136
         *
4137
         * Set CANFD_FDF in the fd_flags field,
4138
         * and clear the CANXL_XLF bit in the
4139
         * CAN XL flags field, so that this frame
4140
         * will be identified as a CAN FD frame.
4141
         *
4142
         * The CANFD_FDF bit is not reliably
4143
         * set by the Linux kernel. But setting
4144
         * that bit for CAN FD is recommended.
4145
         */
4146
0
        canxl_hdr->flags &= ~CANXL_XLF;
4147
0
        canhdr->fd_flags |= CANFD_FDF;
4148
0
        break;
4149
4150
0
      case LINUX_SLL_P_CANXL:
4151
        /*
4152
         * CAN XL frame
4153
         *
4154
         * Set CANXL_XLF bit in the CAN XL flags
4155
         * field, so that this frame will appear
4156
         * to be a CAN XL frame.
4157
         */
4158
0
        canxl_hdr->flags |= CANXL_XLF;
4159
0
        break;
4160
0
      }
4161
4162
      /*
4163
       * Put multi-byte header fields in a byte-order
4164
       * -independent format.
4165
       */
4166
0
      if (canxl_hdr->flags & CANXL_XLF) {
4167
        /*
4168
         * This is a CAN XL frame.
4169
         *
4170
         * DLT_CAN_SOCKETCAN is specified as having
4171
         * the Priority ID/VCID field in big-
4172
         * endian byte order, and the payload length
4173
         * and Acceptance Field in little-endian byte
4174
         * order, but capturing on a CAN device
4175
         * provides them in host byte order.
4176
         * Convert them to the appropriate byte
4177
         * orders.
4178
         *
4179
         * The reason we put the first field
4180
         * into big-endian byte order is that
4181
         * older libpcap code, ignorant of
4182
         * CAN XL, treated it as the CAN ID
4183
         * field and put it into big-endian
4184
         * byte order, and we don't want to
4185
         * break code that understands CAN XL
4186
         * headers, and treats that field as
4187
         * being big-endian.
4188
         *
4189
         * The reason other fields are put in little-
4190
         * endian byte order is that older
4191
         * libpcap code, ignorant of CAN XL,
4192
         * left those fields alone, and the
4193
         * processors on which the CAN XL
4194
         * frames were captured are likely
4195
         * to be little-endian processors.
4196
         */
4197
4198
0
#if __BYTE_ORDER == __LITTLE_ENDIAN
4199
        /*
4200
         * We're capturing on a little-endian
4201
         * machine, so we put the priority/VCID
4202
         * field into big-endian byte order, and
4203
         * leave the payload length and acceptance
4204
         * field in little-endian byte order.
4205
         */
4206
        /* Byte-swap priority/VCID. */
4207
0
        canxl_hdr->priority_vcid = SWAPLONG(canxl_hdr->priority_vcid);
4208
#elif __BYTE_ORDER == __BIG_ENDIAN
4209
        /*
4210
         * We're capturing on a big-endian
4211
         * machine, so we want to leave the
4212
         * priority/VCID field alone, and byte-swap
4213
         * the payload length and acceptance
4214
         * fields to little-endian.
4215
         */
4216
        /* Byte-swap the payload length */
4217
        canxl_hdr->payload_length = SWAPSHORT(canxl_hdr->payload_length);
4218
4219
        /*
4220
         * Byte-swap the acceptance field.
4221
         *
4222
         * XXX - is it just a 4-octet string,
4223
         * not in any byte order?
4224
         */
4225
        canxl_hdr->acceptance_field = SWAPLONG(canxl_hdr->acceptance_field);
4226
#else
4227
#error "Unknown byte order"
4228
#endif
4229
0
      } else {
4230
        /*
4231
         * CAN CC or CAN FD frame.
4232
         *
4233
         * DLT_CAN_SOCKETCAN is specified as having
4234
         * the CAN ID and flags in network byte
4235
         * order, but capturing on a CAN device
4236
         * provides it in host byte order.  Convert
4237
         * it to network byte order.
4238
         */
4239
0
        canhdr->can_id = htonl(canhdr->can_id);
4240
0
      }
4241
0
    }
4242
0
  }
4243
4244
0
  if (handlep->filter_in_userland && handle->fcode.bf_insns) {
4245
0
    struct pcap_bpf_aux_data aux_data;
4246
4247
0
    aux_data.vlan_tag_present = tp_vlan_tci_valid;
4248
0
    aux_data.vlan_tag = tp_vlan_tci & 0x0fff;
4249
4250
0
    if (pcapint_filter_with_aux_data(handle->fcode.bf_insns,
4251
0
                bp,
4252
0
                tp_len,
4253
0
                snaplen,
4254
0
                &aux_data) == 0)
4255
0
      return 0;
4256
0
  }
4257
4258
0
  if (!linux_check_direction(handle, sll))
4259
0
    return 0;
4260
4261
  /*
4262
   * Get required packet info from ring header.
4263
   *
4264
   * The seconds part of the time stamp is a 32-bit
4265
   * unsigned integer; this will have a problem in 2106,
4266
   * but not in 2038.
4267
   *
4268
   * ts.tv_sec is a time_t, which is signed, and which
4269
   * may be 32-bit or 64-bit.  Pass it through; if we
4270
   * have a 32-bit signed time_t, in which values >
4271
   * 2^31-1 won't fit, then:
4272
   *
4273
   *    Writing the packet to a file will pass the bits
4274
   *    through.  If the program reading the file can
4275
   *    handle 32-bit unsigned time stamps, including
4276
   *    any conversion to local time or UTC, it will
4277
   *    properly handle the time stamps.
4278
   *
4279
   *    Reporting the packet time stamp may give
4280
   *    an error or a pre-1970 time stamp on platforms
4281
   *    with signed 32-bit time stamps, but that
4282
   *    will happen even if it's captured on a
4283
   *    platform with a 64-bit time_t.
4284
   */
4285
0
  pcaphdr.ts.tv_sec = tp_sec;
4286
0
  pcaphdr.ts.tv_usec = tp_usec;
4287
0
  pcaphdr.caplen = tp_snaplen;
4288
0
  pcaphdr.len = tp_len;
4289
4290
  /* if required build in place the sll header*/
4291
0
  if (handlep->cooked) {
4292
    /* update packet len */
4293
0
    if (handle->linktype == DLT_LINUX_SLL2) {
4294
0
      pcaphdr.caplen += SLL2_HDR_LEN;
4295
0
      pcaphdr.len += SLL2_HDR_LEN;
4296
0
    } else {
4297
0
      pcaphdr.caplen += SLL_HDR_LEN;
4298
0
      pcaphdr.len += SLL_HDR_LEN;
4299
0
    }
4300
0
  }
4301
4302
0
  if (tp_vlan_tci_valid &&
4303
0
    handlep->vlan_offset != -1 &&
4304
0
    tp_snaplen >= (unsigned int) handlep->vlan_offset)
4305
0
  {
4306
0
    struct vlan_tag *tag;
4307
4308
    /*
4309
     * Move everything in the header, except the type field,
4310
     * down VLAN_TAG_LEN bytes, to allow us to insert the
4311
     * VLAN tag between that stuff and the type field.
4312
     */
4313
0
    bp -= VLAN_TAG_LEN;
4314
0
    memmove(bp, bp + VLAN_TAG_LEN, handlep->vlan_offset);
4315
4316
    /*
4317
     * Now insert the tag.
4318
     */
4319
0
    tag = (struct vlan_tag *)(bp + handlep->vlan_offset);
4320
0
    tag->vlan_tpid = htons(tp_vlan_tpid);
4321
0
    tag->vlan_tci = htons(tp_vlan_tci);
4322
4323
    /*
4324
     * Add the tag to the packet lengths.
4325
     */
4326
0
    pcaphdr.caplen += VLAN_TAG_LEN;
4327
0
    pcaphdr.len += VLAN_TAG_LEN;
4328
0
  }
4329
4330
  /*
4331
   * The only way to tell the kernel to cut off the
4332
   * packet at a snapshot length is with a filter program;
4333
   * if there's no filter program, the kernel won't cut
4334
   * the packet off.
4335
   *
4336
   * Trim the snapshot length to be no longer than the
4337
   * specified snapshot length.
4338
   *
4339
   * XXX - an alternative is to put a filter, consisting
4340
   * of a "ret <snaplen>" instruction, on the socket
4341
   * in the activate routine, so that the truncation is
4342
   * done in the kernel even if nobody specified a filter;
4343
   * that means that less buffer space is consumed in
4344
   * the memory-mapped buffer.
4345
   */
4346
0
  if (pcaphdr.caplen > (bpf_u_int32)handle->snapshot)
4347
0
    pcaphdr.caplen = handle->snapshot;
4348
4349
  /* pass the packet to the user */
4350
0
  callback(user, &pcaphdr, bp);
4351
4352
0
  return 1;
4353
0
}
4354
4355
static int
4356
pcap_read_linux_mmap_v2(pcap_t *handle, int max_packets, pcap_handler callback,
4357
    u_char *user)
4358
0
{
4359
0
  struct pcap_linux *handlep = handle->priv;
4360
0
  union thdr h;
4361
0
  int pkts = 0;
4362
0
  int ret;
4363
4364
  /* wait for frames availability.*/
4365
0
  h.raw = RING_GET_CURRENT_FRAME(handle);
4366
0
  if (!packet_mmap_acquire(h.h2)) {
4367
    /*
4368
     * The current frame is owned by the kernel; wait for
4369
     * a frame to be handed to us.
4370
     */
4371
0
    ret = pcap_wait_for_frames_mmap(handle);
4372
0
    if (ret) {
4373
0
      return ret;
4374
0
    }
4375
0
  }
4376
4377
  /*
4378
   * This can conceivably process more than INT_MAX packets,
4379
   * which would overflow the packet count, causing it either
4380
   * to look like a negative number, and thus cause us to
4381
   * return a value that looks like an error, or overflow
4382
   * back into positive territory, and thus cause us to
4383
   * return a too-low count.
4384
   *
4385
   * Therefore, if the packet count is unlimited, we clip
4386
   * it at INT_MAX; this routine is not expected to
4387
   * process packets indefinitely, so that's not an issue.
4388
   */
4389
0
  if (PACKET_COUNT_IS_UNLIMITED(max_packets))
4390
0
    max_packets = INT_MAX;
4391
4392
0
  while (pkts < max_packets) {
4393
    /*
4394
     * Get the current ring buffer frame, and break if
4395
     * it's still owned by the kernel.
4396
     */
4397
0
    h.raw = RING_GET_CURRENT_FRAME(handle);
4398
0
    if (!packet_mmap_acquire(h.h2))
4399
0
      break;
4400
4401
0
    ret = pcap_handle_packet_mmap(
4402
0
        handle,
4403
0
        callback,
4404
0
        user,
4405
0
        h.raw,
4406
0
        h.h2->tp_len,
4407
0
        h.h2->tp_mac,
4408
0
        h.h2->tp_snaplen,
4409
0
        h.h2->tp_sec,
4410
0
        handle->opt.tstamp_precision == PCAP_TSTAMP_PRECISION_NANO ? h.h2->tp_nsec : h.h2->tp_nsec / 1000,
4411
0
        VLAN_VALID(h.h2, h.h2),
4412
0
        h.h2->tp_vlan_tci,
4413
0
        VLAN_TPID(h.h2, h.h2));
4414
0
    if (ret == 1) {
4415
0
      pkts++;
4416
0
    } else if (ret < 0) {
4417
0
      return ret;
4418
0
    }
4419
4420
    /*
4421
     * Hand this block back to the kernel, and, if we're
4422
     * counting blocks that need to be filtered in userland
4423
     * after having been filtered by the kernel, count
4424
     * the one we've just processed.
4425
     */
4426
0
    packet_mmap_release(h.h2);
4427
0
    if (handlep->blocks_to_filter_in_userland != 0) {
4428
0
      handlep->blocks_to_filter_in_userland--;
4429
0
      if (handlep->blocks_to_filter_in_userland == 0) {
4430
        /*
4431
         * No more blocks need to be filtered
4432
         * in userland.
4433
         */
4434
0
        handlep->filter_in_userland = 0;
4435
0
      }
4436
0
    }
4437
4438
    /* next block */
4439
0
    if (++handle->offset >= handle->cc)
4440
0
      handle->offset = 0;
4441
4442
    /* check for break loop condition*/
4443
0
    if (handle->break_loop) {
4444
0
      handle->break_loop = 0;
4445
0
      return PCAP_ERROR_BREAK;
4446
0
    }
4447
0
  }
4448
0
  return pkts;
4449
0
}
4450
4451
#ifdef HAVE_TPACKET3
4452
static int
4453
pcap_read_linux_mmap_v3(pcap_t *handle, int max_packets, pcap_handler callback,
4454
    u_char *user)
4455
0
{
4456
0
  struct pcap_linux *handlep = handle->priv;
4457
0
  union thdr h;
4458
0
  int pkts = 0;
4459
0
  int ret;
4460
4461
0
again:
4462
0
  if (handlep->current_packet == NULL) {
4463
    /* wait for frames availability.*/
4464
0
    h.raw = RING_GET_CURRENT_FRAME(handle);
4465
0
    if (!packet_mmap_v3_acquire(h.h3)) {
4466
      /*
4467
       * The current frame is owned by the kernel; wait
4468
       * for a frame to be handed to us.
4469
       */
4470
0
      ret = pcap_wait_for_frames_mmap(handle);
4471
0
      if (ret) {
4472
0
        return ret;
4473
0
      }
4474
0
    }
4475
0
  }
4476
0
  h.raw = RING_GET_CURRENT_FRAME(handle);
4477
0
  if (!packet_mmap_v3_acquire(h.h3)) {
4478
0
    if (pkts == 0 && handlep->timeout == 0) {
4479
      /* Block until we see a packet. */
4480
0
      goto again;
4481
0
    }
4482
0
    return pkts;
4483
0
  }
4484
4485
  /*
4486
   * This can conceivably process more than INT_MAX packets,
4487
   * which would overflow the packet count, causing it either
4488
   * to look like a negative number, and thus cause us to
4489
   * return a value that looks like an error, or overflow
4490
   * back into positive territory, and thus cause us to
4491
   * return a too-low count.
4492
   *
4493
   * Therefore, if the packet count is unlimited, we clip
4494
   * it at INT_MAX; this routine is not expected to
4495
   * process packets indefinitely, so that's not an issue.
4496
   */
4497
0
  if (PACKET_COUNT_IS_UNLIMITED(max_packets))
4498
0
    max_packets = INT_MAX;
4499
4500
0
  while (pkts < max_packets) {
4501
0
    int packets_to_read;
4502
4503
0
    if (handlep->current_packet == NULL) {
4504
0
      h.raw = RING_GET_CURRENT_FRAME(handle);
4505
0
      if (!packet_mmap_v3_acquire(h.h3))
4506
0
        break;
4507
4508
0
      handlep->current_packet = h.raw + h.h3->hdr.bh1.offset_to_first_pkt;
4509
0
      handlep->packets_left = h.h3->hdr.bh1.num_pkts;
4510
0
    }
4511
0
    packets_to_read = handlep->packets_left;
4512
4513
0
    if (packets_to_read > (max_packets - pkts)) {
4514
      /*
4515
       * There are more packets in the buffer than
4516
       * the number of packets we have left to
4517
       * process to get up to the maximum number
4518
       * of packets to process.  Only process enough
4519
       * of them to get us up to that maximum.
4520
       */
4521
0
      packets_to_read = max_packets - pkts;
4522
0
    }
4523
4524
0
    while (packets_to_read-- && !handle->break_loop) {
4525
0
      struct tpacket3_hdr* tp3_hdr = (struct tpacket3_hdr*) handlep->current_packet;
4526
0
      ret = pcap_handle_packet_mmap(
4527
0
          handle,
4528
0
          callback,
4529
0
          user,
4530
0
          handlep->current_packet,
4531
0
          tp3_hdr->tp_len,
4532
0
          tp3_hdr->tp_mac,
4533
0
          tp3_hdr->tp_snaplen,
4534
0
          tp3_hdr->tp_sec,
4535
0
          handle->opt.tstamp_precision == PCAP_TSTAMP_PRECISION_NANO ? tp3_hdr->tp_nsec : tp3_hdr->tp_nsec / 1000,
4536
0
          VLAN_VALID(tp3_hdr, &tp3_hdr->hv1),
4537
0
          tp3_hdr->hv1.tp_vlan_tci,
4538
0
          VLAN_TPID(tp3_hdr, &tp3_hdr->hv1));
4539
0
      if (ret == 1) {
4540
0
        pkts++;
4541
0
      } else if (ret < 0) {
4542
0
        handlep->current_packet = NULL;
4543
0
        return ret;
4544
0
      }
4545
0
      handlep->current_packet += tp3_hdr->tp_next_offset;
4546
0
      handlep->packets_left--;
4547
0
    }
4548
4549
0
    if (handlep->packets_left <= 0) {
4550
      /*
4551
       * Hand this block back to the kernel, and, if
4552
       * we're counting blocks that need to be
4553
       * filtered in userland after having been
4554
       * filtered by the kernel, count the one we've
4555
       * just processed.
4556
       */
4557
0
      packet_mmap_v3_release(h.h3);
4558
0
      if (handlep->blocks_to_filter_in_userland != 0) {
4559
0
        handlep->blocks_to_filter_in_userland--;
4560
0
        if (handlep->blocks_to_filter_in_userland == 0) {
4561
          /*
4562
           * No more blocks need to be filtered
4563
           * in userland.
4564
           */
4565
0
          handlep->filter_in_userland = 0;
4566
0
        }
4567
0
      }
4568
4569
      /* next block */
4570
0
      if (++handle->offset >= handle->cc)
4571
0
        handle->offset = 0;
4572
4573
0
      handlep->current_packet = NULL;
4574
0
    }
4575
4576
    /* check for break loop condition*/
4577
0
    if (handle->break_loop) {
4578
0
      handle->break_loop = 0;
4579
0
      return PCAP_ERROR_BREAK;
4580
0
    }
4581
0
  }
4582
0
  if (pkts == 0 && handlep->timeout == 0) {
4583
    /* Block until we see a packet. */
4584
0
    goto again;
4585
0
  }
4586
0
  return pkts;
4587
0
}
4588
#endif /* HAVE_TPACKET3 */
4589
4590
/*
4591
 *  Attach the given BPF code to the packet capture device.
4592
 */
4593
static int
4594
pcap_setfilter_linux(pcap_t *handle, struct bpf_program *filter)
4595
0
{
4596
0
  struct pcap_linux *handlep;
4597
0
  struct sock_fprog fcode;
4598
0
  int     can_filter_in_kernel;
4599
0
  int     err = 0;
4600
0
  u_int     n, offset;
4601
4602
0
  if (!handle)
4603
0
    return -1;
4604
0
  if (!filter) {
4605
0
          pcapint_strlcpy(handle->errbuf, "setfilter: No filter specified",
4606
0
      PCAP_ERRBUF_SIZE);
4607
0
    return -1;
4608
0
  }
4609
4610
0
  handlep = handle->priv;
4611
4612
  /* Make our private copy of the filter */
4613
4614
0
  if (pcapint_install_bpf_program(handle, filter) < 0)
4615
    /* pcapint_install_bpf_program() filled in errbuf */
4616
0
    return -1;
4617
4618
  /*
4619
   * Run user level packet filter by default. Will be overridden if
4620
   * installing a kernel filter succeeds.
4621
   */
4622
0
  handlep->filter_in_userland = 1;
4623
4624
  /* Install kernel level filter if possible */
4625
4626
0
  if (handle->fcode.bf_len > USHRT_MAX) {
4627
    /*
4628
     * fcode.len is an unsigned short for current kernel.
4629
     * I have yet to see BPF-Code with that much
4630
     * instructions but still it is possible. So for the
4631
     * sake of correctness I added this check.
4632
     */
4633
0
    fprintf(stderr, "Warning: Filter too complex for kernel\n");
4634
0
    fcode.len = 0;
4635
0
    fcode.filter = NULL;
4636
0
    can_filter_in_kernel = 0;
4637
0
  } else {
4638
    /*
4639
     * Oh joy, the Linux kernel uses struct sock_fprog instead
4640
     * of struct bpf_program and of course the length field is
4641
     * of different size. Pointed out by Sebastian
4642
     *
4643
     * Oh, and we also need to fix it up so that all "ret"
4644
     * instructions with non-zero operands have MAXIMUM_SNAPLEN
4645
     * as the operand if we're not capturing in memory-mapped
4646
     * mode, and so that, if we're in cooked mode, all memory-
4647
     * reference instructions use special magic offsets in
4648
     * references to the link-layer header and assume that the
4649
     * link-layer payload begins at 0; "fix_program()" will do
4650
     * that.
4651
     */
4652
0
    switch (fix_program(handle, &fcode)) {
4653
4654
0
    case -1:
4655
0
    default:
4656
      /*
4657
       * Fatal error; just quit.
4658
       * (The "default" case shouldn't happen; we
4659
       * return -1 for that reason.)
4660
       */
4661
0
      return -1;
4662
4663
0
    case 0:
4664
      /*
4665
       * The program performed checks that we can't make
4666
       * work in the kernel.
4667
       */
4668
0
      can_filter_in_kernel = 0;
4669
0
      break;
4670
4671
0
    case 1:
4672
      /*
4673
       * We have a filter that'll work in the kernel.
4674
       */
4675
0
      can_filter_in_kernel = 1;
4676
0
      break;
4677
0
    }
4678
0
  }
4679
4680
  /*
4681
   * NOTE: at this point, we've set both the "len" and "filter"
4682
   * fields of "fcode".  As of the 2.6.32.4 kernel, at least,
4683
   * those are the only members of the "sock_fprog" structure,
4684
   * so we initialize every member of that structure.
4685
   *
4686
   * If there is anything in "fcode" that is not initialized,
4687
   * it is either a field added in a later kernel, or it's
4688
   * padding.
4689
   *
4690
   * If a new field is added, this code needs to be updated
4691
   * to set it correctly.
4692
   *
4693
   * If there are no other fields, then:
4694
   *
4695
   *  if the Linux kernel looks at the padding, it's
4696
   *  buggy;
4697
   *
4698
   *  if the Linux kernel doesn't look at the padding,
4699
   *  then if some tool complains that we're passing
4700
   *  uninitialized data to the kernel, then the tool
4701
   *  is buggy and needs to understand that it's just
4702
   *  padding.
4703
   */
4704
0
  if (can_filter_in_kernel) {
4705
0
    if ((err = set_kernel_filter(handle, &fcode)) == 0)
4706
0
    {
4707
      /*
4708
       * Installation succeeded - using kernel filter,
4709
       * so userland filtering not needed.
4710
       */
4711
0
      handlep->filter_in_userland = 0;
4712
0
    }
4713
0
    else if (err == -1) /* Non-fatal error */
4714
0
    {
4715
      /*
4716
       * Print a warning if we weren't able to install
4717
       * the filter for a reason other than "this kernel
4718
       * isn't configured to support socket filters.
4719
       */
4720
0
      if (errno == ENOMEM) {
4721
        /*
4722
         * Either a kernel memory allocation
4723
         * failure occurred, or there's too
4724
         * much "other/option memory" allocated
4725
         * for this socket.  Suggest that they
4726
         * increase the "other/option memory"
4727
         * limit.
4728
         */
4729
0
        fprintf(stderr,
4730
0
            "Warning: Couldn't allocate kernel memory for filter: try increasing net.core.optmem_max with sysctl\n");
4731
0
      } else if (errno != ENOPROTOOPT && errno != EOPNOTSUPP) {
4732
0
        fprintf(stderr,
4733
0
            "Warning: Kernel filter failed: %s\n",
4734
0
          pcap_strerror(errno));
4735
0
      }
4736
0
    }
4737
0
  }
4738
4739
  /*
4740
   * If we're not using the kernel filter, get rid of any kernel
4741
   * filter that might've been there before, e.g. because the
4742
   * previous filter could work in the kernel, or because some other
4743
   * code attached a filter to the socket by some means other than
4744
   * calling "pcap_setfilter()".  Otherwise, the kernel filter may
4745
   * filter out packets that would pass the new userland filter.
4746
   */
4747
0
  if (handlep->filter_in_userland) {
4748
0
    if (reset_kernel_filter(handle) == -1) {
4749
0
      pcapint_fmt_errmsg_for_errno(handle->errbuf,
4750
0
          PCAP_ERRBUF_SIZE, errno,
4751
0
          "can't remove kernel filter");
4752
0
      err = -2; /* fatal error */
4753
0
    }
4754
0
  }
4755
4756
  /*
4757
   * Free up the copy of the filter that was made by "fix_program()".
4758
   */
4759
0
  if (fcode.filter != NULL)
4760
0
    free(fcode.filter);
4761
4762
0
  if (err == -2)
4763
    /* Fatal error */
4764
0
    return -1;
4765
4766
  /*
4767
   * If we're filtering in userland, there's nothing to do;
4768
   * the new filter will be used for the next packet.
4769
   */
4770
0
  if (handlep->filter_in_userland)
4771
0
    return 0;
4772
4773
  /*
4774
   * We're filtering in the kernel; the packets present in
4775
   * all blocks currently in the ring were already filtered
4776
   * by the old filter, and so will need to be filtered in
4777
   * userland by the new filter.
4778
   *
4779
   * Get an upper bound for the number of such blocks; first,
4780
   * walk the ring backward and count the free blocks.
4781
   */
4782
0
  offset = handle->offset;
4783
0
  if (offset == 0)
4784
0
    offset = handle->cc;
4785
0
  offset--;
4786
0
  for (n=0; n < handle->cc; ++n) {
4787
0
    if (offset == 0)
4788
0
      offset = handle->cc;
4789
0
    offset--;
4790
0
    if (pcap_get_ring_frame_status(handle, offset) != TP_STATUS_KERNEL)
4791
0
      break;
4792
0
  }
4793
4794
  /*
4795
   * If we found free blocks, decrement the count of free
4796
   * blocks by 1, just in case we lost a race with another
4797
   * thread of control that was adding a packet while
4798
   * we were counting and that had run the filter before
4799
   * we changed it.
4800
   *
4801
   * XXX - could there be more than one block added in
4802
   * this fashion?
4803
   *
4804
   * XXX - is there a way to avoid that race, e.g. somehow
4805
   * wait for all packets that passed the old filter to
4806
   * be added to the ring?
4807
   */
4808
0
  if (n != 0)
4809
0
    n--;
4810
4811
  /*
4812
   * Set the count of blocks worth of packets to filter
4813
   * in userland to the total number of blocks in the
4814
   * ring minus the number of free blocks we found, and
4815
   * turn on userland filtering.  (The count of blocks
4816
   * worth of packets to filter in userland is guaranteed
4817
   * not to be zero - n, above, couldn't be set to a
4818
   * value > handle->cc, and if it were equal to
4819
   * handle->cc, it wouldn't be zero, and thus would
4820
   * be decremented to handle->cc - 1.)
4821
   */
4822
0
  handlep->blocks_to_filter_in_userland = handle->cc - n;
4823
0
  handlep->filter_in_userland = 1;
4824
4825
0
  return 0;
4826
0
}
4827
4828
/*
4829
 *  Return the index of the given device name. Fill ebuf and return
4830
 *  -1 on failure.
4831
 */
4832
static int
4833
iface_get_id(int fd, const char *device, char *ebuf)
4834
0
{
4835
0
  struct ifreq  ifr;
4836
4837
0
  memset(&ifr, 0, sizeof(ifr));
4838
0
  pcapint_strlcpy(ifr.ifr_name, device, sizeof(ifr.ifr_name));
4839
4840
0
  if (ioctl(fd, SIOCGIFINDEX, &ifr) == -1) {
4841
0
    pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE,
4842
0
        errno, "SIOCGIFINDEX");
4843
0
    return -1;
4844
0
  }
4845
4846
0
  return ifr.ifr_ifindex;
4847
0
}
4848
4849
/*
4850
 *  Bind the socket associated with FD to the given device.
4851
 *  Return 0 on success or a PCAP_ERROR_ value on a hard error.
4852
 */
4853
static int
4854
iface_bind(int fd, int ifindex, char *ebuf, int protocol)
4855
0
{
4856
0
  struct sockaddr_ll  sll;
4857
0
  int     ret, err;
4858
0
  socklen_t   errlen = sizeof(err);
4859
4860
0
  memset(&sll, 0, sizeof(sll));
4861
0
  sll.sll_family    = AF_PACKET;
4862
0
  sll.sll_ifindex   = ifindex < 0 ? 0 : ifindex;
4863
0
  sll.sll_protocol  = protocol;
4864
4865
0
  if (bind(fd, (struct sockaddr *) &sll, sizeof(sll)) == -1) {
4866
0
    if (errno == ENETDOWN) {
4867
      /*
4868
       * Return a "network down" indication, so that
4869
       * the application can report that rather than
4870
       * saying we had a mysterious failure and
4871
       * suggest that they report a problem to the
4872
       * libpcap developers.
4873
       */
4874
0
      return PCAP_ERROR_IFACE_NOT_UP;
4875
0
    }
4876
0
    if (errno == ENODEV) {
4877
      /*
4878
       * There's nothing more to say, so clear the
4879
       * error message.
4880
       */
4881
0
      ebuf[0] = '\0';
4882
0
      ret = PCAP_ERROR_NO_SUCH_DEVICE;
4883
0
    } else {
4884
0
      ret = PCAP_ERROR;
4885
0
      pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE,
4886
0
          errno, "bind");
4887
0
    }
4888
0
    return ret;
4889
0
  }
4890
4891
  /* Any pending errors, e.g., network is down? */
4892
4893
0
  if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &err, &errlen) == -1) {
4894
0
    pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE,
4895
0
        errno, "getsockopt (SO_ERROR)");
4896
0
    return PCAP_ERROR;
4897
0
  }
4898
4899
0
  if (err == ENETDOWN) {
4900
    /*
4901
     * Return a "network down" indication, so that
4902
     * the application can report that rather than
4903
     * saying we had a mysterious failure and
4904
     * suggest that they report a problem to the
4905
     * libpcap developers.
4906
     */
4907
0
    return PCAP_ERROR_IFACE_NOT_UP;
4908
0
  } else if (err > 0) {
4909
0
    pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE,
4910
0
        err, "bind");
4911
0
    return PCAP_ERROR;
4912
0
  }
4913
4914
0
  return 0;
4915
0
}
4916
4917
/*
4918
 * Try to enter monitor mode.
4919
 * If we have libnl, try to create a new monitor-mode device and
4920
 * capture on that; otherwise, just say "not supported".
4921
 */
4922
#ifdef HAVE_LIBNL
4923
static int
4924
enter_rfmon_mode(pcap_t *handle, int sock_fd, const char *device)
4925
{
4926
  struct pcap_linux *handlep = handle->priv;
4927
  int ret;
4928
  char phydev_path[PATH_MAX+1];
4929
  struct nl80211_state nlstate;
4930
  struct ifreq ifr;
4931
  u_int n;
4932
4933
  /*
4934
   * Is this a mac80211 device?
4935
   */
4936
  ret = get_mac80211_phydev(handle, device, phydev_path, PATH_MAX);
4937
  if (ret < 0)
4938
    return ret; /* error */
4939
  if (ret == 0)
4940
    return 0; /* no error, but not mac80211 device */
4941
4942
  ret = nl80211_init(handle, &nlstate, device);
4943
  if (ret != 0)
4944
    return ret;
4945
4946
  /*
4947
   * Is this already a monN device?
4948
   * If so, we're done.
4949
   */
4950
  int type;
4951
  ret = get_if_type(handle, sock_fd, &nlstate, device, &type);
4952
  if (ret <= 0) {
4953
    /*
4954
     * < 0 is a Hard failure.  Just return ret; handle->errbuf
4955
     * has already been set.
4956
     *
4957
     * 0 is "device not available"; the caller should retry later.
4958
     */
4959
    nl80211_cleanup(&nlstate);
4960
    return ret;
4961
  }
4962
  if (type == NL80211_IFTYPE_MONITOR) {
4963
    /*
4964
     * OK, it's already a monitor mode device; just use it.
4965
     * There's no point in creating another monitor device
4966
     * that will have to be cleaned up.
4967
     */
4968
    nl80211_cleanup(&nlstate);
4969
    return ret;
4970
  }
4971
4972
  /*
4973
   * OK, it's apparently a mac80211 device but not a monitor device.
4974
   * Try to find an unused monN device for it.
4975
   */
4976
  for (n = 0; n < UINT_MAX; n++) {
4977
    /*
4978
     * Try mon{n}.
4979
     */
4980
    char mondevice[3+10+1]; /* mon{UINT_MAX}\0 */
4981
4982
    snprintf(mondevice, sizeof mondevice, "mon%u", n);
4983
    ret = add_mon_if(handle, sock_fd, &nlstate, device, mondevice);
4984
    if (ret == 1) {
4985
      /*
4986
       * Success.  We don't clean up the libnl state
4987
       * yet, as we'll be using it later.
4988
       */
4989
      goto added;
4990
    }
4991
    if (ret < 0) {
4992
      /*
4993
       * Hard failure.  Just return ret; handle->errbuf
4994
       * has already been set.
4995
       */
4996
      nl80211_cleanup(&nlstate);
4997
      return ret;
4998
    }
4999
  }
5000
5001
  snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
5002
      "%s: No free monN interfaces", device);
5003
  nl80211_cleanup(&nlstate);
5004
  return PCAP_ERROR;
5005
5006
added:
5007
5008
#if 0
5009
  /*
5010
   * Sleep for .1 seconds.
5011
   */
5012
  delay.tv_sec = 0;
5013
  delay.tv_nsec = 500000000;
5014
  nanosleep(&delay, NULL);
5015
#endif
5016
5017
  /*
5018
   * If we haven't already done so, arrange to have
5019
   * "pcap_close_all()" called when we exit.
5020
   */
5021
  if (!pcapint_do_addexit(handle)) {
5022
    /*
5023
     * "atexit()" failed; don't put the interface
5024
     * in rfmon mode, just give up.
5025
     * handle->errbuf has already been filled.
5026
     */
5027
    del_mon_if(handle, sock_fd, &nlstate, device,
5028
        handlep->mondevice);
5029
    nl80211_cleanup(&nlstate);
5030
    return PCAP_ERROR;
5031
  }
5032
5033
  /*
5034
   * Now configure the monitor interface up.
5035
   */
5036
  memset(&ifr, 0, sizeof(ifr));
5037
  pcapint_strlcpy(ifr.ifr_name, handlep->mondevice, sizeof(ifr.ifr_name));
5038
  if (ioctl(sock_fd, SIOCGIFFLAGS, &ifr) == -1) {
5039
    pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
5040
        errno, "%s: Can't get flags for %s", device,
5041
        handlep->mondevice);
5042
    del_mon_if(handle, sock_fd, &nlstate, device,
5043
        handlep->mondevice);
5044
    nl80211_cleanup(&nlstate);
5045
    return PCAP_ERROR;
5046
  }
5047
  ifr.ifr_flags |= IFF_UP|IFF_RUNNING;
5048
  if (ioctl(sock_fd, SIOCSIFFLAGS, &ifr) == -1) {
5049
    pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
5050
        errno, "%s: Can't set flags for %s", device,
5051
        handlep->mondevice);
5052
    del_mon_if(handle, sock_fd, &nlstate, device,
5053
        handlep->mondevice);
5054
    nl80211_cleanup(&nlstate);
5055
    return PCAP_ERROR;
5056
  }
5057
5058
  /*
5059
   * Success.  Clean up the libnl state.
5060
   */
5061
  nl80211_cleanup(&nlstate);
5062
5063
  /*
5064
   * Note that we have to delete the monitor device when we close
5065
   * the handle.
5066
   */
5067
  handlep->must_do_on_close |= MUST_DELETE_MONIF;
5068
5069
  /*
5070
   * Add this to the list of pcaps to close when we exit.
5071
   */
5072
  pcapint_add_to_pcaps_to_close(handle);
5073
5074
  return 1;
5075
}
5076
#else /* HAVE_LIBNL */
5077
static int
5078
enter_rfmon_mode(pcap_t *handle _U_, int sock_fd _U_, const char *device _U_)
5079
0
{
5080
  /*
5081
   * We don't have libnl, so we can't do monitor mode.
5082
   */
5083
0
  return 0;
5084
0
}
5085
#endif /* HAVE_LIBNL */
5086
5087
#if defined(HAVE_LINUX_NET_TSTAMP_H) && defined(PACKET_TIMESTAMP)
5088
/*
5089
 * Map SOF_TIMESTAMPING_ values to PCAP_TSTAMP_ values.
5090
 */
5091
static const struct {
5092
  int soft_timestamping_val;
5093
  int pcap_tstamp_val;
5094
} sof_ts_type_map[3] = {
5095
  { SOF_TIMESTAMPING_SOFTWARE, PCAP_TSTAMP_HOST },
5096
  { SOF_TIMESTAMPING_SYS_HARDWARE, PCAP_TSTAMP_ADAPTER },
5097
  { SOF_TIMESTAMPING_RAW_HARDWARE, PCAP_TSTAMP_ADAPTER_UNSYNCED }
5098
};
5099
0
#define NUM_SOF_TIMESTAMPING_TYPES  (sizeof sof_ts_type_map / sizeof sof_ts_type_map[0])
5100
5101
/*
5102
 * Set the list of time stamping types to include all types.
5103
 */
5104
static int
5105
iface_set_all_ts_types(pcap_t *handle, char *ebuf)
5106
0
{
5107
0
  u_int i;
5108
5109
0
  handle->tstamp_type_list = malloc(NUM_SOF_TIMESTAMPING_TYPES * sizeof(u_int));
5110
0
  if (handle->tstamp_type_list == NULL) {
5111
0
    pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE,
5112
0
        errno, "malloc");
5113
0
    return -1;
5114
0
  }
5115
0
  for (i = 0; i < NUM_SOF_TIMESTAMPING_TYPES; i++)
5116
0
    handle->tstamp_type_list[i] = sof_ts_type_map[i].pcap_tstamp_val;
5117
0
  handle->tstamp_type_count = NUM_SOF_TIMESTAMPING_TYPES;
5118
0
  return 0;
5119
0
}
5120
5121
/*
5122
 * Get a list of time stamp types.
5123
 */
5124
#ifdef ETHTOOL_GET_TS_INFO
5125
static int
5126
iface_get_ts_types(const char *device, pcap_t *handle, char *ebuf)
5127
0
{
5128
0
  int fd;
5129
0
  struct ifreq ifr;
5130
0
  struct ethtool_ts_info info;
5131
0
  int num_ts_types;
5132
0
  u_int i, j;
5133
5134
  /*
5135
   * This doesn't apply to the "any" device; you can't say "turn on
5136
   * hardware time stamping for all devices that exist now and arrange
5137
   * that it be turned on for any device that appears in the future",
5138
   * and not all devices even necessarily *support* hardware time
5139
   * stamping, so don't report any time stamp types.
5140
   */
5141
0
  if (strcmp(device, "any") == 0) {
5142
0
    handle->tstamp_type_list = NULL;
5143
0
    return 0;
5144
0
  }
5145
5146
  /*
5147
   * Create a socket from which to fetch time stamping capabilities.
5148
   */
5149
0
  fd = get_if_ioctl_socket();
5150
0
  if (fd < 0) {
5151
0
    pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE,
5152
0
        errno, "socket for SIOCETHTOOL(ETHTOOL_GET_TS_INFO)");
5153
0
    return -1;
5154
0
  }
5155
5156
0
  memset(&ifr, 0, sizeof(ifr));
5157
0
  pcapint_strlcpy(ifr.ifr_name, device, sizeof(ifr.ifr_name));
5158
0
  memset(&info, 0, sizeof(info));
5159
0
  info.cmd = ETHTOOL_GET_TS_INFO;
5160
0
  ifr.ifr_data = (caddr_t)&info;
5161
0
  if (ioctl(fd, SIOCETHTOOL, &ifr) == -1) {
5162
0
    int save_errno = errno;
5163
5164
0
    close(fd);
5165
0
    switch (save_errno) {
5166
5167
0
    case EOPNOTSUPP:
5168
0
    case EINVAL:
5169
      /*
5170
       * OK, this OS version or driver doesn't support
5171
       * asking for the time stamping types, so let's
5172
       * just return all the possible types.
5173
       */
5174
0
      if (iface_set_all_ts_types(handle, ebuf) == -1)
5175
0
        return -1;
5176
0
      return 0;
5177
5178
0
    case ENODEV:
5179
      /*
5180
       * OK, no such device.
5181
       * The user will find that out when they try to
5182
       * activate the device; just return an empty
5183
       * list of time stamp types.
5184
       */
5185
0
      handle->tstamp_type_list = NULL;
5186
0
      return 0;
5187
5188
0
    default:
5189
      /*
5190
       * Other error.
5191
       */
5192
0
      pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE,
5193
0
          save_errno,
5194
0
          "%s: SIOCETHTOOL(ETHTOOL_GET_TS_INFO) ioctl failed",
5195
0
          device);
5196
0
      return -1;
5197
0
    }
5198
0
  }
5199
0
  close(fd);
5200
5201
  /*
5202
   * Do we support hardware time stamping of *all* packets?
5203
   */
5204
0
  if (!(info.rx_filters & (1 << HWTSTAMP_FILTER_ALL))) {
5205
    /*
5206
     * No, so don't report any time stamp types.
5207
     *
5208
     * XXX - some devices either don't report
5209
     * HWTSTAMP_FILTER_ALL when they do support it, or
5210
     * report HWTSTAMP_FILTER_ALL but map it to only
5211
     * time stamping a few PTP packets.  See
5212
     * http://marc.info/?l=linux-netdev&m=146318183529571&w=2
5213
     *
5214
     * Maybe that got fixed later.
5215
     */
5216
0
    handle->tstamp_type_list = NULL;
5217
0
    return 0;
5218
0
  }
5219
5220
0
  num_ts_types = 0;
5221
0
  for (i = 0; i < NUM_SOF_TIMESTAMPING_TYPES; i++) {
5222
0
    if (info.so_timestamping & sof_ts_type_map[i].soft_timestamping_val)
5223
0
      num_ts_types++;
5224
0
  }
5225
0
  if (num_ts_types != 0) {
5226
0
    handle->tstamp_type_list = malloc(num_ts_types * sizeof(u_int));
5227
0
    if (handle->tstamp_type_list == NULL) {
5228
0
      pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE,
5229
0
          errno, "malloc");
5230
0
      return -1;
5231
0
    }
5232
0
    for (i = 0, j = 0; i < NUM_SOF_TIMESTAMPING_TYPES; i++) {
5233
0
      if (info.so_timestamping & sof_ts_type_map[i].soft_timestamping_val) {
5234
0
        handle->tstamp_type_list[j] = sof_ts_type_map[i].pcap_tstamp_val;
5235
0
        j++;
5236
0
      }
5237
0
    }
5238
0
    handle->tstamp_type_count = num_ts_types;
5239
0
  } else
5240
0
    handle->tstamp_type_list = NULL;
5241
5242
0
  return 0;
5243
0
}
5244
#else /* ETHTOOL_GET_TS_INFO */
5245
static int
5246
iface_get_ts_types(const char *device, pcap_t *handle, char *ebuf)
5247
{
5248
  /*
5249
   * This doesn't apply to the "any" device; you can't say "turn on
5250
   * hardware time stamping for all devices that exist now and arrange
5251
   * that it be turned on for any device that appears in the future",
5252
   * and not all devices even necessarily *support* hardware time
5253
   * stamping, so don't report any time stamp types.
5254
   */
5255
  if (strcmp(device, "any") == 0) {
5256
    handle->tstamp_type_list = NULL;
5257
    return 0;
5258
  }
5259
5260
  /*
5261
   * We don't have an ioctl to use to ask what's supported,
5262
   * so say we support everything.
5263
   */
5264
  if (iface_set_all_ts_types(handle, ebuf) == -1)
5265
    return -1;
5266
  return 0;
5267
}
5268
#endif /* ETHTOOL_GET_TS_INFO */
5269
#else  /* defined(HAVE_LINUX_NET_TSTAMP_H) && defined(PACKET_TIMESTAMP) */
5270
static int
5271
iface_get_ts_types(const char *device _U_, pcap_t *p _U_, char *ebuf _U_)
5272
{
5273
  /*
5274
   * Nothing to fetch, so it always "succeeds".
5275
   */
5276
  return 0;
5277
}
5278
#endif /* defined(HAVE_LINUX_NET_TSTAMP_H) && defined(PACKET_TIMESTAMP) */
5279
5280
/*
5281
 * Find out if we have any form of fragmentation/reassembly offloading.
5282
 *
5283
 * We do so using SIOCETHTOOL checking for various types of offloading;
5284
 * if SIOCETHTOOL isn't defined, or we don't have any #defines for any
5285
 * of the types of offloading, there's nothing we can do to check, so
5286
 * we just say "no, we don't".
5287
 *
5288
 * We treat EOPNOTSUPP, EINVAL and, if eperm_ok is true, EPERM as
5289
 * indications that the operation isn't supported.  We do EPERM
5290
 * weirdly because the SIOCETHTOOL code in later kernels 1) doesn't
5291
 * support ETHTOOL_GUFO, 2) also doesn't include it in the list
5292
 * of ethtool operations that don't require CAP_NET_ADMIN privileges,
5293
 * and 3) does the "is this permitted" check before doing the "is
5294
 * this even supported" check, so it fails with "this is not permitted"
5295
 * rather than "this is not even supported".  To work around this
5296
 * annoyance, we only treat EPERM as an error for the first feature,
5297
 * and assume that they all do the same permission checks, so if the
5298
 * first one is allowed all the others are allowed if supported.
5299
 */
5300
#if defined(SIOCETHTOOL) && (defined(ETHTOOL_GTSO) || defined(ETHTOOL_GUFO) || defined(ETHTOOL_GGSO) || defined(ETHTOOL_GFLAGS) || defined(ETHTOOL_GGRO))
5301
static int
5302
iface_ethtool_flag_ioctl(pcap_t *handle, int cmd, const char *cmdname,
5303
    int eperm_ok)
5304
0
{
5305
0
  struct ifreq  ifr;
5306
0
  struct ethtool_value eval;
5307
5308
0
  memset(&ifr, 0, sizeof(ifr));
5309
0
  pcapint_strlcpy(ifr.ifr_name, handle->opt.device, sizeof(ifr.ifr_name));
5310
0
  eval.cmd = cmd;
5311
0
  eval.data = 0;
5312
0
  ifr.ifr_data = (caddr_t)&eval;
5313
0
  if (ioctl(handle->fd, SIOCETHTOOL, &ifr) == -1) {
5314
0
    if (errno == EOPNOTSUPP || errno == EINVAL ||
5315
0
        (errno == EPERM && eperm_ok)) {
5316
      /*
5317
       * OK, let's just return 0, which, in our
5318
       * case, either means "no, what we're asking
5319
       * about is not enabled" or "all the flags
5320
       * are clear (i.e., nothing is enabled)".
5321
       */
5322
0
      return 0;
5323
0
    }
5324
0
    pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
5325
0
        errno, "%s: SIOCETHTOOL(%s) ioctl failed",
5326
0
        handle->opt.device, cmdname);
5327
0
    return -1;
5328
0
  }
5329
0
  return eval.data;
5330
0
}
5331
5332
/*
5333
 * XXX - it's annoying that we have to check for offloading at all, but,
5334
 * given that we have to, it's still annoying that we have to check for
5335
 * particular types of offloading, especially that shiny new types of
5336
 * offloading may be added - and, worse, may not be checkable with
5337
 * a particular ETHTOOL_ operation; ETHTOOL_GFEATURES would, in
5338
 * theory, give those to you, but the actual flags being used are
5339
 * opaque (defined in a non-uapi header), and there doesn't seem to
5340
 * be any obvious way to ask the kernel what all the offloading flags
5341
 * are - at best, you can ask for a set of strings(!) to get *names*
5342
 * for various flags.  (That whole mechanism appears to have been
5343
 * designed for the sole purpose of letting ethtool report flags
5344
 * by name and set flags by name, with the names having no semantics
5345
 * ethtool understands.)
5346
 */
5347
static int
5348
iface_get_offload(pcap_t *handle)
5349
0
{
5350
0
  int ret;
5351
5352
0
#ifdef ETHTOOL_GTSO
5353
0
  ret = iface_ethtool_flag_ioctl(handle, ETHTOOL_GTSO, "ETHTOOL_GTSO", 0);
5354
0
  if (ret == -1)
5355
0
    return -1;
5356
0
  if (ret)
5357
0
    return 1; /* TCP segmentation offloading on */
5358
0
#endif
5359
5360
0
#ifdef ETHTOOL_GGSO
5361
  /*
5362
   * XXX - will this cause large unsegmented packets to be
5363
   * handed to PF_PACKET sockets on transmission?  If not,
5364
   * this need not be checked.
5365
   */
5366
0
  ret = iface_ethtool_flag_ioctl(handle, ETHTOOL_GGSO, "ETHTOOL_GGSO", 0);
5367
0
  if (ret == -1)
5368
0
    return -1;
5369
0
  if (ret)
5370
0
    return 1; /* generic segmentation offloading on */
5371
0
#endif
5372
5373
0
#ifdef ETHTOOL_GFLAGS
5374
0
  ret = iface_ethtool_flag_ioctl(handle, ETHTOOL_GFLAGS, "ETHTOOL_GFLAGS", 0);
5375
0
  if (ret == -1)
5376
0
    return -1;
5377
0
  if (ret & ETH_FLAG_LRO)
5378
0
    return 1; /* large receive offloading on */
5379
0
#endif
5380
5381
0
#ifdef ETHTOOL_GGRO
5382
  /*
5383
   * XXX - will this cause large reassembled packets to be
5384
   * handed to PF_PACKET sockets on receipt?  If not,
5385
   * this need not be checked.
5386
   */
5387
0
  ret = iface_ethtool_flag_ioctl(handle, ETHTOOL_GGRO, "ETHTOOL_GGRO", 0);
5388
0
  if (ret == -1)
5389
0
    return -1;
5390
0
  if (ret)
5391
0
    return 1; /* generic (large) receive offloading on */
5392
0
#endif
5393
5394
0
#ifdef ETHTOOL_GUFO
5395
  /*
5396
   * Do this one last, as support for it was removed in later
5397
   * kernels, and it fails with EPERM on those kernels rather
5398
   * than with EOPNOTSUPP (see explanation in comment for
5399
   * iface_ethtool_flag_ioctl()).
5400
   */
5401
0
  ret = iface_ethtool_flag_ioctl(handle, ETHTOOL_GUFO, "ETHTOOL_GUFO", 1);
5402
0
  if (ret == -1)
5403
0
    return -1;
5404
0
  if (ret)
5405
0
    return 1; /* UDP fragmentation offloading on */
5406
0
#endif
5407
5408
0
  return 0;
5409
0
}
5410
#else /* SIOCETHTOOL */
5411
static int
5412
iface_get_offload(pcap_t *handle _U_)
5413
{
5414
  /*
5415
   * XXX - do we need to get this information if we don't
5416
   * have the ethtool ioctls?  If so, how do we do that?
5417
   */
5418
  return 0;
5419
}
5420
#endif /* SIOCETHTOOL */
5421
5422
/*
5423
 * As per
5424
 *
5425
 *    https://www.kernel.org/doc/html/latest/networking/dsa/dsa.html#switch-tagging-protocols
5426
 *
5427
 * Type 1 means that the tag is prepended to the Ethernet packet.
5428
 *
5429
 * Type 2 means that the tag is inserted into the Ethernet header
5430
 * after the source address and before the type/length field.
5431
 *
5432
 * Type 3 means that tag is a packet trailer.
5433
 *
5434
 * Every element in the array below uses a DLT.  Because a DSA-tagged frame is
5435
 * not a standard IEEE 802.3 Ethernet frame, the array elements must not use
5436
 * DLT_EN10MB.  It is safe, albeit only barely useful, to use DLT_DEBUG_ONLY,
5437
 * which is also the implicit default for any DSA tag that is not present in
5438
 * the array.  To implement proper support for a particular DSA tag of
5439
 * interest, please do as much of the following as is reasonably practicable:
5440
 *
5441
 * 1. Using recent versions of tcpdump and libpcap on a Linux host with a
5442
 *    network interface that implements the required DSA tag, capture packets
5443
 *    on the interface and study the hex dumps.
5444
 * 2. Using the hex dumps and any other available supporting materials, produce
5445
 *    a sufficiently detailed description of the DSA tag structure, complete
5446
 *    with a full comment indicating whether it's type 1, 2, or 3, and, for
5447
 *    type 2, indicating whether it has an EtherType and, if so, what that type
5448
 *    is, and whether it's registered with the IEEE or not.  Refer to the
5449
 *    specification(s), existing implementation(s), or any other relevant
5450
 *    resources.
5451
 * 3. Using the description, request and obtain a new DLT for the DSA tag.
5452
 * 4. Associate the new DLT with the DSA tag in the array below.
5453
 * 5. Using the updated libpcap, capture packets again, produce a .pcap file
5454
 *    and confirm it uses the new DLT.
5455
 * 6. Using the .pcap file as a test, prepare additional changes to tcpdump to
5456
 *    enable decoding of packets for the new DLT.
5457
 * 7. Using the .pcap file as a test, prepare additional changes to libpcap to
5458
 *    enable filtering of packets for the new DLT.
5459
 *
5460
 * For working examples of such support, see the existing DLTs other than
5461
 * DLT_DEBUG_ONLY in the array below.
5462
 */
5463
static struct dsa_proto {
5464
  const char *name;
5465
  bpf_u_int32 linktype;
5466
} dsa_protos[] = {
5467
  /*
5468
   * Type 1. See
5469
   *
5470
   *    https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_ar9331.c
5471
   */
5472
  { "ar9331", DLT_DEBUG_ONLY },
5473
5474
  /*
5475
   * Type 2, without an EtherType at the beginning.
5476
   */
5477
  { "brcm", DLT_DSA_TAG_BRCM },
5478
5479
  /*
5480
   * Type 2, with EtherType 0x8874, assigned to Broadcom.
5481
   */
5482
  { "brcm-legacy", DLT_DEBUG_ONLY },
5483
5484
  /*
5485
   * Type 1.
5486
   */
5487
  { "brcm-prepend", DLT_DSA_TAG_BRCM_PREPEND },
5488
5489
  /*
5490
   * Type 2, without an EtherType at the beginning.
5491
   */
5492
  { "dsa", DLT_DSA_TAG_DSA },
5493
5494
  /*
5495
   * Type 2, with an EtherType field, but without
5496
   * an assigned EtherType value that can be relied
5497
   * on.
5498
   */
5499
  { "edsa", DLT_DSA_TAG_EDSA },
5500
5501
  /*
5502
   * Type 1, with different transmit and receive headers,
5503
   * so can't really be handled well with the current
5504
   * libpcap API and with pcap files.
5505
   *
5506
   * See
5507
   *
5508
   *    https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_gswip.c
5509
   */
5510
  { "gswip", DLT_DEBUG_ONLY },
5511
5512
  /*
5513
   * Type 3. See
5514
   *
5515
   *    https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_hellcreek.c
5516
   */
5517
  { "hellcreek", DLT_DEBUG_ONLY },
5518
5519
  /*
5520
   * Type 3, with different transmit and receive headers,
5521
   * so can't really be handled well with the current
5522
   * libpcap API and with pcap files.
5523
   *
5524
   * See
5525
   *
5526
   *    https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_ksz.c#L102
5527
   */
5528
  { "ksz8795", DLT_DEBUG_ONLY },
5529
5530
  /*
5531
   * Type 3, with different transmit and receive headers,
5532
   * so can't really be handled well with the current
5533
   * libpcap API and with pcap files.
5534
   *
5535
   * See
5536
   *
5537
   *    https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_ksz.c#L160
5538
   */
5539
  { "ksz9477", DLT_DEBUG_ONLY },
5540
5541
  /*
5542
   * Type 3, with different transmit and receive headers,
5543
   * so can't really be handled well with the current
5544
   * libpcap API and with pcap files.
5545
   *
5546
   * See
5547
   *
5548
   *    https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_ksz.c#L341
5549
   */
5550
  { "ksz9893", DLT_DEBUG_ONLY },
5551
5552
  /*
5553
   * Type 3, with different transmit and receive headers,
5554
   * so can't really be handled well with the current
5555
   * libpcap API and with pcap files.
5556
   *
5557
   * See
5558
   *
5559
   *    https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_ksz.c#L386
5560
   */
5561
  { "lan937x", DLT_DEBUG_ONLY },
5562
5563
  /*
5564
   * Type 2, with EtherType 0x8100; the VID can be interpreted
5565
   * as per
5566
   *
5567
   *    https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_lan9303.c#L24
5568
   */
5569
  { "lan9303", DLT_DEBUG_ONLY },
5570
5571
  /*
5572
   * Type 2, without an EtherType at the beginning.
5573
   *
5574
   * See
5575
   *
5576
   *    https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_mtk.c#L15
5577
   *
5578
   * Linux kernel implements this tag so that it does not indicate the frame
5579
   * encoding reliably.  The matter is, some drivers use METADATA_HW_PORT_MUX,
5580
   * which (for the switch->CPU direction only, at the time of this writing)
5581
   * means that the frame does not have a DSA tag, the frame metadata is stored
5582
   * elsewhere and libpcap receives the frame only.  Specifically, this is the
5583
   * case for drivers/net/ethernet/mediatek/mtk_eth_soc.c, but the tag visible
5584
   * in sysfs is still "mtk" even though the wire encoding is different.
5585
   */
5586
  { "mtk", DLT_DEBUG_ONLY },
5587
5588
  /*
5589
   * Type 1.
5590
   *
5591
   * See
5592
   *
5593
   *    https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_ocelot.c
5594
   */
5595
  { "ocelot", DLT_DEBUG_ONLY },
5596
5597
  /*
5598
   * Type 1.
5599
   *
5600
   * See
5601
   *
5602
   *    https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_ocelot.c
5603
   */
5604
  { "seville", DLT_DEBUG_ONLY },
5605
5606
  /*
5607
   * Type 2, with EtherType 0x8100; the VID can be interpreted
5608
   * as per
5609
   *
5610
   *    https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_8021q.c#L15
5611
   */
5612
  { "ocelot-8021q", DLT_DEBUG_ONLY },
5613
5614
  /*
5615
   * Type 2, without an EtherType at the beginning.
5616
   *
5617
   * See
5618
   *
5619
   *    https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_qca.c
5620
   */
5621
  { "qca", DLT_DEBUG_ONLY },
5622
5623
  /*
5624
   * Type 2, with EtherType 0x8899, assigned to Realtek;
5625
   * they use it for several on-the-Ethernet protocols
5626
   * as well, but there are fields that allow the two
5627
   * tag formats, and all the protocols in question,
5628
   * to be distinguiished from one another.
5629
   *
5630
   * See
5631
   *
5632
   *    https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_rtl4_a.c
5633
   *
5634
   *    http://realtek.info/pdf/rtl8306sd%28m%29_datasheet_1.1.pdf
5635
   *
5636
   * and various pages in tcpdump's print-realtek.c and Wireshark's
5637
   * epan/dissectors/packet-realtek.c for the other protocols.
5638
   */
5639
  { "rtl4a", DLT_DEBUG_ONLY },
5640
5641
  /*
5642
   * Type 2, with EtherType 0x8899, assigned to Realtek;
5643
   * see above.
5644
   */
5645
  { "rtl8_4", DLT_DEBUG_ONLY },
5646
5647
  /*
5648
   * Type 3, with the same tag format as rtl8_4.
5649
   */
5650
  { "rtl8_4t", DLT_DEBUG_ONLY },
5651
5652
  /*
5653
   * Type 2, with EtherType 0xe001; that's probably
5654
   * self-assigned.
5655
   *
5656
   * See
5657
   *
5658
   *    https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_rzn1_a5psw.c
5659
   */
5660
  { "a5psw", DLT_DEBUG_ONLY },
5661
5662
  /*
5663
   * Type 2, with EtherType 0x8100 or the self-assigned
5664
   * 0xdadb, so this really should have its own
5665
   * LINKTYPE_/DLT_ value; that would also allow the
5666
   * VID of the tag to be dissected as per
5667
   *
5668
   *    https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_8021q.c#L15
5669
   */
5670
  { "sja1105", DLT_DEBUG_ONLY },
5671
5672
  /*
5673
   * Type "none of the above", with both a header and trailer,
5674
   * with different transmit and receive tags.  Has
5675
   * EtherType 0xdadc, which is probably self-assigned.
5676
   */
5677
  { "sja1110", DLT_DEBUG_ONLY },
5678
5679
  /*
5680
   * Type 3, as the name suggests.
5681
   *
5682
   * See
5683
   *
5684
   *    https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_trailer.c
5685
   */
5686
  { "trailer", DLT_DEBUG_ONLY },
5687
5688
  /*
5689
   * Type 2, with EtherType 0x8100; the VID can be interpreted
5690
   * as per
5691
   *
5692
   *    https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_8021q.c#L15
5693
   */
5694
  { "vsc73xx-8021q", DLT_DEBUG_ONLY },
5695
5696
  /*
5697
   * Type 3.
5698
   *
5699
   * See
5700
   *
5701
   *    https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_xrs700x.c
5702
   */
5703
  { "xrs700x", DLT_DEBUG_ONLY },
5704
};
5705
5706
/*
5707
 * Return 1 if the interface uses DSA tagging, 0 if the interface does not use
5708
 * DSA tagging, or PCAP_ERROR on error.
5709
 */
5710
static int
5711
iface_dsa_get_proto_info(const char *device, pcap_t *handle)
5712
0
{
5713
0
  char *pathstr;
5714
0
  unsigned int i;
5715
  /*
5716
   * Make this significantly smaller than PCAP_ERRBUF_SIZE;
5717
   * the tag *shouldn't* have some huge long name, and making
5718
   * it smaller keeps newer versions of GCC from whining that
5719
   * the error message if we don't support the tag could
5720
   * overflow the error message buffer.
5721
   */
5722
0
  char buf[128];
5723
0
  ssize_t r;
5724
0
  int fd;
5725
5726
0
  fd = asprintf(&pathstr, "/sys/class/net/%s/dsa/tagging", device);
5727
0
  if (fd < 0) {
5728
0
    pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
5729
0
            fd, "asprintf");
5730
0
    return PCAP_ERROR;
5731
0
  }
5732
5733
0
  fd = open(pathstr, O_RDONLY);
5734
0
  free(pathstr);
5735
  /*
5736
   * This could be not fatal: kernel >= 4.20 *might* expose this
5737
   * attribute.  However, if it exposes the attribute, but the read has
5738
   * failed due to another reason (ENFILE, EMFILE, ENOMEM...), propagate
5739
   * the failure.
5740
   */
5741
0
  if (fd < 0) {
5742
0
    if (errno == ENOENT)
5743
0
      return 0;
5744
0
    pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
5745
0
                                 errno, "open");
5746
0
    return PCAP_ERROR;
5747
0
  }
5748
5749
0
  r = read(fd, buf, sizeof(buf) - 1);
5750
0
  if (r <= 0) {
5751
0
    pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
5752
0
            errno, "read");
5753
0
    close(fd);
5754
0
    return PCAP_ERROR;
5755
0
  }
5756
0
  close(fd);
5757
5758
  /*
5759
   * Buffer should be LF terminated.
5760
   */
5761
0
  if (buf[r - 1] == '\n')
5762
0
    r--;
5763
0
  buf[r] = '\0';
5764
5765
  /*
5766
   * The string "none" indicates that the interface does not have
5767
   * any tagging protocol configured, and is therefore a standard
5768
   * Ethernet interface.
5769
   */
5770
0
  if (strcmp(buf, "none") == 0)
5771
0
    return 0;
5772
5773
  /*
5774
   * Every element in the array stands for a DSA-tagged interface.  Using
5775
   * DLT_EN10MB (the standard IEEE 802.3 Ethernet) for such an interface
5776
   * may seem a good idea at first, but doing so would certainly cause
5777
   * major problems in areas that are already complicated and depend on
5778
   * DLT_EN10MB meaning the standard IEEE 802.3 Ethernet only, namely:
5779
   *
5780
   * - live capturing of packets on Linux, and
5781
   * - live kernel filtering of packets on Linux, and
5782
   * - live userspace filtering of packets on Linux, and
5783
   * - offline filtering of packets on all supported OSes, and
5784
   * - identification of savefiles on all OSes.
5785
   *
5786
   * Therefore use a default DLT value that does not block capturing and
5787
   * hexdumping of unsupported DSA encodings (in case the tag is not in
5788
   * the array) and enforce the non-use of DLT_EN10MB (in case the tag is
5789
   * in the array, but is incorrectly declared).
5790
   */
5791
0
  handle->linktype = DLT_DEBUG_ONLY;
5792
0
  for (i = 0; i < sizeof(dsa_protos) / sizeof(dsa_protos[0]); i++) {
5793
0
    if (strcmp(buf, dsa_protos[i].name) == 0) {
5794
0
      if (dsa_protos[i].linktype != DLT_EN10MB)
5795
0
        handle->linktype = dsa_protos[i].linktype;
5796
0
      break;
5797
0
    }
5798
0
  }
5799
0
  return 1;
5800
0
}
5801
5802
/*
5803
 *  Query the kernel for the MTU of the given interface.
5804
 */
5805
static int
5806
iface_get_mtu(int fd, const char *device, char *ebuf)
5807
0
{
5808
0
  struct ifreq  ifr;
5809
5810
0
  if (!device)
5811
0
    return BIGGER_THAN_ALL_MTUS;
5812
5813
0
  memset(&ifr, 0, sizeof(ifr));
5814
0
  pcapint_strlcpy(ifr.ifr_name, device, sizeof(ifr.ifr_name));
5815
5816
0
  if (ioctl(fd, SIOCGIFMTU, &ifr) == -1) {
5817
0
    pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE,
5818
0
        errno, "SIOCGIFMTU");
5819
0
    return -1;
5820
0
  }
5821
5822
0
  return ifr.ifr_mtu;
5823
0
}
5824
5825
/*
5826
 *  Get the hardware type of the given interface as ARPHRD_xxx constant.
5827
 */
5828
static int
5829
iface_get_arptype(int fd, const char *device, char *ebuf)
5830
0
{
5831
0
  struct ifreq  ifr;
5832
0
  int   ret;
5833
5834
0
  memset(&ifr, 0, sizeof(ifr));
5835
0
  pcapint_strlcpy(ifr.ifr_name, device, sizeof(ifr.ifr_name));
5836
5837
0
  if (ioctl(fd, SIOCGIFHWADDR, &ifr) == -1) {
5838
0
    if (errno == ENODEV) {
5839
      /*
5840
       * No such device.
5841
       *
5842
       * There's nothing more to say, so clear
5843
       * the error message.
5844
       */
5845
0
      ret = PCAP_ERROR_NO_SUCH_DEVICE;
5846
0
      ebuf[0] = '\0';
5847
0
    } else {
5848
0
      ret = PCAP_ERROR;
5849
0
      pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE,
5850
0
          errno, "SIOCGIFHWADDR");
5851
0
    }
5852
0
    return ret;
5853
0
  }
5854
5855
0
  return ifr.ifr_hwaddr.sa_family;
5856
0
}
5857
5858
/*
5859
 * In a DLT_CAN_SOCKETCAN frame the first four bytes are a 32-bit integer
5860
 * value in host byte order if the filter program is running in the kernel and
5861
 * in network byte order if in userland.  This applies to both CC, FD and XL
5862
 * frames, see pcap_handle_packet_mmap() for the rationale.  Return 1 iff the
5863
 * [possibly modified] filter program can work correctly in the kernel.
5864
 */
5865
#if __BYTE_ORDER == __LITTLE_ENDIAN
5866
static int
5867
fix_dlt_can_socketcan(const u_int len, struct bpf_insn insn[])
5868
0
{
5869
0
  for (u_int i = 0; i < len; ++i) {
5870
0
    switch (insn[i].code) {
5871
0
    case BPF_LD|BPF_B|BPF_ABS: // ldb [k]
5872
0
    case BPF_LDX|BPF_MSH|BPF_B: // ldxb 4*([k]&0xf)
5873
0
      if (insn[i].k < 4)
5874
0
        insn[i].k = 3 - insn[i].k; // Fixed now.
5875
0
      break;
5876
0
    case BPF_LD|BPF_H|BPF_ABS: // ldh [k]
5877
0
    case BPF_LD|BPF_W|BPF_ABS: // ld [k]
5878
      /*
5879
       * A halfword or a word load cannot be fixed by just
5880
       * changing k, even if every required byte is within
5881
       * the byte-swapped part of the frame, even if the
5882
       * load is aligned.  The fix would require either
5883
       * rewriting the filter program extensively or
5884
       * generating it differently in the first place.
5885
       */
5886
0
    case BPF_LD|BPF_B|BPF_IND: // ldb [x + k]
5887
0
    case BPF_LD|BPF_H|BPF_IND: // ldh [x + k]
5888
0
    case BPF_LD|BPF_W|BPF_IND: // ld [x + k]
5889
      /*
5890
       * In addition to the above, a variable offset load
5891
       * cannot be fixed because x can have any value, thus
5892
       * x + k can have any value, but only the first four
5893
       * bytes are swapped.  An easy way to demonstrate it
5894
       * is to compile "link[link[4]] == 0", which will use
5895
       * "ldb [x + 0]" to access one of the first four bytes
5896
       * of the frame iff CAN CC/FD payload length is less
5897
       * than 4.
5898
       */
5899
0
      if (insn[i].k < 4)
5900
0
        return 0; // Userland filtering only.
5901
0
      break;
5902
0
    }
5903
0
  }
5904
0
  return 1;
5905
0
}
5906
#else
5907
static int
5908
fix_dlt_can_socketcan(const u_int len _U_, struct bpf_insn insn[] _U_)
5909
{
5910
  return 1;
5911
}
5912
#endif // __BYTE_ORDER == __LITTLE_ENDIAN
5913
5914
static int
5915
fix_program(pcap_t *handle, struct sock_fprog *fcode)
5916
0
{
5917
0
  struct pcap_linux *handlep = handle->priv;
5918
0
  size_t prog_size;
5919
0
  int i;
5920
0
  struct bpf_insn *p;
5921
0
  struct bpf_insn *f;
5922
0
  int len;
5923
5924
  /*
5925
   * Make a copy of the filter, and modify that copy if
5926
   * necessary.
5927
   */
5928
0
  prog_size = sizeof(*handle->fcode.bf_insns) * handle->fcode.bf_len;
5929
0
  len = handle->fcode.bf_len;
5930
0
  f = (struct bpf_insn *)malloc(prog_size);
5931
0
  if (f == NULL) {
5932
0
    pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
5933
0
        errno, "malloc");
5934
0
    return -1;
5935
0
  }
5936
0
  memcpy(f, handle->fcode.bf_insns, prog_size);
5937
0
  fcode->len = len;
5938
0
  fcode->filter = (struct sock_filter *) f;
5939
5940
0
  switch (handle->linktype) {
5941
0
  case DLT_CAN_SOCKETCAN:
5942
    /*
5943
     * If a similar fix needs to be done for CAN frames that
5944
     * appear on the "any" pseudo-interface, it needs to be done
5945
     * differently because that would be within DLT_LINUX_SLL or
5946
     * DLT_LINUX_SLL2.
5947
     */
5948
0
    return fix_dlt_can_socketcan(len, f);
5949
0
  }
5950
5951
0
  for (i = 0; i < len; ++i) {
5952
0
    p = &f[i];
5953
    /*
5954
     * What type of instruction is this?
5955
     */
5956
0
    switch (BPF_CLASS(p->code)) {
5957
5958
0
    case BPF_LD:
5959
0
    case BPF_LDX:
5960
      /*
5961
       * It's a load instruction; is it loading
5962
       * from the packet?
5963
       */
5964
0
      switch (BPF_MODE(p->code)) {
5965
5966
0
      case BPF_ABS:
5967
0
      case BPF_IND:
5968
0
      case BPF_MSH:
5969
        /*
5970
         * Yes; are we in cooked mode?
5971
         */
5972
0
        if (handlep->cooked) {
5973
          /*
5974
           * Yes, so we need to fix this
5975
           * instruction.
5976
           */
5977
0
          if (fix_offset(handle, p) < 0) {
5978
            /*
5979
             * We failed to do so.
5980
             * Return 0, so our caller
5981
             * knows to punt to userland.
5982
             */
5983
0
            return 0;
5984
0
          }
5985
0
        }
5986
0
        break;
5987
0
      }
5988
0
      break;
5989
0
    }
5990
0
  }
5991
0
  return 1; /* we succeeded */
5992
0
}
5993
5994
static int
5995
fix_offset(pcap_t *handle, struct bpf_insn *p)
5996
0
{
5997
  /*
5998
   * Existing references to auxiliary data shouldn't be adjusted.
5999
   *
6000
   * Note that SKF_AD_OFF is negative, but p->k is unsigned, so
6001
   * we use >= and cast SKF_AD_OFF to unsigned.
6002
   */
6003
0
  if (p->k >= (bpf_u_int32)SKF_AD_OFF)
6004
0
    return 0;
6005
0
  if (handle->linktype == DLT_LINUX_SLL2) {
6006
    /*
6007
     * What's the offset?
6008
     */
6009
0
    if (p->k >= SLL2_HDR_LEN) {
6010
      /*
6011
       * It's within the link-layer payload; that starts
6012
       * at an offset of 0, as far as the kernel packet
6013
       * filter is concerned, so subtract the length of
6014
       * the link-layer header.
6015
       */
6016
0
      p->k -= SLL2_HDR_LEN;
6017
0
    } else if (p->k == 0) {
6018
      /*
6019
       * It's the protocol field; map it to the
6020
       * special magic kernel offset for that field.
6021
       */
6022
0
      p->k = SKF_AD_OFF + SKF_AD_PROTOCOL;
6023
0
    } else if (p->k == 4) {
6024
      /*
6025
       * It's the ifindex field; map it to the
6026
       * special magic kernel offset for that field.
6027
       */
6028
0
      p->k = SKF_AD_OFF + SKF_AD_IFINDEX;
6029
0
    } else if (p->k == 10) {
6030
      /*
6031
       * It's the packet type field; map it to the
6032
       * special magic kernel offset for that field.
6033
       */
6034
0
      p->k = SKF_AD_OFF + SKF_AD_PKTTYPE;
6035
0
    } else if ((bpf_int32)(p->k) > 0) {
6036
      /*
6037
       * It's within the header, but it's not one of
6038
       * those fields; we can't do that in the kernel,
6039
       * so punt to userland.
6040
       */
6041
0
      return -1;
6042
0
    }
6043
0
  } else {
6044
    /*
6045
     * What's the offset?
6046
     */
6047
0
    if (p->k >= SLL_HDR_LEN) {
6048
      /*
6049
       * It's within the link-layer payload; that starts
6050
       * at an offset of 0, as far as the kernel packet
6051
       * filter is concerned, so subtract the length of
6052
       * the link-layer header.
6053
       */
6054
0
      p->k -= SLL_HDR_LEN;
6055
0
    } else if (p->k == 0) {
6056
      /*
6057
       * It's the packet type field; map it to the
6058
       * special magic kernel offset for that field.
6059
       */
6060
0
      p->k = SKF_AD_OFF + SKF_AD_PKTTYPE;
6061
0
    } else if (p->k == 14) {
6062
      /*
6063
       * It's the protocol field; map it to the
6064
       * special magic kernel offset for that field.
6065
       */
6066
0
      p->k = SKF_AD_OFF + SKF_AD_PROTOCOL;
6067
0
    } else if ((bpf_int32)(p->k) > 0) {
6068
      /*
6069
       * It's within the header, but it's not one of
6070
       * those fields; we can't do that in the kernel,
6071
       * so punt to userland.
6072
       */
6073
0
      return -1;
6074
0
    }
6075
0
  }
6076
0
  return 0;
6077
0
}
6078
6079
static int
6080
set_kernel_filter(pcap_t *handle, struct sock_fprog *fcode)
6081
0
{
6082
0
  int total_filter_on = 0;
6083
0
  int save_mode;
6084
0
  int ret;
6085
0
  int save_errno;
6086
6087
  /*
6088
   * The socket filter code doesn't discard all packets queued
6089
   * up on the socket when the filter is changed; this means
6090
   * that packets that don't match the new filter may show up
6091
   * after the new filter is put onto the socket, if those
6092
   * packets haven't yet been read.
6093
   *
6094
   * This means, for example, that if you do a tcpdump capture
6095
   * with a filter, the first few packets in the capture might
6096
   * be packets that wouldn't have passed the filter.
6097
   *
6098
   * We therefore discard all packets queued up on the socket
6099
   * when setting a kernel filter.  (This isn't an issue for
6100
   * userland filters, as the userland filtering is done after
6101
   * packets are queued up.)
6102
   *
6103
   * To flush those packets, we put the socket in read-only mode,
6104
   * and read packets from the socket until there are no more to
6105
   * read.
6106
   *
6107
   * In order to keep that from being an infinite loop - i.e.,
6108
   * to keep more packets from arriving while we're draining
6109
   * the queue - we put the "total filter", which is a filter
6110
   * that rejects all packets, onto the socket before draining
6111
   * the queue.
6112
   *
6113
   * This code deliberately ignores any errors, so that you may
6114
   * get bogus packets if an error occurs, rather than having
6115
   * the filtering done in userland even if it could have been
6116
   * done in the kernel.
6117
   */
6118
0
  if (setsockopt(handle->fd, SOL_SOCKET, SO_ATTACH_FILTER,
6119
0
           &total_fcode, sizeof(total_fcode)) == 0) {
6120
0
    char drain[1];
6121
6122
    /*
6123
     * Note that we've put the total filter onto the socket.
6124
     */
6125
0
    total_filter_on = 1;
6126
6127
    /*
6128
     * Save the socket's current mode, and put it in
6129
     * non-blocking mode; we drain it by reading packets
6130
     * until we get an error (which is normally a
6131
     * "nothing more to be read" error).
6132
     */
6133
0
    save_mode = fcntl(handle->fd, F_GETFL, 0);
6134
0
    if (save_mode == -1) {
6135
0
      pcapint_fmt_errmsg_for_errno(handle->errbuf,
6136
0
          PCAP_ERRBUF_SIZE, errno,
6137
0
          "can't get FD flags when changing filter");
6138
0
      return -2;
6139
0
    }
6140
0
    if (fcntl(handle->fd, F_SETFL, save_mode | O_NONBLOCK) < 0) {
6141
0
      pcapint_fmt_errmsg_for_errno(handle->errbuf,
6142
0
          PCAP_ERRBUF_SIZE, errno,
6143
0
          "can't set nonblocking mode when changing filter");
6144
0
      return -2;
6145
0
    }
6146
0
    while (recv(handle->fd, &drain, sizeof drain, MSG_TRUNC) >= 0)
6147
0
      ;
6148
0
    save_errno = errno;
6149
0
    if (save_errno != EAGAIN) {
6150
      /*
6151
       * Fatal error.
6152
       *
6153
       * If we can't restore the mode or reset the
6154
       * kernel filter, there's nothing we can do.
6155
       */
6156
0
      (void)fcntl(handle->fd, F_SETFL, save_mode);
6157
0
      (void)reset_kernel_filter(handle);
6158
0
      pcapint_fmt_errmsg_for_errno(handle->errbuf,
6159
0
          PCAP_ERRBUF_SIZE, save_errno,
6160
0
          "recv failed when changing filter");
6161
0
      return -2;
6162
0
    }
6163
0
    if (fcntl(handle->fd, F_SETFL, save_mode) == -1) {
6164
0
      pcapint_fmt_errmsg_for_errno(handle->errbuf,
6165
0
          PCAP_ERRBUF_SIZE, errno,
6166
0
          "can't restore FD flags when changing filter");
6167
0
      return -2;
6168
0
    }
6169
0
  }
6170
6171
  /*
6172
   * Now attach the new filter.
6173
   */
6174
0
  ret = setsockopt(handle->fd, SOL_SOCKET, SO_ATTACH_FILTER,
6175
0
       fcode, sizeof(*fcode));
6176
0
  if (ret == -1 && total_filter_on) {
6177
    /*
6178
     * Well, we couldn't set that filter on the socket,
6179
     * but we could set the total filter on the socket.
6180
     *
6181
     * This could, for example, mean that the filter was
6182
     * too big to put into the kernel, so we'll have to
6183
     * filter in userland; in any case, we'll be doing
6184
     * filtering in userland, so we need to remove the
6185
     * total filter so we see packets.
6186
     */
6187
0
    save_errno = errno;
6188
6189
    /*
6190
     * If this fails, we're really screwed; we have the
6191
     * total filter on the socket, and it won't come off.
6192
     * Report it as a fatal error.
6193
     */
6194
0
    if (reset_kernel_filter(handle) == -1) {
6195
0
      pcapint_fmt_errmsg_for_errno(handle->errbuf,
6196
0
          PCAP_ERRBUF_SIZE, errno,
6197
0
          "can't remove kernel total filter");
6198
0
      return -2;  /* fatal error */
6199
0
    }
6200
6201
0
    errno = save_errno;
6202
0
  }
6203
0
  return ret;
6204
0
}
6205
6206
static int
6207
reset_kernel_filter(pcap_t *handle)
6208
0
{
6209
0
  int ret;
6210
  /*
6211
   * setsockopt() barfs unless it get a dummy parameter.
6212
   * valgrind whines unless the value is initialized,
6213
   * as it has no idea that setsockopt() ignores its
6214
   * parameter.
6215
   */
6216
0
  int dummy = 0;
6217
6218
0
  ret = setsockopt(handle->fd, SOL_SOCKET, SO_DETACH_FILTER,
6219
0
           &dummy, sizeof(dummy));
6220
  /*
6221
   * Ignore ENOENT - it means "we don't have a filter", so there
6222
   * was no filter to remove, and there's still no filter.
6223
   *
6224
   * Also ignore ENONET, as a lot of kernel versions had a
6225
   * typo where ENONET, rather than ENOENT, was returned.
6226
   */
6227
0
  if (ret == -1 && errno != ENOENT && errno != ENONET)
6228
0
    return -1;
6229
0
  return 0;
6230
0
}
6231
6232
int
6233
pcap_set_protocol_linux(pcap_t *p, int protocol)
6234
0
{
6235
0
  if (pcapint_check_activated(p))
6236
0
    return (PCAP_ERROR_ACTIVATED);
6237
0
  p->opt.protocol = protocol;
6238
0
  return (0);
6239
0
}
6240
6241
/*
6242
 * Libpcap version string.
6243
 */
6244
#if defined(HAVE_TPACKET3) && defined(PCAP_SUPPORT_NETMAP)
6245
  #define ADDITIONAL_INFO_STRING  "with TPACKET_V3 and netmap"
6246
#elif defined(HAVE_TPACKET3)
6247
  #define ADDITIONAL_INFO_STRING  "with TPACKET_V3"
6248
#elif defined(PCAP_SUPPORT_NETMAP)
6249
  #define ADDITIONAL_INFO_STRING  "with TPACKET_V2 and netmap"
6250
#else
6251
  #define ADDITIONAL_INFO_STRING  "with TPACKET_V2"
6252
#endif
6253
6254
const char *
6255
pcap_lib_version(void)
6256
0
{
6257
0
  return (PCAP_VERSION_STRING_WITH_ADDITIONAL_INFO(ADDITIONAL_INFO_STRING));
6258
0
}