Coverage Report

Created: 2025-07-12 07:02

/src/libpcap/pcap-linux.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 *  pcap-linux.c: Packet capture interface to the Linux kernel
3
 *
4
 *  Copyright (c) 2000 Torsten Landschoff <torsten@debian.org>
5
 *           Sebastian Krahmer  <krahmer@cs.uni-potsdam.de>
6
 *
7
 *  License: BSD
8
 *
9
 *  Redistribution and use in source and binary forms, with or without
10
 *  modification, are permitted provided that the following conditions
11
 *  are met:
12
 *
13
 *  1. Redistributions of source code must retain the above copyright
14
 *     notice, this list of conditions and the following disclaimer.
15
 *  2. Redistributions in binary form must reproduce the above copyright
16
 *     notice, this list of conditions and the following disclaimer in
17
 *     the documentation and/or other materials provided with the
18
 *     distribution.
19
 *  3. The names of the authors may not be used to endorse or promote
20
 *     products derived from this software without specific prior
21
 *     written permission.
22
 *
23
 *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
24
 *  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
25
 *  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
26
 *
27
 *  Modifications:     Added PACKET_MMAP support
28
 *                     Paolo Abeni <paolo.abeni@email.it>
29
 *                     Added TPACKET_V3 support
30
 *                     Gabor Tatarka <gabor.tatarka@ericsson.com>
31
 *
32
 *                     based on previous works of:
33
 *                     Simon Patarin <patarin@cs.unibo.it>
34
 *                     Phil Wood <cpw@lanl.gov>
35
 *
36
 * Monitor-mode support for mac80211 includes code taken from the iw
37
 * command; the copyright notice for that code is
38
 *
39
 * Copyright (c) 2007, 2008 Johannes Berg
40
 * Copyright (c) 2007   Andy Lutomirski
41
 * Copyright (c) 2007   Mike Kershaw
42
 * Copyright (c) 2008   Gábor Stefanik
43
 *
44
 * All rights reserved.
45
 *
46
 * Redistribution and use in source and binary forms, with or without
47
 * modification, are permitted provided that the following conditions
48
 * are met:
49
 * 1. Redistributions of source code must retain the above copyright
50
 *    notice, this list of conditions and the following disclaimer.
51
 * 2. Redistributions in binary form must reproduce the above copyright
52
 *    notice, this list of conditions and the following disclaimer in the
53
 *    documentation and/or other materials provided with the distribution.
54
 * 3. The name of the author may not be used to endorse or promote products
55
 *    derived from this software without specific prior written permission.
56
 *
57
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
58
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
59
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
60
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
61
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
62
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
63
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
64
 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
65
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
66
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
67
 * SUCH DAMAGE.
68
 */
69
70
71
#ifndef _GNU_SOURCE
72
#define _GNU_SOURCE
73
#endif
74
75
#include <config.h>
76
77
#include <errno.h>
78
#include <stdio.h>
79
#include <stdlib.h>
80
#include <unistd.h>
81
#include <fcntl.h>
82
#include <string.h>
83
#include <limits.h>
84
#include <endian.h>
85
#include <sys/stat.h>
86
#include <sys/socket.h>
87
#include <sys/ioctl.h>
88
#include <sys/utsname.h>
89
#include <sys/mman.h>
90
#include <linux/if.h>
91
#include <linux/if_packet.h>
92
#include <linux/sockios.h>
93
#include <linux/ethtool.h>
94
#include <netinet/in.h>
95
#include <linux/if_ether.h>
96
#include <linux/netlink.h>
97
98
#include <linux/if_arp.h>
99
#ifndef ARPHRD_IEEE802154
100
  // Linux before 2.6.31
101
  #define ARPHRD_IEEE802154 804
102
#endif
103
#ifndef ARPHRD_IEEE802154_MONITOR
104
  // Linux before 3.5
105
  #define ARPHRD_IEEE802154_MONITOR 805
106
#endif
107
#ifndef ARPHRD_NETLINK
108
  // Linux before 3.11
109
  #define ARPHRD_NETLINK 824
110
#endif
111
#ifndef ARPHRD_6LOWPAN
112
  // Linux before 3.14
113
  #define ARPHRD_6LOWPAN 825
114
#endif
115
#ifndef ARPHRD_VSOCKMON
116
  // Linux before 4.12
117
  #define ARPHRD_VSOCKMON 826
118
#endif
119
#ifndef ARPHRD_LAPD
120
  /*
121
   * ARPHRD_LAPD is unofficial and randomly allocated, if reallocation
122
   * is needed, please report it to <daniele@orlandi.com>
123
   */
124
0
  #define ARPHRD_LAPD 8445
125
#endif
126
127
#include <poll.h>
128
#include <dirent.h>
129
#include <sys/eventfd.h>
130
131
#include "pcap-int.h"
132
#include "pcap-util.h"
133
#include "pcap-snf.h"
134
#include "pcap/sll.h"
135
#include "pcap/vlan.h"
136
#include "pcap/can_socketcan.h"
137
138
#include "diag-control.h"
139
140
/*
141
 * We require TPACKET_V2 support.
142
 */
143
#ifndef TPACKET2_HDRLEN
144
#error "Libpcap will only work if TPACKET_V2 is supported; you must build for a 2.6.27 or later kernel"
145
#endif
146
147
/* check for memory mapped access availability. We assume every needed
148
 * struct is defined if the macro TPACKET_HDRLEN is defined, because it
149
 * uses many ring related structs and macros */
150
#ifdef TPACKET3_HDRLEN
151
# define HAVE_TPACKET3
152
#endif /* TPACKET3_HDRLEN */
153
154
/*
155
 * Not all compilers that are used to compile code to run on Linux have
156
 * these builtins.  For example, older versions of GCC don't, and at
157
 * least some people are doing cross-builds for MIPS with older versions
158
 * of GCC.
159
 */
160
#ifndef HAVE___ATOMIC_LOAD_N
161
#define __atomic_load_n(ptr, memory_model)    (*(ptr))
162
#endif
163
#ifndef HAVE___ATOMIC_STORE_N
164
#define __atomic_store_n(ptr, val, memory_model)  *(ptr) = (val)
165
#endif
166
167
#define packet_mmap_acquire(pkt) \
168
0
  (__atomic_load_n(&pkt->tp_status, __ATOMIC_ACQUIRE) != TP_STATUS_KERNEL)
169
#define packet_mmap_release(pkt) \
170
0
  (__atomic_store_n(&pkt->tp_status, TP_STATUS_KERNEL, __ATOMIC_RELEASE))
171
#define packet_mmap_v3_acquire(pkt) \
172
0
  (__atomic_load_n(&pkt->hdr.bh1.block_status, __ATOMIC_ACQUIRE) != TP_STATUS_KERNEL)
173
#define packet_mmap_v3_release(pkt) \
174
0
  (__atomic_store_n(&pkt->hdr.bh1.block_status, TP_STATUS_KERNEL, __ATOMIC_RELEASE))
175
176
#include <linux/types.h>
177
#include <linux/filter.h>
178
179
#ifdef HAVE_LINUX_NET_TSTAMP_H
180
#include <linux/net_tstamp.h>
181
#endif
182
183
/*
184
 * For checking whether a device is a bonding device.
185
 */
186
#include <linux/if_bonding.h>
187
188
/*
189
 * Got libnl?
190
 */
191
#ifdef HAVE_LIBNL
192
#include <linux/nl80211.h>
193
194
#include <netlink/genl/genl.h>
195
#include <netlink/genl/family.h>
196
#include <netlink/genl/ctrl.h>
197
#include <netlink/msg.h>
198
#include <netlink/attr.h>
199
#endif /* HAVE_LIBNL */
200
201
#ifndef HAVE_SOCKLEN_T
202
typedef int   socklen_t;
203
#endif
204
205
0
#define MAX_LINKHEADER_SIZE 256
206
207
/*
208
 * When capturing on all interfaces we use this as the buffer size.
209
 * Should be bigger then all MTUs that occur in real life.
210
 * 64kB should be enough for now.
211
 */
212
0
#define BIGGER_THAN_ALL_MTUS  (64*1024)
213
214
/*
215
 * Private data for capturing on Linux PF_PACKET sockets.
216
 */
217
struct pcap_linux {
218
  long long sysfs_dropped; /* packets reported dropped by /sys/class/net/{if_name}/statistics/rx_{missed,fifo}_errors */
219
  struct pcap_stat stat;
220
221
  char  *device;  /* device name */
222
  int filter_in_userland; /* must filter in userland */
223
  u_int blocks_to_filter_in_userland;
224
  int must_do_on_close; /* stuff we must do when we close */
225
  int timeout;  /* timeout for buffering */
226
  int cooked;   /* using SOCK_DGRAM rather than SOCK_RAW */
227
  int ifindex;  /* interface index of device we're bound to */
228
  int lo_ifindex; /* interface index of the loopback device */
229
  int netdown;  /* we got an ENETDOWN and haven't resolved it */
230
  bpf_u_int32 oldmode;  /* mode to restore when turning monitor mode off */
231
  char  *mondevice; /* mac80211 monitor device we created */
232
  u_char  *mmapbuf; /* memory-mapped region pointer */
233
  size_t  mmapbuflen; /* size of region */
234
  int vlan_offset;  /* offset at which to insert vlan tags; if -1, don't insert */
235
  u_int tp_version; /* version of tpacket_hdr for mmaped ring */
236
  u_int tp_hdrlen;  /* hdrlen of tpacket_hdr for mmaped ring */
237
  u_char  *oneshot_buffer; /* buffer for copy of packet */
238
  int poll_timeout; /* timeout to use in poll() */
239
#ifdef HAVE_TPACKET3
240
  unsigned char *current_packet; /* Current packet within the TPACKET_V3 block. Move to next block if NULL. */
241
  int packets_left; /* Unhandled packets left within the block from previous call to pcap_read_linux_mmap_v3 in case of TPACKET_V3. */
242
#endif
243
  int poll_breakloop_fd; /* fd to an eventfd to break from blocking operations */
244
};
245
246
/*
247
 * Stuff to do when we close.
248
 */
249
#define MUST_DELETE_MONIF 0x00000001  /* delete monitor-mode interface */
250
251
/*
252
 * Prototypes for internal functions and methods.
253
 */
254
static int is_wifi(const char *);
255
static int pcap_activate_linux(pcap_t *);
256
static int setup_socket(pcap_t *, int);
257
static int setup_mmapped(pcap_t *);
258
static int pcap_can_set_rfmon_linux(pcap_t *);
259
static int pcap_inject_linux(pcap_t *, const void *, int);
260
static int pcap_stats_linux(pcap_t *, struct pcap_stat *);
261
static int pcap_setfilter_linux(pcap_t *, struct bpf_program *);
262
static int pcap_setdirection_linux(pcap_t *, pcap_direction_t);
263
static int pcap_set_datalink_linux(pcap_t *, int);
264
265
union thdr {
266
  struct tpacket2_hdr   *h2;
267
#ifdef HAVE_TPACKET3
268
  struct tpacket_block_desc *h3;
269
#endif
270
  u_char        *raw;
271
};
272
273
0
#define RING_GET_FRAME_AT(h, offset) (((u_char **)h->buffer)[(offset)])
274
0
#define RING_GET_CURRENT_FRAME(h) RING_GET_FRAME_AT(h, h->offset)
275
276
static void destroy_ring(pcap_t *handle);
277
static int create_ring(pcap_t *handle);
278
static int prepare_tpacket_socket(pcap_t *handle);
279
static int pcap_read_linux_mmap_v2(pcap_t *, int, pcap_handler , u_char *);
280
#ifdef HAVE_TPACKET3
281
static int pcap_read_linux_mmap_v3(pcap_t *, int, pcap_handler , u_char *);
282
#endif
283
static int pcap_setnonblock_linux(pcap_t *p, int nonblock);
284
static int pcap_getnonblock_linux(pcap_t *p);
285
static void pcapint_oneshot_linux(u_char *user, const struct pcap_pkthdr *h,
286
    const u_char *bytes);
287
288
/*
289
 * In pre-3.0 kernels, the tp_vlan_tci field is set to whatever the
290
 * vlan_tci field in the skbuff is.  0 can either mean "not on a VLAN"
291
 * or "on VLAN 0".  There is no flag set in the tp_status field to
292
 * distinguish between them.
293
 *
294
 * In 3.0 and later kernels, if there's a VLAN tag present, the tp_vlan_tci
295
 * field is set to the VLAN tag, and the TP_STATUS_VLAN_VALID flag is set
296
 * in the tp_status field, otherwise the tp_vlan_tci field is set to 0 and
297
 * the TP_STATUS_VLAN_VALID flag isn't set in the tp_status field.
298
 *
299
 * With a pre-3.0 kernel, we cannot distinguish between packets with no
300
 * VLAN tag and packets on VLAN 0, so we will mishandle some packets, and
301
 * there's nothing we can do about that.
302
 *
303
 * So, on those systems, which never set the TP_STATUS_VLAN_VALID flag, we
304
 * continue the behavior of earlier libpcaps, wherein we treated packets
305
 * with a VLAN tag of 0 as being packets without a VLAN tag rather than packets
306
 * on VLAN 0.  We do this by treating packets with a tp_vlan_tci of 0 and
307
 * with the TP_STATUS_VLAN_VALID flag not set in tp_status as not having
308
 * VLAN tags.  This does the right thing on 3.0 and later kernels, and
309
 * continues the old unfixably-imperfect behavior on pre-3.0 kernels.
310
 *
311
 * If TP_STATUS_VLAN_VALID isn't defined, we test it as the 0x10 bit; it
312
 * has that value in 3.0 and later kernels.
313
 */
314
#ifdef TP_STATUS_VLAN_VALID
315
0
  #define VLAN_VALID(hdr, hv) ((hv)->tp_vlan_tci != 0 || ((hdr)->tp_status & TP_STATUS_VLAN_VALID))
316
#else
317
  /*
318
   * This is being compiled on a system that lacks TP_STATUS_VLAN_VALID,
319
   * so we test with the value it has in the 3.0 and later kernels, so
320
   * we can test it if we're running on a system that has it.  (If we're
321
   * running on a system that doesn't have it, it won't be set in the
322
   * tp_status field, so the tests of it will always fail; that means
323
   * we behave the way we did before we introduced this macro.)
324
   */
325
  #define VLAN_VALID(hdr, hv) ((hv)->tp_vlan_tci != 0 || ((hdr)->tp_status & 0x10))
326
#endif
327
328
#ifdef TP_STATUS_VLAN_TPID_VALID
329
0
# define VLAN_TPID(hdr, hv) (((hv)->tp_vlan_tpid || ((hdr)->tp_status & TP_STATUS_VLAN_TPID_VALID)) ? (hv)->tp_vlan_tpid : ETH_P_8021Q)
330
#else
331
# define VLAN_TPID(hdr, hv) ETH_P_8021Q
332
#endif
333
334
/*
335
 * Required select timeout if we're polling for an "interface disappeared"
336
 * indication - 1 millisecond.
337
 */
338
static const struct timeval netdown_timeout = {
339
  0, 1000   /* 1000 microseconds = 1 millisecond */
340
};
341
342
/*
343
 * Wrap some ioctl calls
344
 */
345
static int  iface_get_id(int fd, const char *device, char *ebuf);
346
static int  iface_get_mtu(int fd, const char *device, char *ebuf);
347
static int  iface_get_arptype(int fd, const char *device, char *ebuf);
348
static int  iface_bind(int fd, int ifindex, char *ebuf, int protocol);
349
static int  enter_rfmon_mode(pcap_t *handle, int sock_fd,
350
    const char *device);
351
static int  iface_get_ts_types(const char *device, pcap_t *handle,
352
    char *ebuf);
353
static int  iface_get_offload(pcap_t *handle);
354
355
static int  fix_program(pcap_t *handle, struct sock_fprog *fcode);
356
static int  fix_offset(pcap_t *handle, struct bpf_insn *p);
357
static int  set_kernel_filter(pcap_t *handle, struct sock_fprog *fcode);
358
static int  reset_kernel_filter(pcap_t *handle);
359
360
static struct sock_filter total_insn
361
  = BPF_STMT(BPF_RET | BPF_K, 0);
362
static struct sock_fprog  total_fcode
363
  = { 1, &total_insn };
364
365
static int  iface_dsa_get_proto_info(const char *device, pcap_t *handle);
366
367
pcap_t *
368
pcapint_create_interface(const char *device, char *ebuf)
369
0
{
370
0
  pcap_t *handle;
371
372
0
  handle = PCAP_CREATE_COMMON(ebuf, struct pcap_linux);
373
0
  if (handle == NULL)
374
0
    return NULL;
375
376
0
  handle->activate_op = pcap_activate_linux;
377
0
  handle->can_set_rfmon_op = pcap_can_set_rfmon_linux;
378
379
  /*
380
   * See what time stamp types we support.
381
   */
382
0
  if (iface_get_ts_types(device, handle, ebuf) == -1) {
383
0
    pcap_close(handle);
384
0
    return NULL;
385
0
  }
386
387
  /*
388
   * We claim that we support microsecond and nanosecond time
389
   * stamps.
390
   *
391
   * XXX - with adapter-supplied time stamps, can we choose
392
   * microsecond or nanosecond time stamps on arbitrary
393
   * adapters?
394
   */
395
0
  handle->tstamp_precision_list = malloc(2 * sizeof(u_int));
396
0
  if (handle->tstamp_precision_list == NULL) {
397
0
    pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE,
398
0
        errno, "malloc");
399
0
    pcap_close(handle);
400
0
    return NULL;
401
0
  }
402
0
  handle->tstamp_precision_list[0] = PCAP_TSTAMP_PRECISION_MICRO;
403
0
  handle->tstamp_precision_list[1] = PCAP_TSTAMP_PRECISION_NANO;
404
0
  handle->tstamp_precision_count = 2;
405
406
  /*
407
   * Start out with the breakloop handle not open; we don't
408
   * need it until we're activated and ready to capture.
409
   */
410
0
  struct pcap_linux *handlep = handle->priv;
411
0
  handlep->poll_breakloop_fd = -1;
412
413
0
  return handle;
414
0
}
415
416
#ifdef HAVE_LIBNL
417
/*
418
 * If interface {if_name} is a mac80211 driver, the file
419
 * /sys/class/net/{if_name}/phy80211 is a symlink to
420
 * /sys/class/ieee80211/{phydev_name}, for some {phydev_name}.
421
 *
422
 * On Fedora 9, with a 2.6.26.3-29 kernel, my Zydas stick, at
423
 * least, has a "wmaster0" device and a "wlan0" device; the
424
 * latter is the one with the IP address.  Both show up in
425
 * "tcpdump -D" output.  Capturing on the wmaster0 device
426
 * captures with 802.11 headers.
427
 *
428
 * airmon-ng searches through /sys/class/net for devices named
429
 * monN, starting with mon0; as soon as one *doesn't* exist,
430
 * it chooses that as the monitor device name.  If the "iw"
431
 * command exists, it does
432
 *
433
 *    iw dev {if_name} interface add {monif_name} type monitor
434
 *
435
 * where {monif_name} is the monitor device.  It then (sigh) sleeps
436
 * .1 second, and then configures the device up.  Otherwise, if
437
 * /sys/class/ieee80211/{phydev_name}/add_iface is a file, it writes
438
 * {mondev_name}, without a newline, to that file, and again (sigh)
439
 * sleeps .1 second, and then iwconfig's that device into monitor
440
 * mode and configures it up.  Otherwise, you can't do monitor mode.
441
 *
442
 * All these devices are "glued" together by having the
443
 * /sys/class/net/{if_name}/phy80211 links pointing to the same
444
 * place, so, given a wmaster, wlan, or mon device, you can
445
 * find the other devices by looking for devices with
446
 * the same phy80211 link.
447
 *
448
 * To turn monitor mode off, delete the monitor interface,
449
 * either with
450
 *
451
 *    iw dev {monif_name} interface del
452
 *
453
 * or by sending {monif_name}, with no NL, down
454
 * /sys/class/ieee80211/{phydev_name}/remove_iface
455
 *
456
 * Note: if you try to create a monitor device named "monN", and
457
 * there's already a "monN" device, it fails, as least with
458
 * the netlink interface (which is what iw uses), with a return
459
 * value of -ENFILE.  (Return values are negative errnos.)  We
460
 * could probably use that to find an unused device.
461
 *
462
 * Yes, you can have multiple monitor devices for a given
463
 * physical device.
464
 */
465
466
/*
467
 * Is this a mac80211 device?  If so, fill in the physical device path and
468
 * return 1; if not, return 0.  On an error, fill in handle->errbuf and
469
 * return PCAP_ERROR.
470
 */
471
static int
472
get_mac80211_phydev(pcap_t *handle, const char *device, char *phydev_path,
473
    size_t phydev_max_pathlen)
474
{
475
  char *pathstr;
476
  ssize_t bytes_read;
477
478
  /*
479
   * Generate the path string for the symlink to the physical device.
480
   */
481
  if (asprintf(&pathstr, "/sys/class/net/%s/phy80211", device) == -1) {
482
    snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
483
        "%s: Can't generate path name string for /sys/class/net device",
484
        device);
485
    return PCAP_ERROR;
486
  }
487
  bytes_read = readlink(pathstr, phydev_path, phydev_max_pathlen);
488
  if (bytes_read == -1) {
489
    if (errno == ENOENT) {
490
      /*
491
       * This either means that the directory
492
       * /sys/class/net/{device} exists but doesn't
493
       * have anything named "phy80211" in it,
494
       * in which case it's not a mac80211 device,
495
       * or that the directory doesn't exist,
496
       * in which case the device doesn't exist.
497
       *
498
       * Directly check whether the directory
499
       * exists.
500
       */
501
      struct stat statb;
502
503
      free(pathstr);
504
      if (asprintf(&pathstr, "/sys/class/net/%s", device) == -1) {
505
        snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
506
            "%s: Can't generate path name string for /sys/class/net device",
507
            device);
508
        return PCAP_ERROR;
509
      }
510
      if (stat(pathstr, &statb) == -1) {
511
        if (errno == ENOENT) {
512
          /*
513
           * No such device.
514
           */
515
          snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
516
              "%s: %s doesn't exist",
517
              device, pathstr);
518
          free(pathstr);
519
          return PCAP_ERROR_NO_SUCH_DEVICE;
520
        }
521
        snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
522
            "%s: Can't stat %s: %s",
523
            device, pathstr, strerror(errno));
524
        free(pathstr);
525
        return PCAP_ERROR;
526
      }
527
528
      /*
529
       * Path to the directory that would contain
530
       * "phy80211" exists, but "phy80211" doesn't
531
       * exist; that means it's not a mac80211
532
       * device.
533
       */
534
      free(pathstr);
535
      return 0;
536
    }
537
    if (errno == EINVAL) {
538
      /*
539
       * Exists, but it's not a symlink; assume that
540
       * means it's not a mac80211 device.
541
       */
542
      free(pathstr);
543
      return 0;
544
    }
545
    pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
546
        errno, "%s: Can't readlink %s", device, pathstr);
547
    free(pathstr);
548
    return PCAP_ERROR;
549
  }
550
  free(pathstr);
551
  phydev_path[bytes_read] = '\0';
552
  return 1;
553
}
554
555
struct nl80211_state {
556
  struct nl_sock *nl_sock;
557
  struct nl_cache *nl_cache;
558
  struct genl_family *nl80211;
559
};
560
561
static int
562
nl80211_init(pcap_t *handle, struct nl80211_state *state, const char *device)
563
{
564
  int err;
565
566
  state->nl_sock = nl_socket_alloc();
567
  if (!state->nl_sock) {
568
    snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
569
        "%s: failed to allocate netlink handle", device);
570
    return PCAP_ERROR;
571
  }
572
573
  if (genl_connect(state->nl_sock)) {
574
    snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
575
        "%s: failed to connect to generic netlink", device);
576
    goto out_handle_destroy;
577
  }
578
579
  err = genl_ctrl_alloc_cache(state->nl_sock, &state->nl_cache);
580
  if (err < 0) {
581
    snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
582
        "%s: failed to allocate generic netlink cache: %s",
583
        device, nl_geterror(-err));
584
    goto out_handle_destroy;
585
  }
586
587
  state->nl80211 = genl_ctrl_search_by_name(state->nl_cache, "nl80211");
588
  if (!state->nl80211) {
589
    snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
590
        "%s: nl80211 not found", device);
591
    goto out_cache_free;
592
  }
593
594
  return 0;
595
596
out_cache_free:
597
  nl_cache_free(state->nl_cache);
598
out_handle_destroy:
599
  nl_socket_free(state->nl_sock);
600
  return PCAP_ERROR;
601
}
602
603
static void
604
nl80211_cleanup(struct nl80211_state *state)
605
{
606
  genl_family_put(state->nl80211);
607
  nl_cache_free(state->nl_cache);
608
  nl_socket_free(state->nl_sock);
609
}
610
611
static int
612
del_mon_if(pcap_t *handle, int sock_fd, struct nl80211_state *state,
613
    const char *device, const char *mondevice);
614
615
static int
616
if_type_cb(struct nl_msg *msg, void* arg)
617
{
618
  struct nlmsghdr* ret_hdr = nlmsg_hdr(msg);
619
  struct nlattr *tb_msg[NL80211_ATTR_MAX + 1];
620
  int *type = (int*)arg;
621
622
  struct genlmsghdr *gnlh = (struct genlmsghdr*) nlmsg_data(ret_hdr);
623
624
  nla_parse(tb_msg, NL80211_ATTR_MAX, genlmsg_attrdata(gnlh, 0),
625
    genlmsg_attrlen(gnlh, 0), NULL);
626
627
  /*
628
   * We sent a message asking for info about a single index.
629
   * To be really paranoid, we could check if the index matched
630
   * by examining nla_get_u32(tb_msg[NL80211_ATTR_IFINDEX]).
631
   */
632
633
  if (tb_msg[NL80211_ATTR_IFTYPE]) {
634
    *type = nla_get_u32(tb_msg[NL80211_ATTR_IFTYPE]);
635
  }
636
637
  return NL_SKIP;
638
}
639
640
static int
641
get_if_type(pcap_t *handle, int sock_fd, struct nl80211_state *state,
642
    const char *device, int *type)
643
{
644
  int ifindex;
645
  struct nl_msg *msg;
646
  int err;
647
648
  ifindex = iface_get_id(sock_fd, device, handle->errbuf);
649
  if (ifindex == -1)
650
    return PCAP_ERROR;
651
652
  msg = nlmsg_alloc();
653
  if (!msg) {
654
    snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
655
        "%s: failed to allocate netlink msg", device);
656
    return PCAP_ERROR;
657
  }
658
659
  genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ,
660
        genl_family_get_id(state->nl80211), 0,
661
        0, NL80211_CMD_GET_INTERFACE, 0);
662
  NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, ifindex);
663
664
  err = nl_send_auto(state->nl_sock, msg);
665
  nlmsg_free(msg);
666
  if (err < 0) {
667
    if (err == -NLE_FAILURE) {
668
      /*
669
       * Device not available; our caller should just
670
       * keep trying.  (libnl 2.x maps ENFILE to
671
       * NLE_FAILURE; it can also map other errors
672
       * to that, but there's not much we can do
673
       * about that.)
674
       */
675
      return 0;
676
    } else {
677
      /*
678
       * Real failure, not just "that device is not
679
       * available.
680
       */
681
      snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
682
          "%s: nl_send_auto failed getting interface type: %s",
683
          device, nl_geterror(-err));
684
      return PCAP_ERROR;
685
    }
686
  }
687
688
  struct nl_cb *cb = nl_cb_alloc(NL_CB_DEFAULT);
689
  nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, if_type_cb, (void*)type);
690
  err = nl_recvmsgs(state->nl_sock, cb);
691
  nl_cb_put(cb);
692
693
  if (err < 0) {
694
    snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
695
        "%s: nl_recvmsgs failed getting interface type: %s",
696
        device, nl_geterror(-err));
697
    return PCAP_ERROR;
698
  }
699
700
  /*
701
  * If this is a mac80211 device not in monitor mode, nl_sock will be
702
  * reused for add_mon_if. So we must wait for the ACK here so that
703
  * add_mon_if does not receive it instead and incorrectly interpret
704
  * the ACK as its NEW_INTERFACE command succeeding, even when it fails.
705
  */
706
  err = nl_wait_for_ack(state->nl_sock);
707
  if (err < 0) {
708
    snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
709
        "%s: nl_wait_for_ack failed getting interface type: %s",
710
        device, nl_geterror(-err));
711
    return PCAP_ERROR;
712
  }
713
714
  /*
715
   * Success.
716
   */
717
  return 1;
718
719
nla_put_failure:
720
  snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
721
      "%s: nl_put failed getting interface type",
722
      device);
723
  nlmsg_free(msg);
724
  // Do not call nl_cb_put(): nl_cb_alloc() has not been called.
725
  return PCAP_ERROR;
726
}
727
728
static int
729
add_mon_if(pcap_t *handle, int sock_fd, struct nl80211_state *state,
730
    const char *device, const char *mondevice)
731
{
732
  struct pcap_linux *handlep = handle->priv;
733
  int ifindex;
734
  struct nl_msg *msg;
735
  int err;
736
737
  ifindex = iface_get_id(sock_fd, device, handle->errbuf);
738
  if (ifindex == -1)
739
    return PCAP_ERROR;
740
741
  msg = nlmsg_alloc();
742
  if (!msg) {
743
    snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
744
        "%s: failed to allocate netlink msg", device);
745
    return PCAP_ERROR;
746
  }
747
748
  genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ,
749
        genl_family_get_id(state->nl80211), 0,
750
        0, NL80211_CMD_NEW_INTERFACE, 0);
751
  NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, ifindex);
752
DIAG_OFF_NARROWING
753
  NLA_PUT_STRING(msg, NL80211_ATTR_IFNAME, mondevice);
754
DIAG_ON_NARROWING
755
  NLA_PUT_U32(msg, NL80211_ATTR_IFTYPE, NL80211_IFTYPE_MONITOR);
756
757
  err = nl_send_sync(state->nl_sock, msg); // calls nlmsg_free()
758
  if (err < 0) {
759
    switch (err) {
760
761
    case -NLE_FAILURE:
762
    case -NLE_AGAIN:
763
      /*
764
       * Device not available; our caller should just
765
       * keep trying.  (libnl 2.x maps ENFILE to
766
       * NLE_FAILURE; it can also map other errors
767
       * to that, but there's not much we can do
768
       * about that.)
769
       */
770
      return 0;
771
772
    case -NLE_OPNOTSUPP:
773
      /*
774
       * Device is a mac80211 device but adding it as a
775
       * monitor mode device isn't supported.  Report our
776
       * error.
777
       */
778
      return PCAP_ERROR_RFMON_NOTSUP;
779
780
    default:
781
      /*
782
       * Real failure, not just "that device is not
783
       * available."  Report a generic error, using the
784
       * error message from libnl.
785
       */
786
      snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
787
          "%s: nl_send_sync failed adding %s interface: %s",
788
          device, mondevice, nl_geterror(-err));
789
      return PCAP_ERROR;
790
    }
791
  }
792
793
  /*
794
   * Success.
795
   */
796
797
  /*
798
   * Try to remember the monitor device.
799
   */
800
  handlep->mondevice = strdup(mondevice);
801
  if (handlep->mondevice == NULL) {
802
    pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
803
        errno, "strdup");
804
    /*
805
     * Get rid of the monitor device.
806
     */
807
    del_mon_if(handle, sock_fd, state, device, mondevice);
808
    return PCAP_ERROR;
809
  }
810
  return 1;
811
812
nla_put_failure:
813
  snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
814
      "%s: nl_put failed adding %s interface",
815
      device, mondevice);
816
  nlmsg_free(msg);
817
  return PCAP_ERROR;
818
}
819
820
static int
821
del_mon_if(pcap_t *handle, int sock_fd, struct nl80211_state *state,
822
    const char *device, const char *mondevice)
823
{
824
  int ifindex;
825
  struct nl_msg *msg;
826
  int err;
827
828
  ifindex = iface_get_id(sock_fd, mondevice, handle->errbuf);
829
  if (ifindex == -1)
830
    return PCAP_ERROR;
831
832
  msg = nlmsg_alloc();
833
  if (!msg) {
834
    snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
835
        "%s: failed to allocate netlink msg", device);
836
    return PCAP_ERROR;
837
  }
838
839
  genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ,
840
        genl_family_get_id(state->nl80211), 0,
841
        0, NL80211_CMD_DEL_INTERFACE, 0);
842
  NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, ifindex);
843
844
  err = nl_send_sync(state->nl_sock, msg); // calls nlmsg_free()
845
  if (err < 0) {
846
    snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
847
        "%s: nl_send_sync failed deleting %s interface: %s",
848
        device, mondevice, nl_geterror(-err));
849
    return PCAP_ERROR;
850
  }
851
852
  /*
853
   * Success.
854
   */
855
  return 1;
856
857
nla_put_failure:
858
  snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
859
      "%s: nl_put failed deleting %s interface",
860
      device, mondevice);
861
  nlmsg_free(msg);
862
  return PCAP_ERROR;
863
}
864
#endif /* HAVE_LIBNL */
865
866
static int pcap_protocol(pcap_t *handle)
867
0
{
868
0
  int protocol;
869
870
0
  protocol = handle->opt.protocol;
871
0
  if (protocol == 0)
872
0
    protocol = ETH_P_ALL;
873
874
0
  return htons(protocol);
875
0
}
876
877
static int
878
pcap_can_set_rfmon_linux(pcap_t *handle)
879
0
{
880
#ifdef HAVE_LIBNL
881
  char phydev_path[PATH_MAX+1];
882
  int ret;
883
#endif
884
885
0
  if (strcmp(handle->opt.device, "any") == 0) {
886
    /*
887
     * Monitor mode makes no sense on the "any" device.
888
     */
889
0
    return 0;
890
0
  }
891
892
#ifdef HAVE_LIBNL
893
  /*
894
   * Bleah.  There doesn't seem to be a way to ask a mac80211
895
   * device, through libnl, whether it supports monitor mode;
896
   * we'll just check whether the device appears to be a
897
   * mac80211 device and, if so, assume the device supports
898
   * monitor mode.
899
   */
900
  ret = get_mac80211_phydev(handle, handle->opt.device, phydev_path,
901
      PATH_MAX);
902
  if (ret < 0)
903
    return ret; /* error */
904
  if (ret == 1)
905
    return 1; /* mac80211 device */
906
#endif
907
908
0
  return 0;
909
0
}
910
911
/*
912
 * Grabs the number of missed packets by the interface from
913
 * /sys/class/net/{if_name}/statistics/rx_{missed,fifo}_errors.
914
 *
915
 * Compared to /proc/net/dev this avoids counting software drops,
916
 * but may be unimplemented and just return 0.
917
 * The author has found no straightforward way to check for support.
918
 */
919
static long long int
920
0
linux_get_stat(const char * if_name, const char * stat) {
921
0
  ssize_t bytes_read;
922
0
  int fd;
923
0
  char buffer[PATH_MAX];
924
925
0
  snprintf(buffer, sizeof(buffer), "/sys/class/net/%s/statistics/%s", if_name, stat);
926
0
  fd = open(buffer, O_RDONLY);
927
0
  if (fd == -1)
928
0
    return 0;
929
930
0
  bytes_read = read(fd, buffer, sizeof(buffer) - 1);
931
0
  close(fd);
932
0
  if (bytes_read == -1)
933
0
    return 0;
934
0
  buffer[bytes_read] = '\0';
935
936
0
  return strtoll(buffer, NULL, 10);
937
0
}
938
939
static long long int
940
linux_if_drops(const char * if_name)
941
0
{
942
0
  long long int missed = linux_get_stat(if_name, "rx_missed_errors");
943
0
  long long int fifo = linux_get_stat(if_name, "rx_fifo_errors");
944
0
  return missed + fifo;
945
0
}
946
947
948
/*
949
 * Monitor mode is kind of interesting because we have to reset the
950
 * interface before exiting. The problem can't really be solved without
951
 * some daemon taking care of managing usage counts.  If we put the
952
 * interface into monitor mode, we set a flag indicating that we must
953
 * take it out of that mode when the interface is closed, and, when
954
 * closing the interface, if that flag is set we take it out of monitor
955
 * mode.
956
 */
957
958
static void pcap_cleanup_linux( pcap_t *handle )
959
0
{
960
0
  struct pcap_linux *handlep = handle->priv;
961
#ifdef HAVE_LIBNL
962
  struct nl80211_state nlstate;
963
  int ret;
964
#endif /* HAVE_LIBNL */
965
966
0
  if (handlep->must_do_on_close != 0) {
967
    /*
968
     * There's something we have to do when closing this
969
     * pcap_t.
970
     */
971
#ifdef HAVE_LIBNL
972
    if (handlep->must_do_on_close & MUST_DELETE_MONIF) {
973
      ret = nl80211_init(handle, &nlstate, handlep->device);
974
      if (ret >= 0) {
975
        ret = del_mon_if(handle, handle->fd, &nlstate,
976
            handlep->device, handlep->mondevice);
977
        nl80211_cleanup(&nlstate);
978
      }
979
      if (ret < 0) {
980
        fprintf(stderr,
981
            "Can't delete monitor interface %s (%s).\n"
982
            "Please delete manually.\n",
983
            handlep->mondevice, handle->errbuf);
984
      }
985
    }
986
#endif /* HAVE_LIBNL */
987
988
    /*
989
     * Take this pcap out of the list of pcaps for which we
990
     * have to take the interface out of some mode.
991
     */
992
0
    pcapint_remove_from_pcaps_to_close(handle);
993
0
  }
994
995
0
  if (handle->fd != -1) {
996
    /*
997
     * Destroy the ring buffer (assuming we've set it up),
998
     * and unmap it if it's mapped.
999
     */
1000
0
    destroy_ring(handle);
1001
0
  }
1002
1003
0
  if (handlep->oneshot_buffer != NULL) {
1004
0
    munmap(handlep->oneshot_buffer, handle->snapshot);
1005
0
    handlep->oneshot_buffer = NULL;
1006
0
  }
1007
1008
0
  if (handlep->mondevice != NULL) {
1009
0
    free(handlep->mondevice);
1010
0
    handlep->mondevice = NULL;
1011
0
  }
1012
0
  if (handlep->device != NULL) {
1013
0
    free(handlep->device);
1014
0
    handlep->device = NULL;
1015
0
  }
1016
1017
0
  if (handlep->poll_breakloop_fd != -1) {
1018
0
    close(handlep->poll_breakloop_fd);
1019
0
    handlep->poll_breakloop_fd = -1;
1020
0
  }
1021
0
  pcapint_cleanup_live_common(handle);
1022
0
}
1023
1024
#ifdef HAVE_TPACKET3
1025
/*
1026
 * Some versions of TPACKET_V3 have annoying bugs/misfeatures
1027
 * around which we have to work.  Determine if we have those
1028
 * problems or not.
1029
 * 3.19 is the first release with a fixed version of
1030
 * TPACKET_V3.  We treat anything before that as
1031
 * not having a fixed version; that may really mean
1032
 * it has *no* version.
1033
 */
1034
static int has_broken_tpacket_v3(void)
1035
0
{
1036
0
  struct utsname utsname;
1037
0
  const char *release;
1038
0
  long major, minor;
1039
0
  int matches, verlen;
1040
1041
  /* No version information, assume broken. */
1042
0
  if (uname(&utsname) == -1)
1043
0
    return 1;
1044
0
  release = utsname.release;
1045
1046
  /* A malformed version, ditto. */
1047
0
  matches = sscanf(release, "%ld.%ld%n", &major, &minor, &verlen);
1048
0
  if (matches != 2)
1049
0
    return 1;
1050
0
  if (release[verlen] != '.' && release[verlen] != '\0')
1051
0
    return 1;
1052
1053
  /* OK, a fixed version. */
1054
0
  if (major > 3 || (major == 3 && minor >= 19))
1055
0
    return 0;
1056
1057
  /* Too old :( */
1058
0
  return 1;
1059
0
}
1060
#endif
1061
1062
/*
1063
 * Set the timeout to be used in poll() with memory-mapped packet capture.
1064
 */
1065
static void
1066
set_poll_timeout(struct pcap_linux *handlep)
1067
0
{
1068
0
#ifdef HAVE_TPACKET3
1069
0
  int broken_tpacket_v3 = has_broken_tpacket_v3();
1070
0
#endif
1071
0
  if (handlep->timeout == 0) {
1072
0
#ifdef HAVE_TPACKET3
1073
    /*
1074
     * XXX - due to a set of (mis)features in the TPACKET_V3
1075
     * kernel code prior to the 3.19 kernel, blocking forever
1076
     * with a TPACKET_V3 socket can, if few packets are
1077
     * arriving and passing the socket filter, cause most
1078
     * packets to be dropped.  See libpcap issue #335 for the
1079
     * full painful story.
1080
     *
1081
     * The workaround is to have poll() time out very quickly,
1082
     * so we grab the frames handed to us, and return them to
1083
     * the kernel, ASAP.
1084
     */
1085
0
    if (handlep->tp_version == TPACKET_V3 && broken_tpacket_v3)
1086
0
      handlep->poll_timeout = 1; /* don't block for very long */
1087
0
    else
1088
0
#endif
1089
0
      handlep->poll_timeout = -1; /* block forever */
1090
0
  } else if (handlep->timeout > 0) {
1091
0
#ifdef HAVE_TPACKET3
1092
    /*
1093
     * For TPACKET_V3, the timeout is handled by the kernel,
1094
     * so block forever; that way, we don't get extra timeouts.
1095
     * Don't do that if we have a broken TPACKET_V3, though.
1096
     */
1097
0
    if (handlep->tp_version == TPACKET_V3 && !broken_tpacket_v3)
1098
0
      handlep->poll_timeout = -1; /* block forever, let TPACKET_V3 wake us up */
1099
0
    else
1100
0
#endif
1101
0
      handlep->poll_timeout = handlep->timeout; /* block for that amount of time */
1102
0
  } else {
1103
    /*
1104
     * Non-blocking mode; we call poll() to pick up error
1105
     * indications, but we don't want it to wait for
1106
     * anything.
1107
     */
1108
0
    handlep->poll_timeout = 0;
1109
0
  }
1110
0
}
1111
1112
static void pcap_breakloop_linux(pcap_t *handle)
1113
0
{
1114
0
  pcapint_breakloop_common(handle);
1115
0
  struct pcap_linux *handlep = handle->priv;
1116
1117
0
  uint64_t value = 1;
1118
1119
0
  if (handlep->poll_breakloop_fd != -1) {
1120
    /*
1121
     * XXX - pcap_breakloop() doesn't have a return value,
1122
     * so we can't indicate an error.
1123
     */
1124
0
DIAG_OFF_WARN_UNUSED_RESULT
1125
0
    (void)write(handlep->poll_breakloop_fd, &value, sizeof(value));
1126
0
DIAG_ON_WARN_UNUSED_RESULT
1127
0
  }
1128
0
}
1129
1130
/*
1131
 * Set the offset at which to insert VLAN tags.
1132
 * That should be the offset of the type field.
1133
 */
1134
static void
1135
set_vlan_offset(pcap_t *handle)
1136
0
{
1137
0
  struct pcap_linux *handlep = handle->priv;
1138
1139
0
  switch (handle->linktype) {
1140
1141
0
  case DLT_EN10MB:
1142
    /*
1143
     * The type field is after the destination and source
1144
     * MAC address.
1145
     */
1146
0
    handlep->vlan_offset = 2 * ETH_ALEN;
1147
0
    break;
1148
1149
0
  case DLT_LINUX_SLL:
1150
    /*
1151
     * The type field is in the last 2 bytes of the
1152
     * DLT_LINUX_SLL header.
1153
     */
1154
0
    handlep->vlan_offset = SLL_HDR_LEN - 2;
1155
0
    break;
1156
1157
0
  default:
1158
0
    handlep->vlan_offset = -1; /* unknown */
1159
0
    break;
1160
0
  }
1161
0
}
1162
1163
static int
1164
pcap_activate_linux(pcap_t *handle)
1165
0
{
1166
0
  struct pcap_linux *handlep = handle->priv;
1167
0
  const char  *device;
1168
0
  int   is_any_device;
1169
0
  struct ifreq  ifr;
1170
0
  int   status;
1171
0
  int   ret;
1172
1173
0
  device = handle->opt.device;
1174
1175
  /*
1176
   * Start out assuming no warnings.
1177
   */
1178
0
  status = 0;
1179
1180
  /*
1181
   * Make sure the name we were handed will fit into the ioctls we
1182
   * might perform on the device; if not, return a "No such device"
1183
   * indication, as the Linux kernel shouldn't support creating
1184
   * a device whose name won't fit into those ioctls.
1185
   *
1186
   * "Will fit" means "will fit, complete with a null terminator",
1187
   * so if the length, which does *not* include the null terminator,
1188
   * is greater than *or equal to* the size of the field into which
1189
   * we'll be copying it, that won't fit.
1190
   */
1191
0
  if (strlen(device) >= sizeof(ifr.ifr_name)) {
1192
    /*
1193
     * There's nothing more to say, so clear the error
1194
     * message.
1195
     */
1196
0
    handle->errbuf[0] = '\0';
1197
0
    status = PCAP_ERROR_NO_SUCH_DEVICE;
1198
0
    goto fail;
1199
0
  }
1200
1201
  /*
1202
   * Turn a negative snapshot value (invalid), a snapshot value of
1203
   * 0 (unspecified), or a value bigger than the normal maximum
1204
   * value, into the maximum allowed value.
1205
   *
1206
   * If some application really *needs* a bigger snapshot
1207
   * length, we should just increase MAXIMUM_SNAPLEN.
1208
   */
1209
0
  if (handle->snapshot <= 0 || handle->snapshot > MAXIMUM_SNAPLEN)
1210
0
    handle->snapshot = MAXIMUM_SNAPLEN;
1211
1212
0
  handlep->device = strdup(device);
1213
0
  if (handlep->device == NULL) {
1214
0
    pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
1215
0
        errno, "strdup");
1216
0
    status = PCAP_ERROR;
1217
0
    goto fail;
1218
0
  }
1219
1220
  /*
1221
   * The "any" device is a special device which causes us not
1222
   * to bind to a particular device and thus to look at all
1223
   * devices.
1224
   */
1225
0
  is_any_device = (strcmp(device, "any") == 0);
1226
0
  if (is_any_device) {
1227
0
    if (handle->opt.promisc) {
1228
0
      handle->opt.promisc = 0;
1229
      /* Just a warning. */
1230
0
      snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
1231
0
          "Promiscuous mode not supported on the \"any\" device");
1232
0
      status = PCAP_WARNING_PROMISC_NOTSUP;
1233
0
    }
1234
0
  }
1235
1236
  /* copy timeout value */
1237
0
  handlep->timeout = handle->opt.timeout;
1238
1239
  /*
1240
   * If we're in promiscuous mode, then we probably want
1241
   * to see when the interface drops packets too, so get an
1242
   * initial count from
1243
   * /sys/class/net/{if_name}/statistics/rx_{missed,fifo}_errors
1244
   */
1245
0
  if (handle->opt.promisc)
1246
0
    handlep->sysfs_dropped = linux_if_drops(handlep->device);
1247
1248
  /*
1249
   * If the "any" device is specified, try to open a SOCK_DGRAM.
1250
   * Otherwise, open a SOCK_RAW.
1251
   */
1252
0
  ret = setup_socket(handle, is_any_device);
1253
0
  if (ret < 0) {
1254
    /*
1255
     * Fatal error; the return value is the error code,
1256
     * and handle->errbuf has been set to an appropriate
1257
     * error message.
1258
     */
1259
0
    status = ret;
1260
0
    goto fail;
1261
0
  }
1262
0
  if (ret > 0) {
1263
    /*
1264
     * We got a warning; return that, as handle->errbuf
1265
     * might have been overwritten by this warning.
1266
     */
1267
0
    status = ret;
1268
0
  }
1269
1270
  /*
1271
   * Success (possibly with a warning).
1272
   *
1273
   * First, try to allocate an event FD for breakloop, if
1274
   * we're not going to start in non-blocking mode.
1275
   */
1276
0
  if (!handle->opt.nonblock) {
1277
0
    handlep->poll_breakloop_fd = eventfd(0, EFD_NONBLOCK);
1278
0
    if (handlep->poll_breakloop_fd == -1) {
1279
      /*
1280
       * Failed.
1281
       */
1282
0
      pcapint_fmt_errmsg_for_errno(handle->errbuf,
1283
0
          PCAP_ERRBUF_SIZE, errno, "could not open eventfd");
1284
0
      status = PCAP_ERROR;
1285
0
      goto fail;
1286
0
    }
1287
0
  }
1288
1289
  /*
1290
   * Succeeded.
1291
   * Try to set up memory-mapped access.
1292
   */
1293
0
  ret = setup_mmapped(handle);
1294
0
  if (ret < 0) {
1295
    /*
1296
     * We failed to set up to use it, or the
1297
     * kernel supports it, but we failed to
1298
     * enable it.  The return value is the
1299
     * error status to return and, if it's
1300
     * PCAP_ERROR, handle->errbuf contains
1301
     * the error message.
1302
     */
1303
0
    status = ret;
1304
0
    goto fail;
1305
0
  }
1306
0
  if (ret > 0) {
1307
    /*
1308
     * We got a warning; return that, as handle->errbuf
1309
     * might have been overwritten by this warning.
1310
     */
1311
0
    status = ret;
1312
0
  }
1313
1314
  /*
1315
   * We succeeded.  status has been set to the status to return,
1316
   * which might be 0, or might be a PCAP_WARNING_ value.
1317
   */
1318
  /*
1319
   * Now that we have activated the mmap ring, we can
1320
   * set the correct protocol.
1321
   */
1322
0
  if ((ret = iface_bind(handle->fd, handlep->ifindex,
1323
0
      handle->errbuf, pcap_protocol(handle))) != 0) {
1324
0
    status = ret;
1325
0
    goto fail;
1326
0
  }
1327
1328
0
  handle->inject_op = pcap_inject_linux;
1329
0
  handle->setfilter_op = pcap_setfilter_linux;
1330
0
  handle->setdirection_op = pcap_setdirection_linux;
1331
0
  handle->set_datalink_op = pcap_set_datalink_linux;
1332
0
  handle->setnonblock_op = pcap_setnonblock_linux;
1333
0
  handle->getnonblock_op = pcap_getnonblock_linux;
1334
0
  handle->cleanup_op = pcap_cleanup_linux;
1335
0
  handle->stats_op = pcap_stats_linux;
1336
0
  handle->breakloop_op = pcap_breakloop_linux;
1337
1338
0
  switch (handlep->tp_version) {
1339
1340
0
  case TPACKET_V2:
1341
0
    handle->read_op = pcap_read_linux_mmap_v2;
1342
0
    break;
1343
0
#ifdef HAVE_TPACKET3
1344
0
  case TPACKET_V3:
1345
0
    handle->read_op = pcap_read_linux_mmap_v3;
1346
0
    break;
1347
0
#endif
1348
0
  }
1349
0
  handle->oneshot_callback = pcapint_oneshot_linux;
1350
0
  handle->selectable_fd = handle->fd;
1351
1352
0
  return status;
1353
1354
0
fail:
1355
0
  pcap_cleanup_linux(handle);
1356
0
  return status;
1357
0
}
1358
1359
static int
1360
pcap_set_datalink_linux(pcap_t *handle, int dlt)
1361
0
{
1362
0
  handle->linktype = dlt;
1363
1364
  /*
1365
   * Update the offset at which to insert VLAN tags for the
1366
   * new link-layer type.
1367
   */
1368
0
  set_vlan_offset(handle);
1369
1370
0
  return 0;
1371
0
}
1372
1373
/*
1374
 * linux_check_direction()
1375
 *
1376
 * Do checks based on packet direction.
1377
 */
1378
static inline int
1379
linux_check_direction(const pcap_t *handle, const struct sockaddr_ll *sll)
1380
0
{
1381
0
  struct pcap_linux *handlep = handle->priv;
1382
1383
0
  if (sll->sll_pkttype == PACKET_OUTGOING) {
1384
    /*
1385
     * Outgoing packet.
1386
     * If this is from the loopback device, reject it;
1387
     * we'll see the packet as an incoming packet as well,
1388
     * and we don't want to see it twice.
1389
     */
1390
0
    if (sll->sll_ifindex == handlep->lo_ifindex)
1391
0
      return 0;
1392
1393
    /*
1394
     * If this is an outgoing CAN frame, and the user doesn't
1395
     * want only outgoing packets, reject it; CAN devices
1396
     * and drivers, and the CAN stack, always arrange to
1397
     * loop back transmitted packets, so they also appear
1398
     * as incoming packets.  We don't want duplicate packets,
1399
     * and we can't easily distinguish packets looped back
1400
     * by the CAN layer than those received by the CAN layer,
1401
     * so we eliminate this packet instead.
1402
     *
1403
     * We check whether this is a CAN frame by checking whether
1404
     * the device's hardware type is ARPHRD_CAN.
1405
     */
1406
0
    if (sll->sll_hatype == ARPHRD_CAN &&
1407
0
         handle->direction != PCAP_D_OUT)
1408
0
      return 0;
1409
1410
    /*
1411
     * If the user only wants incoming packets, reject it.
1412
     */
1413
0
    if (handle->direction == PCAP_D_IN)
1414
0
      return 0;
1415
0
  } else {
1416
    /*
1417
     * Incoming packet.
1418
     * If the user only wants outgoing packets, reject it.
1419
     */
1420
0
    if (handle->direction == PCAP_D_OUT)
1421
0
      return 0;
1422
0
  }
1423
0
  return 1;
1424
0
}
1425
1426
/*
1427
 * Check whether the device to which the pcap_t is bound still exists.
1428
 * We do so by asking what address the socket is bound to, and checking
1429
 * whether the ifindex in the address is -1, meaning "that device is gone",
1430
 * or some other value, meaning "that device still exists".
1431
 */
1432
static int
1433
device_still_exists(pcap_t *handle)
1434
0
{
1435
0
  struct pcap_linux *handlep = handle->priv;
1436
0
  struct sockaddr_ll addr;
1437
0
  socklen_t addr_len;
1438
1439
  /*
1440
   * If handlep->ifindex is -1, the socket isn't bound, meaning
1441
   * we're capturing on the "any" device; that device never
1442
   * disappears.  (It should also never be configured down, so
1443
   * we shouldn't even get here, but let's make sure.)
1444
   */
1445
0
  if (handlep->ifindex == -1)
1446
0
    return (1); /* it's still here */
1447
1448
  /*
1449
   * OK, now try to get the address for the socket.
1450
   */
1451
0
  addr_len = sizeof (addr);
1452
0
  if (getsockname(handle->fd, (struct sockaddr *) &addr, &addr_len) == -1) {
1453
    /*
1454
     * Error - report an error and return -1.
1455
     */
1456
0
    pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
1457
0
        errno, "getsockname failed");
1458
0
    return (-1);
1459
0
  }
1460
0
  if (addr.sll_ifindex == -1) {
1461
    /*
1462
     * This means the device went away.
1463
     */
1464
0
    return (0);
1465
0
  }
1466
1467
  /*
1468
   * The device presumably just went down.
1469
   */
1470
0
  return (1);
1471
0
}
1472
1473
static int
1474
pcap_inject_linux(pcap_t *handle, const void *buf, int size)
1475
0
{
1476
0
  struct pcap_linux *handlep = handle->priv;
1477
0
  int ret;
1478
1479
0
  if (handlep->ifindex == -1) {
1480
    /*
1481
     * We don't support sending on the "any" device.
1482
     */
1483
0
    pcapint_strlcpy(handle->errbuf,
1484
0
        "Sending packets isn't supported on the \"any\" device",
1485
0
        PCAP_ERRBUF_SIZE);
1486
0
    return (-1);
1487
0
  }
1488
1489
0
  if (handlep->cooked) {
1490
    /*
1491
     * We don't support sending on cooked-mode sockets.
1492
     *
1493
     * XXX - how do you send on a bound cooked-mode
1494
     * socket?
1495
     * Is a "sendto()" required there?
1496
     */
1497
0
    pcapint_strlcpy(handle->errbuf,
1498
0
        "Sending packets isn't supported in cooked mode",
1499
0
        PCAP_ERRBUF_SIZE);
1500
0
    return (-1);
1501
0
  }
1502
1503
0
  ret = (int)send(handle->fd, buf, size, 0);
1504
0
  if (ret == -1) {
1505
0
    pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
1506
0
        errno, "send");
1507
0
    return (-1);
1508
0
  }
1509
0
  return (ret);
1510
0
}
1511
1512
/*
1513
 *  Get the statistics for the given packet capture handle.
1514
 */
1515
static int
1516
pcap_stats_linux(pcap_t *handle, struct pcap_stat *stats)
1517
0
{
1518
0
  struct pcap_linux *handlep = handle->priv;
1519
0
#ifdef HAVE_TPACKET3
1520
  /*
1521
   * For sockets using TPACKET_V2, the extra stuff at the end
1522
   * of a struct tpacket_stats_v3 will not be filled in, and
1523
   * we don't look at it so this is OK even for those sockets.
1524
   * In addition, the PF_PACKET socket code in the kernel only
1525
   * uses the length parameter to compute how much data to
1526
   * copy out and to indicate how much data was copied out, so
1527
   * it's OK to base it on the size of a struct tpacket_stats.
1528
   *
1529
   * XXX - it's probably OK, in fact, to just use a
1530
   * struct tpacket_stats for V3 sockets, as we don't
1531
   * care about the tp_freeze_q_cnt stat.
1532
   */
1533
0
  struct tpacket_stats_v3 kstats;
1534
#else /* HAVE_TPACKET3 */
1535
  struct tpacket_stats kstats;
1536
#endif /* HAVE_TPACKET3 */
1537
0
  socklen_t len = sizeof (struct tpacket_stats);
1538
1539
0
  long long if_dropped = 0;
1540
1541
  /*
1542
   * To fill in ps_ifdrop, we parse
1543
   * /sys/class/net/{if_name}/statistics/rx_{missed,fifo}_errors
1544
   * for the numbers
1545
   */
1546
0
  if (handle->opt.promisc)
1547
0
  {
1548
    /*
1549
     * XXX - is there any reason to do this by remembering
1550
     * the last counts value, subtracting it from the
1551
     * current counts value, and adding that to stat.ps_ifdrop,
1552
     * maintaining stat.ps_ifdrop as a count, rather than just
1553
     * saving the *initial* counts value and setting
1554
     * stat.ps_ifdrop to the difference between the current
1555
     * value and the initial value?
1556
     *
1557
     * One reason might be to handle the count wrapping
1558
     * around, on platforms where the count is 32 bits
1559
     * and where you might get more than 2^32 dropped
1560
     * packets; is there any other reason?
1561
     *
1562
     * (We maintain the count as a long long int so that,
1563
     * if the kernel maintains the counts as 64-bit even
1564
     * on 32-bit platforms, we can handle the real count.
1565
     *
1566
     * Unfortunately, we can't report 64-bit counts; we
1567
     * need a better API for reporting statistics, such as
1568
     * one that reports them in a style similar to the
1569
     * pcapng Interface Statistics Block, so that 1) the
1570
     * counts are 64-bit, 2) it's easier to add new statistics
1571
     * without breaking the ABI, and 3) it's easier to
1572
     * indicate to a caller that wants one particular
1573
     * statistic that it's not available by just not supplying
1574
     * it.)
1575
     */
1576
0
    if_dropped = handlep->sysfs_dropped;
1577
0
    handlep->sysfs_dropped = linux_if_drops(handlep->device);
1578
0
    handlep->stat.ps_ifdrop += (u_int)(handlep->sysfs_dropped - if_dropped);
1579
0
  }
1580
1581
  /*
1582
   * Try to get the packet counts from the kernel.
1583
   */
1584
0
  if (getsockopt(handle->fd, SOL_PACKET, PACKET_STATISTICS,
1585
0
      &kstats, &len) > -1) {
1586
    /*
1587
     * "ps_recv" counts only packets that *passed* the
1588
     * filter, not packets that didn't pass the filter.
1589
     * This includes packets later dropped because we
1590
     * ran out of buffer space.
1591
     *
1592
     * "ps_drop" counts packets dropped because we ran
1593
     * out of buffer space.  It doesn't count packets
1594
     * dropped by the interface driver.  It counts only
1595
     * packets that passed the filter.
1596
     *
1597
     * See above for ps_ifdrop.
1598
     *
1599
     * Both statistics include packets not yet read from
1600
     * the kernel by libpcap, and thus not yet seen by
1601
     * the application.
1602
     *
1603
     * In "linux/net/packet/af_packet.c", at least in 2.6.27
1604
     * through 5.6 kernels, "tp_packets" is incremented for
1605
     * every packet that passes the packet filter *and* is
1606
     * successfully copied to the ring buffer; "tp_drops" is
1607
     * incremented for every packet dropped because there's
1608
     * not enough free space in the ring buffer.
1609
     *
1610
     * When the statistics are returned for a PACKET_STATISTICS
1611
     * "getsockopt()" call, "tp_drops" is added to "tp_packets",
1612
     * so that "tp_packets" counts all packets handed to
1613
     * the PF_PACKET socket, including packets dropped because
1614
     * there wasn't room on the socket buffer - but not
1615
     * including packets that didn't pass the filter.
1616
     *
1617
     * In the BSD BPF, the count of received packets is
1618
     * incremented for every packet handed to BPF, regardless
1619
     * of whether it passed the filter.
1620
     *
1621
     * We can't make "pcap_stats()" work the same on both
1622
     * platforms, but the best approximation is to return
1623
     * "tp_packets" as the count of packets and "tp_drops"
1624
     * as the count of drops.
1625
     *
1626
     * Keep a running total because each call to
1627
     *    getsockopt(handle->fd, SOL_PACKET, PACKET_STATISTICS, ....
1628
     * resets the counters to zero.
1629
     */
1630
0
    handlep->stat.ps_recv += kstats.tp_packets;
1631
0
    handlep->stat.ps_drop += kstats.tp_drops;
1632
0
    *stats = handlep->stat;
1633
0
    return 0;
1634
0
  }
1635
1636
0
  pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, errno,
1637
0
      "failed to get statistics from socket");
1638
0
  return -1;
1639
0
}
1640
1641
/*
1642
 * A PF_PACKET socket can be bound to any network interface.
1643
 */
1644
static int
1645
can_be_bound(const char *name _U_)
1646
0
{
1647
0
  return (1);
1648
0
}
1649
1650
/*
1651
 * Get a socket to use with various interface ioctls.
1652
 */
1653
static int
1654
get_if_ioctl_socket(void)
1655
0
{
1656
0
  int fd;
1657
1658
  /*
1659
   * This is a bit ugly.
1660
   *
1661
   * There isn't a socket type that's guaranteed to work.
1662
   *
1663
   * AF_NETLINK will work *if* you have Netlink configured into the
1664
   * kernel (can it be configured out if you have any networking
1665
   * support at all?) *and* if you're running a sufficiently recent
1666
   * kernel, but not all the kernels we support are sufficiently
1667
   * recent - that feature was introduced in Linux 4.6.
1668
   *
1669
   * AF_UNIX will work *if* you have UNIX-domain sockets configured
1670
   * into the kernel and *if* you're not on a system that doesn't
1671
   * allow them - some SELinux systems don't allow you create them.
1672
   * Most systems probably have them configured in, but not all systems
1673
   * have them configured in and allow them to be created.
1674
   *
1675
   * AF_INET will work *if* you have IPv4 configured into the kernel,
1676
   * but, apparently, some systems have network adapters but have
1677
   * kernels without IPv4 support.
1678
   *
1679
   * AF_INET6 will work *if* you have IPv6 configured into the
1680
   * kernel, but if you don't have AF_INET, you might not have
1681
   * AF_INET6, either (that is, independently on its own grounds).
1682
   *
1683
   * AF_PACKET would work, except that some of these calls should
1684
   * work even if you *don't* have capture permission (you should be
1685
   * able to enumerate interfaces and get information about them
1686
   * without capture permission; you shouldn't get a failure until
1687
   * you try pcap_activate()).  (If you don't allow programs to
1688
   * get as much information as possible about interfaces if you
1689
   * don't have permission to capture, you run the risk of users
1690
   * asking "why isn't it showing XXX" - or, worse, if you don't
1691
   * show interfaces *at all* if you don't have permission to
1692
   * capture on them, "why do no interfaces show up?" - when the
1693
   * real problem is a permissions problem.  Error reports of that
1694
   * type require a lot more back-and-forth to debug, as evidenced
1695
   * by many Wireshark bugs/mailing list questions/Q&A questions.)
1696
   *
1697
   * So:
1698
   *
1699
   * we first try an AF_NETLINK socket, where "try" includes
1700
   * "try to do a device ioctl on it", as, in the future, once
1701
   * pre-4.6 kernels are sufficiently rare, that will probably
1702
   * be the mechanism most likely to work;
1703
   *
1704
   * if that fails, we try an AF_UNIX socket, as that's less
1705
   * likely to be configured out on a networking-capable system
1706
   * than is IP;
1707
   *
1708
   * if that fails, we try an AF_INET6 socket;
1709
   *
1710
   * if that fails, we try an AF_INET socket.
1711
   */
1712
0
  fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
1713
0
  if (fd != -1) {
1714
    /*
1715
     * OK, let's make sure we can do an SIOCGIFNAME
1716
     * ioctl.
1717
     */
1718
0
    struct ifreq ifr;
1719
1720
0
    memset(&ifr, 0, sizeof(ifr));
1721
0
    if (ioctl(fd, SIOCGIFNAME, &ifr) == 0 ||
1722
0
        errno != EOPNOTSUPP) {
1723
      /*
1724
       * It succeeded, or failed for some reason
1725
       * other than "netlink sockets don't support
1726
       * device ioctls".  Go with the AF_NETLINK
1727
       * socket.
1728
       */
1729
0
      return (fd);
1730
0
    }
1731
1732
    /*
1733
     * OK, that didn't work, so it's as bad as "netlink
1734
     * sockets aren't available".  Close the socket and
1735
     * drive on.
1736
     */
1737
0
    close(fd);
1738
0
  }
1739
1740
  /*
1741
   * Now try an AF_UNIX socket.
1742
   */
1743
0
  fd = socket(AF_UNIX, SOCK_RAW, 0);
1744
0
  if (fd != -1) {
1745
    /*
1746
     * OK, we got it!
1747
     */
1748
0
    return (fd);
1749
0
  }
1750
1751
  /*
1752
   * Now try an AF_INET6 socket.
1753
   */
1754
0
  fd = socket(AF_INET6, SOCK_DGRAM, 0);
1755
0
  if (fd != -1) {
1756
0
    return (fd);
1757
0
  }
1758
1759
  /*
1760
   * Now try an AF_INET socket.
1761
   *
1762
   * XXX - if that fails, is there anything else we should try?
1763
   * AF_CAN, for embedded systems in vehicles, in case they're
1764
   * built without Internet protocol support?  Any other socket
1765
   * types popular in non-Internet embedded systems?
1766
   */
1767
0
  return (socket(AF_INET, SOCK_DGRAM, 0));
1768
0
}
1769
1770
/*
1771
 * Get additional flags for a device, using SIOCETHTOOL.
1772
 */
1773
static int
1774
get_if_flags(const char *name, bpf_u_int32 *flags, char *errbuf)
1775
0
{
1776
0
  int sock;
1777
0
  FILE *fh;
1778
0
  unsigned int arptype = ARPHRD_VOID;
1779
0
  struct ifreq ifr;
1780
0
  struct ethtool_value info;
1781
1782
0
  if (*flags & PCAP_IF_LOOPBACK) {
1783
    /*
1784
     * Loopback devices aren't wireless, and "connected"/
1785
     * "disconnected" doesn't apply to them.
1786
     */
1787
0
    *flags |= PCAP_IF_CONNECTION_STATUS_NOT_APPLICABLE;
1788
0
    return 0;
1789
0
  }
1790
1791
0
  sock = get_if_ioctl_socket();
1792
0
  if (sock == -1) {
1793
0
    pcapint_fmt_errmsg_for_errno(errbuf, PCAP_ERRBUF_SIZE, errno,
1794
0
        "Can't create socket to get ethtool information for %s",
1795
0
        name);
1796
0
    return -1;
1797
0
  }
1798
1799
  /*
1800
   * OK, what type of network is this?
1801
   * In particular, is it wired or wireless?
1802
   */
1803
0
  if (is_wifi(name)) {
1804
    /*
1805
     * Wi-Fi, hence wireless.
1806
     */
1807
0
    *flags |= PCAP_IF_WIRELESS;
1808
0
  } else {
1809
    /*
1810
     * OK, what does /sys/class/net/{if_name}/type contain?
1811
     * (We don't use that for Wi-Fi, as it'll report
1812
     * "Ethernet", i.e. ARPHRD_ETHER, for non-monitor-
1813
     * mode devices.)
1814
     */
1815
0
    char *pathstr;
1816
1817
0
    if (asprintf(&pathstr, "/sys/class/net/%s/type", name) == -1) {
1818
0
      snprintf(errbuf, PCAP_ERRBUF_SIZE,
1819
0
          "%s: Can't generate path name string for /sys/class/net device",
1820
0
          name);
1821
0
      close(sock);
1822
0
      return -1;
1823
0
    }
1824
0
    fh = fopen(pathstr, "r");
1825
0
    if (fh != NULL) {
1826
0
      if (fscanf(fh, "%u", &arptype) == 1) {
1827
        /*
1828
         * OK, we got an ARPHRD_ type; what is it?
1829
         */
1830
0
        switch (arptype) {
1831
1832
0
        case ARPHRD_LOOPBACK:
1833
          /*
1834
           * These are types to which
1835
           * "connected" and "disconnected"
1836
           * don't apply, so don't bother
1837
           * asking about it.
1838
           *
1839
           * XXX - add other types?
1840
           */
1841
0
          close(sock);
1842
0
          fclose(fh);
1843
0
          free(pathstr);
1844
0
          return 0;
1845
1846
0
        case ARPHRD_IRDA:
1847
0
        case ARPHRD_IEEE80211:
1848
0
        case ARPHRD_IEEE80211_PRISM:
1849
0
        case ARPHRD_IEEE80211_RADIOTAP:
1850
0
        case ARPHRD_IEEE802154:
1851
0
        case ARPHRD_IEEE802154_MONITOR:
1852
0
        case ARPHRD_6LOWPAN:
1853
          /*
1854
           * Various wireless types.
1855
           */
1856
0
          *flags |= PCAP_IF_WIRELESS;
1857
0
          break;
1858
0
        }
1859
0
      }
1860
0
      fclose(fh);
1861
0
    }
1862
0
    free(pathstr);
1863
0
  }
1864
1865
0
#ifdef ETHTOOL_GLINK
1866
0
  memset(&ifr, 0, sizeof(ifr));
1867
0
  pcapint_strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
1868
0
  info.cmd = ETHTOOL_GLINK;
1869
  /*
1870
   * XXX - while Valgrind handles SIOCETHTOOL and knows that
1871
   * the ETHTOOL_GLINK command sets the .data member of the
1872
   * structure, Memory Sanitizer doesn't yet do so:
1873
   *
1874
   *    https://bugs.llvm.org/show_bug.cgi?id=45814
1875
   *
1876
   * For now, we zero it out to squelch warnings; if the bug
1877
   * in question is fixed, we can remove this.
1878
   */
1879
0
  info.data = 0;
1880
0
  ifr.ifr_data = (caddr_t)&info;
1881
0
  if (ioctl(sock, SIOCETHTOOL, &ifr) == -1) {
1882
0
    int save_errno = errno;
1883
1884
0
    switch (save_errno) {
1885
1886
0
    case EOPNOTSUPP:
1887
0
    case EINVAL:
1888
      /*
1889
       * OK, this OS version or driver doesn't support
1890
       * asking for this information.
1891
       * XXX - distinguish between "this doesn't
1892
       * support ethtool at all because it's not
1893
       * that type of device" vs. "this doesn't
1894
       * support ethtool even though it's that
1895
       * type of device", and return "unknown".
1896
       */
1897
0
      *flags |= PCAP_IF_CONNECTION_STATUS_NOT_APPLICABLE;
1898
0
      close(sock);
1899
0
      return 0;
1900
1901
0
    case ENODEV:
1902
      /*
1903
       * OK, no such device.
1904
       * The user will find that out when they try to
1905
       * activate the device; just say "OK" and
1906
       * don't set anything.
1907
       */
1908
0
      close(sock);
1909
0
      return 0;
1910
1911
0
    default:
1912
      /*
1913
       * Other error.
1914
       */
1915
0
      pcapint_fmt_errmsg_for_errno(errbuf, PCAP_ERRBUF_SIZE,
1916
0
          save_errno,
1917
0
          "%s: SIOCETHTOOL(ETHTOOL_GLINK) ioctl failed",
1918
0
          name);
1919
0
      close(sock);
1920
0
      return -1;
1921
0
    }
1922
0
  }
1923
1924
  /*
1925
   * Is it connected?
1926
   */
1927
0
  if (info.data) {
1928
    /*
1929
     * It's connected.
1930
     */
1931
0
    *flags |= PCAP_IF_CONNECTION_STATUS_CONNECTED;
1932
0
  } else {
1933
    /*
1934
     * It's disconnected.
1935
     */
1936
0
    *flags |= PCAP_IF_CONNECTION_STATUS_DISCONNECTED;
1937
0
  }
1938
0
#endif
1939
1940
0
  close(sock);
1941
1942
#ifdef HAVE_SNF_API
1943
  // For "down" SNF devices the SNF API makes the flags more relevant.
1944
  if (arptype == ARPHRD_ETHER &&
1945
      ! (*flags & PCAP_IF_UP) &&
1946
      snf_get_if_flags(name, flags, errbuf) < 0)
1947
    return PCAP_ERROR;
1948
#endif // HAVE_SNF_API
1949
1950
0
  return 0;
1951
0
}
1952
1953
int
1954
pcapint_platform_finddevs(pcap_if_list_t *devlistp, char *errbuf)
1955
0
{
1956
  /*
1957
   * Get the list of regular interfaces first.
1958
   */
1959
0
  if (pcapint_findalldevs_interfaces(devlistp, errbuf, can_be_bound,
1960
0
      get_if_flags) == -1)
1961
0
    return (-1); /* failure */
1962
1963
  /*
1964
   * Add the "any" device.
1965
   */
1966
0
  if (pcapint_add_any_dev(devlistp, errbuf) == NULL)
1967
0
    return (-1);
1968
1969
0
  return (0);
1970
0
}
1971
1972
/*
1973
 * Set direction flag: Which packets do we accept on a forwarding
1974
 * single device? IN, OUT or both?
1975
 */
1976
static int
1977
pcap_setdirection_linux(pcap_t *handle, pcap_direction_t d)
1978
0
{
1979
  /*
1980
   * It's guaranteed, at this point, that d is a valid
1981
   * direction value.
1982
   */
1983
0
  handle->direction = d;
1984
0
  return 0;
1985
0
}
1986
1987
static int
1988
is_wifi(const char *device)
1989
0
{
1990
0
  char *pathstr;
1991
0
  struct stat statb;
1992
1993
  /*
1994
   * See if there's a sysfs wireless directory for it.
1995
   * If so, it's a wireless interface.
1996
   */
1997
0
  if (asprintf(&pathstr, "/sys/class/net/%s/wireless", device) == -1) {
1998
    /*
1999
     * Just give up here.
2000
     */
2001
0
    return 0;
2002
0
  }
2003
0
  if (stat(pathstr, &statb) == 0) {
2004
0
    free(pathstr);
2005
0
    return 1;
2006
0
  }
2007
0
  free(pathstr);
2008
2009
0
  return 0;
2010
0
}
2011
2012
/*
2013
 *  Linux uses the ARP hardware type to identify the type of an
2014
 *  interface. pcap uses the DLT_xxx constants for this. This
2015
 *  function takes a pointer to a "pcap_t", and an ARPHRD_xxx
2016
 *  constant, as arguments, and sets "handle->linktype" to the
2017
 *  appropriate DLT_XXX constant and sets "handle->offset" to
2018
 *  the appropriate value (to make "handle->offset" plus link-layer
2019
 *  header length be a multiple of 4, so that the link-layer payload
2020
 *  will be aligned on a 4-byte boundary when capturing packets).
2021
 *  (If the offset isn't set here, it'll be 0; add code as appropriate
2022
 *  for cases where it shouldn't be 0.)
2023
 *
2024
 *  If "cooked_ok" is non-zero, we can use DLT_LINUX_SLL and capture
2025
 *  in cooked mode; otherwise, we can't use cooked mode, so we have
2026
 *  to pick some type that works in raw mode, or fail.
2027
 *
2028
 *  Sets the link type to -1 if unable to map the type.
2029
 *
2030
 *  Returns 0 on success or a PCAP_ERROR_ value on error.
2031
 */
2032
static int map_arphrd_to_dlt(pcap_t *handle, int arptype,
2033
           const char *device, int cooked_ok)
2034
0
{
2035
0
  static const char cdma_rmnet[] = "cdma_rmnet";
2036
2037
0
  switch (arptype) {
2038
2039
0
  case ARPHRD_ETHER:
2040
    /*
2041
     * For various annoying reasons having to do with DHCP
2042
     * software, some versions of Android give the mobile-
2043
     * phone-network interface an ARPHRD_ value of
2044
     * ARPHRD_ETHER, even though the packets supplied by
2045
     * that interface have no link-layer header, and begin
2046
     * with an IP header, so that the ARPHRD_ value should
2047
     * be ARPHRD_NONE.
2048
     *
2049
     * Detect those devices by checking the device name, and
2050
     * use DLT_RAW for them.
2051
     */
2052
0
    if (strncmp(device, cdma_rmnet, sizeof cdma_rmnet - 1) == 0) {
2053
0
      handle->linktype = DLT_RAW;
2054
0
      return 0;
2055
0
    }
2056
2057
    /*
2058
     * Is this a real Ethernet device?  If so, give it a
2059
     * link-layer-type list with DLT_EN10MB and DLT_DOCSIS, so
2060
     * that an application can let you choose it, in case you're
2061
     * capturing DOCSIS traffic that a Cisco Cable Modem
2062
     * Termination System is putting out onto an Ethernet (it
2063
     * doesn't put an Ethernet header onto the wire, it puts raw
2064
     * DOCSIS frames out on the wire inside the low-level
2065
     * Ethernet framing).
2066
     *
2067
     * XXX - are there any other sorts of "fake Ethernet" that
2068
     * have ARPHRD_ETHER but that shouldn't offer DLT_DOCSIS as
2069
     * a Cisco CMTS won't put traffic onto it or get traffic
2070
     * bridged onto it?  ISDN is handled in "setup_socket()",
2071
     * as we fall back on cooked mode there, and we use
2072
     * is_wifi() to check for 802.11 devices; are there any
2073
     * others?
2074
     */
2075
0
    if (!is_wifi(device)) {
2076
0
      int ret;
2077
2078
      /*
2079
       * This is not a Wi-Fi device but it could be
2080
       * a DSA master/management network device.
2081
       */
2082
0
      ret = iface_dsa_get_proto_info(device, handle);
2083
0
      if (ret < 0)
2084
0
        return ret;
2085
2086
0
      if (ret == 1) {
2087
        /*
2088
         * This is a DSA master/management network
2089
         * device, linktype is already set by
2090
         * iface_dsa_get_proto_info(), set an
2091
         * appropriate offset here.
2092
         */
2093
0
        handle->offset = 2;
2094
0
        break;
2095
0
      }
2096
2097
      /*
2098
       * It's not a Wi-Fi device; offer DOCSIS.
2099
       */
2100
0
      handle->dlt_list = (u_int *) malloc(sizeof(u_int) * 2);
2101
0
      if (handle->dlt_list == NULL) {
2102
0
        pcapint_fmt_errmsg_for_errno(handle->errbuf,
2103
0
            PCAP_ERRBUF_SIZE, errno, "malloc");
2104
0
        return (PCAP_ERROR);
2105
0
      }
2106
0
      handle->dlt_list[0] = DLT_EN10MB;
2107
0
      handle->dlt_list[1] = DLT_DOCSIS;
2108
0
      handle->dlt_count = 2;
2109
0
    }
2110
    /* FALLTHROUGH */
2111
2112
0
  case ARPHRD_METRICOM:
2113
0
  case ARPHRD_LOOPBACK:
2114
0
    handle->linktype = DLT_EN10MB;
2115
0
    handle->offset = 2;
2116
0
    break;
2117
2118
0
  case ARPHRD_EETHER:
2119
0
    handle->linktype = DLT_EN3MB;
2120
0
    break;
2121
2122
0
  case ARPHRD_AX25:
2123
0
    handle->linktype = DLT_AX25_KISS;
2124
0
    break;
2125
2126
0
  case ARPHRD_PRONET:
2127
0
    handle->linktype = DLT_PRONET;
2128
0
    break;
2129
2130
0
  case ARPHRD_CHAOS:
2131
0
    handle->linktype = DLT_CHAOS;
2132
0
    break;
2133
2134
0
  case ARPHRD_CAN:
2135
0
    handle->linktype = DLT_CAN_SOCKETCAN;
2136
0
    break;
2137
2138
0
  case ARPHRD_IEEE802_TR:
2139
0
  case ARPHRD_IEEE802:
2140
0
    handle->linktype = DLT_IEEE802;
2141
0
    handle->offset = 2;
2142
0
    break;
2143
2144
0
  case ARPHRD_ARCNET:
2145
0
    handle->linktype = DLT_ARCNET_LINUX;
2146
0
    break;
2147
2148
0
  case ARPHRD_FDDI:
2149
0
    handle->linktype = DLT_FDDI;
2150
0
    handle->offset = 3;
2151
0
    break;
2152
2153
0
  case ARPHRD_ATM:
2154
    /*
2155
     * The Classical IP implementation in ATM for Linux
2156
     * supports both what RFC 1483 calls "LLC Encapsulation",
2157
     * in which each packet has an LLC header, possibly
2158
     * with a SNAP header as well, prepended to it, and
2159
     * what RFC 1483 calls "VC Based Multiplexing", in which
2160
     * different virtual circuits carry different network
2161
     * layer protocols, and no header is prepended to packets.
2162
     *
2163
     * They both have an ARPHRD_ type of ARPHRD_ATM, so
2164
     * you can't use the ARPHRD_ type to find out whether
2165
     * captured packets will have an LLC header, and,
2166
     * while there's a socket ioctl to *set* the encapsulation
2167
     * type, there's no ioctl to *get* the encapsulation type.
2168
     *
2169
     * This means that
2170
     *
2171
     *  programs that dissect Linux Classical IP frames
2172
     *  would have to check for an LLC header and,
2173
     *  depending on whether they see one or not, dissect
2174
     *  the frame as LLC-encapsulated or as raw IP (I
2175
     *  don't know whether there's any traffic other than
2176
     *  IP that would show up on the socket, or whether
2177
     *  there's any support for IPv6 in the Linux
2178
     *  Classical IP code);
2179
     *
2180
     *  filter expressions would have to compile into
2181
     *  code that checks for an LLC header and does
2182
     *  the right thing.
2183
     *
2184
     * Both of those are a nuisance - and, at least on systems
2185
     * that support PF_PACKET sockets, we don't have to put
2186
     * up with those nuisances; instead, we can just capture
2187
     * in cooked mode.  That's what we'll do, if we can.
2188
     * Otherwise, we'll just fail.
2189
     */
2190
0
    if (cooked_ok)
2191
0
      handle->linktype = DLT_LINUX_SLL;
2192
0
    else
2193
0
      handle->linktype = -1;
2194
0
    break;
2195
2196
0
  case ARPHRD_IEEE80211:
2197
0
    handle->linktype = DLT_IEEE802_11;
2198
0
    break;
2199
2200
0
  case ARPHRD_IEEE80211_PRISM:
2201
0
    handle->linktype = DLT_PRISM_HEADER;
2202
0
    break;
2203
2204
0
  case ARPHRD_IEEE80211_RADIOTAP:
2205
0
    handle->linktype = DLT_IEEE802_11_RADIO;
2206
0
    break;
2207
2208
0
  case ARPHRD_PPP:
2209
    /*
2210
     * Some PPP code in the kernel supplies no link-layer
2211
     * header whatsoever to PF_PACKET sockets; other PPP
2212
     * code supplies PPP link-layer headers ("syncppp.c");
2213
     * some PPP code might supply random link-layer
2214
     * headers (PPP over ISDN - there's code in Ethereal,
2215
     * for example, to cope with PPP-over-ISDN captures
2216
     * with which the Ethereal developers have had to cope,
2217
     * heuristically trying to determine which of the
2218
     * oddball link-layer headers particular packets have).
2219
     *
2220
     * As such, we just punt, and run all PPP interfaces
2221
     * in cooked mode, if we can; otherwise, we just treat
2222
     * it as DLT_RAW, for now - if somebody needs to capture,
2223
     * on a 2.0[.x] kernel, on PPP devices that supply a
2224
     * link-layer header, they'll have to add code here to
2225
     * map to the appropriate DLT_ type (possibly adding a
2226
     * new DLT_ type, if necessary).
2227
     */
2228
0
    if (cooked_ok)
2229
0
      handle->linktype = DLT_LINUX_SLL;
2230
0
    else {
2231
      /*
2232
       * XXX - handle ISDN types here?  We can't fall
2233
       * back on cooked sockets, so we'd have to
2234
       * figure out from the device name what type of
2235
       * link-layer encapsulation it's using, and map
2236
       * that to an appropriate DLT_ value, meaning
2237
       * we'd map "isdnN" devices to DLT_RAW (they
2238
       * supply raw IP packets with no link-layer
2239
       * header) and "isdY" devices to a new DLT_I4L_IP
2240
       * type that has only an Ethernet packet type as
2241
       * a link-layer header.
2242
       *
2243
       * But sometimes we seem to get random crap
2244
       * in the link-layer header when capturing on
2245
       * ISDN devices....
2246
       */
2247
0
      handle->linktype = DLT_RAW;
2248
0
    }
2249
0
    break;
2250
2251
0
  case ARPHRD_CISCO:
2252
0
    handle->linktype = DLT_C_HDLC;
2253
0
    break;
2254
2255
  /* Not sure if this is correct for all tunnels, but it
2256
   * works for CIPE */
2257
0
  case ARPHRD_TUNNEL:
2258
0
  case ARPHRD_SIT:
2259
0
  case ARPHRD_CSLIP:
2260
0
  case ARPHRD_SLIP6:
2261
0
  case ARPHRD_CSLIP6:
2262
0
  case ARPHRD_ADAPT:
2263
0
  case ARPHRD_SLIP:
2264
0
  case ARPHRD_RAWHDLC:
2265
0
  case ARPHRD_DLCI:
2266
    /*
2267
     * XXX - should some of those be mapped to DLT_LINUX_SLL
2268
     * instead?  Should we just map all of them to DLT_LINUX_SLL?
2269
     */
2270
0
    handle->linktype = DLT_RAW;
2271
0
    break;
2272
2273
0
  case ARPHRD_FRAD:
2274
0
    handle->linktype = DLT_FRELAY;
2275
0
    break;
2276
2277
0
  case ARPHRD_LOCALTLK:
2278
0
    handle->linktype = DLT_LTALK;
2279
0
    break;
2280
2281
0
  case 18:
2282
    /*
2283
     * RFC 4338 defines an encapsulation for IP and ARP
2284
     * packets that's compatible with the RFC 2625
2285
     * encapsulation, but that uses a different ARP
2286
     * hardware type and hardware addresses.  That
2287
     * ARP hardware type is 18; Linux doesn't define
2288
     * any ARPHRD_ value as 18, but if it ever officially
2289
     * supports RFC 4338-style IP-over-FC, it should define
2290
     * one.
2291
     *
2292
     * For now, we map it to DLT_IP_OVER_FC, in the hopes
2293
     * that this will encourage its use in the future,
2294
     * should Linux ever officially support RFC 4338-style
2295
     * IP-over-FC.
2296
     */
2297
0
    handle->linktype = DLT_IP_OVER_FC;
2298
0
    break;
2299
2300
0
  case ARPHRD_FCPP:
2301
0
  case ARPHRD_FCAL:
2302
0
  case ARPHRD_FCPL:
2303
0
  case ARPHRD_FCFABRIC:
2304
    /*
2305
     * Back in 2002, Donald Lee at Cray wanted a DLT_ for
2306
     * IP-over-FC:
2307
     *
2308
     *  https://www.mail-archive.com/tcpdump-workers@sandelman.ottawa.on.ca/msg01043.html
2309
     *
2310
     * and one was assigned.
2311
     *
2312
     * In a later private discussion (spun off from a message
2313
     * on the ethereal-users list) on how to get that DLT_
2314
     * value in libpcap on Linux, I ended up deciding that
2315
     * the best thing to do would be to have him tweak the
2316
     * driver to set the ARPHRD_ value to some ARPHRD_FCxx
2317
     * type, and map all those types to DLT_IP_OVER_FC:
2318
     *
2319
     *  I've checked into the libpcap and tcpdump CVS tree
2320
     *  support for DLT_IP_OVER_FC.  In order to use that,
2321
     *  you'd have to modify your modified driver to return
2322
     *  one of the ARPHRD_FCxxx types, in "fcLINUXfcp.c" -
2323
     *  change it to set "dev->type" to ARPHRD_FCFABRIC, for
2324
     *  example (the exact value doesn't matter, it can be
2325
     *  any of ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL, or
2326
     *  ARPHRD_FCFABRIC).
2327
     *
2328
     * 11 years later, Christian Svensson wanted to map
2329
     * various ARPHRD_ values to DLT_FC_2 and
2330
     * DLT_FC_2_WITH_FRAME_DELIMS for raw Fibre Channel
2331
     * frames:
2332
     *
2333
     *  https://github.com/mcr/libpcap/pull/29
2334
     *
2335
     * There doesn't seem to be any network drivers that uses
2336
     * any of the ARPHRD_FC* values for IP-over-FC, and
2337
     * it's not exactly clear what the "Dummy types for non
2338
     * ARP hardware" are supposed to mean (link-layer
2339
     * header type?  Physical network type?), so it's
2340
     * not exactly clear why the ARPHRD_FC* types exist
2341
     * in the first place.
2342
     *
2343
     * For now, we map them to DLT_FC_2, and provide an
2344
     * option of DLT_FC_2_WITH_FRAME_DELIMS, as well as
2345
     * DLT_IP_OVER_FC just in case there's some old
2346
     * driver out there that uses one of those types for
2347
     * IP-over-FC on which somebody wants to capture
2348
     * packets.
2349
     */
2350
0
    handle->linktype = DLT_FC_2;
2351
0
    handle->dlt_list = (u_int *) malloc(sizeof(u_int) * 3);
2352
0
    if (handle->dlt_list == NULL) {
2353
0
      pcapint_fmt_errmsg_for_errno(handle->errbuf,
2354
0
          PCAP_ERRBUF_SIZE, errno, "malloc");
2355
0
      return (PCAP_ERROR);
2356
0
    }
2357
0
    handle->dlt_list[0] = DLT_FC_2;
2358
0
    handle->dlt_list[1] = DLT_FC_2_WITH_FRAME_DELIMS;
2359
0
    handle->dlt_list[2] = DLT_IP_OVER_FC;
2360
0
    handle->dlt_count = 3;
2361
0
    break;
2362
2363
0
  case ARPHRD_IRDA:
2364
    /* Don't expect IP packet out of this interfaces... */
2365
0
    handle->linktype = DLT_LINUX_IRDA;
2366
    /* We need to save packet direction for IrDA decoding,
2367
     * so let's use "Linux-cooked" mode. Jean II
2368
     *
2369
     * XXX - this is handled in setup_socket(). */
2370
    /* handlep->cooked = 1; */
2371
0
    break;
2372
2373
0
  case ARPHRD_LAPD:
2374
    /* Don't expect IP packet out of this interfaces... */
2375
0
    handle->linktype = DLT_LINUX_LAPD;
2376
0
    break;
2377
2378
0
  case ARPHRD_NONE:
2379
    /*
2380
     * No link-layer header; packets are just IP
2381
     * packets, so use DLT_RAW.
2382
     */
2383
0
    handle->linktype = DLT_RAW;
2384
0
    break;
2385
2386
0
       case ARPHRD_IEEE802154:
2387
0
               handle->linktype =  DLT_IEEE802_15_4_NOFCS;
2388
0
               break;
2389
2390
0
  case ARPHRD_NETLINK:
2391
0
    handle->linktype = DLT_NETLINK;
2392
    /*
2393
     * We need to use cooked mode, so that in sll_protocol we
2394
     * pick up the netlink protocol type such as NETLINK_ROUTE,
2395
     * NETLINK_GENERIC, NETLINK_FIB_LOOKUP, etc.
2396
     *
2397
     * XXX - this is handled in setup_socket().
2398
     */
2399
    /* handlep->cooked = 1; */
2400
0
    break;
2401
2402
0
  case ARPHRD_VSOCKMON:
2403
0
    handle->linktype = DLT_VSOCK;
2404
0
    break;
2405
2406
0
  default:
2407
0
    handle->linktype = -1;
2408
0
    break;
2409
0
  }
2410
0
  return (0);
2411
0
}
2412
2413
/*
2414
 * Try to set up a PF_PACKET socket.
2415
 * Returns 0 or a PCAP_WARNING_ value on success and a PCAP_ERROR_ value
2416
 * on failure.
2417
 */
2418
static int
2419
setup_socket(pcap_t *handle, int is_any_device)
2420
0
{
2421
0
  struct pcap_linux *handlep = handle->priv;
2422
0
  const char    *device = handle->opt.device;
2423
0
  int     status = 0;
2424
0
  int     sock_fd, arptype;
2425
0
  int     val;
2426
0
  int     err = 0;
2427
0
  struct packet_mreq  mr;
2428
2429
  /*
2430
   * Open a socket with protocol family packet. If cooked is true,
2431
   * we open a SOCK_DGRAM socket for the cooked interface, otherwise
2432
   * we open a SOCK_RAW socket for the raw interface.
2433
   *
2434
   * The protocol is set to 0.  This means we will receive no
2435
   * packets until we "bind" the socket with a non-zero
2436
   * protocol.  This allows us to setup the ring buffers without
2437
   * dropping any packets.
2438
   */
2439
0
  sock_fd = is_any_device ?
2440
0
    socket(PF_PACKET, SOCK_DGRAM, 0) :
2441
0
    socket(PF_PACKET, SOCK_RAW, 0);
2442
2443
0
  if (sock_fd == -1) {
2444
0
    if (errno == EPERM || errno == EACCES) {
2445
      /*
2446
       * You don't have permission to open the
2447
       * socket.
2448
       */
2449
0
      status = PCAP_ERROR_PERM_DENIED;
2450
0
      snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
2451
0
          "Attempt to create packet socket failed - CAP_NET_RAW may be required");
2452
0
    } else if (errno == EAFNOSUPPORT) {
2453
      /*
2454
       * PF_PACKET sockets not supported.
2455
       * Perhaps we're running on the WSL1 module
2456
       * in the Windows NT kernel rather than on
2457
       * a real Linux kernel.
2458
       */
2459
0
      status = PCAP_ERROR_CAPTURE_NOTSUP;
2460
0
      snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
2461
0
          "PF_PACKET sockets not supported - is this WSL1?");
2462
0
    } else {
2463
      /*
2464
       * Other error.
2465
       */
2466
0
      status = PCAP_ERROR;
2467
0
    }
2468
0
    pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
2469
0
        errno, "socket");
2470
0
    return status;
2471
0
  }
2472
2473
  /*
2474
   * Get the interface index of the loopback device.
2475
   * If the attempt fails, don't fail, just set the
2476
   * "handlep->lo_ifindex" to -1.
2477
   *
2478
   * XXX - can there be more than one device that loops
2479
   * packets back, i.e. devices other than "lo"?  If so,
2480
   * we'd need to find them all, and have an array of
2481
   * indices for them, and check all of them in
2482
   * "pcap_read_packet()".
2483
   */
2484
0
  handlep->lo_ifindex = iface_get_id(sock_fd, "lo", handle->errbuf);
2485
2486
  /*
2487
   * Default value for offset to align link-layer payload
2488
   * on a 4-byte boundary.
2489
   */
2490
0
  handle->offset   = 0;
2491
2492
  /*
2493
   * What kind of frames do we have to deal with? Fall back
2494
   * to cooked mode if we have an unknown interface type
2495
   * or a type we know doesn't work well in raw mode.
2496
   */
2497
0
  if (!is_any_device) {
2498
    /* Assume for now we don't need cooked mode. */
2499
0
    handlep->cooked = 0;
2500
2501
0
    if (handle->opt.rfmon) {
2502
      /*
2503
       * We were asked to turn on monitor mode.
2504
       * Do so before we get the link-layer type,
2505
       * because entering monitor mode could change
2506
       * the link-layer type.
2507
       */
2508
0
      err = enter_rfmon_mode(handle, sock_fd, device);
2509
0
      if (err < 0) {
2510
        /* Hard failure */
2511
0
        close(sock_fd);
2512
0
        return err;
2513
0
      }
2514
0
      if (err == 0) {
2515
        /*
2516
         * Nothing worked for turning monitor mode
2517
         * on.
2518
         */
2519
0
        close(sock_fd);
2520
2521
0
        return PCAP_ERROR_RFMON_NOTSUP;
2522
0
      }
2523
2524
      /*
2525
       * Either monitor mode has been turned on for
2526
       * the device, or we've been given a different
2527
       * device to open for monitor mode.  If we've
2528
       * been given a different device, use it.
2529
       */
2530
0
      if (handlep->mondevice != NULL)
2531
0
        device = handlep->mondevice;
2532
0
    }
2533
0
    arptype = iface_get_arptype(sock_fd, device, handle->errbuf);
2534
0
    if (arptype < 0) {
2535
0
      close(sock_fd);
2536
0
      return arptype;
2537
0
    }
2538
0
    status = map_arphrd_to_dlt(handle, arptype, device, 1);
2539
0
    if (status < 0) {
2540
0
      close(sock_fd);
2541
0
      return status;
2542
0
    }
2543
0
    if (handle->linktype == -1 ||
2544
0
        handle->linktype == DLT_LINUX_SLL ||
2545
0
        handle->linktype == DLT_LINUX_IRDA ||
2546
0
        handle->linktype == DLT_LINUX_LAPD ||
2547
0
        handle->linktype == DLT_NETLINK ||
2548
0
        (handle->linktype == DLT_EN10MB &&
2549
0
         (strncmp("isdn", device, 4) == 0 ||
2550
0
          strncmp("isdY", device, 4) == 0))) {
2551
      /*
2552
       * Unknown interface type (-1), or a
2553
       * device we explicitly chose to run
2554
       * in cooked mode (e.g., PPP devices),
2555
       * or an ISDN device (whose link-layer
2556
       * type we can only determine by using
2557
       * APIs that may be different on different
2558
       * kernels) - reopen in cooked mode.
2559
       *
2560
       * If the type is unknown, return a warning;
2561
       * map_arphrd_to_dlt() has already set the
2562
       * warning message.
2563
       */
2564
0
      if (close(sock_fd) == -1) {
2565
0
        pcapint_fmt_errmsg_for_errno(handle->errbuf,
2566
0
            PCAP_ERRBUF_SIZE, errno, "close");
2567
0
        return PCAP_ERROR;
2568
0
      }
2569
0
      sock_fd = socket(PF_PACKET, SOCK_DGRAM, 0);
2570
0
      if (sock_fd < 0) {
2571
        /*
2572
         * Fatal error.  We treat this as
2573
         * a generic error; we already know
2574
         * that we were able to open a
2575
         * PF_PACKET/SOCK_RAW socket, so
2576
         * any failure is a "this shouldn't
2577
         * happen" case.
2578
         */
2579
0
        pcapint_fmt_errmsg_for_errno(handle->errbuf,
2580
0
            PCAP_ERRBUF_SIZE, errno, "socket");
2581
0
        return PCAP_ERROR;
2582
0
      }
2583
0
      handlep->cooked = 1;
2584
2585
      /*
2586
       * Get rid of any link-layer type list
2587
       * we allocated - this only supports cooked
2588
       * capture.
2589
       */
2590
0
      if (handle->dlt_list != NULL) {
2591
0
        free(handle->dlt_list);
2592
0
        handle->dlt_list = NULL;
2593
0
        handle->dlt_count = 0;
2594
0
      }
2595
2596
0
      if (handle->linktype == -1) {
2597
        /*
2598
         * Warn that we're falling back on
2599
         * cooked mode; we may want to
2600
         * update "map_arphrd_to_dlt()"
2601
         * to handle the new type.
2602
         */
2603
0
        snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
2604
0
          "arptype %d not "
2605
0
          "supported by libpcap - "
2606
0
          "falling back to cooked "
2607
0
          "socket",
2608
0
          arptype);
2609
0
        status = PCAP_WARNING;
2610
0
      }
2611
2612
      /*
2613
       * IrDA capture is not a real "cooked" capture,
2614
       * it's IrLAP frames, not IP packets.  The
2615
       * same applies to LAPD capture.
2616
       */
2617
0
      if (handle->linktype != DLT_LINUX_IRDA &&
2618
0
          handle->linktype != DLT_LINUX_LAPD &&
2619
0
          handle->linktype != DLT_NETLINK)
2620
0
        handle->linktype = DLT_LINUX_SLL;
2621
0
    }
2622
2623
0
    handlep->ifindex = iface_get_id(sock_fd, device,
2624
0
        handle->errbuf);
2625
0
    if (handlep->ifindex == -1) {
2626
0
      close(sock_fd);
2627
0
      return PCAP_ERROR;
2628
0
    }
2629
2630
0
    if ((err = iface_bind(sock_fd, handlep->ifindex,
2631
0
        handle->errbuf, 0)) != 0) {
2632
0
      close(sock_fd);
2633
0
      return err;
2634
0
    }
2635
0
  } else {
2636
    /*
2637
     * The "any" device.
2638
     */
2639
0
    if (handle->opt.rfmon) {
2640
      /*
2641
       * It doesn't support monitor mode.
2642
       */
2643
0
      close(sock_fd);
2644
0
      return PCAP_ERROR_RFMON_NOTSUP;
2645
0
    }
2646
2647
    /*
2648
     * It uses cooked mode.
2649
     * Support both DLT_LINUX_SLL and DLT_LINUX_SLL2.
2650
     */
2651
0
    handlep->cooked = 1;
2652
0
    handle->linktype = DLT_LINUX_SLL;
2653
0
    handle->dlt_list = (u_int *) malloc(sizeof(u_int) * 2);
2654
0
    if (handle->dlt_list == NULL) {
2655
0
      pcapint_fmt_errmsg_for_errno(handle->errbuf,
2656
0
          PCAP_ERRBUF_SIZE, errno, "malloc");
2657
0
      return (PCAP_ERROR);
2658
0
    }
2659
0
    handle->dlt_list[0] = DLT_LINUX_SLL;
2660
0
    handle->dlt_list[1] = DLT_LINUX_SLL2;
2661
0
    handle->dlt_count = 2;
2662
2663
    /*
2664
     * We're not bound to a device.
2665
     * For now, we're using this as an indication
2666
     * that we can't transmit; stop doing that only
2667
     * if we figure out how to transmit in cooked
2668
     * mode.
2669
     */
2670
0
    handlep->ifindex = -1;
2671
0
  }
2672
2673
  /*
2674
   * Select promiscuous mode on if "promisc" is set.
2675
   *
2676
   * Do not turn allmulti mode on if we don't select
2677
   * promiscuous mode - on some devices (e.g., Orinoco
2678
   * wireless interfaces), allmulti mode isn't supported
2679
   * and the driver implements it by turning promiscuous
2680
   * mode on, and that screws up the operation of the
2681
   * card as a normal networking interface, and on no
2682
   * other platform I know of does starting a non-
2683
   * promiscuous capture affect which multicast packets
2684
   * are received by the interface.
2685
   */
2686
2687
  /*
2688
   * Hmm, how can we set promiscuous mode on all interfaces?
2689
   * I am not sure if that is possible at all.  For now, we
2690
   * silently ignore attempts to turn promiscuous mode on
2691
   * for the "any" device (so you don't have to explicitly
2692
   * disable it in programs such as tcpdump).
2693
   */
2694
2695
0
  if (!is_any_device && handle->opt.promisc) {
2696
0
    memset(&mr, 0, sizeof(mr));
2697
0
    mr.mr_ifindex = handlep->ifindex;
2698
0
    mr.mr_type    = PACKET_MR_PROMISC;
2699
0
    if (setsockopt(sock_fd, SOL_PACKET, PACKET_ADD_MEMBERSHIP,
2700
0
        &mr, sizeof(mr)) == -1) {
2701
0
      pcapint_fmt_errmsg_for_errno(handle->errbuf,
2702
0
          PCAP_ERRBUF_SIZE, errno, "setsockopt (PACKET_ADD_MEMBERSHIP)");
2703
0
      close(sock_fd);
2704
0
      return PCAP_ERROR;
2705
0
    }
2706
0
  }
2707
2708
  /*
2709
   * Enable auxiliary data and reserve room for reconstructing
2710
   * VLAN headers.
2711
   *
2712
   * XXX - is enabling auxiliary data necessary, now that we
2713
   * only support memory-mapped capture?  The kernel's memory-mapped
2714
   * capture code doesn't seem to check whether auxiliary data
2715
   * is enabled, it seems to provide it whether it is or not.
2716
   */
2717
0
  val = 1;
2718
0
  if (setsockopt(sock_fd, SOL_PACKET, PACKET_AUXDATA, &val,
2719
0
           sizeof(val)) == -1 && errno != ENOPROTOOPT) {
2720
0
    pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
2721
0
        errno, "setsockopt (PACKET_AUXDATA)");
2722
0
    close(sock_fd);
2723
0
    return PCAP_ERROR;
2724
0
  }
2725
0
  handle->offset += VLAN_TAG_LEN;
2726
2727
  /*
2728
   * If we're in cooked mode, make the snapshot length
2729
   * large enough to hold a "cooked mode" header plus
2730
   * 1 byte of packet data (so we don't pass a byte
2731
   * count of 0 to "recvfrom()").
2732
   * XXX - we don't know whether this will be DLT_LINUX_SLL
2733
   * or DLT_LINUX_SLL2, so make sure it's big enough for
2734
   * a DLT_LINUX_SLL2 "cooked mode" header; a snapshot length
2735
   * that small is silly anyway.
2736
   */
2737
0
  if (handlep->cooked) {
2738
0
    if (handle->snapshot < SLL2_HDR_LEN + 1)
2739
0
      handle->snapshot = SLL2_HDR_LEN + 1;
2740
0
  }
2741
0
  handle->bufsize = handle->snapshot;
2742
2743
  /*
2744
   * Set the offset at which to insert VLAN tags.
2745
   */
2746
0
  set_vlan_offset(handle);
2747
2748
0
  if (handle->opt.tstamp_precision == PCAP_TSTAMP_PRECISION_NANO) {
2749
0
    int nsec_tstamps = 1;
2750
2751
0
    if (setsockopt(sock_fd, SOL_SOCKET, SO_TIMESTAMPNS, &nsec_tstamps, sizeof(nsec_tstamps)) < 0) {
2752
0
      snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, "setsockopt: unable to set SO_TIMESTAMPNS");
2753
0
      close(sock_fd);
2754
0
      return PCAP_ERROR;
2755
0
    }
2756
0
  }
2757
2758
  /*
2759
   * We've succeeded. Save the socket FD in the pcap structure.
2760
   */
2761
0
  handle->fd = sock_fd;
2762
2763
  /*
2764
   * Any supported Linux version implements at least four auxiliary
2765
   * data items (SKF_AD_PROTOCOL, SKF_AD_PKTTYPE, SKF_AD_IFINDEX and
2766
   * SKF_AD_NLATTR).  Set a flag so the code generator can use these
2767
   * items if necessary.
2768
   */
2769
0
  handle->bpf_codegen_flags |= BPF_SPECIAL_BASIC_HANDLING;
2770
2771
  /*
2772
   * Can we generate special code for VLAN checks?
2773
   * (XXX - what if we need the special code but it's not supported
2774
   * by the OS?  Is that possible?)
2775
   *
2776
   * This depends on both a runtime condition (the running Linux kernel
2777
   * must support at least SKF_AD_VLAN_TAG_PRESENT in the auxiliary data
2778
   * and must support SO_BPF_EXTENSIONS in order to tell the userland
2779
   * process what it supports) and a compile-time condition (the OS
2780
   * headers must define both constants in order to compile libpcap code
2781
   * that asks the kernel about the support).
2782
   */
2783
0
#if defined(SO_BPF_EXTENSIONS) && defined(SKF_AD_VLAN_TAG_PRESENT)
2784
0
  int bpf_extensions;
2785
0
  socklen_t len = sizeof(bpf_extensions);
2786
0
  if (getsockopt(sock_fd, SOL_SOCKET, SO_BPF_EXTENSIONS,
2787
0
      &bpf_extensions, &len) == 0) {
2788
0
    if (bpf_extensions >= SKF_AD_VLAN_TAG_PRESENT) {
2789
      /*
2790
       * Yes, we can.  Request that we do so.
2791
       */
2792
0
      handle->bpf_codegen_flags |= BPF_SPECIAL_VLAN_HANDLING;
2793
0
    }
2794
0
  }
2795
0
#endif // defined(SO_BPF_EXTENSIONS) && defined(SKF_AD_VLAN_TAG_PRESENT)
2796
2797
0
  return status;
2798
0
}
2799
2800
/*
2801
 * Attempt to setup memory-mapped access.
2802
 *
2803
 * On success, returns 0 if there are no warnings or a PCAP_WARNING_ code
2804
 * if there is a warning.
2805
 *
2806
 * On error, returns the appropriate error code; if that is PCAP_ERROR,
2807
 * sets handle->errbuf to the appropriate message.
2808
 */
2809
static int
2810
setup_mmapped(pcap_t *handle)
2811
0
{
2812
0
  struct pcap_linux *handlep = handle->priv;
2813
0
  int flags = MAP_ANONYMOUS | MAP_PRIVATE;
2814
0
  int status;
2815
2816
  /*
2817
   * Attempt to allocate a buffer to hold the contents of one
2818
   * packet, for use by the oneshot callback.
2819
   */
2820
0
#ifdef MAP_32BIT
2821
0
  if (pcapint_mmap_32bit) flags |= MAP_32BIT;
2822
0
#endif
2823
0
  handlep->oneshot_buffer = mmap(0, handle->snapshot, PROT_READ | PROT_WRITE, flags, -1, 0);
2824
0
  if (handlep->oneshot_buffer == MAP_FAILED) {
2825
0
    pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
2826
0
        errno, "can't allocate oneshot buffer");
2827
0
    return PCAP_ERROR;
2828
0
  }
2829
2830
0
  if (handle->opt.buffer_size == 0) {
2831
    /* by default request 2M for the ring buffer */
2832
0
    handle->opt.buffer_size = 2*1024*1024;
2833
0
  }
2834
0
  status = prepare_tpacket_socket(handle);
2835
0
  if (status == -1) {
2836
0
    munmap(handlep->oneshot_buffer, handle->snapshot);
2837
0
    handlep->oneshot_buffer = NULL;
2838
0
    return PCAP_ERROR;
2839
0
  }
2840
0
  status = create_ring(handle);
2841
0
  if (status < 0) {
2842
    /*
2843
     * Error attempting to enable memory-mapped capture;
2844
     * fail.  The return value is the status to return.
2845
     */
2846
0
    munmap(handlep->oneshot_buffer, handle->snapshot);
2847
0
    handlep->oneshot_buffer = NULL;
2848
0
    return status;
2849
0
  }
2850
2851
  /*
2852
   * Success.  status has been set either to 0 if there are no
2853
   * warnings or to a PCAP_WARNING_ value if there is a warning.
2854
   *
2855
   * handle->offset is used to get the current position into the rx ring.
2856
   * handle->cc is used to store the ring size.
2857
   */
2858
2859
  /*
2860
   * Set the timeout to use in poll() before returning.
2861
   */
2862
0
  set_poll_timeout(handlep);
2863
2864
0
  return status;
2865
0
}
2866
2867
/*
2868
 * Attempt to set the socket to the specified version of the memory-mapped
2869
 * header.
2870
 *
2871
 * Return 0 if we succeed; return 1 if we fail because that version isn't
2872
 * supported; return -1 on any other error, and set handle->errbuf.
2873
 */
2874
static int
2875
init_tpacket(pcap_t *handle, int version, const char *version_str)
2876
0
{
2877
0
  struct pcap_linux *handlep = handle->priv;
2878
0
  int val = version;
2879
0
  socklen_t len = sizeof(val);
2880
2881
  /*
2882
   * Probe whether kernel supports the specified TPACKET version;
2883
   * this also gets the length of the header for that version.
2884
   *
2885
   * This socket option was introduced in 2.6.27, which was
2886
   * also the first release with TPACKET_V2 support.
2887
   */
2888
0
  if (getsockopt(handle->fd, SOL_PACKET, PACKET_HDRLEN, &val, &len) < 0) {
2889
0
    if (errno == EINVAL) {
2890
      /*
2891
       * EINVAL means this specific version of TPACKET
2892
       * is not supported. Tell the caller they can try
2893
       * with a different one; if they've run out of
2894
       * others to try, let them set the error message
2895
       * appropriately.
2896
       */
2897
0
      return 1;
2898
0
    }
2899
2900
    /*
2901
     * All other errors are fatal.
2902
     */
2903
0
    if (errno == ENOPROTOOPT) {
2904
      /*
2905
       * PACKET_HDRLEN isn't supported, which means
2906
       * that memory-mapped capture isn't supported.
2907
       * Indicate that in the message.
2908
       */
2909
0
      snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
2910
0
          "Kernel doesn't support memory-mapped capture; a 2.6.27 or later 2.x kernel is required, with CONFIG_PACKET_MMAP specified for 2.x kernels");
2911
0
    } else {
2912
      /*
2913
       * Some unexpected error.
2914
       */
2915
0
      pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
2916
0
          errno, "can't get %s header len on packet socket",
2917
0
          version_str);
2918
0
    }
2919
0
    return -1;
2920
0
  }
2921
0
  handlep->tp_hdrlen = val;
2922
2923
0
  val = version;
2924
0
  if (setsockopt(handle->fd, SOL_PACKET, PACKET_VERSION, &val,
2925
0
         sizeof(val)) < 0) {
2926
0
    pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
2927
0
        errno, "can't activate %s on packet socket", version_str);
2928
0
    return -1;
2929
0
  }
2930
0
  handlep->tp_version = version;
2931
2932
0
  return 0;
2933
0
}
2934
2935
/*
2936
 * Attempt to set the socket to version 3 of the memory-mapped header and,
2937
 * if that fails because version 3 isn't supported, attempt to fall
2938
 * back to version 2.  If version 2 isn't supported, just fail.
2939
 *
2940
 * Return 0 if we succeed and -1 on any other error, and set handle->errbuf.
2941
 */
2942
static int
2943
prepare_tpacket_socket(pcap_t *handle)
2944
0
{
2945
0
  int ret;
2946
2947
0
#ifdef HAVE_TPACKET3
2948
  /*
2949
   * Try setting the version to TPACKET_V3.
2950
   *
2951
   * The only mode in which buffering is done on PF_PACKET
2952
   * sockets, so that packets might not be delivered
2953
   * immediately, is TPACKET_V3 mode.
2954
   *
2955
   * The buffering cannot be disabled in that mode, so
2956
   * if the user has requested immediate mode, we don't
2957
   * use TPACKET_V3.
2958
   */
2959
0
  if (!handle->opt.immediate) {
2960
0
    ret = init_tpacket(handle, TPACKET_V3, "TPACKET_V3");
2961
0
    if (ret == 0) {
2962
      /*
2963
       * Success.
2964
       */
2965
0
      return 0;
2966
0
    }
2967
0
    if (ret == -1) {
2968
      /*
2969
       * We failed for some reason other than "the
2970
       * kernel doesn't support TPACKET_V3".
2971
       */
2972
0
      return -1;
2973
0
    }
2974
2975
    /*
2976
     * This means it returned 1, which means "the kernel
2977
     * doesn't support TPACKET_V3"; try TPACKET_V2.
2978
     */
2979
0
  }
2980
0
#endif /* HAVE_TPACKET3 */
2981
2982
  /*
2983
   * Try setting the version to TPACKET_V2.
2984
   */
2985
0
  ret = init_tpacket(handle, TPACKET_V2, "TPACKET_V2");
2986
0
  if (ret == 0) {
2987
    /*
2988
     * Success.
2989
     */
2990
0
    return 0;
2991
0
  }
2992
2993
0
  if (ret == 1) {
2994
    /*
2995
     * OK, the kernel supports memory-mapped capture, but
2996
     * not TPACKET_V2.  Set the error message appropriately.
2997
     */
2998
0
    snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
2999
0
        "Kernel doesn't support TPACKET_V2; a 2.6.27 or later kernel is required");
3000
0
  }
3001
3002
  /*
3003
   * We failed.
3004
   */
3005
0
  return -1;
3006
0
}
3007
3008
/*
3009
 * Attempt to set up memory-mapped access.
3010
 *
3011
 * On success, returns 0 if there are no warnings or to a PCAP_WARNING_ code
3012
 * if there is a warning.
3013
 *
3014
 * On error, returns the appropriate error code; if that is PCAP_ERROR,
3015
 * sets handle->errbuf to the appropriate message.
3016
 */
3017
static int
3018
create_ring(pcap_t *handle)
3019
0
{
3020
0
  struct pcap_linux *handlep = handle->priv;
3021
0
  unsigned i, j, frames_per_block;
3022
0
  int flags = MAP_SHARED;
3023
0
#ifdef HAVE_TPACKET3
3024
  /*
3025
   * For sockets using TPACKET_V2, the extra stuff at the end of a
3026
   * struct tpacket_req3 will be ignored, so this is OK even for
3027
   * those sockets.
3028
   */
3029
0
  struct tpacket_req3 req;
3030
#else
3031
  struct tpacket_req req;
3032
#endif
3033
0
  socklen_t len;
3034
0
  unsigned int sk_type, tp_reserve, maclen, tp_hdrlen, netoff, macoff;
3035
0
  unsigned int frame_size;
3036
0
  int status;
3037
3038
  /*
3039
   * Start out assuming no warnings.
3040
   */
3041
0
  status = 0;
3042
3043
  /*
3044
   * Reserve space for VLAN tag reconstruction.
3045
   */
3046
0
  tp_reserve = VLAN_TAG_LEN;
3047
3048
  /*
3049
   * If we're capturing in cooked mode, reserve space for
3050
   * a DLT_LINUX_SLL2 header; we don't know yet whether
3051
   * we'll be using DLT_LINUX_SLL or DLT_LINUX_SLL2, as
3052
   * that can be changed on an open device, so we reserve
3053
   * space for the larger of the two.
3054
   *
3055
   * XXX - we assume that the kernel is still adding
3056
   * 16 bytes of extra space, so we subtract 16 from
3057
   * SLL2_HDR_LEN to get the additional space needed.
3058
   * (Are they doing that for DLT_LINUX_SLL, the link-
3059
   * layer header for which is 16 bytes?)
3060
   *
3061
   * XXX - should we use TPACKET_ALIGN(SLL2_HDR_LEN - 16)?
3062
   */
3063
0
  if (handlep->cooked)
3064
0
    tp_reserve += SLL2_HDR_LEN - 16;
3065
3066
  /*
3067
   * Try to request that amount of reserve space.
3068
   * This must be done before creating the ring buffer.
3069
   */
3070
0
  len = sizeof(tp_reserve);
3071
0
  if (setsockopt(handle->fd, SOL_PACKET, PACKET_RESERVE,
3072
0
      &tp_reserve, len) < 0) {
3073
0
    pcapint_fmt_errmsg_for_errno(handle->errbuf,
3074
0
        PCAP_ERRBUF_SIZE, errno,
3075
0
        "setsockopt (PACKET_RESERVE)");
3076
0
    return PCAP_ERROR;
3077
0
  }
3078
3079
0
  switch (handlep->tp_version) {
3080
3081
0
  case TPACKET_V2:
3082
    /* Note that with large snapshot length (say 256K, which is
3083
     * the default for recent versions of tcpdump, Wireshark,
3084
     * TShark, dumpcap or 64K, the value that "-s 0" has given for
3085
     * a long time with tcpdump), if we use the snapshot
3086
     * length to calculate the frame length, only a few frames
3087
     * will be available in the ring even with pretty
3088
     * large ring size (and a lot of memory will be unused).
3089
     *
3090
     * Ideally, we should choose a frame length based on the
3091
     * minimum of the specified snapshot length and the maximum
3092
     * packet size.  That's not as easy as it sounds; consider,
3093
     * for example, an 802.11 interface in monitor mode, where
3094
     * the frame would include a radiotap header, where the
3095
     * maximum radiotap header length is device-dependent.
3096
     *
3097
     * So, for now, we just do this for Ethernet devices, where
3098
     * there's no metadata header, and the link-layer header is
3099
     * fixed length.  We can get the maximum packet size by
3100
     * adding 18, the Ethernet header length plus the CRC length
3101
     * (just in case we happen to get the CRC in the packet), to
3102
     * the MTU of the interface; we fetch the MTU in the hopes
3103
     * that it reflects support for jumbo frames.  (Even if the
3104
     * interface is just being used for passive snooping, the
3105
     * driver might set the size of buffers in the receive ring
3106
     * based on the MTU, so that the MTU limits the maximum size
3107
     * of packets that we can receive.)
3108
     *
3109
     * If segmentation/fragmentation or receive offload are
3110
     * enabled, we can get reassembled/aggregated packets larger
3111
     * than MTU, but bounded to 65535 plus the Ethernet overhead,
3112
     * due to kernel and protocol constraints */
3113
0
    frame_size = handle->snapshot;
3114
0
    if (handle->linktype == DLT_EN10MB) {
3115
0
      unsigned int max_frame_len;
3116
0
      int mtu;
3117
0
      int offload;
3118
3119
0
      mtu = iface_get_mtu(handle->fd, handle->opt.device,
3120
0
          handle->errbuf);
3121
0
      if (mtu == -1)
3122
0
        return PCAP_ERROR;
3123
0
      offload = iface_get_offload(handle);
3124
0
      if (offload == -1)
3125
0
        return PCAP_ERROR;
3126
0
      if (offload)
3127
0
        max_frame_len = max(mtu, 65535);
3128
0
      else
3129
0
        max_frame_len = mtu;
3130
0
      max_frame_len += 18;
3131
3132
0
      if (frame_size > max_frame_len)
3133
0
        frame_size = max_frame_len;
3134
0
    }
3135
3136
    /* NOTE: calculus matching those in tpacket_rcv()
3137
     * in linux-2.6/net/packet/af_packet.c
3138
     */
3139
0
    len = sizeof(sk_type);
3140
0
    if (getsockopt(handle->fd, SOL_SOCKET, SO_TYPE, &sk_type,
3141
0
        &len) < 0) {
3142
0
      pcapint_fmt_errmsg_for_errno(handle->errbuf,
3143
0
          PCAP_ERRBUF_SIZE, errno, "getsockopt (SO_TYPE)");
3144
0
      return PCAP_ERROR;
3145
0
    }
3146
0
    maclen = (sk_type == SOCK_DGRAM) ? 0 : MAX_LINKHEADER_SIZE;
3147
      /* XXX: in the kernel maclen is calculated from
3148
       * LL_ALLOCATED_SPACE(dev) and vnet_hdr.hdr_len
3149
       * in:  packet_snd()           in linux-2.6/net/packet/af_packet.c
3150
       * then packet_alloc_skb()     in linux-2.6/net/packet/af_packet.c
3151
       * then sock_alloc_send_pskb() in linux-2.6/net/core/sock.c
3152
       * but I see no way to get those sizes in userspace,
3153
       * like for instance with an ifreq ioctl();
3154
       * the best thing I've found so far is MAX_HEADER in
3155
       * the kernel part of linux-2.6/include/linux/netdevice.h
3156
       * which goes up to 128+48=176; since pcap-linux.c
3157
       * defines a MAX_LINKHEADER_SIZE of 256 which is
3158
       * greater than that, let's use it.. maybe is it even
3159
       * large enough to directly replace macoff..
3160
       */
3161
0
    tp_hdrlen = TPACKET_ALIGN(handlep->tp_hdrlen) + sizeof(struct sockaddr_ll) ;
3162
0
    netoff = TPACKET_ALIGN(tp_hdrlen + (maclen < 16 ? 16 : maclen)) + tp_reserve;
3163
      /* NOTE: AFAICS tp_reserve may break the TPACKET_ALIGN
3164
       * of netoff, which contradicts
3165
       * linux-2.6/Documentation/networking/packet_mmap.txt
3166
       * documenting that:
3167
       * "- Gap, chosen so that packet data (Start+tp_net)
3168
       * aligns to TPACKET_ALIGNMENT=16"
3169
       */
3170
      /* NOTE: in linux-2.6/include/linux/skbuff.h:
3171
       * "CPUs often take a performance hit
3172
       *  when accessing unaligned memory locations"
3173
       */
3174
0
    macoff = netoff - maclen;
3175
0
    req.tp_frame_size = TPACKET_ALIGN(macoff + frame_size);
3176
    /*
3177
     * Round the buffer size up to a multiple of the
3178
     * frame size (rather than rounding down, which
3179
     * would give a buffer smaller than our caller asked
3180
     * for, and possibly give zero frames if the requested
3181
     * buffer size is too small for one frame).
3182
     */
3183
0
    req.tp_frame_nr = (handle->opt.buffer_size + req.tp_frame_size - 1)/req.tp_frame_size;
3184
0
    break;
3185
3186
0
#ifdef HAVE_TPACKET3
3187
0
  case TPACKET_V3:
3188
    /* The "frames" for this are actually buffers that
3189
     * contain multiple variable-sized frames.
3190
     *
3191
     * We pick a "frame" size of MAXIMUM_SNAPLEN to leave
3192
     * enough room for at least one reasonably-sized packet
3193
     * in the "frame". */
3194
0
    req.tp_frame_size = MAXIMUM_SNAPLEN;
3195
    /*
3196
     * Round the buffer size up to a multiple of the
3197
     * "frame" size (rather than rounding down, which
3198
     * would give a buffer smaller than our caller asked
3199
     * for, and possibly give zero "frames" if the requested
3200
     * buffer size is too small for one "frame").
3201
     */
3202
0
    req.tp_frame_nr = (handle->opt.buffer_size + req.tp_frame_size - 1)/req.tp_frame_size;
3203
0
    break;
3204
0
#endif
3205
0
  default:
3206
0
    snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
3207
0
        "Internal error: unknown TPACKET_ value %u",
3208
0
        handlep->tp_version);
3209
0
    return PCAP_ERROR;
3210
0
  }
3211
3212
  /* compute the minimum block size that will handle this frame.
3213
   * The block has to be page size aligned.
3214
   * The max block size allowed by the kernel is arch-dependent and
3215
   * it's not explicitly checked here. */
3216
0
  req.tp_block_size = getpagesize();
3217
0
  while (req.tp_block_size < req.tp_frame_size)
3218
0
    req.tp_block_size <<= 1;
3219
3220
0
  frames_per_block = req.tp_block_size/req.tp_frame_size;
3221
3222
  /*
3223
   * PACKET_TIMESTAMP was added after linux/net_tstamp.h was,
3224
   * so we check for PACKET_TIMESTAMP.  We check for
3225
   * linux/net_tstamp.h just in case a system somehow has
3226
   * PACKET_TIMESTAMP but not linux/net_tstamp.h; that might
3227
   * be unnecessary.
3228
   *
3229
   * SIOCSHWTSTAMP was introduced in the patch that introduced
3230
   * linux/net_tstamp.h, so we don't bother checking whether
3231
   * SIOCSHWTSTAMP is defined (if your Linux system has
3232
   * linux/net_tstamp.h but doesn't define SIOCSHWTSTAMP, your
3233
   * Linux system is badly broken).
3234
   */
3235
0
#if defined(HAVE_LINUX_NET_TSTAMP_H) && defined(PACKET_TIMESTAMP)
3236
  /*
3237
   * If we were told to do so, ask the kernel and the driver
3238
   * to use hardware timestamps.
3239
   *
3240
   * Hardware timestamps are only supported with mmapped
3241
   * captures.
3242
   */
3243
0
  if (handle->opt.tstamp_type == PCAP_TSTAMP_ADAPTER ||
3244
0
      handle->opt.tstamp_type == PCAP_TSTAMP_ADAPTER_UNSYNCED) {
3245
0
    struct hwtstamp_config hwconfig;
3246
0
    struct ifreq ifr;
3247
0
    int timesource;
3248
3249
    /*
3250
     * Ask for hardware time stamps on all packets,
3251
     * including transmitted packets.
3252
     */
3253
0
    memset(&hwconfig, 0, sizeof(hwconfig));
3254
0
    hwconfig.tx_type = HWTSTAMP_TX_ON;
3255
0
    hwconfig.rx_filter = HWTSTAMP_FILTER_ALL;
3256
3257
0
    memset(&ifr, 0, sizeof(ifr));
3258
0
    pcapint_strlcpy(ifr.ifr_name, handle->opt.device, sizeof(ifr.ifr_name));
3259
0
    ifr.ifr_data = (void *)&hwconfig;
3260
3261
    /*
3262
     * This may require CAP_NET_ADMIN.
3263
     */
3264
0
    if (ioctl(handle->fd, SIOCSHWTSTAMP, &ifr) < 0) {
3265
0
      switch (errno) {
3266
3267
0
      case EPERM:
3268
        /*
3269
         * Treat this as an error, as the
3270
         * user should try to run this
3271
         * with the appropriate privileges -
3272
         * and, if they can't, shouldn't
3273
         * try requesting hardware time stamps.
3274
         */
3275
0
        snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
3276
0
            "Attempt to set hardware timestamp failed - CAP_NET_ADMIN may be required");
3277
0
        return PCAP_ERROR_PERM_DENIED;
3278
3279
0
      case EOPNOTSUPP:
3280
0
      case ERANGE:
3281
        /*
3282
         * Treat this as a warning, as the
3283
         * only way to fix the warning is to
3284
         * get an adapter that supports hardware
3285
         * time stamps for *all* packets.
3286
         * (ERANGE means "we support hardware
3287
         * time stamps, but for packets matching
3288
         * that particular filter", so it means
3289
         * "we don't support hardware time stamps
3290
         * for all incoming packets" here.)
3291
         *
3292
         * We'll just fall back on the standard
3293
         * host time stamps.
3294
         */
3295
0
        status = PCAP_WARNING_TSTAMP_TYPE_NOTSUP;
3296
0
        break;
3297
3298
0
      default:
3299
0
        pcapint_fmt_errmsg_for_errno(handle->errbuf,
3300
0
            PCAP_ERRBUF_SIZE, errno,
3301
0
            "SIOCSHWTSTAMP failed");
3302
0
        return PCAP_ERROR;
3303
0
      }
3304
0
    } else {
3305
      /*
3306
       * Well, that worked.  Now specify the type of
3307
       * hardware time stamp we want for this
3308
       * socket.
3309
       */
3310
0
      if (handle->opt.tstamp_type == PCAP_TSTAMP_ADAPTER) {
3311
        /*
3312
         * Hardware timestamp, synchronized
3313
         * with the system clock.
3314
         */
3315
0
        timesource = SOF_TIMESTAMPING_SYS_HARDWARE;
3316
0
      } else {
3317
        /*
3318
         * PCAP_TSTAMP_ADAPTER_UNSYNCED - hardware
3319
         * timestamp, not synchronized with the
3320
         * system clock.
3321
         */
3322
0
        timesource = SOF_TIMESTAMPING_RAW_HARDWARE;
3323
0
      }
3324
0
      if (setsockopt(handle->fd, SOL_PACKET, PACKET_TIMESTAMP,
3325
0
        (void *)&timesource, sizeof(timesource))) {
3326
0
        pcapint_fmt_errmsg_for_errno(handle->errbuf,
3327
0
            PCAP_ERRBUF_SIZE, errno,
3328
0
            "can't set PACKET_TIMESTAMP");
3329
0
        return PCAP_ERROR;
3330
0
      }
3331
0
    }
3332
0
  }
3333
0
#endif /* HAVE_LINUX_NET_TSTAMP_H && PACKET_TIMESTAMP */
3334
3335
  /* ask the kernel to create the ring */
3336
0
retry:
3337
0
  req.tp_block_nr = req.tp_frame_nr / frames_per_block;
3338
3339
  /* req.tp_frame_nr is requested to match frames_per_block*req.tp_block_nr */
3340
0
  req.tp_frame_nr = req.tp_block_nr * frames_per_block;
3341
3342
0
#ifdef HAVE_TPACKET3
3343
  /* timeout value to retire block - use the configured buffering timeout, or default if <0. */
3344
0
  if (handlep->timeout > 0) {
3345
    /* Use the user specified timeout as the block timeout */
3346
0
    req.tp_retire_blk_tov = handlep->timeout;
3347
0
  } else if (handlep->timeout == 0) {
3348
    /*
3349
     * In pcap, this means "infinite timeout"; TPACKET_V3
3350
     * doesn't support that, so just set it to UINT_MAX
3351
     * milliseconds.  In the TPACKET_V3 loop, if the
3352
     * timeout is 0, and we haven't yet seen any packets,
3353
     * and we block and still don't have any packets, we
3354
     * keep blocking until we do.
3355
     */
3356
0
    req.tp_retire_blk_tov = UINT_MAX;
3357
0
  } else {
3358
    /*
3359
     * XXX - this is not valid; use 0, meaning "have the
3360
     * kernel pick a default", for now.
3361
     */
3362
0
    req.tp_retire_blk_tov = 0;
3363
0
  }
3364
  /* private data not used */
3365
0
  req.tp_sizeof_priv = 0;
3366
  /* Rx ring - feature request bits - none (rxhash will not be filled) */
3367
0
  req.tp_feature_req_word = 0;
3368
0
#endif
3369
3370
0
  if (setsockopt(handle->fd, SOL_PACKET, PACKET_RX_RING,
3371
0
          (void *) &req, sizeof(req))) {
3372
0
    if ((errno == ENOMEM) && (req.tp_block_nr > 1)) {
3373
      /*
3374
       * Memory failure; try to reduce the requested ring
3375
       * size.
3376
       *
3377
       * We used to reduce this by half -- do 5% instead.
3378
       * That may result in more iterations and a longer
3379
       * startup, but the user will be much happier with
3380
       * the resulting buffer size.
3381
       */
3382
0
      if (req.tp_frame_nr < 20)
3383
0
        req.tp_frame_nr -= 1;
3384
0
      else
3385
0
        req.tp_frame_nr -= req.tp_frame_nr/20;
3386
0
      goto retry;
3387
0
    }
3388
0
    pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
3389
0
        errno, "can't create rx ring on packet socket");
3390
0
    return PCAP_ERROR;
3391
0
  }
3392
3393
  /* memory map the rx ring */
3394
0
  handlep->mmapbuflen = req.tp_block_nr * req.tp_block_size;
3395
0
#ifdef MAP_32BIT
3396
0
  if (pcapint_mmap_32bit) flags |= MAP_32BIT;
3397
0
#endif
3398
0
  handlep->mmapbuf = mmap(0, handlep->mmapbuflen, PROT_READ | PROT_WRITE, flags, handle->fd, 0);
3399
0
  if (handlep->mmapbuf == MAP_FAILED) {
3400
0
    pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
3401
0
        errno, "can't mmap rx ring");
3402
3403
    /* clear the allocated ring on error*/
3404
0
    destroy_ring(handle);
3405
0
    return PCAP_ERROR;
3406
0
  }
3407
3408
  /* allocate a ring for each frame header pointer*/
3409
0
  handle->cc = req.tp_frame_nr;
3410
0
  handle->buffer = malloc(handle->cc * sizeof(union thdr *));
3411
0
  if (!handle->buffer) {
3412
0
    pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
3413
0
        errno, "can't allocate ring of frame headers");
3414
3415
0
    destroy_ring(handle);
3416
0
    return PCAP_ERROR;
3417
0
  }
3418
3419
  /* fill the header ring with proper frame ptr*/
3420
0
  handle->offset = 0;
3421
0
  for (i=0; i<req.tp_block_nr; ++i) {
3422
0
    u_char *base = &handlep->mmapbuf[i*req.tp_block_size];
3423
0
    for (j=0; j<frames_per_block; ++j, ++handle->offset) {
3424
0
      RING_GET_CURRENT_FRAME(handle) = base;
3425
0
      base += req.tp_frame_size;
3426
0
    }
3427
0
  }
3428
3429
0
  handle->bufsize = req.tp_frame_size;
3430
0
  handle->offset = 0;
3431
0
  return status;
3432
0
}
3433
3434
/* free all ring related resources*/
3435
static void
3436
destroy_ring(pcap_t *handle)
3437
0
{
3438
0
  struct pcap_linux *handlep = handle->priv;
3439
3440
  /*
3441
   * Tell the kernel to destroy the ring.
3442
   * We don't check for setsockopt failure, as 1) we can't recover
3443
   * from an error and 2) we might not yet have set it up in the
3444
   * first place.
3445
   */
3446
0
  struct tpacket_req req;
3447
0
  memset(&req, 0, sizeof(req));
3448
0
  (void)setsockopt(handle->fd, SOL_PACKET, PACKET_RX_RING,
3449
0
        (void *) &req, sizeof(req));
3450
3451
  /* if ring is mapped, unmap it*/
3452
0
  if (handlep->mmapbuf) {
3453
    /* do not test for mmap failure, as we can't recover from any error */
3454
0
    (void)munmap(handlep->mmapbuf, handlep->mmapbuflen);
3455
0
    handlep->mmapbuf = NULL;
3456
0
  }
3457
0
}
3458
3459
/*
3460
 * Special one-shot callback, used for pcap_next() and pcap_next_ex(),
3461
 * for Linux mmapped capture.
3462
 *
3463
 * The problem is that pcap_next() and pcap_next_ex() expect the packet
3464
 * data handed to the callback to be valid after the callback returns,
3465
 * but pcap_read_linux_mmap() has to release that packet as soon as
3466
 * the callback returns (otherwise, the kernel thinks there's still
3467
 * at least one unprocessed packet available in the ring, so a select()
3468
 * will immediately return indicating that there's data to process), so,
3469
 * in the callback, we have to make a copy of the packet.
3470
 *
3471
 * Yes, this means that, if the capture is using the ring buffer, using
3472
 * pcap_next() or pcap_next_ex() requires more copies than using
3473
 * pcap_loop() or pcap_dispatch().  If that bothers you, don't use
3474
 * pcap_next() or pcap_next_ex().
3475
 */
3476
static void
3477
pcapint_oneshot_linux(u_char *user, const struct pcap_pkthdr *h,
3478
    const u_char *bytes)
3479
0
{
3480
0
  struct oneshot_userdata *sp = (struct oneshot_userdata *)user;
3481
0
  pcap_t *handle = sp->pd;
3482
0
  struct pcap_linux *handlep = handle->priv;
3483
3484
0
  *sp->hdr = *h;
3485
0
  memcpy(handlep->oneshot_buffer, bytes, h->caplen);
3486
0
  *sp->pkt = handlep->oneshot_buffer;
3487
0
}
3488
3489
static int
3490
pcap_getnonblock_linux(pcap_t *handle)
3491
0
{
3492
0
  struct pcap_linux *handlep = handle->priv;
3493
3494
  /* use negative value of timeout to indicate non blocking ops */
3495
0
  return (handlep->timeout<0);
3496
0
}
3497
3498
static int
3499
pcap_setnonblock_linux(pcap_t *handle, int nonblock)
3500
0
{
3501
0
  struct pcap_linux *handlep = handle->priv;
3502
3503
  /*
3504
   * Set the file descriptor to the requested mode, as we use
3505
   * it for sending packets.
3506
   */
3507
0
  if (pcapint_setnonblock_fd(handle, nonblock) == -1)
3508
0
    return -1;
3509
3510
  /*
3511
   * Map each value to their corresponding negation to
3512
   * preserve the timeout value provided with pcap_set_timeout.
3513
   */
3514
0
  if (nonblock) {
3515
    /*
3516
     * We're setting the mode to non-blocking mode.
3517
     */
3518
0
    if (handlep->timeout >= 0) {
3519
      /*
3520
       * Indicate that we're switching to
3521
       * non-blocking mode.
3522
       */
3523
0
      handlep->timeout = ~handlep->timeout;
3524
0
    }
3525
0
    if (handlep->poll_breakloop_fd != -1) {
3526
      /* Close the eventfd; we do not need it in nonblock mode. */
3527
0
      close(handlep->poll_breakloop_fd);
3528
0
      handlep->poll_breakloop_fd = -1;
3529
0
    }
3530
0
  } else {
3531
    /*
3532
     * We're setting the mode to blocking mode.
3533
     */
3534
0
    if (handlep->poll_breakloop_fd == -1) {
3535
      /* If we did not have an eventfd, open one now that we are blocking. */
3536
0
      if ( ( handlep->poll_breakloop_fd = eventfd(0, EFD_NONBLOCK) ) == -1 ) {
3537
0
        pcapint_fmt_errmsg_for_errno(handle->errbuf,
3538
0
            PCAP_ERRBUF_SIZE, errno,
3539
0
            "could not open eventfd");
3540
0
        return -1;
3541
0
      }
3542
0
    }
3543
0
    if (handlep->timeout < 0) {
3544
0
      handlep->timeout = ~handlep->timeout;
3545
0
    }
3546
0
  }
3547
  /* Update the timeout to use in poll(). */
3548
0
  set_poll_timeout(handlep);
3549
0
  return 0;
3550
0
}
3551
3552
/*
3553
 * Get the status field of the ring buffer frame at a specified offset.
3554
 */
3555
static inline u_int
3556
pcap_get_ring_frame_status(pcap_t *handle, u_int offset)
3557
0
{
3558
0
  struct pcap_linux *handlep = handle->priv;
3559
0
  union thdr h;
3560
3561
0
  h.raw = RING_GET_FRAME_AT(handle, offset);
3562
0
  switch (handlep->tp_version) {
3563
0
  case TPACKET_V2:
3564
0
    return __atomic_load_n(&h.h2->tp_status, __ATOMIC_ACQUIRE);
3565
0
#ifdef HAVE_TPACKET3
3566
0
  case TPACKET_V3:
3567
0
    return __atomic_load_n(&h.h3->hdr.bh1.block_status, __ATOMIC_ACQUIRE);
3568
0
#endif
3569
0
  }
3570
  /* This should not happen. */
3571
0
  return 0;
3572
0
}
3573
3574
/*
3575
 * Block waiting for frames to be available.
3576
 */
3577
static int pcap_wait_for_frames_mmap(pcap_t *handle)
3578
0
{
3579
0
  struct pcap_linux *handlep = handle->priv;
3580
0
  int timeout;
3581
0
  struct ifreq ifr;
3582
0
  int ret;
3583
0
  struct pollfd pollinfo[2];
3584
0
  int numpollinfo;
3585
0
  pollinfo[0].fd = handle->fd;
3586
0
  pollinfo[0].events = POLLIN;
3587
0
  if ( handlep->poll_breakloop_fd == -1 ) {
3588
0
    numpollinfo = 1;
3589
0
    pollinfo[1].revents = 0;
3590
    /*
3591
     * We set pollinfo[1].revents to zero, even though
3592
     * numpollinfo = 1 meaning that poll() doesn't see
3593
     * pollinfo[1], so that we do not have to add a
3594
     * conditional of numpollinfo > 1 below when we
3595
     * test pollinfo[1].revents.
3596
     */
3597
0
  } else {
3598
0
    pollinfo[1].fd = handlep->poll_breakloop_fd;
3599
0
    pollinfo[1].events = POLLIN;
3600
0
    numpollinfo = 2;
3601
0
  }
3602
3603
  /*
3604
   * Keep polling until we either get some packets to read, see
3605
   * that we got told to break out of the loop, get a fatal error,
3606
   * or discover that the device went away.
3607
   *
3608
   * In non-blocking mode, we must still do one poll() to catch
3609
   * any pending error indications, but the poll() has a timeout
3610
   * of 0, so that it doesn't block, and we quit after that one
3611
   * poll().
3612
   *
3613
   * If we've seen an ENETDOWN, it might be the first indication
3614
   * that the device went away, or it might just be that it was
3615
   * configured down.  Unfortunately, there's no guarantee that
3616
   * the device has actually been removed as an interface, because:
3617
   *
3618
   * 1) if, as appears to be the case at least some of the time,
3619
   * the PF_PACKET socket code first gets a NETDEV_DOWN indication
3620
   * for the device and then gets a NETDEV_UNREGISTER indication
3621
   * for it, the first indication will cause a wakeup with ENETDOWN
3622
   * but won't set the packet socket's field for the interface index
3623
   * to -1, and the second indication won't cause a wakeup (because
3624
   * the first indication also caused the protocol hook to be
3625
   * unregistered) but will set the packet socket's field for the
3626
   * interface index to -1;
3627
   *
3628
   * 2) even if just a NETDEV_UNREGISTER indication is registered,
3629
   * the packet socket's field for the interface index only gets
3630
   * set to -1 after the wakeup, so there's a small but non-zero
3631
   * risk that a thread blocked waiting for the wakeup will get
3632
   * to the "fetch the socket name" code before the interface index
3633
   * gets set to -1, so it'll get the old interface index.
3634
   *
3635
   * Therefore, if we got an ENETDOWN and haven't seen a packet
3636
   * since then, we assume that we might be waiting for the interface
3637
   * to disappear, and poll with a timeout to try again in a short
3638
   * period of time.  If we *do* see a packet, the interface has
3639
   * come back up again, and is *definitely* still there, so we
3640
   * don't need to poll.
3641
   */
3642
0
  for (;;) {
3643
    /*
3644
     * Yes, we do this even in non-blocking mode, as it's
3645
     * the only way to get error indications from a
3646
     * tpacket socket.
3647
     *
3648
     * The timeout is 0 in non-blocking mode, so poll()
3649
     * returns immediately.
3650
     */
3651
0
    timeout = handlep->poll_timeout;
3652
3653
    /*
3654
     * If we got an ENETDOWN and haven't gotten an indication
3655
     * that the device has gone away or that the device is up,
3656
     * we don't yet know for certain whether the device has
3657
     * gone away or not, do a poll() with a 1-millisecond timeout,
3658
     * as we have to poll indefinitely for "device went away"
3659
     * indications until we either get one or see that the
3660
     * device is up.
3661
     */
3662
0
    if (handlep->netdown) {
3663
0
      if (timeout != 0)
3664
0
        timeout = 1;
3665
0
    }
3666
0
    ret = poll(pollinfo, numpollinfo, timeout);
3667
0
    if (ret < 0) {
3668
      /*
3669
       * Error.  If it's not EINTR, report it.
3670
       */
3671
0
      if (errno != EINTR) {
3672
0
        pcapint_fmt_errmsg_for_errno(handle->errbuf,
3673
0
            PCAP_ERRBUF_SIZE, errno,
3674
0
            "can't poll on packet socket");
3675
0
        return PCAP_ERROR;
3676
0
      }
3677
3678
      /*
3679
       * It's EINTR; if we were told to break out of
3680
       * the loop, do so.
3681
       */
3682
0
      if (handle->break_loop) {
3683
0
        handle->break_loop = 0;
3684
0
        return PCAP_ERROR_BREAK;
3685
0
      }
3686
0
    } else if (ret > 0) {
3687
      /*
3688
       * OK, some descriptor is ready.
3689
       * Check the socket descriptor first.
3690
       *
3691
       * As I read the Linux man page, pollinfo[0].revents
3692
       * will either be POLLIN, POLLERR, POLLHUP, or POLLNVAL.
3693
       */
3694
0
      if (pollinfo[0].revents == POLLIN) {
3695
        /*
3696
         * OK, we may have packets to
3697
         * read.
3698
         */
3699
0
        break;
3700
0
      }
3701
0
      if (pollinfo[0].revents != 0) {
3702
        /*
3703
         * There's some indication other than
3704
         * "you can read on this descriptor" on
3705
         * the descriptor.
3706
         */
3707
0
        if (pollinfo[0].revents & POLLNVAL) {
3708
0
          snprintf(handle->errbuf,
3709
0
              PCAP_ERRBUF_SIZE,
3710
0
              "Invalid polling request on packet socket");
3711
0
          return PCAP_ERROR;
3712
0
        }
3713
0
        if (pollinfo[0].revents & (POLLHUP | POLLRDHUP)) {
3714
0
          snprintf(handle->errbuf,
3715
0
              PCAP_ERRBUF_SIZE,
3716
0
              "Hangup on packet socket");
3717
0
          return PCAP_ERROR;
3718
0
        }
3719
0
        if (pollinfo[0].revents & POLLERR) {
3720
          /*
3721
           * Get the error.
3722
           */
3723
0
          int err;
3724
0
          socklen_t errlen;
3725
3726
0
          errlen = sizeof(err);
3727
0
          if (getsockopt(handle->fd, SOL_SOCKET,
3728
0
              SO_ERROR, &err, &errlen) == -1) {
3729
            /*
3730
             * The call *itself* returned
3731
             * an error; make *that*
3732
             * the error.
3733
             */
3734
0
            err = errno;
3735
0
          }
3736
3737
          /*
3738
           * OK, we have the error.
3739
           */
3740
0
          if (err == ENETDOWN) {
3741
            /*
3742
             * The device on which we're
3743
             * capturing went away or the
3744
             * interface was taken down.
3745
             *
3746
             * We don't know for certain
3747
             * which happened, and the
3748
             * next poll() may indicate
3749
             * that there are packets
3750
             * to be read, so just set
3751
             * a flag to get us to do
3752
             * checks later, and set
3753
             * the required select
3754
             * timeout to 1 millisecond
3755
             * so that event loops that
3756
             * check our socket descriptor
3757
             * also time out so that
3758
             * they can call us and we
3759
             * can do the checks.
3760
             */
3761
0
            handlep->netdown = 1;
3762
0
            handle->required_select_timeout = &netdown_timeout;
3763
0
          } else if (err == 0) {
3764
            /*
3765
             * This shouldn't happen, so
3766
             * report a special indication
3767
             * that it did.
3768
             */
3769
0
            snprintf(handle->errbuf,
3770
0
                PCAP_ERRBUF_SIZE,
3771
0
                "Error condition on packet socket: Reported error was 0");
3772
0
            return PCAP_ERROR;
3773
0
          } else {
3774
0
            pcapint_fmt_errmsg_for_errno(handle->errbuf,
3775
0
                PCAP_ERRBUF_SIZE,
3776
0
                err,
3777
0
                "Error condition on packet socket");
3778
0
            return PCAP_ERROR;
3779
0
          }
3780
0
        }
3781
0
      }
3782
      /*
3783
       * Now check the event device.
3784
       */
3785
0
      if (pollinfo[1].revents & POLLIN) {
3786
0
        ssize_t nread;
3787
0
        uint64_t value;
3788
3789
        /*
3790
         * This should never fail, but, just
3791
         * in case....
3792
         */
3793
0
        nread = read(handlep->poll_breakloop_fd, &value,
3794
0
            sizeof(value));
3795
0
        if (nread == -1) {
3796
0
          pcapint_fmt_errmsg_for_errno(handle->errbuf,
3797
0
              PCAP_ERRBUF_SIZE,
3798
0
              errno,
3799
0
              "Error reading from event FD");
3800
0
          return PCAP_ERROR;
3801
0
        }
3802
3803
        /*
3804
         * According to the Linux read(2) man
3805
         * page, read() will transfer at most
3806
         * 2^31-1 bytes, so the return value is
3807
         * either -1 or a value between 0
3808
         * and 2^31-1, so it's non-negative.
3809
         *
3810
         * Cast it to size_t to squelch
3811
         * warnings from the compiler; add this
3812
         * comment to squelch warnings from
3813
         * humans reading the code. :-)
3814
         *
3815
         * Don't treat an EOF as an error, but
3816
         * *do* treat a short read as an error;
3817
         * that "shouldn't happen", but....
3818
         */
3819
0
        if (nread != 0 &&
3820
0
            (size_t)nread < sizeof(value)) {
3821
0
          snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
3822
0
              "Short read from event FD: expected %zu, got %zd",
3823
0
              sizeof(value), nread);
3824
0
          return PCAP_ERROR;
3825
0
        }
3826
3827
        /*
3828
         * This event gets signaled by a
3829
         * pcap_breakloop() call; if we were told
3830
         * to break out of the loop, do so.
3831
         */
3832
0
        if (handle->break_loop) {
3833
0
          handle->break_loop = 0;
3834
0
          return PCAP_ERROR_BREAK;
3835
0
        }
3836
0
      }
3837
0
    }
3838
3839
    /*
3840
     * Either:
3841
     *
3842
     *   1) we got neither an error from poll() nor any
3843
     *      readable descriptors, in which case there
3844
     *      are no packets waiting to read
3845
     *
3846
     * or
3847
     *
3848
     *   2) We got readable descriptors but the PF_PACKET
3849
     *      socket wasn't one of them, in which case there
3850
     *      are no packets waiting to read
3851
     *
3852
     * so, if we got an ENETDOWN, we've drained whatever
3853
     * packets were available to read at the point of the
3854
     * ENETDOWN.
3855
     *
3856
     * So, if we got an ENETDOWN and haven't gotten an indication
3857
     * that the device has gone away or that the device is up,
3858
     * we don't yet know for certain whether the device has
3859
     * gone away or not, check whether the device exists and is
3860
     * up.
3861
     */
3862
0
    if (handlep->netdown) {
3863
0
      if (!device_still_exists(handle)) {
3864
        /*
3865
         * The device doesn't exist any more;
3866
         * report that.
3867
         *
3868
         * XXX - we should really return an
3869
         * appropriate error for that, but
3870
         * pcap_dispatch() etc. aren't documented
3871
         * as having error returns other than
3872
         * PCAP_ERROR or PCAP_ERROR_BREAK.
3873
         */
3874
0
        snprintf(handle->errbuf,  PCAP_ERRBUF_SIZE,
3875
0
            "The interface disappeared");
3876
0
        return PCAP_ERROR;
3877
0
      }
3878
3879
      /*
3880
       * The device still exists; try to see if it's up.
3881
       */
3882
0
      memset(&ifr, 0, sizeof(ifr));
3883
0
      pcapint_strlcpy(ifr.ifr_name, handlep->device,
3884
0
          sizeof(ifr.ifr_name));
3885
0
      if (ioctl(handle->fd, SIOCGIFFLAGS, &ifr) == -1) {
3886
0
        if (errno == ENXIO || errno == ENODEV) {
3887
          /*
3888
           * OK, *now* it's gone.
3889
           *
3890
           * XXX - see above comment.
3891
           */
3892
0
          snprintf(handle->errbuf,
3893
0
              PCAP_ERRBUF_SIZE,
3894
0
              "The interface disappeared");
3895
0
          return PCAP_ERROR;
3896
0
        } else {
3897
0
          pcapint_fmt_errmsg_for_errno(handle->errbuf,
3898
0
              PCAP_ERRBUF_SIZE, errno,
3899
0
              "%s: Can't get flags",
3900
0
              handlep->device);
3901
0
          return PCAP_ERROR;
3902
0
        }
3903
0
      }
3904
0
      if (ifr.ifr_flags & IFF_UP) {
3905
        /*
3906
         * It's up, so it definitely still exists.
3907
         * Cancel the ENETDOWN indication - we
3908
         * presumably got it due to the interface
3909
         * going down rather than the device going
3910
         * away - and revert to "no required select
3911
         * timeout.
3912
         */
3913
0
        handlep->netdown = 0;
3914
0
        handle->required_select_timeout = NULL;
3915
0
      }
3916
0
    }
3917
3918
    /*
3919
     * If we're in non-blocking mode, just quit now, rather
3920
     * than spinning in a loop doing poll()s that immediately
3921
     * time out if there's no indication on any descriptor.
3922
     */
3923
0
    if (handlep->poll_timeout == 0)
3924
0
      break;
3925
0
  }
3926
0
  return 0;
3927
0
}
3928
3929
/* handle a single memory mapped packet */
3930
static int pcap_handle_packet_mmap(
3931
    pcap_t *handle,
3932
    pcap_handler callback,
3933
    u_char *user,
3934
    unsigned char *frame,
3935
    unsigned int tp_len,
3936
    unsigned int tp_mac,
3937
    unsigned int tp_snaplen,
3938
    unsigned int tp_sec,
3939
    unsigned int tp_usec,
3940
    int tp_vlan_tci_valid,
3941
    __u16 tp_vlan_tci,
3942
    __u16 tp_vlan_tpid)
3943
0
{
3944
0
  struct pcap_linux *handlep = handle->priv;
3945
0
  unsigned char *bp;
3946
0
  struct sockaddr_ll *sll;
3947
0
  struct pcap_pkthdr pcaphdr;
3948
0
  unsigned int snaplen = tp_snaplen;
3949
0
  struct utsname utsname;
3950
3951
  /* perform sanity check on internal offset. */
3952
0
  if (tp_mac + tp_snaplen > handle->bufsize) {
3953
    /*
3954
     * Report some system information as a debugging aid.
3955
     */
3956
0
    if (uname(&utsname) != -1) {
3957
0
      snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
3958
0
        "corrupted frame on kernel ring mac "
3959
0
        "offset %u + caplen %u > frame len %d "
3960
0
        "(kernel %.32s version %s, machine %.16s)",
3961
0
        tp_mac, tp_snaplen, handle->bufsize,
3962
0
        utsname.release, utsname.version,
3963
0
        utsname.machine);
3964
0
    } else {
3965
0
      snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
3966
0
        "corrupted frame on kernel ring mac "
3967
0
        "offset %u + caplen %u > frame len %d",
3968
0
        tp_mac, tp_snaplen, handle->bufsize);
3969
0
    }
3970
0
    return -1;
3971
0
  }
3972
3973
  /* run filter on received packet
3974
   * If the kernel filtering is enabled we need to run the
3975
   * filter until all the frames present into the ring
3976
   * at filter creation time are processed.
3977
   * In this case, blocks_to_filter_in_userland is used
3978
   * as a counter for the packet we need to filter.
3979
   * Note: alternatively it could be possible to stop applying
3980
   * the filter when the ring became empty, but it can possibly
3981
   * happen a lot later... */
3982
0
  bp = frame + tp_mac;
3983
3984
  /* if required build in place the sll header*/
3985
0
  sll = (void *)(frame + TPACKET_ALIGN(handlep->tp_hdrlen));
3986
0
  if (handlep->cooked) {
3987
0
    if (handle->linktype == DLT_LINUX_SLL2) {
3988
0
      struct sll2_header *hdrp;
3989
3990
      /*
3991
       * The kernel should have left us with enough
3992
       * space for an sll header; back up the packet
3993
       * data pointer into that space, as that'll be
3994
       * the beginning of the packet we pass to the
3995
       * callback.
3996
       */
3997
0
      bp -= SLL2_HDR_LEN;
3998
3999
      /*
4000
       * Let's make sure that's past the end of
4001
       * the tpacket header, i.e. >=
4002
       * ((u_char *)thdr + TPACKET_HDRLEN), so we
4003
       * don't step on the header when we construct
4004
       * the sll header.
4005
       */
4006
0
      if (bp < (u_char *)frame +
4007
0
             TPACKET_ALIGN(handlep->tp_hdrlen) +
4008
0
             sizeof(struct sockaddr_ll)) {
4009
0
        snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
4010
0
          "cooked-mode frame doesn't have room for sll header");
4011
0
        return -1;
4012
0
      }
4013
4014
      /*
4015
       * OK, that worked; construct the sll header.
4016
       */
4017
0
      hdrp = (struct sll2_header *)bp;
4018
0
      hdrp->sll2_protocol = sll->sll_protocol;
4019
0
      hdrp->sll2_reserved_mbz = 0;
4020
0
      hdrp->sll2_if_index = htonl(sll->sll_ifindex);
4021
0
      hdrp->sll2_hatype = htons(sll->sll_hatype);
4022
0
      hdrp->sll2_pkttype = sll->sll_pkttype;
4023
0
      hdrp->sll2_halen = sll->sll_halen;
4024
0
      memcpy(hdrp->sll2_addr, sll->sll_addr, SLL_ADDRLEN);
4025
4026
0
      snaplen += sizeof(struct sll2_header);
4027
0
    } else {
4028
0
      struct sll_header *hdrp;
4029
4030
      /*
4031
       * The kernel should have left us with enough
4032
       * space for an sll header; back up the packet
4033
       * data pointer into that space, as that'll be
4034
       * the beginning of the packet we pass to the
4035
       * callback.
4036
       */
4037
0
      bp -= SLL_HDR_LEN;
4038
4039
      /*
4040
       * Let's make sure that's past the end of
4041
       * the tpacket header, i.e. >=
4042
       * ((u_char *)thdr + TPACKET_HDRLEN), so we
4043
       * don't step on the header when we construct
4044
       * the sll header.
4045
       */
4046
0
      if (bp < (u_char *)frame +
4047
0
             TPACKET_ALIGN(handlep->tp_hdrlen) +
4048
0
             sizeof(struct sockaddr_ll)) {
4049
0
        snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
4050
0
          "cooked-mode frame doesn't have room for sll header");
4051
0
        return -1;
4052
0
      }
4053
4054
      /*
4055
       * OK, that worked; construct the sll header.
4056
       */
4057
0
      hdrp = (struct sll_header *)bp;
4058
0
      hdrp->sll_pkttype = htons(sll->sll_pkttype);
4059
0
      hdrp->sll_hatype = htons(sll->sll_hatype);
4060
0
      hdrp->sll_halen = htons(sll->sll_halen);
4061
0
      memcpy(hdrp->sll_addr, sll->sll_addr, SLL_ADDRLEN);
4062
0
      hdrp->sll_protocol = sll->sll_protocol;
4063
4064
0
      snaplen += sizeof(struct sll_header);
4065
0
    }
4066
0
  } else {
4067
    /*
4068
     * If this is a packet from a CAN device, so that
4069
     * sll->sll_hatype is ARPHRD_CAN, then, as we're
4070
     * not capturing in cooked mode, its link-layer
4071
     * type is DLT_CAN_SOCKETCAN.  Fix up the header
4072
     * provided by the code below us to match what
4073
     * DLT_CAN_SOCKETCAN is expected to provide.
4074
     */
4075
0
    if (sll->sll_hatype == ARPHRD_CAN) {
4076
0
      pcap_can_socketcan_hdr *canhdr = (pcap_can_socketcan_hdr *)bp;
4077
0
      pcap_can_socketcan_xl_hdr *canxl_hdr = (pcap_can_socketcan_xl_hdr *)bp;
4078
0
      uint16_t protocol = ntohs(sll->sll_protocol);
4079
4080
      /*
4081
       * Check the protocol field from the sll header.
4082
       * If it's one of the known CAN protocol types,
4083
       * make sure the appropriate flags are set, so
4084
       * that a program can tell what type of frame
4085
       * it is.
4086
       *
4087
       * These operations should not have any effect
4088
       * when reading proper CAN frames from Linux
4089
       * CAN interfaces. Enforcing these bit values
4090
       * ensures proper DLT_CAN_SOCKETCAN data even
4091
       * with malformed PF_PACKET content.
4092
       *
4093
       * The two flags are:
4094
       *
4095
       *   CANFD_FDF, which is in the fd_flags field
4096
       *   of the CAN CC/CAN FD header;
4097
       *
4098
       *   CANXL_XLF, which is in the flags field
4099
       *   of the CAN XL header, which overlaps
4100
       *   the payload_length field of the CAN CC/
4101
       *   CAN FD header. Setting CANXL_XLF in the
4102
       *   payload_length of CAN CC/FD frames would
4103
       *   intentionally break the payload length.
4104
       */
4105
0
      switch (protocol) {
4106
4107
0
      case LINUX_SLL_P_CAN:
4108
        /*
4109
         * CAN CC frame (aka Classical CAN, CAN 2.0B)
4110
         *
4111
         * Zero out the CAN FD and CAN XL flags
4112
         * so that this frame will be identified
4113
         * as a CAN CC frame.
4114
         */
4115
0
        canxl_hdr->flags &= ~CANXL_XLF;
4116
0
        canhdr->fd_flags &= ~CANFD_FDF;
4117
0
        break;
4118
4119
0
      case LINUX_SLL_P_CANFD:
4120
        /*
4121
         * CAN FD frame
4122
         *
4123
         * Set CANFD_FDF in the fd_flags field,
4124
         * and clear the CANXL_XLF bit in the
4125
         * CAN XL flags field, so that this frame
4126
         * will be identified as a CAN FD frame.
4127
         *
4128
         * The CANFD_FDF bit is not reliably
4129
         * set by the Linux kernel. But setting
4130
         * that bit for CAN FD is recommended.
4131
         */
4132
0
        canxl_hdr->flags &= ~CANXL_XLF;
4133
0
        canhdr->fd_flags |= CANFD_FDF;
4134
0
        break;
4135
4136
0
      case LINUX_SLL_P_CANXL:
4137
        /*
4138
         * CAN XL frame
4139
         *
4140
         * Set CANXL_XLF bit in the CAN XL flags
4141
         * field, so that this frame will appear
4142
         * to be a CAN XL frame.
4143
         */
4144
0
        canxl_hdr->flags |= CANXL_XLF;
4145
0
        break;
4146
0
      }
4147
4148
      /*
4149
       * Put multi-byte header fields in a byte-order
4150
       * -independent format.
4151
       */
4152
0
      if (canxl_hdr->flags & CANXL_XLF) {
4153
        /*
4154
         * This is a CAN XL frame.
4155
         *
4156
         * DLT_CAN_SOCKETCAN is specified as having
4157
         * the Priority ID/VCID field in big-
4158
         * endian byte order, and the payload length
4159
         * and Acceptance Field in little-endian byte
4160
         * order, but capturing on a CAN device
4161
         * provides them in host byte order.
4162
         * Convert them to the appropriate byte
4163
         * orders.
4164
         *
4165
         * The reason we put the first field
4166
         * into big-endian byte order is that
4167
         * older libpcap code, ignorant of
4168
         * CAN XL, treated it as the CAN ID
4169
         * field and put it into big-endian
4170
         * byte order, and we don't want to
4171
         * break code that understands CAN XL
4172
         * headers, and treats that field as
4173
         * being big-endian.
4174
         *
4175
         * The reason other fields are put in little-
4176
         * endian byte order is that older
4177
         * libpcap code, ignorant of CAN XL,
4178
         * left those fields alone, and the
4179
         * processors on which the CAN XL
4180
         * frames were captured are likely
4181
         * to be little-endian processors.
4182
         */
4183
4184
0
#if __BYTE_ORDER == __LITTLE_ENDIAN
4185
        /*
4186
         * We're capturing on a little-endian
4187
         * machine, so we put the priority/VCID
4188
         * field into big-endian byte order, and
4189
         * leave the payload length and acceptance
4190
         * field in little-endian byte order.
4191
         */
4192
        /* Byte-swap priority/VCID. */
4193
0
        canxl_hdr->priority_vcid = SWAPLONG(canxl_hdr->priority_vcid);
4194
#elif __BYTE_ORDER == __BIG_ENDIAN
4195
        /*
4196
         * We're capturing on a big-endian
4197
         * machine, so we want to leave the
4198
         * priority/VCID field alone, and byte-swap
4199
         * the payload length and acceptance
4200
         * fields to little-endian.
4201
         */
4202
        /* Byte-swap the payload length */
4203
        canxl_hdr->payload_length = SWAPSHORT(canxl_hdr->payload_length);
4204
4205
        /*
4206
         * Byte-swap the acceptance field.
4207
         *
4208
         * XXX - is it just a 4-octet string,
4209
         * not in any byte order?
4210
         */
4211
        canxl_hdr->acceptance_field = SWAPLONG(canxl_hdr->acceptance_field);
4212
#else
4213
#error "Unknown byte order"
4214
#endif
4215
0
      } else {
4216
        /*
4217
         * CAN CC or CAN FD frame.
4218
         *
4219
         * DLT_CAN_SOCKETCAN is specified as having
4220
         * the CAN ID and flags in network byte
4221
         * order, but capturing on a CAN device
4222
         * provides it in host byte order.  Convert
4223
         * it to network byte order.
4224
         */
4225
0
        canhdr->can_id = htonl(canhdr->can_id);
4226
0
      }
4227
0
    }
4228
0
  }
4229
4230
0
  if (handlep->filter_in_userland && handle->fcode.bf_insns) {
4231
0
    struct pcap_bpf_aux_data aux_data;
4232
4233
0
    aux_data.vlan_tag_present = tp_vlan_tci_valid;
4234
0
    aux_data.vlan_tag = tp_vlan_tci & 0x0fff;
4235
4236
0
    if (pcapint_filter_with_aux_data(handle->fcode.bf_insns,
4237
0
                bp,
4238
0
                tp_len,
4239
0
                snaplen,
4240
0
                &aux_data) == 0)
4241
0
      return 0;
4242
0
  }
4243
4244
0
  if (!linux_check_direction(handle, sll))
4245
0
    return 0;
4246
4247
  /*
4248
   * Get required packet info from ring header.
4249
   *
4250
   * The seconds part of the time stamp is a 32-bit
4251
   * unsigned integer; this will have a problem in 2106,
4252
   * but not in 2038.
4253
   *
4254
   * ts.tv_sec is a time_t, which is signed, and which
4255
   * may be 32-bit or 64-bit.  Pass it through; if we
4256
   * have a 32-bit signed time_t, in which values >
4257
   * 2^31-1 won't fit, then:
4258
   *
4259
   *    Writing the packet to a file will pass the bits
4260
   *    through.  If the program reading the file can
4261
   *    handle 32-bit unsigned time stamps, including
4262
   *    any conversion to local time or UTC, it will
4263
   *    properly handle the time stamps.
4264
   *
4265
   *    Reporting the packet time stamp may give
4266
   *    an error or a pre-1970 time stamp on platforms
4267
   *    with signed 32-bit time stamps, but that
4268
   *    will happen even if it's captured on a
4269
   *    platform with a 64-bit time_t.
4270
   */
4271
0
  pcaphdr.ts.tv_sec = tp_sec;
4272
0
  pcaphdr.ts.tv_usec = tp_usec;
4273
0
  pcaphdr.caplen = tp_snaplen;
4274
0
  pcaphdr.len = tp_len;
4275
4276
  /* if required build in place the sll header*/
4277
0
  if (handlep->cooked) {
4278
    /* update packet len */
4279
0
    if (handle->linktype == DLT_LINUX_SLL2) {
4280
0
      pcaphdr.caplen += SLL2_HDR_LEN;
4281
0
      pcaphdr.len += SLL2_HDR_LEN;
4282
0
    } else {
4283
0
      pcaphdr.caplen += SLL_HDR_LEN;
4284
0
      pcaphdr.len += SLL_HDR_LEN;
4285
0
    }
4286
0
  }
4287
4288
0
  if (tp_vlan_tci_valid &&
4289
0
    handlep->vlan_offset != -1 &&
4290
0
    tp_snaplen >= (unsigned int) handlep->vlan_offset)
4291
0
  {
4292
0
    struct vlan_tag *tag;
4293
4294
    /*
4295
     * Move everything in the header, except the type field,
4296
     * down VLAN_TAG_LEN bytes, to allow us to insert the
4297
     * VLAN tag between that stuff and the type field.
4298
     */
4299
0
    bp -= VLAN_TAG_LEN;
4300
0
    memmove(bp, bp + VLAN_TAG_LEN, handlep->vlan_offset);
4301
4302
    /*
4303
     * Now insert the tag.
4304
     */
4305
0
    tag = (struct vlan_tag *)(bp + handlep->vlan_offset);
4306
0
    tag->vlan_tpid = htons(tp_vlan_tpid);
4307
0
    tag->vlan_tci = htons(tp_vlan_tci);
4308
4309
    /*
4310
     * Add the tag to the packet lengths.
4311
     */
4312
0
    pcaphdr.caplen += VLAN_TAG_LEN;
4313
0
    pcaphdr.len += VLAN_TAG_LEN;
4314
0
  }
4315
4316
  /*
4317
   * The only way to tell the kernel to cut off the
4318
   * packet at a snapshot length is with a filter program;
4319
   * if there's no filter program, the kernel won't cut
4320
   * the packet off.
4321
   *
4322
   * Trim the snapshot length to be no longer than the
4323
   * specified snapshot length.
4324
   *
4325
   * XXX - an alternative is to put a filter, consisting
4326
   * of a "ret <snaplen>" instruction, on the socket
4327
   * in the activate routine, so that the truncation is
4328
   * done in the kernel even if nobody specified a filter;
4329
   * that means that less buffer space is consumed in
4330
   * the memory-mapped buffer.
4331
   */
4332
0
  if (pcaphdr.caplen > (bpf_u_int32)handle->snapshot)
4333
0
    pcaphdr.caplen = handle->snapshot;
4334
4335
  /* pass the packet to the user */
4336
0
  callback(user, &pcaphdr, bp);
4337
4338
0
  return 1;
4339
0
}
4340
4341
static int
4342
pcap_read_linux_mmap_v2(pcap_t *handle, int max_packets, pcap_handler callback,
4343
    u_char *user)
4344
0
{
4345
0
  struct pcap_linux *handlep = handle->priv;
4346
0
  union thdr h;
4347
0
  int pkts = 0;
4348
0
  int ret;
4349
4350
  /* wait for frames availability.*/
4351
0
  h.raw = RING_GET_CURRENT_FRAME(handle);
4352
0
  if (!packet_mmap_acquire(h.h2)) {
4353
    /*
4354
     * The current frame is owned by the kernel; wait for
4355
     * a frame to be handed to us.
4356
     */
4357
0
    ret = pcap_wait_for_frames_mmap(handle);
4358
0
    if (ret) {
4359
0
      return ret;
4360
0
    }
4361
0
  }
4362
4363
  /*
4364
   * This can conceivably process more than INT_MAX packets,
4365
   * which would overflow the packet count, causing it either
4366
   * to look like a negative number, and thus cause us to
4367
   * return a value that looks like an error, or overflow
4368
   * back into positive territory, and thus cause us to
4369
   * return a too-low count.
4370
   *
4371
   * Therefore, if the packet count is unlimited, we clip
4372
   * it at INT_MAX; this routine is not expected to
4373
   * process packets indefinitely, so that's not an issue.
4374
   */
4375
0
  if (PACKET_COUNT_IS_UNLIMITED(max_packets))
4376
0
    max_packets = INT_MAX;
4377
4378
0
  while (pkts < max_packets) {
4379
    /*
4380
     * Get the current ring buffer frame, and break if
4381
     * it's still owned by the kernel.
4382
     */
4383
0
    h.raw = RING_GET_CURRENT_FRAME(handle);
4384
0
    if (!packet_mmap_acquire(h.h2))
4385
0
      break;
4386
4387
0
    ret = pcap_handle_packet_mmap(
4388
0
        handle,
4389
0
        callback,
4390
0
        user,
4391
0
        h.raw,
4392
0
        h.h2->tp_len,
4393
0
        h.h2->tp_mac,
4394
0
        h.h2->tp_snaplen,
4395
0
        h.h2->tp_sec,
4396
0
        handle->opt.tstamp_precision == PCAP_TSTAMP_PRECISION_NANO ? h.h2->tp_nsec : h.h2->tp_nsec / 1000,
4397
0
        VLAN_VALID(h.h2, h.h2),
4398
0
        h.h2->tp_vlan_tci,
4399
0
        VLAN_TPID(h.h2, h.h2));
4400
0
    if (ret == 1) {
4401
0
      pkts++;
4402
0
    } else if (ret < 0) {
4403
0
      return ret;
4404
0
    }
4405
4406
    /*
4407
     * Hand this block back to the kernel, and, if we're
4408
     * counting blocks that need to be filtered in userland
4409
     * after having been filtered by the kernel, count
4410
     * the one we've just processed.
4411
     */
4412
0
    packet_mmap_release(h.h2);
4413
0
    if (handlep->blocks_to_filter_in_userland != 0) {
4414
0
      handlep->blocks_to_filter_in_userland--;
4415
0
      if (handlep->blocks_to_filter_in_userland == 0) {
4416
        /*
4417
         * No more blocks need to be filtered
4418
         * in userland.
4419
         */
4420
0
        handlep->filter_in_userland = 0;
4421
0
      }
4422
0
    }
4423
4424
    /* next block */
4425
0
    if (++handle->offset >= handle->cc)
4426
0
      handle->offset = 0;
4427
4428
    /* check for break loop condition*/
4429
0
    if (handle->break_loop) {
4430
0
      handle->break_loop = 0;
4431
0
      return PCAP_ERROR_BREAK;
4432
0
    }
4433
0
  }
4434
0
  return pkts;
4435
0
}
4436
4437
#ifdef HAVE_TPACKET3
4438
static int
4439
pcap_read_linux_mmap_v3(pcap_t *handle, int max_packets, pcap_handler callback,
4440
    u_char *user)
4441
0
{
4442
0
  struct pcap_linux *handlep = handle->priv;
4443
0
  union thdr h;
4444
0
  int pkts = 0;
4445
0
  int ret;
4446
4447
0
again:
4448
0
  if (handlep->current_packet == NULL) {
4449
    /* wait for frames availability.*/
4450
0
    h.raw = RING_GET_CURRENT_FRAME(handle);
4451
0
    if (!packet_mmap_v3_acquire(h.h3)) {
4452
      /*
4453
       * The current frame is owned by the kernel; wait
4454
       * for a frame to be handed to us.
4455
       */
4456
0
      ret = pcap_wait_for_frames_mmap(handle);
4457
0
      if (ret) {
4458
0
        return ret;
4459
0
      }
4460
0
    }
4461
0
  }
4462
0
  h.raw = RING_GET_CURRENT_FRAME(handle);
4463
0
  if (!packet_mmap_v3_acquire(h.h3)) {
4464
0
    if (pkts == 0 && handlep->timeout == 0) {
4465
      /* Block until we see a packet. */
4466
0
      goto again;
4467
0
    }
4468
0
    return pkts;
4469
0
  }
4470
4471
  /*
4472
   * This can conceivably process more than INT_MAX packets,
4473
   * which would overflow the packet count, causing it either
4474
   * to look like a negative number, and thus cause us to
4475
   * return a value that looks like an error, or overflow
4476
   * back into positive territory, and thus cause us to
4477
   * return a too-low count.
4478
   *
4479
   * Therefore, if the packet count is unlimited, we clip
4480
   * it at INT_MAX; this routine is not expected to
4481
   * process packets indefinitely, so that's not an issue.
4482
   */
4483
0
  if (PACKET_COUNT_IS_UNLIMITED(max_packets))
4484
0
    max_packets = INT_MAX;
4485
4486
0
  while (pkts < max_packets) {
4487
0
    int packets_to_read;
4488
4489
0
    if (handlep->current_packet == NULL) {
4490
0
      h.raw = RING_GET_CURRENT_FRAME(handle);
4491
0
      if (!packet_mmap_v3_acquire(h.h3))
4492
0
        break;
4493
4494
0
      handlep->current_packet = h.raw + h.h3->hdr.bh1.offset_to_first_pkt;
4495
0
      handlep->packets_left = h.h3->hdr.bh1.num_pkts;
4496
0
    }
4497
0
    packets_to_read = handlep->packets_left;
4498
4499
0
    if (packets_to_read > (max_packets - pkts)) {
4500
      /*
4501
       * There are more packets in the buffer than
4502
       * the number of packets we have left to
4503
       * process to get up to the maximum number
4504
       * of packets to process.  Only process enough
4505
       * of them to get us up to that maximum.
4506
       */
4507
0
      packets_to_read = max_packets - pkts;
4508
0
    }
4509
4510
0
    while (packets_to_read-- && !handle->break_loop) {
4511
0
      struct tpacket3_hdr* tp3_hdr = (struct tpacket3_hdr*) handlep->current_packet;
4512
0
      ret = pcap_handle_packet_mmap(
4513
0
          handle,
4514
0
          callback,
4515
0
          user,
4516
0
          handlep->current_packet,
4517
0
          tp3_hdr->tp_len,
4518
0
          tp3_hdr->tp_mac,
4519
0
          tp3_hdr->tp_snaplen,
4520
0
          tp3_hdr->tp_sec,
4521
0
          handle->opt.tstamp_precision == PCAP_TSTAMP_PRECISION_NANO ? tp3_hdr->tp_nsec : tp3_hdr->tp_nsec / 1000,
4522
0
          VLAN_VALID(tp3_hdr, &tp3_hdr->hv1),
4523
0
          tp3_hdr->hv1.tp_vlan_tci,
4524
0
          VLAN_TPID(tp3_hdr, &tp3_hdr->hv1));
4525
0
      if (ret == 1) {
4526
0
        pkts++;
4527
0
      } else if (ret < 0) {
4528
0
        handlep->current_packet = NULL;
4529
0
        return ret;
4530
0
      }
4531
0
      handlep->current_packet += tp3_hdr->tp_next_offset;
4532
0
      handlep->packets_left--;
4533
0
    }
4534
4535
0
    if (handlep->packets_left <= 0) {
4536
      /*
4537
       * Hand this block back to the kernel, and, if
4538
       * we're counting blocks that need to be
4539
       * filtered in userland after having been
4540
       * filtered by the kernel, count the one we've
4541
       * just processed.
4542
       */
4543
0
      packet_mmap_v3_release(h.h3);
4544
0
      if (handlep->blocks_to_filter_in_userland != 0) {
4545
0
        handlep->blocks_to_filter_in_userland--;
4546
0
        if (handlep->blocks_to_filter_in_userland == 0) {
4547
          /*
4548
           * No more blocks need to be filtered
4549
           * in userland.
4550
           */
4551
0
          handlep->filter_in_userland = 0;
4552
0
        }
4553
0
      }
4554
4555
      /* next block */
4556
0
      if (++handle->offset >= handle->cc)
4557
0
        handle->offset = 0;
4558
4559
0
      handlep->current_packet = NULL;
4560
0
    }
4561
4562
    /* check for break loop condition*/
4563
0
    if (handle->break_loop) {
4564
0
      handle->break_loop = 0;
4565
0
      return PCAP_ERROR_BREAK;
4566
0
    }
4567
0
  }
4568
0
  if (pkts == 0 && handlep->timeout == 0) {
4569
    /* Block until we see a packet. */
4570
0
    goto again;
4571
0
  }
4572
0
  return pkts;
4573
0
}
4574
#endif /* HAVE_TPACKET3 */
4575
4576
/*
4577
 *  Attach the given BPF code to the packet capture device.
4578
 */
4579
static int
4580
pcap_setfilter_linux(pcap_t *handle, struct bpf_program *filter)
4581
0
{
4582
0
  struct pcap_linux *handlep;
4583
0
  struct sock_fprog fcode;
4584
0
  int     can_filter_in_kernel;
4585
0
  int     err = 0;
4586
0
  u_int     n, offset;
4587
4588
0
  if (!handle)
4589
0
    return -1;
4590
0
  if (!filter) {
4591
0
          pcapint_strlcpy(handle->errbuf, "setfilter: No filter specified",
4592
0
      PCAP_ERRBUF_SIZE);
4593
0
    return -1;
4594
0
  }
4595
4596
0
  handlep = handle->priv;
4597
4598
  /* Make our private copy of the filter */
4599
4600
0
  if (pcapint_install_bpf_program(handle, filter) < 0)
4601
    /* pcapint_install_bpf_program() filled in errbuf */
4602
0
    return -1;
4603
4604
  /*
4605
   * Run user level packet filter by default. Will be overridden if
4606
   * installing a kernel filter succeeds.
4607
   */
4608
0
  handlep->filter_in_userland = 1;
4609
4610
  /* Install kernel level filter if possible */
4611
4612
0
  if (handle->fcode.bf_len > USHRT_MAX) {
4613
    /*
4614
     * fcode.len is an unsigned short for current kernel.
4615
     * I have yet to see BPF-Code with that much
4616
     * instructions but still it is possible. So for the
4617
     * sake of correctness I added this check.
4618
     */
4619
0
    fprintf(stderr, "Warning: Filter too complex for kernel\n");
4620
0
    fcode.len = 0;
4621
0
    fcode.filter = NULL;
4622
0
    can_filter_in_kernel = 0;
4623
0
  } else {
4624
    /*
4625
     * Oh joy, the Linux kernel uses struct sock_fprog instead
4626
     * of struct bpf_program and of course the length field is
4627
     * of different size. Pointed out by Sebastian
4628
     *
4629
     * Oh, and we also need to fix it up so that all "ret"
4630
     * instructions with non-zero operands have MAXIMUM_SNAPLEN
4631
     * as the operand if we're not capturing in memory-mapped
4632
     * mode, and so that, if we're in cooked mode, all memory-
4633
     * reference instructions use special magic offsets in
4634
     * references to the link-layer header and assume that the
4635
     * link-layer payload begins at 0; "fix_program()" will do
4636
     * that.
4637
     */
4638
0
    switch (fix_program(handle, &fcode)) {
4639
4640
0
    case -1:
4641
0
    default:
4642
      /*
4643
       * Fatal error; just quit.
4644
       * (The "default" case shouldn't happen; we
4645
       * return -1 for that reason.)
4646
       */
4647
0
      return -1;
4648
4649
0
    case 0:
4650
      /*
4651
       * The program performed checks that we can't make
4652
       * work in the kernel.
4653
       */
4654
0
      can_filter_in_kernel = 0;
4655
0
      break;
4656
4657
0
    case 1:
4658
      /*
4659
       * We have a filter that'll work in the kernel.
4660
       */
4661
0
      can_filter_in_kernel = 1;
4662
0
      break;
4663
0
    }
4664
0
  }
4665
4666
  /*
4667
   * NOTE: at this point, we've set both the "len" and "filter"
4668
   * fields of "fcode".  As of the 2.6.32.4 kernel, at least,
4669
   * those are the only members of the "sock_fprog" structure,
4670
   * so we initialize every member of that structure.
4671
   *
4672
   * If there is anything in "fcode" that is not initialized,
4673
   * it is either a field added in a later kernel, or it's
4674
   * padding.
4675
   *
4676
   * If a new field is added, this code needs to be updated
4677
   * to set it correctly.
4678
   *
4679
   * If there are no other fields, then:
4680
   *
4681
   *  if the Linux kernel looks at the padding, it's
4682
   *  buggy;
4683
   *
4684
   *  if the Linux kernel doesn't look at the padding,
4685
   *  then if some tool complains that we're passing
4686
   *  uninitialized data to the kernel, then the tool
4687
   *  is buggy and needs to understand that it's just
4688
   *  padding.
4689
   */
4690
0
  if (can_filter_in_kernel) {
4691
0
    if ((err = set_kernel_filter(handle, &fcode)) == 0)
4692
0
    {
4693
      /*
4694
       * Installation succeeded - using kernel filter,
4695
       * so userland filtering not needed.
4696
       */
4697
0
      handlep->filter_in_userland = 0;
4698
0
    }
4699
0
    else if (err == -1) /* Non-fatal error */
4700
0
    {
4701
      /*
4702
       * Print a warning if we weren't able to install
4703
       * the filter for a reason other than "this kernel
4704
       * isn't configured to support socket filters.
4705
       */
4706
0
      if (errno == ENOMEM) {
4707
        /*
4708
         * Either a kernel memory allocation
4709
         * failure occurred, or there's too
4710
         * much "other/option memory" allocated
4711
         * for this socket.  Suggest that they
4712
         * increase the "other/option memory"
4713
         * limit.
4714
         */
4715
0
        fprintf(stderr,
4716
0
            "Warning: Couldn't allocate kernel memory for filter: try increasing net.core.optmem_max with sysctl\n");
4717
0
      } else if (errno != ENOPROTOOPT && errno != EOPNOTSUPP) {
4718
0
        fprintf(stderr,
4719
0
            "Warning: Kernel filter failed: %s\n",
4720
0
          pcap_strerror(errno));
4721
0
      }
4722
0
    }
4723
0
  }
4724
4725
  /*
4726
   * If we're not using the kernel filter, get rid of any kernel
4727
   * filter that might've been there before, e.g. because the
4728
   * previous filter could work in the kernel, or because some other
4729
   * code attached a filter to the socket by some means other than
4730
   * calling "pcap_setfilter()".  Otherwise, the kernel filter may
4731
   * filter out packets that would pass the new userland filter.
4732
   */
4733
0
  if (handlep->filter_in_userland) {
4734
0
    if (reset_kernel_filter(handle) == -1) {
4735
0
      pcapint_fmt_errmsg_for_errno(handle->errbuf,
4736
0
          PCAP_ERRBUF_SIZE, errno,
4737
0
          "can't remove kernel filter");
4738
0
      err = -2; /* fatal error */
4739
0
    }
4740
0
  }
4741
4742
  /*
4743
   * Free up the copy of the filter that was made by "fix_program()".
4744
   */
4745
0
  if (fcode.filter != NULL)
4746
0
    free(fcode.filter);
4747
4748
0
  if (err == -2)
4749
    /* Fatal error */
4750
0
    return -1;
4751
4752
  /*
4753
   * If we're filtering in userland, there's nothing to do;
4754
   * the new filter will be used for the next packet.
4755
   */
4756
0
  if (handlep->filter_in_userland)
4757
0
    return 0;
4758
4759
  /*
4760
   * We're filtering in the kernel; the packets present in
4761
   * all blocks currently in the ring were already filtered
4762
   * by the old filter, and so will need to be filtered in
4763
   * userland by the new filter.
4764
   *
4765
   * Get an upper bound for the number of such blocks; first,
4766
   * walk the ring backward and count the free blocks.
4767
   */
4768
0
  offset = handle->offset;
4769
0
  if (offset == 0)
4770
0
    offset = handle->cc;
4771
0
  offset--;
4772
0
  for (n=0; n < handle->cc; ++n) {
4773
0
    if (offset == 0)
4774
0
      offset = handle->cc;
4775
0
    offset--;
4776
0
    if (pcap_get_ring_frame_status(handle, offset) != TP_STATUS_KERNEL)
4777
0
      break;
4778
0
  }
4779
4780
  /*
4781
   * If we found free blocks, decrement the count of free
4782
   * blocks by 1, just in case we lost a race with another
4783
   * thread of control that was adding a packet while
4784
   * we were counting and that had run the filter before
4785
   * we changed it.
4786
   *
4787
   * XXX - could there be more than one block added in
4788
   * this fashion?
4789
   *
4790
   * XXX - is there a way to avoid that race, e.g. somehow
4791
   * wait for all packets that passed the old filter to
4792
   * be added to the ring?
4793
   */
4794
0
  if (n != 0)
4795
0
    n--;
4796
4797
  /*
4798
   * Set the count of blocks worth of packets to filter
4799
   * in userland to the total number of blocks in the
4800
   * ring minus the number of free blocks we found, and
4801
   * turn on userland filtering.  (The count of blocks
4802
   * worth of packets to filter in userland is guaranteed
4803
   * not to be zero - n, above, couldn't be set to a
4804
   * value > handle->cc, and if it were equal to
4805
   * handle->cc, it wouldn't be zero, and thus would
4806
   * be decremented to handle->cc - 1.)
4807
   */
4808
0
  handlep->blocks_to_filter_in_userland = handle->cc - n;
4809
0
  handlep->filter_in_userland = 1;
4810
4811
0
  return 0;
4812
0
}
4813
4814
/*
4815
 *  Return the index of the given device name. Fill ebuf and return
4816
 *  -1 on failure.
4817
 */
4818
static int
4819
iface_get_id(int fd, const char *device, char *ebuf)
4820
0
{
4821
0
  struct ifreq  ifr;
4822
4823
0
  memset(&ifr, 0, sizeof(ifr));
4824
0
  pcapint_strlcpy(ifr.ifr_name, device, sizeof(ifr.ifr_name));
4825
4826
0
  if (ioctl(fd, SIOCGIFINDEX, &ifr) == -1) {
4827
0
    pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE,
4828
0
        errno, "SIOCGIFINDEX");
4829
0
    return -1;
4830
0
  }
4831
4832
0
  return ifr.ifr_ifindex;
4833
0
}
4834
4835
/*
4836
 *  Bind the socket associated with FD to the given device.
4837
 *  Return 0 on success or a PCAP_ERROR_ value on a hard error.
4838
 */
4839
static int
4840
iface_bind(int fd, int ifindex, char *ebuf, int protocol)
4841
0
{
4842
0
  struct sockaddr_ll  sll;
4843
0
  int     ret, err;
4844
0
  socklen_t   errlen = sizeof(err);
4845
4846
0
  memset(&sll, 0, sizeof(sll));
4847
0
  sll.sll_family    = AF_PACKET;
4848
0
  sll.sll_ifindex   = ifindex < 0 ? 0 : ifindex;
4849
0
  sll.sll_protocol  = protocol;
4850
4851
0
  if (bind(fd, (struct sockaddr *) &sll, sizeof(sll)) == -1) {
4852
0
    if (errno == ENETDOWN) {
4853
      /*
4854
       * Return a "network down" indication, so that
4855
       * the application can report that rather than
4856
       * saying we had a mysterious failure and
4857
       * suggest that they report a problem to the
4858
       * libpcap developers.
4859
       */
4860
0
      return PCAP_ERROR_IFACE_NOT_UP;
4861
0
    }
4862
0
    if (errno == ENODEV) {
4863
      /*
4864
       * There's nothing more to say, so clear the
4865
       * error message.
4866
       */
4867
0
      ebuf[0] = '\0';
4868
0
      ret = PCAP_ERROR_NO_SUCH_DEVICE;
4869
0
    } else {
4870
0
      ret = PCAP_ERROR;
4871
0
      pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE,
4872
0
          errno, "bind");
4873
0
    }
4874
0
    return ret;
4875
0
  }
4876
4877
  /* Any pending errors, e.g., network is down? */
4878
4879
0
  if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &err, &errlen) == -1) {
4880
0
    pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE,
4881
0
        errno, "getsockopt (SO_ERROR)");
4882
0
    return PCAP_ERROR;
4883
0
  }
4884
4885
0
  if (err == ENETDOWN) {
4886
    /*
4887
     * Return a "network down" indication, so that
4888
     * the application can report that rather than
4889
     * saying we had a mysterious failure and
4890
     * suggest that they report a problem to the
4891
     * libpcap developers.
4892
     */
4893
0
    return PCAP_ERROR_IFACE_NOT_UP;
4894
0
  } else if (err > 0) {
4895
0
    pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE,
4896
0
        err, "bind");
4897
0
    return PCAP_ERROR;
4898
0
  }
4899
4900
0
  return 0;
4901
0
}
4902
4903
/*
4904
 * Try to enter monitor mode.
4905
 * If we have libnl, try to create a new monitor-mode device and
4906
 * capture on that; otherwise, just say "not supported".
4907
 */
4908
#ifdef HAVE_LIBNL
4909
static int
4910
enter_rfmon_mode(pcap_t *handle, int sock_fd, const char *device)
4911
{
4912
  struct pcap_linux *handlep = handle->priv;
4913
  int ret;
4914
  char phydev_path[PATH_MAX+1];
4915
  struct nl80211_state nlstate;
4916
  struct ifreq ifr;
4917
  u_int n;
4918
4919
  /*
4920
   * Is this a mac80211 device?
4921
   */
4922
  ret = get_mac80211_phydev(handle, device, phydev_path, PATH_MAX);
4923
  if (ret < 0)
4924
    return ret; /* error */
4925
  if (ret == 0)
4926
    return 0; /* no error, but not mac80211 device */
4927
4928
  ret = nl80211_init(handle, &nlstate, device);
4929
  if (ret != 0)
4930
    return ret;
4931
4932
  /*
4933
   * Is this already a monN device?
4934
   * If so, we're done.
4935
   */
4936
  int type;
4937
  ret = get_if_type(handle, sock_fd, &nlstate, device, &type);
4938
  if (ret <= 0) {
4939
    /*
4940
     * < 0 is a Hard failure.  Just return ret; handle->errbuf
4941
     * has already been set.
4942
     *
4943
     * 0 is "device not available"; the caller should retry later.
4944
     */
4945
    nl80211_cleanup(&nlstate);
4946
    return ret;
4947
  }
4948
        if (type == NL80211_IFTYPE_MONITOR) {
4949
    /*
4950
     * OK, it's already a monitor mode device; just use it.
4951
     * There's no point in creating another monitor device
4952
     * that will have to be cleaned up.
4953
     */
4954
                nl80211_cleanup(&nlstate);
4955
                return ret;
4956
        }
4957
4958
  /*
4959
   * OK, it's apparently a mac80211 device but not a monitor device.
4960
   * Try to find an unused monN device for it.
4961
   */
4962
  for (n = 0; n < UINT_MAX; n++) {
4963
    /*
4964
     * Try mon{n}.
4965
     */
4966
    char mondevice[3+10+1]; /* mon{UINT_MAX}\0 */
4967
4968
    snprintf(mondevice, sizeof mondevice, "mon%u", n);
4969
    ret = add_mon_if(handle, sock_fd, &nlstate, device, mondevice);
4970
    if (ret == 1) {
4971
      /*
4972
       * Success.  We don't clean up the libnl state
4973
       * yet, as we'll be using it later.
4974
       */
4975
      goto added;
4976
    }
4977
    if (ret < 0) {
4978
      /*
4979
       * Hard failure.  Just return ret; handle->errbuf
4980
       * has already been set.
4981
       */
4982
      nl80211_cleanup(&nlstate);
4983
      return ret;
4984
    }
4985
  }
4986
4987
  snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
4988
      "%s: No free monN interfaces", device);
4989
  nl80211_cleanup(&nlstate);
4990
  return PCAP_ERROR;
4991
4992
added:
4993
4994
#if 0
4995
  /*
4996
   * Sleep for .1 seconds.
4997
   */
4998
  delay.tv_sec = 0;
4999
  delay.tv_nsec = 500000000;
5000
  nanosleep(&delay, NULL);
5001
#endif
5002
5003
  /*
5004
   * If we haven't already done so, arrange to have
5005
   * "pcap_close_all()" called when we exit.
5006
   */
5007
  if (!pcapint_do_addexit(handle)) {
5008
    /*
5009
     * "atexit()" failed; don't put the interface
5010
     * in rfmon mode, just give up.
5011
     * handle->errbuf has already been filled.
5012
     */
5013
    del_mon_if(handle, sock_fd, &nlstate, device,
5014
        handlep->mondevice);
5015
    nl80211_cleanup(&nlstate);
5016
    return PCAP_ERROR;
5017
  }
5018
5019
  /*
5020
   * Now configure the monitor interface up.
5021
   */
5022
  memset(&ifr, 0, sizeof(ifr));
5023
  pcapint_strlcpy(ifr.ifr_name, handlep->mondevice, sizeof(ifr.ifr_name));
5024
  if (ioctl(sock_fd, SIOCGIFFLAGS, &ifr) == -1) {
5025
    pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
5026
        errno, "%s: Can't get flags for %s", device,
5027
        handlep->mondevice);
5028
    del_mon_if(handle, sock_fd, &nlstate, device,
5029
        handlep->mondevice);
5030
    nl80211_cleanup(&nlstate);
5031
    return PCAP_ERROR;
5032
  }
5033
  ifr.ifr_flags |= IFF_UP|IFF_RUNNING;
5034
  if (ioctl(sock_fd, SIOCSIFFLAGS, &ifr) == -1) {
5035
    pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
5036
        errno, "%s: Can't set flags for %s", device,
5037
        handlep->mondevice);
5038
    del_mon_if(handle, sock_fd, &nlstate, device,
5039
        handlep->mondevice);
5040
    nl80211_cleanup(&nlstate);
5041
    return PCAP_ERROR;
5042
  }
5043
5044
  /*
5045
   * Success.  Clean up the libnl state.
5046
   */
5047
  nl80211_cleanup(&nlstate);
5048
5049
  /*
5050
   * Note that we have to delete the monitor device when we close
5051
   * the handle.
5052
   */
5053
  handlep->must_do_on_close |= MUST_DELETE_MONIF;
5054
5055
  /*
5056
   * Add this to the list of pcaps to close when we exit.
5057
   */
5058
  pcapint_add_to_pcaps_to_close(handle);
5059
5060
  return 1;
5061
}
5062
#else /* HAVE_LIBNL */
5063
static int
5064
enter_rfmon_mode(pcap_t *handle _U_, int sock_fd _U_, const char *device _U_)
5065
0
{
5066
  /*
5067
   * We don't have libnl, so we can't do monitor mode.
5068
   */
5069
0
  return 0;
5070
0
}
5071
#endif /* HAVE_LIBNL */
5072
5073
#if defined(HAVE_LINUX_NET_TSTAMP_H) && defined(PACKET_TIMESTAMP)
5074
/*
5075
 * Map SOF_TIMESTAMPING_ values to PCAP_TSTAMP_ values.
5076
 */
5077
static const struct {
5078
  int soft_timestamping_val;
5079
  int pcap_tstamp_val;
5080
} sof_ts_type_map[3] = {
5081
  { SOF_TIMESTAMPING_SOFTWARE, PCAP_TSTAMP_HOST },
5082
  { SOF_TIMESTAMPING_SYS_HARDWARE, PCAP_TSTAMP_ADAPTER },
5083
  { SOF_TIMESTAMPING_RAW_HARDWARE, PCAP_TSTAMP_ADAPTER_UNSYNCED }
5084
};
5085
0
#define NUM_SOF_TIMESTAMPING_TYPES  (sizeof sof_ts_type_map / sizeof sof_ts_type_map[0])
5086
5087
/*
5088
 * Set the list of time stamping types to include all types.
5089
 */
5090
static int
5091
iface_set_all_ts_types(pcap_t *handle, char *ebuf)
5092
0
{
5093
0
  u_int i;
5094
5095
0
  handle->tstamp_type_list = malloc(NUM_SOF_TIMESTAMPING_TYPES * sizeof(u_int));
5096
0
  if (handle->tstamp_type_list == NULL) {
5097
0
    pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE,
5098
0
        errno, "malloc");
5099
0
    return -1;
5100
0
  }
5101
0
  for (i = 0; i < NUM_SOF_TIMESTAMPING_TYPES; i++)
5102
0
    handle->tstamp_type_list[i] = sof_ts_type_map[i].pcap_tstamp_val;
5103
0
  handle->tstamp_type_count = NUM_SOF_TIMESTAMPING_TYPES;
5104
0
  return 0;
5105
0
}
5106
5107
/*
5108
 * Get a list of time stamp types.
5109
 */
5110
#ifdef ETHTOOL_GET_TS_INFO
5111
static int
5112
iface_get_ts_types(const char *device, pcap_t *handle, char *ebuf)
5113
0
{
5114
0
  int fd;
5115
0
  struct ifreq ifr;
5116
0
  struct ethtool_ts_info info;
5117
0
  int num_ts_types;
5118
0
  u_int i, j;
5119
5120
  /*
5121
   * This doesn't apply to the "any" device; you can't say "turn on
5122
   * hardware time stamping for all devices that exist now and arrange
5123
   * that it be turned on for any device that appears in the future",
5124
   * and not all devices even necessarily *support* hardware time
5125
   * stamping, so don't report any time stamp types.
5126
   */
5127
0
  if (strcmp(device, "any") == 0) {
5128
0
    handle->tstamp_type_list = NULL;
5129
0
    return 0;
5130
0
  }
5131
5132
  /*
5133
   * Create a socket from which to fetch time stamping capabilities.
5134
   */
5135
0
  fd = get_if_ioctl_socket();
5136
0
  if (fd < 0) {
5137
0
    pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE,
5138
0
        errno, "socket for SIOCETHTOOL(ETHTOOL_GET_TS_INFO)");
5139
0
    return -1;
5140
0
  }
5141
5142
0
  memset(&ifr, 0, sizeof(ifr));
5143
0
  pcapint_strlcpy(ifr.ifr_name, device, sizeof(ifr.ifr_name));
5144
0
  memset(&info, 0, sizeof(info));
5145
0
  info.cmd = ETHTOOL_GET_TS_INFO;
5146
0
  ifr.ifr_data = (caddr_t)&info;
5147
0
  if (ioctl(fd, SIOCETHTOOL, &ifr) == -1) {
5148
0
    int save_errno = errno;
5149
5150
0
    close(fd);
5151
0
    switch (save_errno) {
5152
5153
0
    case EOPNOTSUPP:
5154
0
    case EINVAL:
5155
      /*
5156
       * OK, this OS version or driver doesn't support
5157
       * asking for the time stamping types, so let's
5158
       * just return all the possible types.
5159
       */
5160
0
      if (iface_set_all_ts_types(handle, ebuf) == -1)
5161
0
        return -1;
5162
0
      return 0;
5163
5164
0
    case ENODEV:
5165
      /*
5166
       * OK, no such device.
5167
       * The user will find that out when they try to
5168
       * activate the device; just return an empty
5169
       * list of time stamp types.
5170
       */
5171
0
      handle->tstamp_type_list = NULL;
5172
0
      return 0;
5173
5174
0
    default:
5175
      /*
5176
       * Other error.
5177
       */
5178
0
      pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE,
5179
0
          save_errno,
5180
0
          "%s: SIOCETHTOOL(ETHTOOL_GET_TS_INFO) ioctl failed",
5181
0
          device);
5182
0
      return -1;
5183
0
    }
5184
0
  }
5185
0
  close(fd);
5186
5187
  /*
5188
   * Do we support hardware time stamping of *all* packets?
5189
   */
5190
0
  if (!(info.rx_filters & (1 << HWTSTAMP_FILTER_ALL))) {
5191
    /*
5192
     * No, so don't report any time stamp types.
5193
     *
5194
     * XXX - some devices either don't report
5195
     * HWTSTAMP_FILTER_ALL when they do support it, or
5196
     * report HWTSTAMP_FILTER_ALL but map it to only
5197
     * time stamping a few PTP packets.  See
5198
     * http://marc.info/?l=linux-netdev&m=146318183529571&w=2
5199
     *
5200
     * Maybe that got fixed later.
5201
     */
5202
0
    handle->tstamp_type_list = NULL;
5203
0
    return 0;
5204
0
  }
5205
5206
0
  num_ts_types = 0;
5207
0
  for (i = 0; i < NUM_SOF_TIMESTAMPING_TYPES; i++) {
5208
0
    if (info.so_timestamping & sof_ts_type_map[i].soft_timestamping_val)
5209
0
      num_ts_types++;
5210
0
  }
5211
0
  if (num_ts_types != 0) {
5212
0
    handle->tstamp_type_list = malloc(num_ts_types * sizeof(u_int));
5213
0
    if (handle->tstamp_type_list == NULL) {
5214
0
      pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE,
5215
0
          errno, "malloc");
5216
0
      return -1;
5217
0
    }
5218
0
    for (i = 0, j = 0; i < NUM_SOF_TIMESTAMPING_TYPES; i++) {
5219
0
      if (info.so_timestamping & sof_ts_type_map[i].soft_timestamping_val) {
5220
0
        handle->tstamp_type_list[j] = sof_ts_type_map[i].pcap_tstamp_val;
5221
0
        j++;
5222
0
      }
5223
0
    }
5224
0
    handle->tstamp_type_count = num_ts_types;
5225
0
  } else
5226
0
    handle->tstamp_type_list = NULL;
5227
5228
0
  return 0;
5229
0
}
5230
#else /* ETHTOOL_GET_TS_INFO */
5231
static int
5232
iface_get_ts_types(const char *device, pcap_t *handle, char *ebuf)
5233
{
5234
  /*
5235
   * This doesn't apply to the "any" device; you can't say "turn on
5236
   * hardware time stamping for all devices that exist now and arrange
5237
   * that it be turned on for any device that appears in the future",
5238
   * and not all devices even necessarily *support* hardware time
5239
   * stamping, so don't report any time stamp types.
5240
   */
5241
  if (strcmp(device, "any") == 0) {
5242
    handle->tstamp_type_list = NULL;
5243
    return 0;
5244
  }
5245
5246
  /*
5247
   * We don't have an ioctl to use to ask what's supported,
5248
   * so say we support everything.
5249
   */
5250
  if (iface_set_all_ts_types(handle, ebuf) == -1)
5251
    return -1;
5252
  return 0;
5253
}
5254
#endif /* ETHTOOL_GET_TS_INFO */
5255
#else  /* defined(HAVE_LINUX_NET_TSTAMP_H) && defined(PACKET_TIMESTAMP) */
5256
static int
5257
iface_get_ts_types(const char *device _U_, pcap_t *p _U_, char *ebuf _U_)
5258
{
5259
  /*
5260
   * Nothing to fetch, so it always "succeeds".
5261
   */
5262
  return 0;
5263
}
5264
#endif /* defined(HAVE_LINUX_NET_TSTAMP_H) && defined(PACKET_TIMESTAMP) */
5265
5266
/*
5267
 * Find out if we have any form of fragmentation/reassembly offloading.
5268
 *
5269
 * We do so using SIOCETHTOOL checking for various types of offloading;
5270
 * if SIOCETHTOOL isn't defined, or we don't have any #defines for any
5271
 * of the types of offloading, there's nothing we can do to check, so
5272
 * we just say "no, we don't".
5273
 *
5274
 * We treat EOPNOTSUPP, EINVAL and, if eperm_ok is true, EPERM as
5275
 * indications that the operation isn't supported.  We do EPERM
5276
 * weirdly because the SIOCETHTOOL code in later kernels 1) doesn't
5277
 * support ETHTOOL_GUFO, 2) also doesn't include it in the list
5278
 * of ethtool operations that don't require CAP_NET_ADMIN privileges,
5279
 * and 3) does the "is this permitted" check before doing the "is
5280
 * this even supported" check, so it fails with "this is not permitted"
5281
 * rather than "this is not even supported".  To work around this
5282
 * annoyance, we only treat EPERM as an error for the first feature,
5283
 * and assume that they all do the same permission checks, so if the
5284
 * first one is allowed all the others are allowed if supported.
5285
 */
5286
#if defined(SIOCETHTOOL) && (defined(ETHTOOL_GTSO) || defined(ETHTOOL_GUFO) || defined(ETHTOOL_GGSO) || defined(ETHTOOL_GFLAGS) || defined(ETHTOOL_GGRO))
5287
static int
5288
iface_ethtool_flag_ioctl(pcap_t *handle, int cmd, const char *cmdname,
5289
    int eperm_ok)
5290
0
{
5291
0
  struct ifreq  ifr;
5292
0
  struct ethtool_value eval;
5293
5294
0
  memset(&ifr, 0, sizeof(ifr));
5295
0
  pcapint_strlcpy(ifr.ifr_name, handle->opt.device, sizeof(ifr.ifr_name));
5296
0
  eval.cmd = cmd;
5297
0
  eval.data = 0;
5298
0
  ifr.ifr_data = (caddr_t)&eval;
5299
0
  if (ioctl(handle->fd, SIOCETHTOOL, &ifr) == -1) {
5300
0
    if (errno == EOPNOTSUPP || errno == EINVAL ||
5301
0
        (errno == EPERM && eperm_ok)) {
5302
      /*
5303
       * OK, let's just return 0, which, in our
5304
       * case, either means "no, what we're asking
5305
       * about is not enabled" or "all the flags
5306
       * are clear (i.e., nothing is enabled)".
5307
       */
5308
0
      return 0;
5309
0
    }
5310
0
    pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
5311
0
        errno, "%s: SIOCETHTOOL(%s) ioctl failed",
5312
0
        handle->opt.device, cmdname);
5313
0
    return -1;
5314
0
  }
5315
0
  return eval.data;
5316
0
}
5317
5318
/*
5319
 * XXX - it's annoying that we have to check for offloading at all, but,
5320
 * given that we have to, it's still annoying that we have to check for
5321
 * particular types of offloading, especially that shiny new types of
5322
 * offloading may be added - and, worse, may not be checkable with
5323
 * a particular ETHTOOL_ operation; ETHTOOL_GFEATURES would, in
5324
 * theory, give those to you, but the actual flags being used are
5325
 * opaque (defined in a non-uapi header), and there doesn't seem to
5326
 * be any obvious way to ask the kernel what all the offloading flags
5327
 * are - at best, you can ask for a set of strings(!) to get *names*
5328
 * for various flags.  (That whole mechanism appears to have been
5329
 * designed for the sole purpose of letting ethtool report flags
5330
 * by name and set flags by name, with the names having no semantics
5331
 * ethtool understands.)
5332
 */
5333
static int
5334
iface_get_offload(pcap_t *handle)
5335
0
{
5336
0
  int ret;
5337
5338
0
#ifdef ETHTOOL_GTSO
5339
0
  ret = iface_ethtool_flag_ioctl(handle, ETHTOOL_GTSO, "ETHTOOL_GTSO", 0);
5340
0
  if (ret == -1)
5341
0
    return -1;
5342
0
  if (ret)
5343
0
    return 1; /* TCP segmentation offloading on */
5344
0
#endif
5345
5346
0
#ifdef ETHTOOL_GGSO
5347
  /*
5348
   * XXX - will this cause large unsegmented packets to be
5349
   * handed to PF_PACKET sockets on transmission?  If not,
5350
   * this need not be checked.
5351
   */
5352
0
  ret = iface_ethtool_flag_ioctl(handle, ETHTOOL_GGSO, "ETHTOOL_GGSO", 0);
5353
0
  if (ret == -1)
5354
0
    return -1;
5355
0
  if (ret)
5356
0
    return 1; /* generic segmentation offloading on */
5357
0
#endif
5358
5359
0
#ifdef ETHTOOL_GFLAGS
5360
0
  ret = iface_ethtool_flag_ioctl(handle, ETHTOOL_GFLAGS, "ETHTOOL_GFLAGS", 0);
5361
0
  if (ret == -1)
5362
0
    return -1;
5363
0
  if (ret & ETH_FLAG_LRO)
5364
0
    return 1; /* large receive offloading on */
5365
0
#endif
5366
5367
0
#ifdef ETHTOOL_GGRO
5368
  /*
5369
   * XXX - will this cause large reassembled packets to be
5370
   * handed to PF_PACKET sockets on receipt?  If not,
5371
   * this need not be checked.
5372
   */
5373
0
  ret = iface_ethtool_flag_ioctl(handle, ETHTOOL_GGRO, "ETHTOOL_GGRO", 0);
5374
0
  if (ret == -1)
5375
0
    return -1;
5376
0
  if (ret)
5377
0
    return 1; /* generic (large) receive offloading on */
5378
0
#endif
5379
5380
0
#ifdef ETHTOOL_GUFO
5381
  /*
5382
   * Do this one last, as support for it was removed in later
5383
   * kernels, and it fails with EPERM on those kernels rather
5384
   * than with EOPNOTSUPP (see explanation in comment for
5385
   * iface_ethtool_flag_ioctl()).
5386
   */
5387
0
  ret = iface_ethtool_flag_ioctl(handle, ETHTOOL_GUFO, "ETHTOOL_GUFO", 1);
5388
0
  if (ret == -1)
5389
0
    return -1;
5390
0
  if (ret)
5391
0
    return 1; /* UDP fragmentation offloading on */
5392
0
#endif
5393
5394
0
  return 0;
5395
0
}
5396
#else /* SIOCETHTOOL */
5397
static int
5398
iface_get_offload(pcap_t *handle _U_)
5399
{
5400
  /*
5401
   * XXX - do we need to get this information if we don't
5402
   * have the ethtool ioctls?  If so, how do we do that?
5403
   */
5404
  return 0;
5405
}
5406
#endif /* SIOCETHTOOL */
5407
5408
/*
5409
 * As per
5410
 *
5411
 *    https://www.kernel.org/doc/html/latest/networking/dsa/dsa.html#switch-tagging-protocols
5412
 *
5413
 * Type 1 means that the tag is prepended to the Ethernet packet.
5414
 * LINKTYPE_ETHERNET/DLT_EN10MB doesn't work, as it would try to
5415
 * dissect the tag data as the Ethernet header.  These should get
5416
 * their own LINKTYPE_DLT_ values.
5417
 *
5418
 * Type 2 means that the tag is inserted into the Ethernet header
5419
 * after the source address and before the type/length field.
5420
 *
5421
 * Type 3 means that tag is a packet trailer.  LINKTYPE_ETHERNET/DLT_EN10MB
5422
 * works,  unless the next-layer protocol has no length field of its own,
5423
 * so that the tag might be treated as part of the payload. These should
5424
 * get their own LINKTYPE_/DLT_ values.
5425
 *
5426
 * If you get an "unsupported DSA tag" error, please add the tag to here,
5427
 * complete with a full comment indicating whether it's type 1, 2, or 3,
5428
 * and, for type 2, indicating whether it has an Ethertype and, if so
5429
 * what that type is, and whether it's registered with the IEEE or is
5430
 * self-assigned. Also, point to *something* that indicates the format
5431
 * of the tag.
5432
 */
5433
static struct dsa_proto {
5434
  const char *name;
5435
  bpf_u_int32 linktype;
5436
} dsa_protos[] = {
5437
  /*
5438
   * Type 1. See
5439
   *
5440
   *    https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_ar9331.c
5441
   */
5442
  { "ar9331", DLT_EN10MB },
5443
5444
  /*
5445
   * Type 2, without an EtherType at the beginning,
5446
   * assigned a LINKTYPE_/DLT_ value.
5447
   */
5448
  { "brcm", DLT_DSA_TAG_BRCM },
5449
5450
  /*
5451
   * Type 2, with EtherType 0x8874, assigned to Broadcom.
5452
   *
5453
   * This does not require a LINKTYPE_/DLT_ value, it
5454
   * just requires that Ethertype 0x8874 be dissected
5455
   * properly.
5456
   */
5457
  { "brcm-legacy", DLT_EN10MB },
5458
5459
  /*
5460
   * Type 1.
5461
   */
5462
  { "brcm-prepend", DLT_DSA_TAG_BRCM_PREPEND },
5463
5464
  /*
5465
   * Type 2, without an EtherType at the beginning,
5466
   * assigned a LINKTYPE_/DLT_ value.
5467
   */
5468
  { "dsa", DLT_DSA_TAG_DSA },
5469
5470
  /*
5471
   * Type 2, with an Ethertype field, but without
5472
   * an assigned EtherType value that can be relied
5473
   * on; assigned a LINKTYPE_/DLT_ value.
5474
   */
5475
  { "edsa", DLT_DSA_TAG_EDSA },
5476
5477
  /*
5478
   * Type 1, with different transmit and receive headers,
5479
   * so can't really be handled well with the current
5480
   * libpcap API and with pcap files.  Use DLT_LINUX_SLL,
5481
   * to get the direction?
5482
   *
5483
   * See
5484
   *
5485
   *    https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_gswip.c
5486
   */
5487
  { "gswip", DLT_EN10MB },
5488
5489
  /*
5490
   * Type 3. See
5491
   *
5492
   *    https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_hellcreek.c
5493
   */
5494
  { "hellcreek", DLT_EN10MB },
5495
5496
  /*
5497
   * Type 3, with different transmit and receive headers,
5498
   * so can't really be handled well with the current
5499
   * libpcap API and with pcap files.  Use DLT_LINUX_SLL,
5500
   * to get the direction?
5501
   *
5502
   * See
5503
   *
5504
   *    https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_ksz.c#L102
5505
   */
5506
  { "ksz8795", DLT_EN10MB },
5507
5508
  /*
5509
   * Type 3, with different transmit and receive headers,
5510
   * so can't really be handled well with the current
5511
   * libpcap API and with pcap files.  Use DLT_LINUX_SLL,
5512
   * to get the direction?
5513
   *
5514
   * See
5515
   *
5516
   *    https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_ksz.c#L160
5517
   */
5518
  { "ksz9477", DLT_EN10MB },
5519
5520
  /*
5521
   * Type 3, with different transmit and receive headers,
5522
   * so can't really be handled well with the current
5523
   * libpcap API and with pcap files.  Use DLT_LINUX_SLL,
5524
   * to get the direction?
5525
   *
5526
   * See
5527
   *
5528
   *    https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_ksz.c#L341
5529
   */
5530
  { "ksz9893", DLT_EN10MB },
5531
5532
  /*
5533
   * Type 3, with different transmit and receive headers,
5534
   * so can't really be handled well with the current
5535
   * libpcap API and with pcap files.  Use DLT_LINUX_SLL,
5536
   * to get the direction?
5537
   *
5538
   * See
5539
   *
5540
   *    https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_ksz.c#L386
5541
   */
5542
  { "lan937x", DLT_EN10MB },
5543
5544
  /*
5545
   * Type 2, with EtherType 0x8100; the VID can be interpreted
5546
   * as per
5547
   *
5548
   *    https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_lan9303.c#L24
5549
   *
5550
   * so giving its own LINKTYPE_/DLT_ value would allow a
5551
   * dissector to do so.
5552
   */
5553
  { "lan9303", DLT_EN10MB },
5554
5555
  /*
5556
   * Type 2, without an EtherType at the beginning,
5557
   * should be assigned a LINKTYPE_/DLT_ value.
5558
   *
5559
   * See
5560
   *
5561
   *    https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_mtk.c#L15
5562
   */
5563
  { "mtk", DLT_EN10MB },
5564
5565
  /*
5566
   * The string "none" indicates that the interface does not have
5567
   * any tagging protocol configured, and is therefore a standard
5568
   * Ethernet interface.
5569
   */
5570
  { "none", DLT_EN10MB },
5571
5572
  /*
5573
   * Type 1.
5574
   *
5575
   * See
5576
   *
5577
   *    https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_ocelot.c
5578
   */
5579
  { "ocelot", DLT_EN10MB },
5580
5581
  /*
5582
   * Type 1.
5583
   *
5584
   * See
5585
   *
5586
   *    https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_ocelot.c
5587
   */
5588
  { "seville", DLT_EN10MB },
5589
5590
  /*
5591
   * Type 2, with EtherType 0x8100; the VID can be interpreted
5592
   * as per
5593
   *
5594
   *    https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_8021q.c#L15
5595
   *
5596
   * so giving its own LINKTYPE_/DLT_ value would allow a
5597
   * dissector to do so.
5598
   */
5599
  { "ocelot-8021q", DLT_EN10MB },
5600
5601
  /*
5602
   * Type 2, without an EtherType at the beginning,
5603
   * should be assigned a LINKTYPE_/DLT_ value.
5604
   *
5605
   * See
5606
   *
5607
   *    https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_qca.c
5608
   */
5609
  { "qca", DLT_EN10MB },
5610
5611
  /*
5612
   * Type 2, with EtherType 0x8899, assigned to Realtek;
5613
   * they use it for several on-the-Ethernet protocols
5614
   * as well, but there are fields that allow the two
5615
   * tag formats, and all the protocols in question,
5616
   * to be distinguiished from one another.
5617
   *
5618
   * This does not require a LINKTYPE_/DLT_ value, it
5619
   * just requires that EtherType 0x8899 be dissected
5620
   * properly.
5621
   *
5622
   * See
5623
   *
5624
   *    https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_rtl4_a.c
5625
   *
5626
   *    http://realtek.info/pdf/rtl8306sd%28m%29_datasheet_1.1.pdf
5627
   *
5628
   * and various pages in tcpdump's print-realtek.c and Wireshark's
5629
   * epan/dissectors/packet-realtek.c for the other protocols.
5630
   */
5631
  { "rtl4a", DLT_EN10MB },
5632
5633
  /*
5634
   * Type 2, with EtherType 0x8899, assigned to Realtek;
5635
   * see above.
5636
   */
5637
  { "rtl8_4", DLT_EN10MB },
5638
5639
  /*
5640
   * Type 3, with the same tag format as rtl8_4.
5641
   */
5642
  { "rtl8_4t", DLT_EN10MB },
5643
5644
  /*
5645
   * Type 2, with EtherType 0xe001; that's probably
5646
   * self-assigned, so this really should have its
5647
   * own LINKTYPE_/DLT_ value.
5648
   *
5649
   * See
5650
   *
5651
   *    https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_rzn1_a5psw.c
5652
   */
5653
  { "a5psw", DLT_EN10MB },
5654
5655
  /*
5656
   * Type 2, with EtherType 0x8100 or the self-assigned
5657
   * 0xdadb, so this really should have its own
5658
   * LINKTYPE_/DLT_ value; that would also allow the
5659
   * VID of the tag to be dissected as per
5660
   *
5661
   *    https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_8021q.c#L15
5662
   */
5663
  { "sja1105", DLT_EN10MB },
5664
5665
  /*
5666
   * Type "none of the above", with both a header and trailer,
5667
   * with different transmit and receive tags.  Has
5668
   * EtherType 0xdadc, which is probably self-assigned.
5669
   * This should really have its own LINKTYPE_/DLT_ value.
5670
   */
5671
  { "sja1110", DLT_EN10MB },
5672
5673
  /*
5674
   * Type 3, as the name suggests.
5675
   *
5676
   * See
5677
   *
5678
   *    https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_trailer.c
5679
   */
5680
  { "trailer", DLT_EN10MB },
5681
5682
  /*
5683
   * Type 2, with EtherType 0x8100; the VID can be interpreted
5684
   * as per
5685
   *
5686
   *    https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_8021q.c#L15
5687
   *
5688
   * so giving its own LINKTYPE_/DLT_ value would allow a
5689
   * dissector to do so.
5690
   */
5691
  { "vsc73xx-8021q", DLT_EN10MB },
5692
5693
  /*
5694
   * Type 3.
5695
   *
5696
   * See
5697
   *
5698
   *    https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_xrs700x.c
5699
   */
5700
  { "xrs700x", DLT_EN10MB },
5701
};
5702
5703
static int
5704
iface_dsa_get_proto_info(const char *device, pcap_t *handle)
5705
0
{
5706
0
  char *pathstr;
5707
0
  unsigned int i;
5708
  /*
5709
   * Make this significantly smaller than PCAP_ERRBUF_SIZE;
5710
   * the tag *shouldn't* have some huge long name, and making
5711
   * it smaller keeps newer versions of GCC from whining that
5712
   * the error message if we don't support the tag could
5713
   * overflow the error message buffer.
5714
   */
5715
0
  char buf[128];
5716
0
  ssize_t r;
5717
0
  int fd;
5718
5719
0
  fd = asprintf(&pathstr, "/sys/class/net/%s/dsa/tagging", device);
5720
0
  if (fd < 0) {
5721
0
    pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
5722
0
            fd, "asprintf");
5723
0
    return PCAP_ERROR;
5724
0
  }
5725
5726
0
  fd = open(pathstr, O_RDONLY);
5727
0
  free(pathstr);
5728
  /*
5729
   * This is not fatal, kernel >= 4.20 *might* expose this attribute
5730
   */
5731
0
  if (fd < 0)
5732
0
    return 0;
5733
5734
0
  r = read(fd, buf, sizeof(buf) - 1);
5735
0
  if (r <= 0) {
5736
0
    pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
5737
0
            errno, "read");
5738
0
    close(fd);
5739
0
    return PCAP_ERROR;
5740
0
  }
5741
0
  close(fd);
5742
5743
  /*
5744
   * Buffer should be LF terminated.
5745
   */
5746
0
  if (buf[r - 1] == '\n')
5747
0
    r--;
5748
0
  buf[r] = '\0';
5749
5750
0
  for (i = 0; i < sizeof(dsa_protos) / sizeof(dsa_protos[0]); i++) {
5751
0
    if (strlen(dsa_protos[i].name) == (size_t)r &&
5752
0
        strcmp(buf, dsa_protos[i].name) == 0) {
5753
0
      handle->linktype = dsa_protos[i].linktype;
5754
0
      switch (dsa_protos[i].linktype) {
5755
0
      case DLT_EN10MB:
5756
0
        return 0;
5757
0
      default:
5758
0
        return 1;
5759
0
      }
5760
0
    }
5761
0
  }
5762
5763
0
  snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
5764
0
          "unsupported DSA tag: %s", buf);
5765
5766
0
  return PCAP_ERROR;
5767
0
}
5768
5769
/*
5770
 *  Query the kernel for the MTU of the given interface.
5771
 */
5772
static int
5773
iface_get_mtu(int fd, const char *device, char *ebuf)
5774
0
{
5775
0
  struct ifreq  ifr;
5776
5777
0
  if (!device)
5778
0
    return BIGGER_THAN_ALL_MTUS;
5779
5780
0
  memset(&ifr, 0, sizeof(ifr));
5781
0
  pcapint_strlcpy(ifr.ifr_name, device, sizeof(ifr.ifr_name));
5782
5783
0
  if (ioctl(fd, SIOCGIFMTU, &ifr) == -1) {
5784
0
    pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE,
5785
0
        errno, "SIOCGIFMTU");
5786
0
    return -1;
5787
0
  }
5788
5789
0
  return ifr.ifr_mtu;
5790
0
}
5791
5792
/*
5793
 *  Get the hardware type of the given interface as ARPHRD_xxx constant.
5794
 */
5795
static int
5796
iface_get_arptype(int fd, const char *device, char *ebuf)
5797
0
{
5798
0
  struct ifreq  ifr;
5799
0
  int   ret;
5800
5801
0
  memset(&ifr, 0, sizeof(ifr));
5802
0
  pcapint_strlcpy(ifr.ifr_name, device, sizeof(ifr.ifr_name));
5803
5804
0
  if (ioctl(fd, SIOCGIFHWADDR, &ifr) == -1) {
5805
0
    if (errno == ENODEV) {
5806
      /*
5807
       * No such device.
5808
       *
5809
       * There's nothing more to say, so clear
5810
       * the error message.
5811
       */
5812
0
      ret = PCAP_ERROR_NO_SUCH_DEVICE;
5813
0
      ebuf[0] = '\0';
5814
0
    } else {
5815
0
      ret = PCAP_ERROR;
5816
0
      pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE,
5817
0
          errno, "SIOCGIFHWADDR");
5818
0
    }
5819
0
    return ret;
5820
0
  }
5821
5822
0
  return ifr.ifr_hwaddr.sa_family;
5823
0
}
5824
5825
/*
5826
 * In a DLT_CAN_SOCKETCAN frame the first four bytes are a 32-bit integer
5827
 * value in host byte order if the filter program is running in the kernel and
5828
 * in network byte order if in userland.  This applies to both CC, FD and XL
5829
 * frames, see pcap_handle_packet_mmap() for the rationale.  Return 1 iff the
5830
 * [possibly modified] filter program can work correctly in the kernel.
5831
 */
5832
#if __BYTE_ORDER == __LITTLE_ENDIAN
5833
static int
5834
fix_dlt_can_socketcan(const u_int len, struct bpf_insn insn[])
5835
0
{
5836
0
  for (u_int i = 0; i < len; ++i) {
5837
0
    switch (insn[i].code) {
5838
0
    case BPF_LD|BPF_B|BPF_ABS: // ldb [k]
5839
0
    case BPF_LDX|BPF_MSH|BPF_B: // ldxb 4*([k]&0xf)
5840
0
      if (insn[i].k < 4)
5841
0
        insn[i].k = 3 - insn[i].k; // Fixed now.
5842
0
      break;
5843
0
    case BPF_LD|BPF_H|BPF_ABS: // ldh [k]
5844
0
    case BPF_LD|BPF_W|BPF_ABS: // ld [k]
5845
      /*
5846
       * A halfword or a word load cannot be fixed by just
5847
       * changing k, even if every required byte is within
5848
       * the byte-swapped part of the frame, even if the
5849
       * load is aligned.  The fix would require either
5850
       * rewriting the filter program extensively or
5851
       * generating it differently in the first place.
5852
       */
5853
0
    case BPF_LD|BPF_B|BPF_IND: // ldb [x + k]
5854
0
    case BPF_LD|BPF_H|BPF_IND: // ldh [x + k]
5855
0
    case BPF_LD|BPF_W|BPF_IND: // ld [x + k]
5856
      /*
5857
       * In addition to the above, a variable offset load
5858
       * cannot be fixed because x can have any value, thus
5859
       * x + k can have any value, but only the first four
5860
       * bytes are swapped.  An easy way to demonstrate it
5861
       * is to compile "link[link[4]] == 0", which will use
5862
       * "ldb [x + 0]" to access one of the first four bytes
5863
       * of the frame iff CAN CC/FD payload length is less
5864
       * than 4.
5865
       */
5866
0
      if (insn[i].k < 4)
5867
0
        return 0; // Userland filtering only.
5868
0
      break;
5869
0
    }
5870
0
  }
5871
0
  return 1;
5872
0
}
5873
#else
5874
static int
5875
fix_dlt_can_socketcan(const u_int len _U_, struct bpf_insn insn[] _U_)
5876
{
5877
  return 1;
5878
}
5879
#endif // __BYTE_ORDER == __LITTLE_ENDIAN
5880
5881
static int
5882
fix_program(pcap_t *handle, struct sock_fprog *fcode)
5883
0
{
5884
0
  struct pcap_linux *handlep = handle->priv;
5885
0
  size_t prog_size;
5886
0
  register int i;
5887
0
  register struct bpf_insn *p;
5888
0
  struct bpf_insn *f;
5889
0
  int len;
5890
5891
  /*
5892
   * Make a copy of the filter, and modify that copy if
5893
   * necessary.
5894
   */
5895
0
  prog_size = sizeof(*handle->fcode.bf_insns) * handle->fcode.bf_len;
5896
0
  len = handle->fcode.bf_len;
5897
0
  f = (struct bpf_insn *)malloc(prog_size);
5898
0
  if (f == NULL) {
5899
0
    pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
5900
0
        errno, "malloc");
5901
0
    return -1;
5902
0
  }
5903
0
  memcpy(f, handle->fcode.bf_insns, prog_size);
5904
0
  fcode->len = len;
5905
0
  fcode->filter = (struct sock_filter *) f;
5906
5907
0
  switch (handle->linktype) {
5908
0
  case DLT_CAN_SOCKETCAN:
5909
    /*
5910
     * If a similar fix needs to be done for CAN frames that
5911
     * appear on the "any" pseudo-interface, it needs to be done
5912
     * differently because that would be within DLT_LINUX_SLL or
5913
     * DLT_LINUX_SLL2.
5914
     */
5915
0
    return fix_dlt_can_socketcan(len, f);
5916
0
  }
5917
5918
0
  for (i = 0; i < len; ++i) {
5919
0
    p = &f[i];
5920
    /*
5921
     * What type of instruction is this?
5922
     */
5923
0
    switch (BPF_CLASS(p->code)) {
5924
5925
0
    case BPF_LD:
5926
0
    case BPF_LDX:
5927
      /*
5928
       * It's a load instruction; is it loading
5929
       * from the packet?
5930
       */
5931
0
      switch (BPF_MODE(p->code)) {
5932
5933
0
      case BPF_ABS:
5934
0
      case BPF_IND:
5935
0
      case BPF_MSH:
5936
        /*
5937
         * Yes; are we in cooked mode?
5938
         */
5939
0
        if (handlep->cooked) {
5940
          /*
5941
           * Yes, so we need to fix this
5942
           * instruction.
5943
           */
5944
0
          if (fix_offset(handle, p) < 0) {
5945
            /*
5946
             * We failed to do so.
5947
             * Return 0, so our caller
5948
             * knows to punt to userland.
5949
             */
5950
0
            return 0;
5951
0
          }
5952
0
        }
5953
0
        break;
5954
0
      }
5955
0
      break;
5956
0
    }
5957
0
  }
5958
0
  return 1; /* we succeeded */
5959
0
}
5960
5961
static int
5962
fix_offset(pcap_t *handle, struct bpf_insn *p)
5963
0
{
5964
  /*
5965
   * Existing references to auxiliary data shouldn't be adjusted.
5966
   *
5967
   * Note that SKF_AD_OFF is negative, but p->k is unsigned, so
5968
   * we use >= and cast SKF_AD_OFF to unsigned.
5969
   */
5970
0
  if (p->k >= (bpf_u_int32)SKF_AD_OFF)
5971
0
    return 0;
5972
0
  if (handle->linktype == DLT_LINUX_SLL2) {
5973
    /*
5974
     * What's the offset?
5975
     */
5976
0
    if (p->k >= SLL2_HDR_LEN) {
5977
      /*
5978
       * It's within the link-layer payload; that starts
5979
       * at an offset of 0, as far as the kernel packet
5980
       * filter is concerned, so subtract the length of
5981
       * the link-layer header.
5982
       */
5983
0
      p->k -= SLL2_HDR_LEN;
5984
0
    } else if (p->k == 0) {
5985
      /*
5986
       * It's the protocol field; map it to the
5987
       * special magic kernel offset for that field.
5988
       */
5989
0
      p->k = SKF_AD_OFF + SKF_AD_PROTOCOL;
5990
0
    } else if (p->k == 4) {
5991
      /*
5992
       * It's the ifindex field; map it to the
5993
       * special magic kernel offset for that field.
5994
       */
5995
0
      p->k = SKF_AD_OFF + SKF_AD_IFINDEX;
5996
0
    } else if (p->k == 10) {
5997
      /*
5998
       * It's the packet type field; map it to the
5999
       * special magic kernel offset for that field.
6000
       */
6001
0
      p->k = SKF_AD_OFF + SKF_AD_PKTTYPE;
6002
0
    } else if ((bpf_int32)(p->k) > 0) {
6003
      /*
6004
       * It's within the header, but it's not one of
6005
       * those fields; we can't do that in the kernel,
6006
       * so punt to userland.
6007
       */
6008
0
      return -1;
6009
0
    }
6010
0
  } else {
6011
    /*
6012
     * What's the offset?
6013
     */
6014
0
    if (p->k >= SLL_HDR_LEN) {
6015
      /*
6016
       * It's within the link-layer payload; that starts
6017
       * at an offset of 0, as far as the kernel packet
6018
       * filter is concerned, so subtract the length of
6019
       * the link-layer header.
6020
       */
6021
0
      p->k -= SLL_HDR_LEN;
6022
0
    } else if (p->k == 0) {
6023
      /*
6024
       * It's the packet type field; map it to the
6025
       * special magic kernel offset for that field.
6026
       */
6027
0
      p->k = SKF_AD_OFF + SKF_AD_PKTTYPE;
6028
0
    } else if (p->k == 14) {
6029
      /*
6030
       * It's the protocol field; map it to the
6031
       * special magic kernel offset for that field.
6032
       */
6033
0
      p->k = SKF_AD_OFF + SKF_AD_PROTOCOL;
6034
0
    } else if ((bpf_int32)(p->k) > 0) {
6035
      /*
6036
       * It's within the header, but it's not one of
6037
       * those fields; we can't do that in the kernel,
6038
       * so punt to userland.
6039
       */
6040
0
      return -1;
6041
0
    }
6042
0
  }
6043
0
  return 0;
6044
0
}
6045
6046
static int
6047
set_kernel_filter(pcap_t *handle, struct sock_fprog *fcode)
6048
0
{
6049
0
  int total_filter_on = 0;
6050
0
  int save_mode;
6051
0
  int ret;
6052
0
  int save_errno;
6053
6054
  /*
6055
   * The socket filter code doesn't discard all packets queued
6056
   * up on the socket when the filter is changed; this means
6057
   * that packets that don't match the new filter may show up
6058
   * after the new filter is put onto the socket, if those
6059
   * packets haven't yet been read.
6060
   *
6061
   * This means, for example, that if you do a tcpdump capture
6062
   * with a filter, the first few packets in the capture might
6063
   * be packets that wouldn't have passed the filter.
6064
   *
6065
   * We therefore discard all packets queued up on the socket
6066
   * when setting a kernel filter.  (This isn't an issue for
6067
   * userland filters, as the userland filtering is done after
6068
   * packets are queued up.)
6069
   *
6070
   * To flush those packets, we put the socket in read-only mode,
6071
   * and read packets from the socket until there are no more to
6072
   * read.
6073
   *
6074
   * In order to keep that from being an infinite loop - i.e.,
6075
   * to keep more packets from arriving while we're draining
6076
   * the queue - we put the "total filter", which is a filter
6077
   * that rejects all packets, onto the socket before draining
6078
   * the queue.
6079
   *
6080
   * This code deliberately ignores any errors, so that you may
6081
   * get bogus packets if an error occurs, rather than having
6082
   * the filtering done in userland even if it could have been
6083
   * done in the kernel.
6084
   */
6085
0
  if (setsockopt(handle->fd, SOL_SOCKET, SO_ATTACH_FILTER,
6086
0
           &total_fcode, sizeof(total_fcode)) == 0) {
6087
0
    char drain[1];
6088
6089
    /*
6090
     * Note that we've put the total filter onto the socket.
6091
     */
6092
0
    total_filter_on = 1;
6093
6094
    /*
6095
     * Save the socket's current mode, and put it in
6096
     * non-blocking mode; we drain it by reading packets
6097
     * until we get an error (which is normally a
6098
     * "nothing more to be read" error).
6099
     */
6100
0
    save_mode = fcntl(handle->fd, F_GETFL, 0);
6101
0
    if (save_mode == -1) {
6102
0
      pcapint_fmt_errmsg_for_errno(handle->errbuf,
6103
0
          PCAP_ERRBUF_SIZE, errno,
6104
0
          "can't get FD flags when changing filter");
6105
0
      return -2;
6106
0
    }
6107
0
    if (fcntl(handle->fd, F_SETFL, save_mode | O_NONBLOCK) < 0) {
6108
0
      pcapint_fmt_errmsg_for_errno(handle->errbuf,
6109
0
          PCAP_ERRBUF_SIZE, errno,
6110
0
          "can't set nonblocking mode when changing filter");
6111
0
      return -2;
6112
0
    }
6113
0
    while (recv(handle->fd, &drain, sizeof drain, MSG_TRUNC) >= 0)
6114
0
      ;
6115
0
    save_errno = errno;
6116
0
    if (save_errno != EAGAIN) {
6117
      /*
6118
       * Fatal error.
6119
       *
6120
       * If we can't restore the mode or reset the
6121
       * kernel filter, there's nothing we can do.
6122
       */
6123
0
      (void)fcntl(handle->fd, F_SETFL, save_mode);
6124
0
      (void)reset_kernel_filter(handle);
6125
0
      pcapint_fmt_errmsg_for_errno(handle->errbuf,
6126
0
          PCAP_ERRBUF_SIZE, save_errno,
6127
0
          "recv failed when changing filter");
6128
0
      return -2;
6129
0
    }
6130
0
    if (fcntl(handle->fd, F_SETFL, save_mode) == -1) {
6131
0
      pcapint_fmt_errmsg_for_errno(handle->errbuf,
6132
0
          PCAP_ERRBUF_SIZE, errno,
6133
0
          "can't restore FD flags when changing filter");
6134
0
      return -2;
6135
0
    }
6136
0
  }
6137
6138
  /*
6139
   * Now attach the new filter.
6140
   */
6141
0
  ret = setsockopt(handle->fd, SOL_SOCKET, SO_ATTACH_FILTER,
6142
0
       fcode, sizeof(*fcode));
6143
0
  if (ret == -1 && total_filter_on) {
6144
    /*
6145
     * Well, we couldn't set that filter on the socket,
6146
     * but we could set the total filter on the socket.
6147
     *
6148
     * This could, for example, mean that the filter was
6149
     * too big to put into the kernel, so we'll have to
6150
     * filter in userland; in any case, we'll be doing
6151
     * filtering in userland, so we need to remove the
6152
     * total filter so we see packets.
6153
     */
6154
0
    save_errno = errno;
6155
6156
    /*
6157
     * If this fails, we're really screwed; we have the
6158
     * total filter on the socket, and it won't come off.
6159
     * Report it as a fatal error.
6160
     */
6161
0
    if (reset_kernel_filter(handle) == -1) {
6162
0
      pcapint_fmt_errmsg_for_errno(handle->errbuf,
6163
0
          PCAP_ERRBUF_SIZE, errno,
6164
0
          "can't remove kernel total filter");
6165
0
      return -2;  /* fatal error */
6166
0
    }
6167
6168
0
    errno = save_errno;
6169
0
  }
6170
0
  return ret;
6171
0
}
6172
6173
static int
6174
reset_kernel_filter(pcap_t *handle)
6175
0
{
6176
0
  int ret;
6177
  /*
6178
   * setsockopt() barfs unless it get a dummy parameter.
6179
   * valgrind whines unless the value is initialized,
6180
   * as it has no idea that setsockopt() ignores its
6181
   * parameter.
6182
   */
6183
0
  int dummy = 0;
6184
6185
0
  ret = setsockopt(handle->fd, SOL_SOCKET, SO_DETACH_FILTER,
6186
0
           &dummy, sizeof(dummy));
6187
  /*
6188
   * Ignore ENOENT - it means "we don't have a filter", so there
6189
   * was no filter to remove, and there's still no filter.
6190
   *
6191
   * Also ignore ENONET, as a lot of kernel versions had a
6192
   * typo where ENONET, rather than ENOENT, was returned.
6193
   */
6194
0
  if (ret == -1 && errno != ENOENT && errno != ENONET)
6195
0
    return -1;
6196
0
  return 0;
6197
0
}
6198
6199
int
6200
pcap_set_protocol_linux(pcap_t *p, int protocol)
6201
0
{
6202
0
  if (pcapint_check_activated(p))
6203
0
    return (PCAP_ERROR_ACTIVATED);
6204
0
  p->opt.protocol = protocol;
6205
0
  return (0);
6206
0
}
6207
6208
/*
6209
 * Libpcap version string.
6210
 */
6211
const char *
6212
pcap_lib_version(void)
6213
0
{
6214
0
  return (PCAP_VERSION_STRING
6215
#if defined(HAVE_TPACKET3) && defined(PCAP_SUPPORT_NETMAP)
6216
    " (with TPACKET_V3 and netmap)"
6217
#elif defined(HAVE_TPACKET3)
6218
    " (with TPACKET_V3)"
6219
#elif defined(PCAP_SUPPORT_NETMAP)
6220
    " (with TPACKET_V2 and netmap)"
6221
#else
6222
    " (with TPACKET_V2)"
6223
#endif
6224
0
  );
6225
0
}