Coverage Report

Created: 2024-05-15 07:09

/src/libpcap/pcap-linux.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 *  pcap-linux.c: Packet capture interface to the Linux kernel
3
 *
4
 *  Copyright (c) 2000 Torsten Landschoff <torsten@debian.org>
5
 *             Sebastian Krahmer  <krahmer@cs.uni-potsdam.de>
6
 *
7
 *  License: BSD
8
 *
9
 *  Redistribution and use in source and binary forms, with or without
10
 *  modification, are permitted provided that the following conditions
11
 *  are met:
12
 *
13
 *  1. Redistributions of source code must retain the above copyright
14
 *     notice, this list of conditions and the following disclaimer.
15
 *  2. Redistributions in binary form must reproduce the above copyright
16
 *     notice, this list of conditions and the following disclaimer in
17
 *     the documentation and/or other materials provided with the
18
 *     distribution.
19
 *  3. The names of the authors may not be used to endorse or promote
20
 *     products derived from this software without specific prior
21
 *     written permission.
22
 *
23
 *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
24
 *  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
25
 *  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
26
 *
27
 *  Modifications:     Added PACKET_MMAP support
28
 *                     Paolo Abeni <paolo.abeni@email.it>
29
 *                     Added TPACKET_V3 support
30
 *                     Gabor Tatarka <gabor.tatarka@ericsson.com>
31
 *
32
 *                     based on previous works of:
33
 *                     Simon Patarin <patarin@cs.unibo.it>
34
 *                     Phil Wood <cpw@lanl.gov>
35
 *
36
 * Monitor-mode support for mac80211 includes code taken from the iw
37
 * command; the copyright notice for that code is
38
 *
39
 * Copyright (c) 2007, 2008 Johannes Berg
40
 * Copyright (c) 2007   Andy Lutomirski
41
 * Copyright (c) 2007   Mike Kershaw
42
 * Copyright (c) 2008   Gábor Stefanik
43
 *
44
 * All rights reserved.
45
 *
46
 * Redistribution and use in source and binary forms, with or without
47
 * modification, are permitted provided that the following conditions
48
 * are met:
49
 * 1. Redistributions of source code must retain the above copyright
50
 *    notice, this list of conditions and the following disclaimer.
51
 * 2. Redistributions in binary form must reproduce the above copyright
52
 *    notice, this list of conditions and the following disclaimer in the
53
 *    documentation and/or other materials provided with the distribution.
54
 * 3. The name of the author may not be used to endorse or promote products
55
 *    derived from this software without specific prior written permission.
56
 *
57
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
58
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
59
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
60
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
61
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
62
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
63
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
64
 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
65
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
66
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
67
 * SUCH DAMAGE.
68
 */
69
70
71
#define _GNU_SOURCE
72
73
#ifdef HAVE_CONFIG_H
74
#include <config.h>
75
#endif
76
77
#include <errno.h>
78
#include <stdio.h>
79
#include <stdlib.h>
80
#include <unistd.h>
81
#include <fcntl.h>
82
#include <string.h>
83
#include <limits.h>
84
#include <sys/stat.h>
85
#include <sys/socket.h>
86
#include <sys/ioctl.h>
87
#include <sys/utsname.h>
88
#include <sys/mman.h>
89
#include <linux/if.h>
90
#include <linux/if_packet.h>
91
#include <linux/sockios.h>
92
#include <linux/ethtool.h>
93
#include <netinet/in.h>
94
#include <linux/if_ether.h>
95
#include <linux/if_arp.h>
96
#include <poll.h>
97
#include <dirent.h>
98
#include <sys/eventfd.h>
99
100
#include "pcap-int.h"
101
#include "pcap/sll.h"
102
#include "pcap/vlan.h"
103
#include "pcap/can_socketcan.h"
104
105
#include "diag-control.h"
106
107
/*
108
 * We require TPACKET_V2 support.
109
 */
110
#ifndef TPACKET2_HDRLEN
111
#error "Libpcap will only work if TPACKET_V2 is supported; you must build for a 2.6.27 or later kernel"
112
#endif
113
114
/* check for memory mapped access avaibility. We assume every needed
115
 * struct is defined if the macro TPACKET_HDRLEN is defined, because it
116
 * uses many ring related structs and macros */
117
#ifdef TPACKET3_HDRLEN
118
# define HAVE_TPACKET3
119
#endif /* TPACKET3_HDRLEN */
120
121
/*
122
 * Not all compilers that are used to compile code to run on Linux have
123
 * these builtins.  For example, older versions of GCC don't, and at
124
 * least some people are doing cross-builds for MIPS with older versions
125
 * of GCC.
126
 */
127
#ifndef HAVE___ATOMIC_LOAD_N
128
#define __atomic_load_n(ptr, memory_model)    (*(ptr))
129
#endif
130
#ifndef HAVE___ATOMIC_STORE_N
131
#define __atomic_store_n(ptr, val, memory_model)  *(ptr) = (val)
132
#endif
133
134
#define packet_mmap_acquire(pkt) \
135
0
  (__atomic_load_n(&pkt->tp_status, __ATOMIC_ACQUIRE) != TP_STATUS_KERNEL)
136
#define packet_mmap_release(pkt) \
137
0
  (__atomic_store_n(&pkt->tp_status, TP_STATUS_KERNEL, __ATOMIC_RELEASE))
138
#define packet_mmap_v3_acquire(pkt) \
139
0
  (__atomic_load_n(&pkt->hdr.bh1.block_status, __ATOMIC_ACQUIRE) != TP_STATUS_KERNEL)
140
#define packet_mmap_v3_release(pkt) \
141
0
  (__atomic_store_n(&pkt->hdr.bh1.block_status, TP_STATUS_KERNEL, __ATOMIC_RELEASE))
142
143
#include <linux/types.h>
144
#include <linux/filter.h>
145
146
#ifdef HAVE_LINUX_NET_TSTAMP_H
147
#include <linux/net_tstamp.h>
148
#endif
149
150
/*
151
 * For checking whether a device is a bonding device.
152
 */
153
#include <linux/if_bonding.h>
154
155
/*
156
 * Got libnl?
157
 */
158
#ifdef HAVE_LIBNL
159
#include <linux/nl80211.h>
160
161
#include <netlink/genl/genl.h>
162
#include <netlink/genl/family.h>
163
#include <netlink/genl/ctrl.h>
164
#include <netlink/msg.h>
165
#include <netlink/attr.h>
166
#endif /* HAVE_LIBNL */
167
168
#ifndef HAVE_SOCKLEN_T
169
typedef int   socklen_t;
170
#endif
171
172
0
#define MAX_LINKHEADER_SIZE 256
173
174
/*
175
 * When capturing on all interfaces we use this as the buffer size.
176
 * Should be bigger then all MTUs that occur in real life.
177
 * 64kB should be enough for now.
178
 */
179
0
#define BIGGER_THAN_ALL_MTUS  (64*1024)
180
181
/*
182
 * Private data for capturing on Linux PF_PACKET sockets.
183
 */
184
struct pcap_linux {
185
  long long sysfs_dropped; /* packets reported dropped by /sys/class/net/{if_name}/statistics/rx_{missed,fifo}_errors */
186
  struct pcap_stat stat;
187
188
  char  *device;  /* device name */
189
  int filter_in_userland; /* must filter in userland */
190
  int blocks_to_filter_in_userland;
191
  int must_do_on_close; /* stuff we must do when we close */
192
  int timeout;  /* timeout for buffering */
193
  int cooked;   /* using SOCK_DGRAM rather than SOCK_RAW */
194
  int ifindex;  /* interface index of device we're bound to */
195
  int lo_ifindex; /* interface index of the loopback device */
196
  int netdown;  /* we got an ENETDOWN and haven't resolved it */
197
  bpf_u_int32 oldmode;  /* mode to restore when turning monitor mode off */
198
  char  *mondevice; /* mac80211 monitor device we created */
199
  u_char  *mmapbuf; /* memory-mapped region pointer */
200
  size_t  mmapbuflen; /* size of region */
201
  int vlan_offset;  /* offset at which to insert vlan tags; if -1, don't insert */
202
  u_int tp_version; /* version of tpacket_hdr for mmaped ring */
203
  u_int tp_hdrlen;  /* hdrlen of tpacket_hdr for mmaped ring */
204
  u_char  *oneshot_buffer; /* buffer for copy of packet */
205
  int poll_timeout; /* timeout to use in poll() */
206
#ifdef HAVE_TPACKET3
207
  unsigned char *current_packet; /* Current packet within the TPACKET_V3 block. Move to next block if NULL. */
208
  int packets_left; /* Unhandled packets left within the block from previous call to pcap_read_linux_mmap_v3 in case of TPACKET_V3. */
209
#endif
210
  int poll_breakloop_fd; /* fd to an eventfd to break from blocking operations */
211
};
212
213
/*
214
 * Stuff to do when we close.
215
 */
216
#define MUST_CLEAR_RFMON  0x00000001  /* clear rfmon (monitor) mode */
217
#define MUST_DELETE_MONIF 0x00000002  /* delete monitor-mode interface */
218
219
/*
220
 * Prototypes for internal functions and methods.
221
 */
222
static int get_if_flags(const char *, bpf_u_int32 *, char *);
223
static int is_wifi(const char *);
224
static void map_arphrd_to_dlt(pcap_t *, int, const char *, int);
225
static int pcap_activate_linux(pcap_t *);
226
static int setup_socket(pcap_t *, int);
227
static int setup_mmapped(pcap_t *, int *);
228
static int pcap_can_set_rfmon_linux(pcap_t *);
229
static int pcap_inject_linux(pcap_t *, const void *, int);
230
static int pcap_stats_linux(pcap_t *, struct pcap_stat *);
231
static int pcap_setfilter_linux(pcap_t *, struct bpf_program *);
232
static int pcap_setdirection_linux(pcap_t *, pcap_direction_t);
233
static int pcap_set_datalink_linux(pcap_t *, int);
234
static void pcap_cleanup_linux(pcap_t *);
235
236
union thdr {
237
  struct tpacket2_hdr   *h2;
238
#ifdef HAVE_TPACKET3
239
  struct tpacket_block_desc *h3;
240
#endif
241
  u_char        *raw;
242
};
243
244
0
#define RING_GET_FRAME_AT(h, offset) (((u_char **)h->buffer)[(offset)])
245
0
#define RING_GET_CURRENT_FRAME(h) RING_GET_FRAME_AT(h, h->offset)
246
247
static void destroy_ring(pcap_t *handle);
248
static int create_ring(pcap_t *handle, int *status);
249
static int prepare_tpacket_socket(pcap_t *handle);
250
static int pcap_read_linux_mmap_v2(pcap_t *, int, pcap_handler , u_char *);
251
#ifdef HAVE_TPACKET3
252
static int pcap_read_linux_mmap_v3(pcap_t *, int, pcap_handler , u_char *);
253
#endif
254
static int pcap_setnonblock_linux(pcap_t *p, int nonblock);
255
static int pcap_getnonblock_linux(pcap_t *p);
256
static void pcap_oneshot_linux(u_char *user, const struct pcap_pkthdr *h,
257
    const u_char *bytes);
258
259
/*
260
 * In pre-3.0 kernels, the tp_vlan_tci field is set to whatever the
261
 * vlan_tci field in the skbuff is.  0 can either mean "not on a VLAN"
262
 * or "on VLAN 0".  There is no flag set in the tp_status field to
263
 * distinguish between them.
264
 *
265
 * In 3.0 and later kernels, if there's a VLAN tag present, the tp_vlan_tci
266
 * field is set to the VLAN tag, and the TP_STATUS_VLAN_VALID flag is set
267
 * in the tp_status field, otherwise the tp_vlan_tci field is set to 0 and
268
 * the TP_STATUS_VLAN_VALID flag isn't set in the tp_status field.
269
 *
270
 * With a pre-3.0 kernel, we cannot distinguish between packets with no
271
 * VLAN tag and packets on VLAN 0, so we will mishandle some packets, and
272
 * there's nothing we can do about that.
273
 *
274
 * So, on those systems, which never set the TP_STATUS_VLAN_VALID flag, we
275
 * continue the behavior of earlier libpcaps, wherein we treated packets
276
 * with a VLAN tag of 0 as being packets without a VLAN tag rather than packets
277
 * on VLAN 0.  We do this by treating packets with a tp_vlan_tci of 0 and
278
 * with the TP_STATUS_VLAN_VALID flag not set in tp_status as not having
279
 * VLAN tags.  This does the right thing on 3.0 and later kernels, and
280
 * continues the old unfixably-imperfect behavior on pre-3.0 kernels.
281
 *
282
 * If TP_STATUS_VLAN_VALID isn't defined, we test it as the 0x10 bit; it
283
 * has that value in 3.0 and later kernels.
284
 */
285
#ifdef TP_STATUS_VLAN_VALID
286
0
  #define VLAN_VALID(hdr, hv) ((hv)->tp_vlan_tci != 0 || ((hdr)->tp_status & TP_STATUS_VLAN_VALID))
287
#else
288
  /*
289
   * This is being compiled on a system that lacks TP_STATUS_VLAN_VALID,
290
   * so we testwith the value it has in the 3.0 and later kernels, so
291
   * we can test it if we're running on a system that has it.  (If we're
292
   * running on a system that doesn't have it, it won't be set in the
293
   * tp_status field, so the tests of it will always fail; that means
294
   * we behave the way we did before we introduced this macro.)
295
   */
296
  #define VLAN_VALID(hdr, hv) ((hv)->tp_vlan_tci != 0 || ((hdr)->tp_status & 0x10))
297
#endif
298
299
#ifdef TP_STATUS_VLAN_TPID_VALID
300
0
# define VLAN_TPID(hdr, hv) (((hv)->tp_vlan_tpid || ((hdr)->tp_status & TP_STATUS_VLAN_TPID_VALID)) ? (hv)->tp_vlan_tpid : ETH_P_8021Q)
301
#else
302
# define VLAN_TPID(hdr, hv) ETH_P_8021Q
303
#endif
304
305
/*
306
 * Required select timeout if we're polling for an "interface disappeared"
307
 * indication - 1 millisecond.
308
 */
309
static const struct timeval netdown_timeout = {
310
  0, 1000   /* 1000 microseconds = 1 millisecond */
311
};
312
313
/*
314
 * Wrap some ioctl calls
315
 */
316
static int  iface_get_id(int fd, const char *device, char *ebuf);
317
static int  iface_get_mtu(int fd, const char *device, char *ebuf);
318
static int  iface_get_arptype(int fd, const char *device, char *ebuf);
319
static int  iface_bind(int fd, int ifindex, char *ebuf, int protocol);
320
static int  enter_rfmon_mode(pcap_t *handle, int sock_fd,
321
    const char *device);
322
static int  iface_get_ts_types(const char *device, pcap_t *handle,
323
    char *ebuf);
324
static int  iface_get_offload(pcap_t *handle);
325
326
static int  fix_program(pcap_t *handle, struct sock_fprog *fcode);
327
static int  fix_offset(pcap_t *handle, struct bpf_insn *p);
328
static int  set_kernel_filter(pcap_t *handle, struct sock_fprog *fcode);
329
static int  reset_kernel_filter(pcap_t *handle);
330
331
static struct sock_filter total_insn
332
  = BPF_STMT(BPF_RET | BPF_K, 0);
333
static struct sock_fprog  total_fcode
334
  = { 1, &total_insn };
335
336
static int  iface_dsa_get_proto_info(const char *device, pcap_t *handle);
337
338
pcap_t *
339
pcap_create_interface(const char *device, char *ebuf)
340
0
{
341
0
  pcap_t *handle;
342
343
0
  handle = PCAP_CREATE_COMMON(ebuf, struct pcap_linux);
344
0
  if (handle == NULL)
345
0
    return NULL;
346
347
0
  handle->activate_op = pcap_activate_linux;
348
0
  handle->can_set_rfmon_op = pcap_can_set_rfmon_linux;
349
350
  /*
351
   * See what time stamp types we support.
352
   */
353
0
  if (iface_get_ts_types(device, handle, ebuf) == -1) {
354
0
    pcap_close(handle);
355
0
    return NULL;
356
0
  }
357
358
  /*
359
   * We claim that we support microsecond and nanosecond time
360
   * stamps.
361
   *
362
   * XXX - with adapter-supplied time stamps, can we choose
363
   * microsecond or nanosecond time stamps on arbitrary
364
   * adapters?
365
   */
366
0
  handle->tstamp_precision_list = malloc(2 * sizeof(u_int));
367
0
  if (handle->tstamp_precision_list == NULL) {
368
0
    pcap_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE,
369
0
        errno, "malloc");
370
0
    pcap_close(handle);
371
0
    return NULL;
372
0
  }
373
0
  handle->tstamp_precision_list[0] = PCAP_TSTAMP_PRECISION_MICRO;
374
0
  handle->tstamp_precision_list[1] = PCAP_TSTAMP_PRECISION_NANO;
375
0
  handle->tstamp_precision_count = 2;
376
377
0
  struct pcap_linux *handlep = handle->priv;
378
0
  handlep->poll_breakloop_fd = eventfd(0, EFD_NONBLOCK);
379
380
0
  return handle;
381
0
}
382
383
#ifdef HAVE_LIBNL
384
/*
385
 * If interface {if_name} is a mac80211 driver, the file
386
 * /sys/class/net/{if_name}/phy80211 is a symlink to
387
 * /sys/class/ieee80211/{phydev_name}, for some {phydev_name}.
388
 *
389
 * On Fedora 9, with a 2.6.26.3-29 kernel, my Zydas stick, at
390
 * least, has a "wmaster0" device and a "wlan0" device; the
391
 * latter is the one with the IP address.  Both show up in
392
 * "tcpdump -D" output.  Capturing on the wmaster0 device
393
 * captures with 802.11 headers.
394
 *
395
 * airmon-ng searches through /sys/class/net for devices named
396
 * monN, starting with mon0; as soon as one *doesn't* exist,
397
 * it chooses that as the monitor device name.  If the "iw"
398
 * command exists, it does
399
 *
400
 *    iw dev {if_name} interface add {monif_name} type monitor
401
 *
402
 * where {monif_name} is the monitor device.  It then (sigh) sleeps
403
 * .1 second, and then configures the device up.  Otherwise, if
404
 * /sys/class/ieee80211/{phydev_name}/add_iface is a file, it writes
405
 * {mondev_name}, without a newline, to that file, and again (sigh)
406
 * sleeps .1 second, and then iwconfig's that device into monitor
407
 * mode and configures it up.  Otherwise, you can't do monitor mode.
408
 *
409
 * All these devices are "glued" together by having the
410
 * /sys/class/net/{if_name}/phy80211 links pointing to the same
411
 * place, so, given a wmaster, wlan, or mon device, you can
412
 * find the other devices by looking for devices with
413
 * the same phy80211 link.
414
 *
415
 * To turn monitor mode off, delete the monitor interface,
416
 * either with
417
 *
418
 *    iw dev {monif_name} interface del
419
 *
420
 * or by sending {monif_name}, with no NL, down
421
 * /sys/class/ieee80211/{phydev_name}/remove_iface
422
 *
423
 * Note: if you try to create a monitor device named "monN", and
424
 * there's already a "monN" device, it fails, as least with
425
 * the netlink interface (which is what iw uses), with a return
426
 * value of -ENFILE.  (Return values are negative errnos.)  We
427
 * could probably use that to find an unused device.
428
 *
429
 * Yes, you can have multiple monitor devices for a given
430
 * physical device.
431
 */
432
433
/*
434
 * Is this a mac80211 device?  If so, fill in the physical device path and
435
 * return 1; if not, return 0.  On an error, fill in handle->errbuf and
436
 * return PCAP_ERROR.
437
 */
438
static int
439
get_mac80211_phydev(pcap_t *handle, const char *device, char *phydev_path,
440
    size_t phydev_max_pathlen)
441
{
442
  char *pathstr;
443
  ssize_t bytes_read;
444
445
  /*
446
   * Generate the path string for the symlink to the physical device.
447
   */
448
  if (asprintf(&pathstr, "/sys/class/net/%s/phy80211", device) == -1) {
449
    snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
450
        "%s: Can't generate path name string for /sys/class/net device",
451
        device);
452
    return PCAP_ERROR;
453
  }
454
  bytes_read = readlink(pathstr, phydev_path, phydev_max_pathlen);
455
  if (bytes_read == -1) {
456
    if (errno == ENOENT || errno == EINVAL) {
457
      /*
458
       * Doesn't exist, or not a symlink; assume that
459
       * means it's not a mac80211 device.
460
       */
461
      free(pathstr);
462
      return 0;
463
    }
464
    pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
465
        errno, "%s: Can't readlink %s", device, pathstr);
466
    free(pathstr);
467
    return PCAP_ERROR;
468
  }
469
  free(pathstr);
470
  phydev_path[bytes_read] = '\0';
471
  return 1;
472
}
473
474
struct nl80211_state {
475
  struct nl_sock *nl_sock;
476
  struct nl_cache *nl_cache;
477
  struct genl_family *nl80211;
478
};
479
480
static int
481
nl80211_init(pcap_t *handle, struct nl80211_state *state, const char *device)
482
{
483
  int err;
484
485
  state->nl_sock = nl_socket_alloc();
486
  if (!state->nl_sock) {
487
    snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
488
        "%s: failed to allocate netlink handle", device);
489
    return PCAP_ERROR;
490
  }
491
492
  if (genl_connect(state->nl_sock)) {
493
    snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
494
        "%s: failed to connect to generic netlink", device);
495
    goto out_handle_destroy;
496
  }
497
498
  err = genl_ctrl_alloc_cache(state->nl_sock, &state->nl_cache);
499
  if (err < 0) {
500
    snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
501
        "%s: failed to allocate generic netlink cache: %s",
502
        device, nl_geterror(-err));
503
    goto out_handle_destroy;
504
  }
505
506
  state->nl80211 = genl_ctrl_search_by_name(state->nl_cache, "nl80211");
507
  if (!state->nl80211) {
508
    snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
509
        "%s: nl80211 not found", device);
510
    goto out_cache_free;
511
  }
512
513
  return 0;
514
515
out_cache_free:
516
  nl_cache_free(state->nl_cache);
517
out_handle_destroy:
518
  nl_socket_free(state->nl_sock);
519
  return PCAP_ERROR;
520
}
521
522
static void
523
nl80211_cleanup(struct nl80211_state *state)
524
{
525
  genl_family_put(state->nl80211);
526
  nl_cache_free(state->nl_cache);
527
  nl_socket_free(state->nl_sock);
528
}
529
530
static int
531
del_mon_if(pcap_t *handle, int sock_fd, struct nl80211_state *state,
532
    const char *device, const char *mondevice);
533
534
static int
535
add_mon_if(pcap_t *handle, int sock_fd, struct nl80211_state *state,
536
    const char *device, const char *mondevice)
537
{
538
  struct pcap_linux *handlep = handle->priv;
539
  int ifindex;
540
  struct nl_msg *msg;
541
  int err;
542
543
  ifindex = iface_get_id(sock_fd, device, handle->errbuf);
544
  if (ifindex == -1)
545
    return PCAP_ERROR;
546
547
  msg = nlmsg_alloc();
548
  if (!msg) {
549
    snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
550
        "%s: failed to allocate netlink msg", device);
551
    return PCAP_ERROR;
552
  }
553
554
  genlmsg_put(msg, 0, 0, genl_family_get_id(state->nl80211), 0,
555
        0, NL80211_CMD_NEW_INTERFACE, 0);
556
  NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, ifindex);
557
DIAG_OFF_NARROWING
558
  NLA_PUT_STRING(msg, NL80211_ATTR_IFNAME, mondevice);
559
DIAG_ON_NARROWING
560
  NLA_PUT_U32(msg, NL80211_ATTR_IFTYPE, NL80211_IFTYPE_MONITOR);
561
562
  err = nl_send_auto_complete(state->nl_sock, msg);
563
  if (err < 0) {
564
    if (err == -NLE_FAILURE) {
565
      /*
566
       * Device not available; our caller should just
567
       * keep trying.  (libnl 2.x maps ENFILE to
568
       * NLE_FAILURE; it can also map other errors
569
       * to that, but there's not much we can do
570
       * about that.)
571
       */
572
      nlmsg_free(msg);
573
      return 0;
574
    } else {
575
      /*
576
       * Real failure, not just "that device is not
577
       * available.
578
       */
579
      snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
580
          "%s: nl_send_auto_complete failed adding %s interface: %s",
581
          device, mondevice, nl_geterror(-err));
582
      nlmsg_free(msg);
583
      return PCAP_ERROR;
584
    }
585
  }
586
  err = nl_wait_for_ack(state->nl_sock);
587
  if (err < 0) {
588
    if (err == -NLE_FAILURE) {
589
      /*
590
       * Device not available; our caller should just
591
       * keep trying.  (libnl 2.x maps ENFILE to
592
       * NLE_FAILURE; it can also map other errors
593
       * to that, but there's not much we can do
594
       * about that.)
595
       */
596
      nlmsg_free(msg);
597
      return 0;
598
    } else {
599
      /*
600
       * Real failure, not just "that device is not
601
       * available.
602
       */
603
      snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
604
          "%s: nl_wait_for_ack failed adding %s interface: %s",
605
          device, mondevice, nl_geterror(-err));
606
      nlmsg_free(msg);
607
      return PCAP_ERROR;
608
    }
609
  }
610
611
  /*
612
   * Success.
613
   */
614
  nlmsg_free(msg);
615
616
  /*
617
   * Try to remember the monitor device.
618
   */
619
  handlep->mondevice = strdup(mondevice);
620
  if (handlep->mondevice == NULL) {
621
    pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
622
        errno, "strdup");
623
    /*
624
     * Get rid of the monitor device.
625
     */
626
    del_mon_if(handle, sock_fd, state, device, mondevice);
627
    return PCAP_ERROR;
628
  }
629
  return 1;
630
631
nla_put_failure:
632
  snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
633
      "%s: nl_put failed adding %s interface",
634
      device, mondevice);
635
  nlmsg_free(msg);
636
  return PCAP_ERROR;
637
}
638
639
static int
640
del_mon_if(pcap_t *handle, int sock_fd, struct nl80211_state *state,
641
    const char *device, const char *mondevice)
642
{
643
  int ifindex;
644
  struct nl_msg *msg;
645
  int err;
646
647
  ifindex = iface_get_id(sock_fd, mondevice, handle->errbuf);
648
  if (ifindex == -1)
649
    return PCAP_ERROR;
650
651
  msg = nlmsg_alloc();
652
  if (!msg) {
653
    snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
654
        "%s: failed to allocate netlink msg", device);
655
    return PCAP_ERROR;
656
  }
657
658
  genlmsg_put(msg, 0, 0, genl_family_get_id(state->nl80211), 0,
659
        0, NL80211_CMD_DEL_INTERFACE, 0);
660
  NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, ifindex);
661
662
  err = nl_send_auto_complete(state->nl_sock, msg);
663
  if (err < 0) {
664
    snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
665
        "%s: nl_send_auto_complete failed deleting %s interface: %s",
666
        device, mondevice, nl_geterror(-err));
667
    nlmsg_free(msg);
668
    return PCAP_ERROR;
669
  }
670
  err = nl_wait_for_ack(state->nl_sock);
671
  if (err < 0) {
672
    snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
673
        "%s: nl_wait_for_ack failed adding %s interface: %s",
674
        device, mondevice, nl_geterror(-err));
675
    nlmsg_free(msg);
676
    return PCAP_ERROR;
677
  }
678
679
  /*
680
   * Success.
681
   */
682
  nlmsg_free(msg);
683
  return 1;
684
685
nla_put_failure:
686
  snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
687
      "%s: nl_put failed deleting %s interface",
688
      device, mondevice);
689
  nlmsg_free(msg);
690
  return PCAP_ERROR;
691
}
692
#endif /* HAVE_LIBNL */
693
694
static int pcap_protocol(pcap_t *handle)
695
0
{
696
0
  int protocol;
697
698
0
  protocol = handle->opt.protocol;
699
0
  if (protocol == 0)
700
0
    protocol = ETH_P_ALL;
701
702
0
  return htons(protocol);
703
0
}
704
705
static int
706
pcap_can_set_rfmon_linux(pcap_t *handle)
707
0
{
708
#ifdef HAVE_LIBNL
709
  char phydev_path[PATH_MAX+1];
710
  int ret;
711
#endif
712
713
0
  if (strcmp(handle->opt.device, "any") == 0) {
714
    /*
715
     * Monitor mode makes no sense on the "any" device.
716
     */
717
0
    return 0;
718
0
  }
719
720
#ifdef HAVE_LIBNL
721
  /*
722
   * Bleah.  There doesn't seem to be a way to ask a mac80211
723
   * device, through libnl, whether it supports monitor mode;
724
   * we'll just check whether the device appears to be a
725
   * mac80211 device and, if so, assume the device supports
726
   * monitor mode.
727
   */
728
  ret = get_mac80211_phydev(handle, handle->opt.device, phydev_path,
729
      PATH_MAX);
730
  if (ret < 0)
731
    return ret; /* error */
732
  if (ret == 1)
733
    return 1; /* mac80211 device */
734
#endif
735
736
0
  return 0;
737
0
}
738
739
/*
740
 * Grabs the number of missed packets by the interface from
741
 * /sys/class/net/{if_name}/statistics/rx_{missed,fifo}_errors.
742
 *
743
 * Compared to /proc/net/dev this avoids counting software drops,
744
 * but may be unimplemented and just return 0.
745
 * The author has found no straigthforward way to check for support.
746
 */
747
static long long int
748
0
linux_get_stat(const char * if_name, const char * stat) {
749
0
  ssize_t bytes_read;
750
0
  int fd;
751
0
  char buffer[PATH_MAX];
752
753
0
  snprintf(buffer, sizeof(buffer), "/sys/class/net/%s/statistics/%s", if_name, stat);
754
0
  fd = open(buffer, O_RDONLY);
755
0
  if (fd == -1)
756
0
    return 0;
757
758
0
  bytes_read = read(fd, buffer, sizeof(buffer) - 1);
759
0
  close(fd);
760
0
  if (bytes_read == -1)
761
0
    return 0;
762
0
  buffer[bytes_read] = '\0';
763
764
0
  return strtoll(buffer, NULL, 10);
765
0
}
766
767
static long long int
768
linux_if_drops(const char * if_name)
769
0
{
770
0
  long long int missed = linux_get_stat(if_name, "rx_missed_errors");
771
0
  long long int fifo = linux_get_stat(if_name, "rx_fifo_errors");
772
0
  return missed + fifo;
773
0
}
774
775
776
/*
777
 * Monitor mode is kind of interesting because we have to reset the
778
 * interface before exiting. The problem can't really be solved without
779
 * some daemon taking care of managing usage counts.  If we put the
780
 * interface into monitor mode, we set a flag indicating that we must
781
 * take it out of that mode when the interface is closed, and, when
782
 * closing the interface, if that flag is set we take it out of monitor
783
 * mode.
784
 */
785
786
static void pcap_cleanup_linux( pcap_t *handle )
787
0
{
788
0
  struct pcap_linux *handlep = handle->priv;
789
#ifdef HAVE_LIBNL
790
  struct nl80211_state nlstate;
791
  int ret;
792
#endif /* HAVE_LIBNL */
793
794
0
  if (handlep->must_do_on_close != 0) {
795
    /*
796
     * There's something we have to do when closing this
797
     * pcap_t.
798
     */
799
#ifdef HAVE_LIBNL
800
    if (handlep->must_do_on_close & MUST_DELETE_MONIF) {
801
      ret = nl80211_init(handle, &nlstate, handlep->device);
802
      if (ret >= 0) {
803
        ret = del_mon_if(handle, handle->fd, &nlstate,
804
            handlep->device, handlep->mondevice);
805
        nl80211_cleanup(&nlstate);
806
      }
807
      if (ret < 0) {
808
        fprintf(stderr,
809
            "Can't delete monitor interface %s (%s).\n"
810
            "Please delete manually.\n",
811
            handlep->mondevice, handle->errbuf);
812
      }
813
    }
814
#endif /* HAVE_LIBNL */
815
816
    /*
817
     * Take this pcap out of the list of pcaps for which we
818
     * have to take the interface out of some mode.
819
     */
820
0
    pcap_remove_from_pcaps_to_close(handle);
821
0
  }
822
823
0
  if (handle->fd != -1) {
824
    /*
825
     * Destroy the ring buffer (assuming we've set it up),
826
     * and unmap it if it's mapped.
827
     */
828
0
    destroy_ring(handle);
829
0
  }
830
831
0
  if (handlep->oneshot_buffer != NULL) {
832
0
    munmap(handlep->oneshot_buffer, handle->snapshot);
833
0
    handlep->oneshot_buffer = NULL;
834
0
  }
835
836
0
  if (handlep->mondevice != NULL) {
837
0
    free(handlep->mondevice);
838
0
    handlep->mondevice = NULL;
839
0
  }
840
0
  if (handlep->device != NULL) {
841
0
    free(handlep->device);
842
0
    handlep->device = NULL;
843
0
  }
844
845
0
  if (handlep->poll_breakloop_fd != -1) {
846
0
    close(handlep->poll_breakloop_fd);
847
0
    handlep->poll_breakloop_fd = -1;
848
0
  }
849
0
  pcap_cleanup_live_common(handle);
850
0
}
851
852
#ifdef HAVE_TPACKET3
853
/*
854
 * Some versions of TPACKET_V3 have annoying bugs/misfeatures
855
 * around which we have to work.  Determine if we have those
856
 * problems or not.
857
 * 3.19 is the first release with a fixed version of
858
 * TPACKET_V3.  We treat anything before that as
859
 * not having a fixed version; that may really mean
860
 * it has *no* version.
861
 */
862
static int has_broken_tpacket_v3(void)
863
0
{
864
0
  struct utsname utsname;
865
0
  const char *release;
866
0
  long major, minor;
867
0
  int matches, verlen;
868
869
  /* No version information, assume broken. */
870
0
  if (uname(&utsname) == -1)
871
0
    return 1;
872
0
  release = utsname.release;
873
874
  /* A malformed version, ditto. */
875
0
  matches = sscanf(release, "%ld.%ld%n", &major, &minor, &verlen);
876
0
  if (matches != 2)
877
0
    return 1;
878
0
  if (release[verlen] != '.' && release[verlen] != '\0')
879
0
    return 1;
880
881
  /* OK, a fixed version. */
882
0
  if (major > 3 || (major == 3 && minor >= 19))
883
0
    return 0;
884
885
  /* Too old :( */
886
0
  return 1;
887
0
}
888
#endif
889
890
/*
891
 * Set the timeout to be used in poll() with memory-mapped packet capture.
892
 */
893
static void
894
set_poll_timeout(struct pcap_linux *handlep)
895
0
{
896
0
#ifdef HAVE_TPACKET3
897
0
  int broken_tpacket_v3 = has_broken_tpacket_v3();
898
0
#endif
899
0
  if (handlep->timeout == 0) {
900
0
#ifdef HAVE_TPACKET3
901
    /*
902
     * XXX - due to a set of (mis)features in the TPACKET_V3
903
     * kernel code prior to the 3.19 kernel, blocking forever
904
     * with a TPACKET_V3 socket can, if few packets are
905
     * arriving and passing the socket filter, cause most
906
     * packets to be dropped.  See libpcap issue #335 for the
907
     * full painful story.
908
     *
909
     * The workaround is to have poll() time out very quickly,
910
     * so we grab the frames handed to us, and return them to
911
     * the kernel, ASAP.
912
     */
913
0
    if (handlep->tp_version == TPACKET_V3 && broken_tpacket_v3)
914
0
      handlep->poll_timeout = 1; /* don't block for very long */
915
0
    else
916
0
#endif
917
0
      handlep->poll_timeout = -1; /* block forever */
918
0
  } else if (handlep->timeout > 0) {
919
0
#ifdef HAVE_TPACKET3
920
    /*
921
     * For TPACKET_V3, the timeout is handled by the kernel,
922
     * so block forever; that way, we don't get extra timeouts.
923
     * Don't do that if we have a broken TPACKET_V3, though.
924
     */
925
0
    if (handlep->tp_version == TPACKET_V3 && !broken_tpacket_v3)
926
0
      handlep->poll_timeout = -1; /* block forever, let TPACKET_V3 wake us up */
927
0
    else
928
0
#endif
929
0
      handlep->poll_timeout = handlep->timeout; /* block for that amount of time */
930
0
  } else {
931
    /*
932
     * Non-blocking mode; we call poll() to pick up error
933
     * indications, but we don't want it to wait for
934
     * anything.
935
     */
936
0
    handlep->poll_timeout = 0;
937
0
  }
938
0
}
939
940
static void pcap_breakloop_linux(pcap_t *handle)
941
0
{
942
0
  pcap_breakloop_common(handle);
943
0
  struct pcap_linux *handlep = handle->priv;
944
945
0
  uint64_t value = 1;
946
  /* XXX - what if this fails? */
947
0
  if (handlep->poll_breakloop_fd != -1)
948
0
    (void)write(handlep->poll_breakloop_fd, &value, sizeof(value));
949
0
}
950
951
/*
952
 * Set the offset at which to insert VLAN tags.
953
 * That should be the offset of the type field.
954
 */
955
static void
956
set_vlan_offset(pcap_t *handle)
957
0
{
958
0
  struct pcap_linux *handlep = handle->priv;
959
960
0
  switch (handle->linktype) {
961
962
0
  case DLT_EN10MB:
963
    /*
964
     * The type field is after the destination and source
965
     * MAC address.
966
     */
967
0
    handlep->vlan_offset = 2 * ETH_ALEN;
968
0
    break;
969
970
0
  case DLT_LINUX_SLL:
971
    /*
972
     * The type field is in the last 2 bytes of the
973
     * DLT_LINUX_SLL header.
974
     */
975
0
    handlep->vlan_offset = SLL_HDR_LEN - 2;
976
0
    break;
977
978
0
  default:
979
0
    handlep->vlan_offset = -1; /* unknown */
980
0
    break;
981
0
  }
982
0
}
983
984
/*
985
 *  Get a handle for a live capture from the given device. You can
986
 *  pass NULL as device to get all packages (without link level
987
 *  information of course). If you pass 1 as promisc the interface
988
 *  will be set to promiscuous mode (XXX: I think this usage should
989
 *  be deprecated and functions be added to select that later allow
990
 *  modification of that values -- Torsten).
991
 */
992
static int
993
pcap_activate_linux(pcap_t *handle)
994
0
{
995
0
  struct pcap_linux *handlep = handle->priv;
996
0
  const char  *device;
997
0
  int   is_any_device;
998
0
  struct ifreq  ifr;
999
0
  int   status = 0;
1000
0
  int   status2 = 0;
1001
0
  int   ret;
1002
1003
0
  device = handle->opt.device;
1004
1005
  /*
1006
   * Make sure the name we were handed will fit into the ioctls we
1007
   * might perform on the device; if not, return a "No such device"
1008
   * indication, as the Linux kernel shouldn't support creating
1009
   * a device whose name won't fit into those ioctls.
1010
   *
1011
   * "Will fit" means "will fit, complete with a null terminator",
1012
   * so if the length, which does *not* include the null terminator,
1013
   * is greater than *or equal to* the size of the field into which
1014
   * we'll be copying it, that won't fit.
1015
   */
1016
0
  if (strlen(device) >= sizeof(ifr.ifr_name)) {
1017
    /*
1018
     * There's nothing more to say, so clear the error
1019
     * message.
1020
     */
1021
0
    handle->errbuf[0] = '\0';
1022
0
    status = PCAP_ERROR_NO_SUCH_DEVICE;
1023
0
    goto fail;
1024
0
  }
1025
1026
  /*
1027
   * Turn a negative snapshot value (invalid), a snapshot value of
1028
   * 0 (unspecified), or a value bigger than the normal maximum
1029
   * value, into the maximum allowed value.
1030
   *
1031
   * If some application really *needs* a bigger snapshot
1032
   * length, we should just increase MAXIMUM_SNAPLEN.
1033
   */
1034
0
  if (handle->snapshot <= 0 || handle->snapshot > MAXIMUM_SNAPLEN)
1035
0
    handle->snapshot = MAXIMUM_SNAPLEN;
1036
1037
0
  handlep->device = strdup(device);
1038
0
  if (handlep->device == NULL) {
1039
0
    pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
1040
0
        errno, "strdup");
1041
0
    status = PCAP_ERROR;
1042
0
    goto fail;
1043
0
  }
1044
1045
  /*
1046
   * The "any" device is a special device which causes us not
1047
   * to bind to a particular device and thus to look at all
1048
   * devices.
1049
   */
1050
0
  is_any_device = (strcmp(device, "any") == 0);
1051
0
  if (is_any_device) {
1052
0
    if (handle->opt.promisc) {
1053
0
      handle->opt.promisc = 0;
1054
      /* Just a warning. */
1055
0
      snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
1056
0
          "Promiscuous mode not supported on the \"any\" device");
1057
0
      status = PCAP_WARNING_PROMISC_NOTSUP;
1058
0
    }
1059
0
  }
1060
1061
  /* copy timeout value */
1062
0
  handlep->timeout = handle->opt.timeout;
1063
1064
  /*
1065
   * If we're in promiscuous mode, then we probably want
1066
   * to see when the interface drops packets too, so get an
1067
   * initial count from
1068
   * /sys/class/net/{if_name}/statistics/rx_{missed,fifo}_errors
1069
   */
1070
0
  if (handle->opt.promisc)
1071
0
    handlep->sysfs_dropped = linux_if_drops(handlep->device);
1072
1073
  /*
1074
   * If the "any" device is specified, try to open a SOCK_DGRAM.
1075
   * Otherwise, open a SOCK_RAW.
1076
   */
1077
0
  ret = setup_socket(handle, is_any_device);
1078
0
  if (ret < 0) {
1079
    /*
1080
     * Fatal error; the return value is the error code,
1081
     * and handle->errbuf has been set to an appropriate
1082
     * error message.
1083
     */
1084
0
    status = ret;
1085
0
    goto fail;
1086
0
  }
1087
  /*
1088
   * Success.
1089
   * Try to set up memory-mapped access.
1090
   */
1091
0
  ret = setup_mmapped(handle, &status);
1092
0
  if (ret == -1) {
1093
    /*
1094
     * We failed to set up to use it, or the
1095
     * kernel supports it, but we failed to
1096
     * enable it.  status has been set to the
1097
     * error status to return and, if it's
1098
     * PCAP_ERROR, handle->errbuf contains
1099
     * the error message.
1100
     */
1101
0
    goto fail;
1102
0
  }
1103
1104
  /*
1105
   * We succeeded.  status has been set to the status to return,
1106
   * which might be 0, or might be a PCAP_WARNING_ value.
1107
   */
1108
  /*
1109
   * Now that we have activated the mmap ring, we can
1110
   * set the correct protocol.
1111
   */
1112
0
  if ((status2 = iface_bind(handle->fd, handlep->ifindex,
1113
0
      handle->errbuf, pcap_protocol(handle))) != 0) {
1114
0
    status = status2;
1115
0
    goto fail;
1116
0
  }
1117
1118
0
  handle->inject_op = pcap_inject_linux;
1119
0
  handle->setfilter_op = pcap_setfilter_linux;
1120
0
  handle->setdirection_op = pcap_setdirection_linux;
1121
0
  handle->set_datalink_op = pcap_set_datalink_linux;
1122
0
  handle->setnonblock_op = pcap_setnonblock_linux;
1123
0
  handle->getnonblock_op = pcap_getnonblock_linux;
1124
0
  handle->cleanup_op = pcap_cleanup_linux;
1125
0
  handle->stats_op = pcap_stats_linux;
1126
0
  handle->breakloop_op = pcap_breakloop_linux;
1127
1128
0
  switch (handlep->tp_version) {
1129
1130
0
  case TPACKET_V2:
1131
0
    handle->read_op = pcap_read_linux_mmap_v2;
1132
0
    break;
1133
0
#ifdef HAVE_TPACKET3
1134
0
  case TPACKET_V3:
1135
0
    handle->read_op = pcap_read_linux_mmap_v3;
1136
0
    break;
1137
0
#endif
1138
0
  }
1139
0
  handle->oneshot_callback = pcap_oneshot_linux;
1140
0
  handle->selectable_fd = handle->fd;
1141
1142
0
  return status;
1143
1144
0
fail:
1145
0
  pcap_cleanup_linux(handle);
1146
0
  return status;
1147
0
}
1148
1149
static int
1150
pcap_set_datalink_linux(pcap_t *handle, int dlt)
1151
0
{
1152
0
  handle->linktype = dlt;
1153
1154
  /*
1155
   * Update the offset at which to insert VLAN tags for the
1156
   * new link-layer type.
1157
   */
1158
0
  set_vlan_offset(handle);
1159
1160
0
  return 0;
1161
0
}
1162
1163
/*
1164
 * linux_check_direction()
1165
 *
1166
 * Do checks based on packet direction.
1167
 */
1168
static inline int
1169
linux_check_direction(const pcap_t *handle, const struct sockaddr_ll *sll)
1170
0
{
1171
0
  struct pcap_linux *handlep = handle->priv;
1172
1173
0
  if (sll->sll_pkttype == PACKET_OUTGOING) {
1174
    /*
1175
     * Outgoing packet.
1176
     * If this is from the loopback device, reject it;
1177
     * we'll see the packet as an incoming packet as well,
1178
     * and we don't want to see it twice.
1179
     */
1180
0
    if (sll->sll_ifindex == handlep->lo_ifindex)
1181
0
      return 0;
1182
1183
    /*
1184
     * If this is an outgoing CAN or CAN FD frame, and
1185
     * the user doesn't only want outgoing packets,
1186
     * reject it; CAN devices and drivers, and the CAN
1187
     * stack, always arrange to loop back transmitted
1188
     * packets, so they also appear as incoming packets.
1189
     * We don't want duplicate packets, and we can't
1190
     * easily distinguish packets looped back by the CAN
1191
     * layer than those received by the CAN layer, so we
1192
     * eliminate this packet instead.
1193
     *
1194
     * We check whether this is a CAN or CAN FD frame
1195
     * by checking whether the device's hardware type
1196
     * is ARPHRD_CAN.
1197
     */
1198
0
    if (sll->sll_hatype == ARPHRD_CAN &&
1199
0
         handle->direction != PCAP_D_OUT)
1200
0
      return 0;
1201
1202
    /*
1203
     * If the user only wants incoming packets, reject it.
1204
     */
1205
0
    if (handle->direction == PCAP_D_IN)
1206
0
      return 0;
1207
0
  } else {
1208
    /*
1209
     * Incoming packet.
1210
     * If the user only wants outgoing packets, reject it.
1211
     */
1212
0
    if (handle->direction == PCAP_D_OUT)
1213
0
      return 0;
1214
0
  }
1215
0
  return 1;
1216
0
}
1217
1218
/*
1219
 * Check whether the device to which the pcap_t is bound still exists.
1220
 * We do so by asking what address the socket is bound to, and checking
1221
 * whether the ifindex in the address is -1, meaning "that device is gone",
1222
 * or some other value, meaning "that device still exists".
1223
 */
1224
static int
1225
device_still_exists(pcap_t *handle)
1226
0
{
1227
0
  struct pcap_linux *handlep = handle->priv;
1228
0
  struct sockaddr_ll addr;
1229
0
  socklen_t addr_len;
1230
1231
  /*
1232
   * If handlep->ifindex is -1, the socket isn't bound, meaning
1233
   * we're capturing on the "any" device; that device never
1234
   * disappears.  (It should also never be configured down, so
1235
   * we shouldn't even get here, but let's make sure.)
1236
   */
1237
0
  if (handlep->ifindex == -1)
1238
0
    return (1); /* it's still here */
1239
1240
  /*
1241
   * OK, now try to get the address for the socket.
1242
   */
1243
0
  addr_len = sizeof (addr);
1244
0
  if (getsockname(handle->fd, (struct sockaddr *) &addr, &addr_len) == -1) {
1245
    /*
1246
     * Error - report an error and return -1.
1247
     */
1248
0
    pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
1249
0
        errno, "getsockname failed");
1250
0
    return (-1);
1251
0
  }
1252
0
  if (addr.sll_ifindex == -1) {
1253
    /*
1254
     * This means the device went away.
1255
     */
1256
0
    return (0);
1257
0
  }
1258
1259
  /*
1260
   * The device presumably just went down.
1261
   */
1262
0
  return (1);
1263
0
}
1264
1265
static int
1266
pcap_inject_linux(pcap_t *handle, const void *buf, int size)
1267
0
{
1268
0
  struct pcap_linux *handlep = handle->priv;
1269
0
  int ret;
1270
1271
0
  if (handlep->ifindex == -1) {
1272
    /*
1273
     * We don't support sending on the "any" device.
1274
     */
1275
0
    pcap_strlcpy(handle->errbuf,
1276
0
        "Sending packets isn't supported on the \"any\" device",
1277
0
        PCAP_ERRBUF_SIZE);
1278
0
    return (-1);
1279
0
  }
1280
1281
0
  if (handlep->cooked) {
1282
    /*
1283
     * We don't support sending on cooked-mode sockets.
1284
     *
1285
     * XXX - how do you send on a bound cooked-mode
1286
     * socket?
1287
     * Is a "sendto()" required there?
1288
     */
1289
0
    pcap_strlcpy(handle->errbuf,
1290
0
        "Sending packets isn't supported in cooked mode",
1291
0
        PCAP_ERRBUF_SIZE);
1292
0
    return (-1);
1293
0
  }
1294
1295
0
  ret = (int)send(handle->fd, buf, size, 0);
1296
0
  if (ret == -1) {
1297
0
    pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
1298
0
        errno, "send");
1299
0
    return (-1);
1300
0
  }
1301
0
  return (ret);
1302
0
}
1303
1304
/*
1305
 *  Get the statistics for the given packet capture handle.
1306
 */
1307
static int
1308
pcap_stats_linux(pcap_t *handle, struct pcap_stat *stats)
1309
0
{
1310
0
  struct pcap_linux *handlep = handle->priv;
1311
0
#ifdef HAVE_TPACKET3
1312
  /*
1313
   * For sockets using TPACKET_V2, the extra stuff at the end
1314
   * of a struct tpacket_stats_v3 will not be filled in, and
1315
   * we don't look at it so this is OK even for those sockets.
1316
   * In addition, the PF_PACKET socket code in the kernel only
1317
   * uses the length parameter to compute how much data to
1318
   * copy out and to indicate how much data was copied out, so
1319
   * it's OK to base it on the size of a struct tpacket_stats.
1320
   *
1321
   * XXX - it's probably OK, in fact, to just use a
1322
   * struct tpacket_stats for V3 sockets, as we don't
1323
   * care about the tp_freeze_q_cnt stat.
1324
   */
1325
0
  struct tpacket_stats_v3 kstats;
1326
#else /* HAVE_TPACKET3 */
1327
  struct tpacket_stats kstats;
1328
#endif /* HAVE_TPACKET3 */
1329
0
  socklen_t len = sizeof (struct tpacket_stats);
1330
1331
0
  long long if_dropped = 0;
1332
1333
  /*
1334
   * To fill in ps_ifdrop, we parse
1335
   * /sys/class/net/{if_name}/statistics/rx_{missed,fifo}_errors
1336
   * for the numbers
1337
   */
1338
0
  if (handle->opt.promisc)
1339
0
  {
1340
    /*
1341
     * XXX - is there any reason to do this by remembering
1342
     * the last counts value, subtracting it from the
1343
     * current counts value, and adding that to stat.ps_ifdrop,
1344
     * maintaining stat.ps_ifdrop as a count, rather than just
1345
     * saving the *initial* counts value and setting
1346
     * stat.ps_ifdrop to the difference between the current
1347
     * value and the initial value?
1348
     *
1349
     * One reason might be to handle the count wrapping
1350
     * around, on platforms where the count is 32 bits
1351
     * and where you might get more than 2^32 dropped
1352
     * packets; is there any other reason?
1353
     *
1354
     * (We maintain the count as a long long int so that,
1355
     * if the kernel maintains the counts as 64-bit even
1356
     * on 32-bit platforms, we can handle the real count.
1357
     *
1358
     * Unfortunately, we can't report 64-bit counts; we
1359
     * need a better API for reporting statistics, such as
1360
     * one that reports them in a style similar to the
1361
     * pcapng Interface Statistics Block, so that 1) the
1362
     * counts are 64-bit, 2) it's easier to add new statistics
1363
     * without breaking the ABI, and 3) it's easier to
1364
     * indicate to a caller that wants one particular
1365
     * statistic that it's not available by just not supplying
1366
     * it.)
1367
     */
1368
0
    if_dropped = handlep->sysfs_dropped;
1369
0
    handlep->sysfs_dropped = linux_if_drops(handlep->device);
1370
0
    handlep->stat.ps_ifdrop += (u_int)(handlep->sysfs_dropped - if_dropped);
1371
0
  }
1372
1373
  /*
1374
   * Try to get the packet counts from the kernel.
1375
   */
1376
0
  if (getsockopt(handle->fd, SOL_PACKET, PACKET_STATISTICS,
1377
0
      &kstats, &len) > -1) {
1378
    /*
1379
     * "ps_recv" counts only packets that *passed* the
1380
     * filter, not packets that didn't pass the filter.
1381
     * This includes packets later dropped because we
1382
     * ran out of buffer space.
1383
     *
1384
     * "ps_drop" counts packets dropped because we ran
1385
     * out of buffer space.  It doesn't count packets
1386
     * dropped by the interface driver.  It counts only
1387
     * packets that passed the filter.
1388
     *
1389
     * See above for ps_ifdrop.
1390
     *
1391
     * Both statistics include packets not yet read from
1392
     * the kernel by libpcap, and thus not yet seen by
1393
     * the application.
1394
     *
1395
     * In "linux/net/packet/af_packet.c", at least in 2.6.27
1396
     * through 5.6 kernels, "tp_packets" is incremented for
1397
     * every packet that passes the packet filter *and* is
1398
     * successfully copied to the ring buffer; "tp_drops" is
1399
     * incremented for every packet dropped because there's
1400
     * not enough free space in the ring buffer.
1401
     *
1402
     * When the statistics are returned for a PACKET_STATISTICS
1403
     * "getsockopt()" call, "tp_drops" is added to "tp_packets",
1404
     * so that "tp_packets" counts all packets handed to
1405
     * the PF_PACKET socket, including packets dropped because
1406
     * there wasn't room on the socket buffer - but not
1407
     * including packets that didn't pass the filter.
1408
     *
1409
     * In the BSD BPF, the count of received packets is
1410
     * incremented for every packet handed to BPF, regardless
1411
     * of whether it passed the filter.
1412
     *
1413
     * We can't make "pcap_stats()" work the same on both
1414
     * platforms, but the best approximation is to return
1415
     * "tp_packets" as the count of packets and "tp_drops"
1416
     * as the count of drops.
1417
     *
1418
     * Keep a running total because each call to
1419
     *    getsockopt(handle->fd, SOL_PACKET, PACKET_STATISTICS, ....
1420
     * resets the counters to zero.
1421
     */
1422
0
    handlep->stat.ps_recv += kstats.tp_packets;
1423
0
    handlep->stat.ps_drop += kstats.tp_drops;
1424
0
    *stats = handlep->stat;
1425
0
    return 0;
1426
0
  }
1427
1428
0
  pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, errno,
1429
0
      "failed to get statistics from socket");
1430
0
  return -1;
1431
0
}
1432
1433
/*
1434
 * Description string for the "any" device.
1435
 */
1436
static const char any_descr[] = "Pseudo-device that captures on all interfaces";
1437
1438
/*
1439
 * A PF_PACKET socket can be bound to any network interface.
1440
 */
1441
static int
1442
can_be_bound(const char *name _U_)
1443
0
{
1444
0
  return (1);
1445
0
}
1446
1447
/*
1448
 * Get a socket to use with various interface ioctls.
1449
 */
1450
static int
1451
get_if_ioctl_socket(void)
1452
0
{
1453
0
  int fd;
1454
1455
  /*
1456
   * This is a bit ugly.
1457
   *
1458
   * There isn't a socket type that's guaranteed to work.
1459
   *
1460
   * AF_NETLINK will work *if* you have Netlink configured into the
1461
   * kernel (can it be configured out if you have any networking
1462
   * support at all?) *and* if you're running a sufficiently recent
1463
   * kernel, but not all the kernels we support are sufficiently
1464
   * recent - that feature was introduced in Linux 4.6.
1465
   *
1466
   * AF_UNIX will work *if* you have UNIX-domain sockets configured
1467
   * into the kernel and *if* you're not on a system that doesn't
1468
   * allow them - some SELinux systems don't allow you create them.
1469
   * Most systems probably have them configured in, but not all systems
1470
   * have them configured in and allow them to be created.
1471
   *
1472
   * AF_INET will work *if* you have IPv4 configured into the kernel,
1473
   * but, apparently, some systems have network adapters but have
1474
   * kernels without IPv4 support.
1475
   *
1476
   * AF_INET6 will work *if* you have IPv6 configured into the
1477
   * kernel, but if you don't have AF_INET, you might not have
1478
   * AF_INET6, either (that is, independently on its own grounds).
1479
   *
1480
   * AF_PACKET would work, except that some of these calls should
1481
   * work even if you *don't* have capture permission (you should be
1482
   * able to enumerate interfaces and get information about them
1483
   * without capture permission; you shouldn't get a failure until
1484
   * you try pcap_activate()).  (If you don't allow programs to
1485
   * get as much information as possible about interfaces if you
1486
   * don't have permission to capture, you run the risk of users
1487
   * asking "why isn't it showing XXX" - or, worse, if you don't
1488
   * show interfaces *at all* if you don't have permission to
1489
   * capture on them, "why do no interfaces show up?" - when the
1490
   * real problem is a permissions problem.  Error reports of that
1491
   * type require a lot more back-and-forth to debug, as evidenced
1492
   * by many Wireshark bugs/mailing list questions/Q&A questions.)
1493
   *
1494
   * So:
1495
   *
1496
   * we first try an AF_NETLINK socket, where "try" includes
1497
   * "try to do a device ioctl on it", as, in the future, once
1498
   * pre-4.6 kernels are sufficiently rare, that will probably
1499
   * be the mechanism most likely to work;
1500
   *
1501
   * if that fails, we try an AF_UNIX socket, as that's less
1502
   * likely to be configured out on a networking-capable system
1503
   * than is IP;
1504
   *
1505
   * if that fails, we try an AF_INET6 socket;
1506
   *
1507
   * if that fails, we try an AF_INET socket.
1508
   */
1509
0
  fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
1510
0
  if (fd != -1) {
1511
    /*
1512
     * OK, let's make sure we can do an SIOCGIFNAME
1513
     * ioctl.
1514
     */
1515
0
    struct ifreq ifr;
1516
1517
0
    memset(&ifr, 0, sizeof(ifr));
1518
0
    if (ioctl(fd, SIOCGIFNAME, &ifr) == 0 ||
1519
0
        errno != EOPNOTSUPP) {
1520
      /*
1521
       * It succeeded, or failed for some reason
1522
       * other than "netlink sockets don't support
1523
       * device ioctls".  Go with the AF_NETLINK
1524
       * socket.
1525
       */
1526
0
      return (fd);
1527
0
    }
1528
1529
    /*
1530
     * OK, that didn't work, so it's as bad as "netlink
1531
     * sockets aren't available".  Close the socket and
1532
     * drive on.
1533
     */
1534
0
    close(fd);
1535
0
  }
1536
1537
  /*
1538
   * Now try an AF_UNIX socket.
1539
   */
1540
0
  fd = socket(AF_UNIX, SOCK_RAW, 0);
1541
0
  if (fd != -1) {
1542
    /*
1543
     * OK, we got it!
1544
     */
1545
0
    return (fd);
1546
0
  }
1547
1548
  /*
1549
   * Now try an AF_INET6 socket.
1550
   */
1551
0
  fd = socket(AF_INET6, SOCK_DGRAM, 0);
1552
0
  if (fd != -1) {
1553
0
    return (fd);
1554
0
  }
1555
1556
  /*
1557
   * Now try an AF_INET socket.
1558
   *
1559
   * XXX - if that fails, is there anything else we should try?
1560
   * AF_CAN, for embedded systems in vehicles, in case they're
1561
   * built without Internet protocol support?  Any other socket
1562
   * types popular in non-Internet embedded systems?
1563
   */
1564
0
  return (socket(AF_INET, SOCK_DGRAM, 0));
1565
0
}
1566
1567
/*
1568
 * Get additional flags for a device, using SIOCGIFMEDIA.
1569
 */
1570
static int
1571
get_if_flags(const char *name, bpf_u_int32 *flags, char *errbuf)
1572
0
{
1573
0
  int sock;
1574
0
  FILE *fh;
1575
0
  unsigned int arptype;
1576
0
  struct ifreq ifr;
1577
0
  struct ethtool_value info;
1578
1579
0
  if (*flags & PCAP_IF_LOOPBACK) {
1580
    /*
1581
     * Loopback devices aren't wireless, and "connected"/
1582
     * "disconnected" doesn't apply to them.
1583
     */
1584
0
    *flags |= PCAP_IF_CONNECTION_STATUS_NOT_APPLICABLE;
1585
0
    return 0;
1586
0
  }
1587
1588
0
  sock = get_if_ioctl_socket();
1589
0
  if (sock == -1) {
1590
0
    pcap_fmt_errmsg_for_errno(errbuf, PCAP_ERRBUF_SIZE, errno,
1591
0
        "Can't create socket to get ethtool information for %s",
1592
0
        name);
1593
0
    return -1;
1594
0
  }
1595
1596
  /*
1597
   * OK, what type of network is this?
1598
   * In particular, is it wired or wireless?
1599
   */
1600
0
  if (is_wifi(name)) {
1601
    /*
1602
     * Wi-Fi, hence wireless.
1603
     */
1604
0
    *flags |= PCAP_IF_WIRELESS;
1605
0
  } else {
1606
    /*
1607
     * OK, what does /sys/class/net/{if_name}/type contain?
1608
     * (We don't use that for Wi-Fi, as it'll report
1609
     * "Ethernet", i.e. ARPHRD_ETHER, for non-monitor-
1610
     * mode devices.)
1611
     */
1612
0
    char *pathstr;
1613
1614
0
    if (asprintf(&pathstr, "/sys/class/net/%s/type", name) == -1) {
1615
0
      snprintf(errbuf, PCAP_ERRBUF_SIZE,
1616
0
          "%s: Can't generate path name string for /sys/class/net device",
1617
0
          name);
1618
0
      close(sock);
1619
0
      return -1;
1620
0
    }
1621
0
    fh = fopen(pathstr, "r");
1622
0
    if (fh != NULL) {
1623
0
      if (fscanf(fh, "%u", &arptype) == 1) {
1624
        /*
1625
         * OK, we got an ARPHRD_ type; what is it?
1626
         */
1627
0
        switch (arptype) {
1628
1629
0
        case ARPHRD_LOOPBACK:
1630
          /*
1631
           * These are types to which
1632
           * "connected" and "disconnected"
1633
           * don't apply, so don't bother
1634
           * asking about it.
1635
           *
1636
           * XXX - add other types?
1637
           */
1638
0
          close(sock);
1639
0
          fclose(fh);
1640
0
          free(pathstr);
1641
0
          return 0;
1642
1643
0
        case ARPHRD_IRDA:
1644
0
        case ARPHRD_IEEE80211:
1645
0
        case ARPHRD_IEEE80211_PRISM:
1646
0
        case ARPHRD_IEEE80211_RADIOTAP:
1647
0
#ifdef ARPHRD_IEEE802154
1648
0
        case ARPHRD_IEEE802154:
1649
0
#endif
1650
0
#ifdef ARPHRD_IEEE802154_MONITOR
1651
0
        case ARPHRD_IEEE802154_MONITOR:
1652
0
#endif
1653
0
#ifdef ARPHRD_6LOWPAN
1654
0
        case ARPHRD_6LOWPAN:
1655
0
#endif
1656
          /*
1657
           * Various wireless types.
1658
           */
1659
0
          *flags |= PCAP_IF_WIRELESS;
1660
0
          break;
1661
0
        }
1662
0
      }
1663
0
      fclose(fh);
1664
0
    }
1665
0
    free(pathstr);
1666
0
  }
1667
1668
0
#ifdef ETHTOOL_GLINK
1669
0
  memset(&ifr, 0, sizeof(ifr));
1670
0
  pcap_strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
1671
0
  info.cmd = ETHTOOL_GLINK;
1672
  /*
1673
   * XXX - while Valgrind handles SIOCETHTOOL and knows that
1674
   * the ETHTOOL_GLINK command sets the .data member of the
1675
   * structure, Memory Sanitizer doesn't yet do so:
1676
   *
1677
   *    https://bugs.llvm.org/show_bug.cgi?id=45814
1678
   *
1679
   * For now, we zero it out to squelch warnings; if the bug
1680
   * in question is fixed, we can remove this.
1681
   */
1682
0
  info.data = 0;
1683
0
  ifr.ifr_data = (caddr_t)&info;
1684
0
  if (ioctl(sock, SIOCETHTOOL, &ifr) == -1) {
1685
0
    int save_errno = errno;
1686
1687
0
    switch (save_errno) {
1688
1689
0
    case EOPNOTSUPP:
1690
0
    case EINVAL:
1691
      /*
1692
       * OK, this OS version or driver doesn't support
1693
       * asking for this information.
1694
       * XXX - distinguish between "this doesn't
1695
       * support ethtool at all because it's not
1696
       * that type of device" vs. "this doesn't
1697
       * support ethtool even though it's that
1698
       * type of device", and return "unknown".
1699
       */
1700
0
      *flags |= PCAP_IF_CONNECTION_STATUS_NOT_APPLICABLE;
1701
0
      close(sock);
1702
0
      return 0;
1703
1704
0
    case ENODEV:
1705
      /*
1706
       * OK, no such device.
1707
       * The user will find that out when they try to
1708
       * activate the device; just say "OK" and
1709
       * don't set anything.
1710
       */
1711
0
      close(sock);
1712
0
      return 0;
1713
1714
0
    default:
1715
      /*
1716
       * Other error.
1717
       */
1718
0
      pcap_fmt_errmsg_for_errno(errbuf, PCAP_ERRBUF_SIZE,
1719
0
          save_errno,
1720
0
          "%s: SIOCETHTOOL(ETHTOOL_GLINK) ioctl failed",
1721
0
          name);
1722
0
      close(sock);
1723
0
      return -1;
1724
0
    }
1725
0
  }
1726
1727
  /*
1728
   * Is it connected?
1729
   */
1730
0
  if (info.data) {
1731
    /*
1732
     * It's connected.
1733
     */
1734
0
    *flags |= PCAP_IF_CONNECTION_STATUS_CONNECTED;
1735
0
  } else {
1736
    /*
1737
     * It's disconnected.
1738
     */
1739
0
    *flags |= PCAP_IF_CONNECTION_STATUS_DISCONNECTED;
1740
0
  }
1741
0
#endif
1742
1743
0
  close(sock);
1744
0
  return 0;
1745
0
}
1746
1747
int
1748
pcap_platform_finddevs(pcap_if_list_t *devlistp, char *errbuf)
1749
0
{
1750
  /*
1751
   * Get the list of regular interfaces first.
1752
   */
1753
0
  if (pcap_findalldevs_interfaces(devlistp, errbuf, can_be_bound,
1754
0
      get_if_flags) == -1)
1755
0
    return (-1); /* failure */
1756
1757
  /*
1758
   * Add the "any" device.
1759
   * As it refers to all network devices, not to any particular
1760
   * network device, the notion of "connected" vs. "disconnected"
1761
   * doesn't apply.
1762
   */
1763
0
  if (add_dev(devlistp, "any",
1764
0
      PCAP_IF_UP|PCAP_IF_RUNNING|PCAP_IF_CONNECTION_STATUS_NOT_APPLICABLE,
1765
0
      any_descr, errbuf) == NULL)
1766
0
    return (-1);
1767
1768
0
  return (0);
1769
0
}
1770
1771
/*
1772
 * Set direction flag: Which packets do we accept on a forwarding
1773
 * single device? IN, OUT or both?
1774
 */
1775
static int
1776
pcap_setdirection_linux(pcap_t *handle, pcap_direction_t d)
1777
0
{
1778
  /*
1779
   * It's guaranteed, at this point, that d is a valid
1780
   * direction value.
1781
   */
1782
0
  handle->direction = d;
1783
0
  return 0;
1784
0
}
1785
1786
static int
1787
is_wifi(const char *device)
1788
0
{
1789
0
  char *pathstr;
1790
0
  struct stat statb;
1791
1792
  /*
1793
   * See if there's a sysfs wireless directory for it.
1794
   * If so, it's a wireless interface.
1795
   */
1796
0
  if (asprintf(&pathstr, "/sys/class/net/%s/wireless", device) == -1) {
1797
    /*
1798
     * Just give up here.
1799
     */
1800
0
    return 0;
1801
0
  }
1802
0
  if (stat(pathstr, &statb) == 0) {
1803
0
    free(pathstr);
1804
0
    return 1;
1805
0
  }
1806
0
  free(pathstr);
1807
1808
0
  return 0;
1809
0
}
1810
1811
/*
1812
 *  Linux uses the ARP hardware type to identify the type of an
1813
 *  interface. pcap uses the DLT_xxx constants for this. This
1814
 *  function takes a pointer to a "pcap_t", and an ARPHRD_xxx
1815
 *  constant, as arguments, and sets "handle->linktype" to the
1816
 *  appropriate DLT_XXX constant and sets "handle->offset" to
1817
 *  the appropriate value (to make "handle->offset" plus link-layer
1818
 *  header length be a multiple of 4, so that the link-layer payload
1819
 *  will be aligned on a 4-byte boundary when capturing packets).
1820
 *  (If the offset isn't set here, it'll be 0; add code as appropriate
1821
 *  for cases where it shouldn't be 0.)
1822
 *
1823
 *  If "cooked_ok" is non-zero, we can use DLT_LINUX_SLL and capture
1824
 *  in cooked mode; otherwise, we can't use cooked mode, so we have
1825
 *  to pick some type that works in raw mode, or fail.
1826
 *
1827
 *  Sets the link type to -1 if unable to map the type.
1828
 */
1829
static void map_arphrd_to_dlt(pcap_t *handle, int arptype,
1830
            const char *device, int cooked_ok)
1831
0
{
1832
0
  static const char cdma_rmnet[] = "cdma_rmnet";
1833
1834
0
  switch (arptype) {
1835
1836
0
  case ARPHRD_ETHER:
1837
    /*
1838
     * For various annoying reasons having to do with DHCP
1839
     * software, some versions of Android give the mobile-
1840
     * phone-network interface an ARPHRD_ value of
1841
     * ARPHRD_ETHER, even though the packets supplied by
1842
     * that interface have no link-layer header, and begin
1843
     * with an IP header, so that the ARPHRD_ value should
1844
     * be ARPHRD_NONE.
1845
     *
1846
     * Detect those devices by checking the device name, and
1847
     * use DLT_RAW for them.
1848
     */
1849
0
    if (strncmp(device, cdma_rmnet, sizeof cdma_rmnet - 1) == 0) {
1850
0
      handle->linktype = DLT_RAW;
1851
0
      return;
1852
0
    }
1853
1854
    /*
1855
     * Is this a real Ethernet device?  If so, give it a
1856
     * link-layer-type list with DLT_EN10MB and DLT_DOCSIS, so
1857
     * that an application can let you choose it, in case you're
1858
     * capturing DOCSIS traffic that a Cisco Cable Modem
1859
     * Termination System is putting out onto an Ethernet (it
1860
     * doesn't put an Ethernet header onto the wire, it puts raw
1861
     * DOCSIS frames out on the wire inside the low-level
1862
     * Ethernet framing).
1863
     *
1864
     * XXX - are there any other sorts of "fake Ethernet" that
1865
     * have ARPHRD_ETHER but that shouldn't offer DLT_DOCSIS as
1866
     * a Cisco CMTS won't put traffic onto it or get traffic
1867
     * bridged onto it?  ISDN is handled in "setup_socket()",
1868
     * as we fall back on cooked mode there, and we use
1869
     * is_wifi() to check for 802.11 devices; are there any
1870
     * others?
1871
     */
1872
0
    if (!is_wifi(device)) {
1873
0
      int ret;
1874
1875
      /*
1876
       * This is not a Wi-Fi device but it could be
1877
       * a DSA master/management network device.
1878
       */
1879
0
      ret = iface_dsa_get_proto_info(device, handle);
1880
0
      if (ret < 0)
1881
0
        return;
1882
1883
0
      if (ret == 1) {
1884
        /*
1885
         * This is a DSA master/management network
1886
         * device linktype is already set by
1887
         * iface_dsa_get_proto_info() set an
1888
         * appropriate offset here.
1889
         */
1890
0
        handle->offset = 2;
1891
0
        break;
1892
0
      }
1893
1894
      /*
1895
       * It's not a Wi-Fi device; offer DOCSIS.
1896
       */
1897
0
      handle->dlt_list = (u_int *) malloc(sizeof(u_int) * 2);
1898
      /*
1899
       * If that fails, just leave the list empty.
1900
       */
1901
0
      if (handle->dlt_list != NULL) {
1902
0
        handle->dlt_list[0] = DLT_EN10MB;
1903
0
        handle->dlt_list[1] = DLT_DOCSIS;
1904
0
        handle->dlt_count = 2;
1905
0
      }
1906
0
    }
1907
    /* FALLTHROUGH */
1908
1909
0
  case ARPHRD_METRICOM:
1910
0
  case ARPHRD_LOOPBACK:
1911
0
    handle->linktype = DLT_EN10MB;
1912
0
    handle->offset = 2;
1913
0
    break;
1914
1915
0
  case ARPHRD_EETHER:
1916
0
    handle->linktype = DLT_EN3MB;
1917
0
    break;
1918
1919
0
  case ARPHRD_AX25:
1920
0
    handle->linktype = DLT_AX25_KISS;
1921
0
    break;
1922
1923
0
  case ARPHRD_PRONET:
1924
0
    handle->linktype = DLT_PRONET;
1925
0
    break;
1926
1927
0
  case ARPHRD_CHAOS:
1928
0
    handle->linktype = DLT_CHAOS;
1929
0
    break;
1930
#ifndef ARPHRD_CAN
1931
#define ARPHRD_CAN 280
1932
#endif
1933
0
  case ARPHRD_CAN:
1934
0
    handle->linktype = DLT_CAN_SOCKETCAN;
1935
0
    break;
1936
1937
#ifndef ARPHRD_IEEE802_TR
1938
#define ARPHRD_IEEE802_TR 800 /* From Linux 2.4 */
1939
#endif
1940
0
  case ARPHRD_IEEE802_TR:
1941
0
  case ARPHRD_IEEE802:
1942
0
    handle->linktype = DLT_IEEE802;
1943
0
    handle->offset = 2;
1944
0
    break;
1945
1946
0
  case ARPHRD_ARCNET:
1947
0
    handle->linktype = DLT_ARCNET_LINUX;
1948
0
    break;
1949
1950
#ifndef ARPHRD_FDDI /* From Linux 2.2.13 */
1951
#define ARPHRD_FDDI 774
1952
#endif
1953
0
  case ARPHRD_FDDI:
1954
0
    handle->linktype = DLT_FDDI;
1955
0
    handle->offset = 3;
1956
0
    break;
1957
1958
#ifndef ARPHRD_ATM  /* FIXME: How to #include this? */
1959
#define ARPHRD_ATM 19
1960
#endif
1961
0
  case ARPHRD_ATM:
1962
    /*
1963
     * The Classical IP implementation in ATM for Linux
1964
     * supports both what RFC 1483 calls "LLC Encapsulation",
1965
     * in which each packet has an LLC header, possibly
1966
     * with a SNAP header as well, prepended to it, and
1967
     * what RFC 1483 calls "VC Based Multiplexing", in which
1968
     * different virtual circuits carry different network
1969
     * layer protocols, and no header is prepended to packets.
1970
     *
1971
     * They both have an ARPHRD_ type of ARPHRD_ATM, so
1972
     * you can't use the ARPHRD_ type to find out whether
1973
     * captured packets will have an LLC header, and,
1974
     * while there's a socket ioctl to *set* the encapsulation
1975
     * type, there's no ioctl to *get* the encapsulation type.
1976
     *
1977
     * This means that
1978
     *
1979
     *  programs that dissect Linux Classical IP frames
1980
     *  would have to check for an LLC header and,
1981
     *  depending on whether they see one or not, dissect
1982
     *  the frame as LLC-encapsulated or as raw IP (I
1983
     *  don't know whether there's any traffic other than
1984
     *  IP that would show up on the socket, or whether
1985
     *  there's any support for IPv6 in the Linux
1986
     *  Classical IP code);
1987
     *
1988
     *  filter expressions would have to compile into
1989
     *  code that checks for an LLC header and does
1990
     *  the right thing.
1991
     *
1992
     * Both of those are a nuisance - and, at least on systems
1993
     * that support PF_PACKET sockets, we don't have to put
1994
     * up with those nuisances; instead, we can just capture
1995
     * in cooked mode.  That's what we'll do, if we can.
1996
     * Otherwise, we'll just fail.
1997
     */
1998
0
    if (cooked_ok)
1999
0
      handle->linktype = DLT_LINUX_SLL;
2000
0
    else
2001
0
      handle->linktype = -1;
2002
0
    break;
2003
2004
#ifndef ARPHRD_IEEE80211  /* From Linux 2.4.6 */
2005
#define ARPHRD_IEEE80211 801
2006
#endif
2007
0
  case ARPHRD_IEEE80211:
2008
0
    handle->linktype = DLT_IEEE802_11;
2009
0
    break;
2010
2011
#ifndef ARPHRD_IEEE80211_PRISM  /* From Linux 2.4.18 */
2012
#define ARPHRD_IEEE80211_PRISM 802
2013
#endif
2014
0
  case ARPHRD_IEEE80211_PRISM:
2015
0
    handle->linktype = DLT_PRISM_HEADER;
2016
0
    break;
2017
2018
#ifndef ARPHRD_IEEE80211_RADIOTAP /* new */
2019
#define ARPHRD_IEEE80211_RADIOTAP 803
2020
#endif
2021
0
  case ARPHRD_IEEE80211_RADIOTAP:
2022
0
    handle->linktype = DLT_IEEE802_11_RADIO;
2023
0
    break;
2024
2025
0
  case ARPHRD_PPP:
2026
    /*
2027
     * Some PPP code in the kernel supplies no link-layer
2028
     * header whatsoever to PF_PACKET sockets; other PPP
2029
     * code supplies PPP link-layer headers ("syncppp.c");
2030
     * some PPP code might supply random link-layer
2031
     * headers (PPP over ISDN - there's code in Ethereal,
2032
     * for example, to cope with PPP-over-ISDN captures
2033
     * with which the Ethereal developers have had to cope,
2034
     * heuristically trying to determine which of the
2035
     * oddball link-layer headers particular packets have).
2036
     *
2037
     * As such, we just punt, and run all PPP interfaces
2038
     * in cooked mode, if we can; otherwise, we just treat
2039
     * it as DLT_RAW, for now - if somebody needs to capture,
2040
     * on a 2.0[.x] kernel, on PPP devices that supply a
2041
     * link-layer header, they'll have to add code here to
2042
     * map to the appropriate DLT_ type (possibly adding a
2043
     * new DLT_ type, if necessary).
2044
     */
2045
0
    if (cooked_ok)
2046
0
      handle->linktype = DLT_LINUX_SLL;
2047
0
    else {
2048
      /*
2049
       * XXX - handle ISDN types here?  We can't fall
2050
       * back on cooked sockets, so we'd have to
2051
       * figure out from the device name what type of
2052
       * link-layer encapsulation it's using, and map
2053
       * that to an appropriate DLT_ value, meaning
2054
       * we'd map "isdnN" devices to DLT_RAW (they
2055
       * supply raw IP packets with no link-layer
2056
       * header) and "isdY" devices to a new DLT_I4L_IP
2057
       * type that has only an Ethernet packet type as
2058
       * a link-layer header.
2059
       *
2060
       * But sometimes we seem to get random crap
2061
       * in the link-layer header when capturing on
2062
       * ISDN devices....
2063
       */
2064
0
      handle->linktype = DLT_RAW;
2065
0
    }
2066
0
    break;
2067
2068
#ifndef ARPHRD_CISCO
2069
#define ARPHRD_CISCO 513 /* previously ARPHRD_HDLC */
2070
#endif
2071
0
  case ARPHRD_CISCO:
2072
0
    handle->linktype = DLT_C_HDLC;
2073
0
    break;
2074
2075
  /* Not sure if this is correct for all tunnels, but it
2076
   * works for CIPE */
2077
0
  case ARPHRD_TUNNEL:
2078
#ifndef ARPHRD_SIT
2079
#define ARPHRD_SIT 776  /* From Linux 2.2.13 */
2080
#endif
2081
0
  case ARPHRD_SIT:
2082
0
  case ARPHRD_CSLIP:
2083
0
  case ARPHRD_SLIP6:
2084
0
  case ARPHRD_CSLIP6:
2085
0
  case ARPHRD_ADAPT:
2086
0
  case ARPHRD_SLIP:
2087
#ifndef ARPHRD_RAWHDLC
2088
#define ARPHRD_RAWHDLC 518
2089
#endif
2090
0
  case ARPHRD_RAWHDLC:
2091
#ifndef ARPHRD_DLCI
2092
#define ARPHRD_DLCI 15
2093
#endif
2094
0
  case ARPHRD_DLCI:
2095
    /*
2096
     * XXX - should some of those be mapped to DLT_LINUX_SLL
2097
     * instead?  Should we just map all of them to DLT_LINUX_SLL?
2098
     */
2099
0
    handle->linktype = DLT_RAW;
2100
0
    break;
2101
2102
#ifndef ARPHRD_FRAD
2103
#define ARPHRD_FRAD 770
2104
#endif
2105
0
  case ARPHRD_FRAD:
2106
0
    handle->linktype = DLT_FRELAY;
2107
0
    break;
2108
2109
0
  case ARPHRD_LOCALTLK:
2110
0
    handle->linktype = DLT_LTALK;
2111
0
    break;
2112
2113
0
  case 18:
2114
    /*
2115
     * RFC 4338 defines an encapsulation for IP and ARP
2116
     * packets that's compatible with the RFC 2625
2117
     * encapsulation, but that uses a different ARP
2118
     * hardware type and hardware addresses.  That
2119
     * ARP hardware type is 18; Linux doesn't define
2120
     * any ARPHRD_ value as 18, but if it ever officially
2121
     * supports RFC 4338-style IP-over-FC, it should define
2122
     * one.
2123
     *
2124
     * For now, we map it to DLT_IP_OVER_FC, in the hopes
2125
     * that this will encourage its use in the future,
2126
     * should Linux ever officially support RFC 4338-style
2127
     * IP-over-FC.
2128
     */
2129
0
    handle->linktype = DLT_IP_OVER_FC;
2130
0
    break;
2131
2132
#ifndef ARPHRD_FCPP
2133
#define ARPHRD_FCPP 784
2134
#endif
2135
0
  case ARPHRD_FCPP:
2136
#ifndef ARPHRD_FCAL
2137
#define ARPHRD_FCAL 785
2138
#endif
2139
0
  case ARPHRD_FCAL:
2140
#ifndef ARPHRD_FCPL
2141
#define ARPHRD_FCPL 786
2142
#endif
2143
0
  case ARPHRD_FCPL:
2144
#ifndef ARPHRD_FCFABRIC
2145
#define ARPHRD_FCFABRIC 787
2146
#endif
2147
0
  case ARPHRD_FCFABRIC:
2148
    /*
2149
     * Back in 2002, Donald Lee at Cray wanted a DLT_ for
2150
     * IP-over-FC:
2151
     *
2152
     *  https://www.mail-archive.com/tcpdump-workers@sandelman.ottawa.on.ca/msg01043.html
2153
     *
2154
     * and one was assigned.
2155
     *
2156
     * In a later private discussion (spun off from a message
2157
     * on the ethereal-users list) on how to get that DLT_
2158
     * value in libpcap on Linux, I ended up deciding that
2159
     * the best thing to do would be to have him tweak the
2160
     * driver to set the ARPHRD_ value to some ARPHRD_FCxx
2161
     * type, and map all those types to DLT_IP_OVER_FC:
2162
     *
2163
     *  I've checked into the libpcap and tcpdump CVS tree
2164
     *  support for DLT_IP_OVER_FC.  In order to use that,
2165
     *  you'd have to modify your modified driver to return
2166
     *  one of the ARPHRD_FCxxx types, in "fcLINUXfcp.c" -
2167
     *  change it to set "dev->type" to ARPHRD_FCFABRIC, for
2168
     *  example (the exact value doesn't matter, it can be
2169
     *  any of ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL, or
2170
     *  ARPHRD_FCFABRIC).
2171
     *
2172
     * 11 years later, Christian Svensson wanted to map
2173
     * various ARPHRD_ values to DLT_FC_2 and
2174
     * DLT_FC_2_WITH_FRAME_DELIMS for raw Fibre Channel
2175
     * frames:
2176
     *
2177
     *  https://github.com/mcr/libpcap/pull/29
2178
     *
2179
     * There doesn't seem to be any network drivers that uses
2180
     * any of the ARPHRD_FC* values for IP-over-FC, and
2181
     * it's not exactly clear what the "Dummy types for non
2182
     * ARP hardware" are supposed to mean (link-layer
2183
     * header type?  Physical network type?), so it's
2184
     * not exactly clear why the ARPHRD_FC* types exist
2185
     * in the first place.
2186
     *
2187
     * For now, we map them to DLT_FC_2, and provide an
2188
     * option of DLT_FC_2_WITH_FRAME_DELIMS, as well as
2189
     * DLT_IP_OVER_FC just in case there's some old
2190
     * driver out there that uses one of those types for
2191
     * IP-over-FC on which somebody wants to capture
2192
     * packets.
2193
     */
2194
0
    handle->dlt_list = (u_int *) malloc(sizeof(u_int) * 3);
2195
    /*
2196
     * If that fails, just leave the list empty.
2197
     */
2198
0
    if (handle->dlt_list != NULL) {
2199
0
      handle->dlt_list[0] = DLT_FC_2;
2200
0
      handle->dlt_list[1] = DLT_FC_2_WITH_FRAME_DELIMS;
2201
0
      handle->dlt_list[2] = DLT_IP_OVER_FC;
2202
0
      handle->dlt_count = 3;
2203
0
    }
2204
0
    handle->linktype = DLT_FC_2;
2205
0
    break;
2206
2207
#ifndef ARPHRD_IRDA
2208
#define ARPHRD_IRDA 783
2209
#endif
2210
0
  case ARPHRD_IRDA:
2211
    /* Don't expect IP packet out of this interfaces... */
2212
0
    handle->linktype = DLT_LINUX_IRDA;
2213
    /* We need to save packet direction for IrDA decoding,
2214
     * so let's use "Linux-cooked" mode. Jean II
2215
     *
2216
     * XXX - this is handled in setup_socket(). */
2217
    /* handlep->cooked = 1; */
2218
0
    break;
2219
2220
  /* ARPHRD_LAPD is unofficial and randomly allocated, if reallocation
2221
   * is needed, please report it to <daniele@orlandi.com> */
2222
0
#ifndef ARPHRD_LAPD
2223
0
#define ARPHRD_LAPD 8445
2224
0
#endif
2225
0
  case ARPHRD_LAPD:
2226
    /* Don't expect IP packet out of this interfaces... */
2227
0
    handle->linktype = DLT_LINUX_LAPD;
2228
0
    break;
2229
2230
#ifndef ARPHRD_NONE
2231
#define ARPHRD_NONE 0xFFFE
2232
#endif
2233
0
  case ARPHRD_NONE:
2234
    /*
2235
     * No link-layer header; packets are just IP
2236
     * packets, so use DLT_RAW.
2237
     */
2238
0
    handle->linktype = DLT_RAW;
2239
0
    break;
2240
2241
#ifndef ARPHRD_IEEE802154
2242
#define ARPHRD_IEEE802154      804
2243
#endif
2244
0
       case ARPHRD_IEEE802154:
2245
0
               handle->linktype =  DLT_IEEE802_15_4_NOFCS;
2246
0
               break;
2247
2248
#ifndef ARPHRD_NETLINK
2249
#define ARPHRD_NETLINK  824
2250
#endif
2251
0
  case ARPHRD_NETLINK:
2252
0
    handle->linktype = DLT_NETLINK;
2253
    /*
2254
     * We need to use cooked mode, so that in sll_protocol we
2255
     * pick up the netlink protocol type such as NETLINK_ROUTE,
2256
     * NETLINK_GENERIC, NETLINK_FIB_LOOKUP, etc.
2257
     *
2258
     * XXX - this is handled in setup_socket().
2259
     */
2260
    /* handlep->cooked = 1; */
2261
0
    break;
2262
2263
#ifndef ARPHRD_VSOCKMON
2264
#define ARPHRD_VSOCKMON 826
2265
#endif
2266
0
  case ARPHRD_VSOCKMON:
2267
0
    handle->linktype = DLT_VSOCK;
2268
0
    break;
2269
2270
0
  default:
2271
0
    handle->linktype = -1;
2272
0
    break;
2273
0
  }
2274
0
}
2275
2276
static void
2277
set_dlt_list_cooked(pcap_t *handle)
2278
0
{
2279
  /*
2280
   * Support both DLT_LINUX_SLL and DLT_LINUX_SLL2.
2281
   */
2282
0
  handle->dlt_list = (u_int *) malloc(sizeof(u_int) * 2);
2283
2284
  /*
2285
   * If that failed, just leave the list empty.
2286
   */
2287
0
  if (handle->dlt_list != NULL) {
2288
0
    handle->dlt_list[0] = DLT_LINUX_SLL;
2289
0
    handle->dlt_list[1] = DLT_LINUX_SLL2;
2290
0
    handle->dlt_count = 2;
2291
0
  }
2292
0
}
2293
2294
/*
2295
 * Try to set up a PF_PACKET socket.
2296
 * Returns 0 on success and a PCAP_ERROR_ value on failure.
2297
 */
2298
static int
2299
setup_socket(pcap_t *handle, int is_any_device)
2300
0
{
2301
0
  struct pcap_linux *handlep = handle->priv;
2302
0
  const char    *device = handle->opt.device;
2303
0
  int     status = 0;
2304
0
  int     sock_fd, arptype;
2305
0
  int     val;
2306
0
  int     err = 0;
2307
0
  struct packet_mreq  mr;
2308
0
#if defined(SO_BPF_EXTENSIONS) && defined(SKF_AD_VLAN_TAG_PRESENT)
2309
0
  int     bpf_extensions;
2310
0
  socklen_t   len = sizeof(bpf_extensions);
2311
0
#endif
2312
2313
  /*
2314
   * Open a socket with protocol family packet. If cooked is true,
2315
   * we open a SOCK_DGRAM socket for the cooked interface, otherwise
2316
   * we open a SOCK_RAW socket for the raw interface.
2317
   *
2318
   * The protocol is set to 0.  This means we will receive no
2319
   * packets until we "bind" the socket with a non-zero
2320
   * protocol.  This allows us to setup the ring buffers without
2321
   * dropping any packets.
2322
   */
2323
0
  sock_fd = is_any_device ?
2324
0
    socket(PF_PACKET, SOCK_DGRAM, 0) :
2325
0
    socket(PF_PACKET, SOCK_RAW, 0);
2326
2327
0
  if (sock_fd == -1) {
2328
0
    if (errno == EPERM || errno == EACCES) {
2329
      /*
2330
       * You don't have permission to open the
2331
       * socket.
2332
       */
2333
0
      status = PCAP_ERROR_PERM_DENIED;
2334
0
      snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
2335
0
          "Attempt to create packet socket failed - CAP_NET_RAW may be required");
2336
0
    } else {
2337
      /*
2338
       * Other error.
2339
       */
2340
0
      status = PCAP_ERROR;
2341
0
    }
2342
0
    pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
2343
0
        errno, "socket");
2344
0
    return status;
2345
0
  }
2346
2347
  /*
2348
   * Get the interface index of the loopback device.
2349
   * If the attempt fails, don't fail, just set the
2350
   * "handlep->lo_ifindex" to -1.
2351
   *
2352
   * XXX - can there be more than one device that loops
2353
   * packets back, i.e. devices other than "lo"?  If so,
2354
   * we'd need to find them all, and have an array of
2355
   * indices for them, and check all of them in
2356
   * "pcap_read_packet()".
2357
   */
2358
0
  handlep->lo_ifindex = iface_get_id(sock_fd, "lo", handle->errbuf);
2359
2360
  /*
2361
   * Default value for offset to align link-layer payload
2362
   * on a 4-byte boundary.
2363
   */
2364
0
  handle->offset   = 0;
2365
2366
  /*
2367
   * What kind of frames do we have to deal with? Fall back
2368
   * to cooked mode if we have an unknown interface type
2369
   * or a type we know doesn't work well in raw mode.
2370
   */
2371
0
  if (!is_any_device) {
2372
    /* Assume for now we don't need cooked mode. */
2373
0
    handlep->cooked = 0;
2374
2375
0
    if (handle->opt.rfmon) {
2376
      /*
2377
       * We were asked to turn on monitor mode.
2378
       * Do so before we get the link-layer type,
2379
       * because entering monitor mode could change
2380
       * the link-layer type.
2381
       */
2382
0
      err = enter_rfmon_mode(handle, sock_fd, device);
2383
0
      if (err < 0) {
2384
        /* Hard failure */
2385
0
        close(sock_fd);
2386
0
        return err;
2387
0
      }
2388
0
      if (err == 0) {
2389
        /*
2390
         * Nothing worked for turning monitor mode
2391
         * on.
2392
         */
2393
0
        close(sock_fd);
2394
0
        return PCAP_ERROR_RFMON_NOTSUP;
2395
0
      }
2396
2397
      /*
2398
       * Either monitor mode has been turned on for
2399
       * the device, or we've been given a different
2400
       * device to open for monitor mode.  If we've
2401
       * been given a different device, use it.
2402
       */
2403
0
      if (handlep->mondevice != NULL)
2404
0
        device = handlep->mondevice;
2405
0
    }
2406
0
    arptype = iface_get_arptype(sock_fd, device, handle->errbuf);
2407
0
    if (arptype < 0) {
2408
0
      close(sock_fd);
2409
0
      return arptype;
2410
0
    }
2411
0
    map_arphrd_to_dlt(handle, arptype, device, 1);
2412
0
    if (handle->linktype == -1 ||
2413
0
        handle->linktype == DLT_LINUX_SLL ||
2414
0
        handle->linktype == DLT_LINUX_IRDA ||
2415
0
        handle->linktype == DLT_LINUX_LAPD ||
2416
0
        handle->linktype == DLT_NETLINK ||
2417
0
        (handle->linktype == DLT_EN10MB &&
2418
0
         (strncmp("isdn", device, 4) == 0 ||
2419
0
          strncmp("isdY", device, 4) == 0))) {
2420
      /*
2421
       * Unknown interface type (-1), or a
2422
       * device we explicitly chose to run
2423
       * in cooked mode (e.g., PPP devices),
2424
       * or an ISDN device (whose link-layer
2425
       * type we can only determine by using
2426
       * APIs that may be different on different
2427
       * kernels) - reopen in cooked mode.
2428
       *
2429
       * If the type is unknown, return a warning;
2430
       * map_arphrd_to_dlt() has already set the
2431
       * warning message.
2432
       */
2433
0
      if (close(sock_fd) == -1) {
2434
0
        pcap_fmt_errmsg_for_errno(handle->errbuf,
2435
0
            PCAP_ERRBUF_SIZE, errno, "close");
2436
0
        return PCAP_ERROR;
2437
0
      }
2438
0
      sock_fd = socket(PF_PACKET, SOCK_DGRAM, 0);
2439
0
      if (sock_fd < 0) {
2440
        /*
2441
         * Fatal error.  We treat this as
2442
         * a generic error; we already know
2443
         * that we were able to open a
2444
         * PF_PACKET/SOCK_RAW socket, so
2445
         * any failure is a "this shouldn't
2446
         * happen" case.
2447
         */
2448
0
        pcap_fmt_errmsg_for_errno(handle->errbuf,
2449
0
            PCAP_ERRBUF_SIZE, errno, "socket");
2450
0
        return PCAP_ERROR;
2451
0
      }
2452
0
      handlep->cooked = 1;
2453
2454
      /*
2455
       * Get rid of any link-layer type list
2456
       * we allocated - this only supports cooked
2457
       * capture.
2458
       */
2459
0
      if (handle->dlt_list != NULL) {
2460
0
        free(handle->dlt_list);
2461
0
        handle->dlt_list = NULL;
2462
0
        handle->dlt_count = 0;
2463
0
        set_dlt_list_cooked(handle);
2464
0
      }
2465
2466
0
      if (handle->linktype == -1) {
2467
        /*
2468
         * Warn that we're falling back on
2469
         * cooked mode; we may want to
2470
         * update "map_arphrd_to_dlt()"
2471
         * to handle the new type.
2472
         */
2473
0
        snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
2474
0
          "arptype %d not "
2475
0
          "supported by libpcap - "
2476
0
          "falling back to cooked "
2477
0
          "socket",
2478
0
          arptype);
2479
0
      }
2480
2481
      /*
2482
       * IrDA capture is not a real "cooked" capture,
2483
       * it's IrLAP frames, not IP packets.  The
2484
       * same applies to LAPD capture.
2485
       */
2486
0
      if (handle->linktype != DLT_LINUX_IRDA &&
2487
0
          handle->linktype != DLT_LINUX_LAPD &&
2488
0
          handle->linktype != DLT_NETLINK)
2489
0
        handle->linktype = DLT_LINUX_SLL;
2490
0
      if (handle->linktype == -1) {
2491
0
        snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
2492
0
            "unknown arptype %d, defaulting to cooked mode",
2493
0
            arptype);
2494
0
        status = PCAP_WARNING;
2495
0
      }
2496
0
    }
2497
2498
0
    handlep->ifindex = iface_get_id(sock_fd, device,
2499
0
        handle->errbuf);
2500
0
    if (handlep->ifindex == -1) {
2501
0
      close(sock_fd);
2502
0
      return PCAP_ERROR;
2503
0
    }
2504
2505
0
    if ((err = iface_bind(sock_fd, handlep->ifindex,
2506
0
        handle->errbuf, 0)) != 0) {
2507
0
      close(sock_fd);
2508
0
      return err;
2509
0
    }
2510
0
  } else {
2511
    /*
2512
     * The "any" device.
2513
     */
2514
0
    if (handle->opt.rfmon) {
2515
      /*
2516
       * It doesn't support monitor mode.
2517
       */
2518
0
      close(sock_fd);
2519
0
      return PCAP_ERROR_RFMON_NOTSUP;
2520
0
    }
2521
2522
    /*
2523
     * It uses cooked mode.
2524
     */
2525
0
    handlep->cooked = 1;
2526
0
    handle->linktype = DLT_LINUX_SLL;
2527
0
    handle->dlt_list = NULL;
2528
0
    handle->dlt_count = 0;
2529
0
    set_dlt_list_cooked(handle);
2530
2531
    /*
2532
     * We're not bound to a device.
2533
     * For now, we're using this as an indication
2534
     * that we can't transmit; stop doing that only
2535
     * if we figure out how to transmit in cooked
2536
     * mode.
2537
     */
2538
0
    handlep->ifindex = -1;
2539
0
  }
2540
2541
  /*
2542
   * Select promiscuous mode on if "promisc" is set.
2543
   *
2544
   * Do not turn allmulti mode on if we don't select
2545
   * promiscuous mode - on some devices (e.g., Orinoco
2546
   * wireless interfaces), allmulti mode isn't supported
2547
   * and the driver implements it by turning promiscuous
2548
   * mode on, and that screws up the operation of the
2549
   * card as a normal networking interface, and on no
2550
   * other platform I know of does starting a non-
2551
   * promiscuous capture affect which multicast packets
2552
   * are received by the interface.
2553
   */
2554
2555
  /*
2556
   * Hmm, how can we set promiscuous mode on all interfaces?
2557
   * I am not sure if that is possible at all.  For now, we
2558
   * silently ignore attempts to turn promiscuous mode on
2559
   * for the "any" device (so you don't have to explicitly
2560
   * disable it in programs such as tcpdump).
2561
   */
2562
2563
0
  if (!is_any_device && handle->opt.promisc) {
2564
0
    memset(&mr, 0, sizeof(mr));
2565
0
    mr.mr_ifindex = handlep->ifindex;
2566
0
    mr.mr_type    = PACKET_MR_PROMISC;
2567
0
    if (setsockopt(sock_fd, SOL_PACKET, PACKET_ADD_MEMBERSHIP,
2568
0
        &mr, sizeof(mr)) == -1) {
2569
0
      pcap_fmt_errmsg_for_errno(handle->errbuf,
2570
0
          PCAP_ERRBUF_SIZE, errno, "setsockopt (PACKET_ADD_MEMBERSHIP)");
2571
0
      close(sock_fd);
2572
0
      return PCAP_ERROR;
2573
0
    }
2574
0
  }
2575
2576
  /*
2577
   * Enable auxiliary data and reserve room for reconstructing
2578
   * VLAN headers.
2579
   *
2580
   * XXX - is enabling auxiliary data necessary, now that we
2581
   * only support memory-mapped capture?  The kernel's memory-mapped
2582
   * capture code doesn't seem to check whether auxiliary data
2583
   * is enabled, it seems to provide it whether it is or not.
2584
   */
2585
0
  val = 1;
2586
0
  if (setsockopt(sock_fd, SOL_PACKET, PACKET_AUXDATA, &val,
2587
0
           sizeof(val)) == -1 && errno != ENOPROTOOPT) {
2588
0
    pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
2589
0
        errno, "setsockopt (PACKET_AUXDATA)");
2590
0
    close(sock_fd);
2591
0
    return PCAP_ERROR;
2592
0
  }
2593
0
  handle->offset += VLAN_TAG_LEN;
2594
2595
  /*
2596
   * If we're in cooked mode, make the snapshot length
2597
   * large enough to hold a "cooked mode" header plus
2598
   * 1 byte of packet data (so we don't pass a byte
2599
   * count of 0 to "recvfrom()").
2600
   * XXX - we don't know whether this will be DLT_LINUX_SLL
2601
   * or DLT_LINUX_SLL2, so make sure it's big enough for
2602
   * a DLT_LINUX_SLL2 "cooked mode" header; a snapshot length
2603
   * that small is silly anyway.
2604
   */
2605
0
  if (handlep->cooked) {
2606
0
    if (handle->snapshot < SLL2_HDR_LEN + 1)
2607
0
      handle->snapshot = SLL2_HDR_LEN + 1;
2608
0
  }
2609
0
  handle->bufsize = handle->snapshot;
2610
2611
  /*
2612
   * Set the offset at which to insert VLAN tags.
2613
   */
2614
0
  set_vlan_offset(handle);
2615
2616
0
  if (handle->opt.tstamp_precision == PCAP_TSTAMP_PRECISION_NANO) {
2617
0
    int nsec_tstamps = 1;
2618
2619
0
    if (setsockopt(sock_fd, SOL_SOCKET, SO_TIMESTAMPNS, &nsec_tstamps, sizeof(nsec_tstamps)) < 0) {
2620
0
      snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, "setsockopt: unable to set SO_TIMESTAMPNS");
2621
0
      close(sock_fd);
2622
0
      return PCAP_ERROR;
2623
0
    }
2624
0
  }
2625
2626
  /*
2627
   * We've succeeded. Save the socket FD in the pcap structure.
2628
   */
2629
0
  handle->fd = sock_fd;
2630
2631
0
#if defined(SO_BPF_EXTENSIONS) && defined(SKF_AD_VLAN_TAG_PRESENT)
2632
  /*
2633
   * Can we generate special code for VLAN checks?
2634
   * (XXX - what if we need the special code but it's not supported
2635
   * by the OS?  Is that possible?)
2636
   */
2637
0
  if (getsockopt(sock_fd, SOL_SOCKET, SO_BPF_EXTENSIONS,
2638
0
      &bpf_extensions, &len) == 0) {
2639
0
    if (bpf_extensions >= SKF_AD_VLAN_TAG_PRESENT) {
2640
      /*
2641
       * Yes, we can.  Request that we do so.
2642
       */
2643
0
      handle->bpf_codegen_flags |= BPF_SPECIAL_VLAN_HANDLING;
2644
0
    }
2645
0
  }
2646
0
#endif /* defined(SO_BPF_EXTENSIONS) && defined(SKF_AD_VLAN_TAG_PRESENT) */
2647
2648
0
  return status;
2649
0
}
2650
2651
/*
2652
 * Attempt to setup memory-mapped access.
2653
 *
2654
 * On success, returns 1, and sets *status to 0 if there are no warnings
2655
 * or to a PCAP_WARNING_ code if there is a warning.
2656
 *
2657
 * On error, returns -1, and sets *status to the appropriate error code;
2658
 * if that is PCAP_ERROR, sets handle->errbuf to the appropriate message.
2659
 */
2660
static int
2661
setup_mmapped(pcap_t *handle, int *status)
2662
0
{
2663
0
  struct pcap_linux *handlep = handle->priv;
2664
0
  int ret, flags = MAP_ANONYMOUS | MAP_PRIVATE;
2665
2666
  /*
2667
   * Attempt to allocate a buffer to hold the contents of one
2668
   * packet, for use by the oneshot callback.
2669
   */
2670
0
#ifdef MAP_32BIT
2671
0
  if (pcap_mmap_32bit) flags |= MAP_32BIT;
2672
0
#endif
2673
0
  handlep->oneshot_buffer = mmap(0, handle->snapshot, PROT_READ | PROT_WRITE, flags, -1, 0);
2674
0
  if (handlep->oneshot_buffer == MAP_FAILED) {
2675
0
    pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
2676
0
        errno, "can't allocate oneshot buffer");
2677
0
    *status = PCAP_ERROR;
2678
0
    return -1;
2679
0
  }
2680
2681
0
  if (handle->opt.buffer_size == 0) {
2682
    /* by default request 2M for the ring buffer */
2683
0
    handle->opt.buffer_size = 2*1024*1024;
2684
0
  }
2685
0
  ret = prepare_tpacket_socket(handle);
2686
0
  if (ret == -1) {
2687
0
    munmap(handlep->oneshot_buffer, handle->snapshot);
2688
0
    handlep->oneshot_buffer = NULL;
2689
0
    *status = PCAP_ERROR;
2690
0
    return ret;
2691
0
  }
2692
0
  ret = create_ring(handle, status);
2693
0
  if (ret == -1) {
2694
    /*
2695
     * Error attempting to enable memory-mapped capture;
2696
     * fail.  create_ring() has set *status.
2697
     */
2698
0
    munmap(handlep->oneshot_buffer, handle->snapshot);
2699
0
    handlep->oneshot_buffer = NULL;
2700
0
    return -1;
2701
0
  }
2702
2703
  /*
2704
   * Success.  *status has been set either to 0 if there are no
2705
   * warnings or to a PCAP_WARNING_ value if there is a warning.
2706
   *
2707
   * handle->offset is used to get the current position into the rx ring.
2708
   * handle->cc is used to store the ring size.
2709
   */
2710
2711
  /*
2712
   * Set the timeout to use in poll() before returning.
2713
   */
2714
0
  set_poll_timeout(handlep);
2715
2716
0
  return 1;
2717
0
}
2718
2719
/*
2720
 * Attempt to set the socket to the specified version of the memory-mapped
2721
 * header.
2722
 *
2723
 * Return 0 if we succeed; return 1 if we fail because that version isn't
2724
 * supported; return -1 on any other error, and set handle->errbuf.
2725
 */
2726
static int
2727
init_tpacket(pcap_t *handle, int version, const char *version_str)
2728
0
{
2729
0
  struct pcap_linux *handlep = handle->priv;
2730
0
  int val = version;
2731
0
  socklen_t len = sizeof(val);
2732
2733
  /*
2734
   * Probe whether kernel supports the specified TPACKET version;
2735
   * this also gets the length of the header for that version.
2736
   *
2737
   * This socket option was introduced in 2.6.27, which was
2738
   * also the first release with TPACKET_V2 support.
2739
   */
2740
0
  if (getsockopt(handle->fd, SOL_PACKET, PACKET_HDRLEN, &val, &len) < 0) {
2741
0
    if (errno == EINVAL) {
2742
      /*
2743
       * EINVAL means this specific version of TPACKET
2744
       * is not supported. Tell the caller they can try
2745
       * with a different one; if they've run out of
2746
       * others to try, let them set the error message
2747
       * appropriately.
2748
       */
2749
0
      return 1;
2750
0
    }
2751
2752
    /*
2753
     * All other errors are fatal.
2754
     */
2755
0
    if (errno == ENOPROTOOPT) {
2756
      /*
2757
       * PACKET_HDRLEN isn't supported, which means
2758
       * that memory-mapped capture isn't supported.
2759
       * Indicate that in the message.
2760
       */
2761
0
      snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
2762
0
          "Kernel doesn't support memory-mapped capture; a 2.6.27 or later 2.x kernel is required, with CONFIG_PACKET_MMAP specified for 2.x kernels");
2763
0
    } else {
2764
      /*
2765
       * Some unexpected error.
2766
       */
2767
0
      pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
2768
0
          errno, "can't get %s header len on packet socket",
2769
0
          version_str);
2770
0
    }
2771
0
    return -1;
2772
0
  }
2773
0
  handlep->tp_hdrlen = val;
2774
2775
0
  val = version;
2776
0
  if (setsockopt(handle->fd, SOL_PACKET, PACKET_VERSION, &val,
2777
0
         sizeof(val)) < 0) {
2778
0
    pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
2779
0
        errno, "can't activate %s on packet socket", version_str);
2780
0
    return -1;
2781
0
  }
2782
0
  handlep->tp_version = version;
2783
2784
0
  return 0;
2785
0
}
2786
2787
/*
2788
 * Attempt to set the socket to version 3 of the memory-mapped header and,
2789
 * if that fails because version 3 isn't supported, attempt to fall
2790
 * back to version 2.  If version 2 isn't supported, just fail.
2791
 *
2792
 * Return 0 if we succeed and -1 on any other error, and set handle->errbuf.
2793
 */
2794
static int
2795
prepare_tpacket_socket(pcap_t *handle)
2796
0
{
2797
0
  int ret;
2798
2799
0
#ifdef HAVE_TPACKET3
2800
  /*
2801
   * Try setting the version to TPACKET_V3.
2802
   *
2803
   * The only mode in which buffering is done on PF_PACKET
2804
   * sockets, so that packets might not be delivered
2805
   * immediately, is TPACKET_V3 mode.
2806
   *
2807
   * The buffering cannot be disabled in that mode, so
2808
   * if the user has requested immediate mode, we don't
2809
   * use TPACKET_V3.
2810
   */
2811
0
  if (!handle->opt.immediate) {
2812
0
    ret = init_tpacket(handle, TPACKET_V3, "TPACKET_V3");
2813
0
    if (ret == 0) {
2814
      /*
2815
       * Success.
2816
       */
2817
0
      return 0;
2818
0
    }
2819
0
    if (ret == -1) {
2820
      /*
2821
       * We failed for some reason other than "the
2822
       * kernel doesn't support TPACKET_V3".
2823
       */
2824
0
      return -1;
2825
0
    }
2826
2827
    /*
2828
     * This means it returned 1, which means "the kernel
2829
     * doesn't support TPACKET_V3"; try TPACKET_V2.
2830
     */
2831
0
  }
2832
0
#endif /* HAVE_TPACKET3 */
2833
2834
  /*
2835
   * Try setting the version to TPACKET_V2.
2836
   */
2837
0
  ret = init_tpacket(handle, TPACKET_V2, "TPACKET_V2");
2838
0
  if (ret == 0) {
2839
    /*
2840
     * Success.
2841
     */
2842
0
    return 0;
2843
0
  }
2844
2845
0
  if (ret == 1) {
2846
    /*
2847
     * OK, the kernel supports memory-mapped capture, but
2848
     * not TPACKET_V2.  Set the error message appropriately.
2849
     */
2850
0
    snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
2851
0
        "Kernel doesn't support TPACKET_V2; a 2.6.27 or later kernel is required");
2852
0
  }
2853
2854
  /*
2855
   * We failed.
2856
   */
2857
0
  return -1;
2858
0
}
2859
2860
0
#define MAX(a,b) ((a)>(b)?(a):(b))
2861
2862
/*
2863
 * Attempt to set up memory-mapped access.
2864
 *
2865
 * On success, returns 1, and sets *status to 0 if there are no warnings
2866
 * or to a PCAP_WARNING_ code if there is a warning.
2867
 *
2868
 * On error, returns -1, and sets *status to the appropriate error code;
2869
 * if that is PCAP_ERROR, sets handle->errbuf to the appropriate message.
2870
 */
2871
static int
2872
create_ring(pcap_t *handle, int *status)
2873
0
{
2874
0
  struct pcap_linux *handlep = handle->priv;
2875
0
  unsigned i, j, frames_per_block;
2876
0
  int flags = MAP_SHARED;
2877
0
#ifdef HAVE_TPACKET3
2878
  /*
2879
   * For sockets using TPACKET_V2, the extra stuff at the end of a
2880
   * struct tpacket_req3 will be ignored, so this is OK even for
2881
   * those sockets.
2882
   */
2883
0
  struct tpacket_req3 req;
2884
#else
2885
  struct tpacket_req req;
2886
#endif
2887
0
  socklen_t len;
2888
0
  unsigned int sk_type, tp_reserve, maclen, tp_hdrlen, netoff, macoff;
2889
0
  unsigned int frame_size;
2890
2891
  /*
2892
   * Start out assuming no warnings or errors.
2893
   */
2894
0
  *status = 0;
2895
2896
  /*
2897
   * Reserve space for VLAN tag reconstruction.
2898
   */
2899
0
  tp_reserve = VLAN_TAG_LEN;
2900
2901
  /*
2902
   * If we're capturing in cooked mode, reserve space for
2903
   * a DLT_LINUX_SLL2 header; we don't know yet whether
2904
   * we'll be using DLT_LINUX_SLL or DLT_LINUX_SLL2, as
2905
   * that can be changed on an open device, so we reserve
2906
   * space for the larger of the two.
2907
   *
2908
   * XXX - we assume that the kernel is still adding
2909
   * 16 bytes of extra space, so we subtract 16 from
2910
   * SLL2_HDR_LEN to get the additional space needed.
2911
   * (Are they doing that for DLT_LINUX_SLL, the link-
2912
   * layer header for which is 16 bytes?)
2913
   *
2914
   * XXX - should we use TPACKET_ALIGN(SLL2_HDR_LEN - 16)?
2915
   */
2916
0
  if (handlep->cooked)
2917
0
    tp_reserve += SLL2_HDR_LEN - 16;
2918
2919
  /*
2920
   * Try to request that amount of reserve space.
2921
   * This must be done before creating the ring buffer.
2922
   */
2923
0
  len = sizeof(tp_reserve);
2924
0
  if (setsockopt(handle->fd, SOL_PACKET, PACKET_RESERVE,
2925
0
      &tp_reserve, len) < 0) {
2926
0
    pcap_fmt_errmsg_for_errno(handle->errbuf,
2927
0
        PCAP_ERRBUF_SIZE, errno,
2928
0
        "setsockopt (PACKET_RESERVE)");
2929
0
    *status = PCAP_ERROR;
2930
0
    return -1;
2931
0
  }
2932
2933
0
  switch (handlep->tp_version) {
2934
2935
0
  case TPACKET_V2:
2936
    /* Note that with large snapshot length (say 256K, which is
2937
     * the default for recent versions of tcpdump, Wireshark,
2938
     * TShark, dumpcap or 64K, the value that "-s 0" has given for
2939
     * a long time with tcpdump), if we use the snapshot
2940
     * length to calculate the frame length, only a few frames
2941
     * will be available in the ring even with pretty
2942
     * large ring size (and a lot of memory will be unused).
2943
     *
2944
     * Ideally, we should choose a frame length based on the
2945
     * minimum of the specified snapshot length and the maximum
2946
     * packet size.  That's not as easy as it sounds; consider,
2947
     * for example, an 802.11 interface in monitor mode, where
2948
     * the frame would include a radiotap header, where the
2949
     * maximum radiotap header length is device-dependent.
2950
     *
2951
     * So, for now, we just do this for Ethernet devices, where
2952
     * there's no metadata header, and the link-layer header is
2953
     * fixed length.  We can get the maximum packet size by
2954
     * adding 18, the Ethernet header length plus the CRC length
2955
     * (just in case we happen to get the CRC in the packet), to
2956
     * the MTU of the interface; we fetch the MTU in the hopes
2957
     * that it reflects support for jumbo frames.  (Even if the
2958
     * interface is just being used for passive snooping, the
2959
     * driver might set the size of buffers in the receive ring
2960
     * based on the MTU, so that the MTU limits the maximum size
2961
     * of packets that we can receive.)
2962
     *
2963
     * If segmentation/fragmentation or receive offload are
2964
     * enabled, we can get reassembled/aggregated packets larger
2965
     * than MTU, but bounded to 65535 plus the Ethernet overhead,
2966
     * due to kernel and protocol constraints */
2967
0
    frame_size = handle->snapshot;
2968
0
    if (handle->linktype == DLT_EN10MB) {
2969
0
      unsigned int max_frame_len;
2970
0
      int mtu;
2971
0
      int offload;
2972
2973
0
      mtu = iface_get_mtu(handle->fd, handle->opt.device,
2974
0
          handle->errbuf);
2975
0
      if (mtu == -1) {
2976
0
        *status = PCAP_ERROR;
2977
0
        return -1;
2978
0
      }
2979
0
      offload = iface_get_offload(handle);
2980
0
      if (offload == -1) {
2981
0
        *status = PCAP_ERROR;
2982
0
        return -1;
2983
0
      }
2984
0
      if (offload)
2985
0
        max_frame_len = MAX(mtu, 65535);
2986
0
      else
2987
0
        max_frame_len = mtu;
2988
0
      max_frame_len += 18;
2989
2990
0
      if (frame_size > max_frame_len)
2991
0
        frame_size = max_frame_len;
2992
0
    }
2993
2994
    /* NOTE: calculus matching those in tpacket_rcv()
2995
     * in linux-2.6/net/packet/af_packet.c
2996
     */
2997
0
    len = sizeof(sk_type);
2998
0
    if (getsockopt(handle->fd, SOL_SOCKET, SO_TYPE, &sk_type,
2999
0
        &len) < 0) {
3000
0
      pcap_fmt_errmsg_for_errno(handle->errbuf,
3001
0
          PCAP_ERRBUF_SIZE, errno, "getsockopt (SO_TYPE)");
3002
0
      *status = PCAP_ERROR;
3003
0
      return -1;
3004
0
    }
3005
0
    maclen = (sk_type == SOCK_DGRAM) ? 0 : MAX_LINKHEADER_SIZE;
3006
      /* XXX: in the kernel maclen is calculated from
3007
       * LL_ALLOCATED_SPACE(dev) and vnet_hdr.hdr_len
3008
       * in:  packet_snd()           in linux-2.6/net/packet/af_packet.c
3009
       * then packet_alloc_skb()     in linux-2.6/net/packet/af_packet.c
3010
       * then sock_alloc_send_pskb() in linux-2.6/net/core/sock.c
3011
       * but I see no way to get those sizes in userspace,
3012
       * like for instance with an ifreq ioctl();
3013
       * the best thing I've found so far is MAX_HEADER in
3014
       * the kernel part of linux-2.6/include/linux/netdevice.h
3015
       * which goes up to 128+48=176; since pcap-linux.c
3016
       * defines a MAX_LINKHEADER_SIZE of 256 which is
3017
       * greater than that, let's use it.. maybe is it even
3018
       * large enough to directly replace macoff..
3019
       */
3020
0
    tp_hdrlen = TPACKET_ALIGN(handlep->tp_hdrlen) + sizeof(struct sockaddr_ll) ;
3021
0
    netoff = TPACKET_ALIGN(tp_hdrlen + (maclen < 16 ? 16 : maclen)) + tp_reserve;
3022
      /* NOTE: AFAICS tp_reserve may break the TPACKET_ALIGN
3023
       * of netoff, which contradicts
3024
       * linux-2.6/Documentation/networking/packet_mmap.txt
3025
       * documenting that:
3026
       * "- Gap, chosen so that packet data (Start+tp_net)
3027
       * aligns to TPACKET_ALIGNMENT=16"
3028
       */
3029
      /* NOTE: in linux-2.6/include/linux/skbuff.h:
3030
       * "CPUs often take a performance hit
3031
       *  when accessing unaligned memory locations"
3032
       */
3033
0
    macoff = netoff - maclen;
3034
0
    req.tp_frame_size = TPACKET_ALIGN(macoff + frame_size);
3035
    /*
3036
     * Round the buffer size up to a multiple of the
3037
     * frame size (rather than rounding down, which
3038
     * would give a buffer smaller than our caller asked
3039
     * for, and possibly give zero frames if the requested
3040
     * buffer size is too small for one frame).
3041
     */
3042
0
    req.tp_frame_nr = (handle->opt.buffer_size + req.tp_frame_size - 1)/req.tp_frame_size;
3043
0
    break;
3044
3045
0
#ifdef HAVE_TPACKET3
3046
0
  case TPACKET_V3:
3047
    /* The "frames" for this are actually buffers that
3048
     * contain multiple variable-sized frames.
3049
     *
3050
     * We pick a "frame" size of MAXIMUM_SNAPLEN to leave
3051
     * enough room for at least one reasonably-sized packet
3052
     * in the "frame". */
3053
0
    req.tp_frame_size = MAXIMUM_SNAPLEN;
3054
    /*
3055
     * Round the buffer size up to a multiple of the
3056
     * "frame" size (rather than rounding down, which
3057
     * would give a buffer smaller than our caller asked
3058
     * for, and possibly give zero "frames" if the requested
3059
     * buffer size is too small for one "frame").
3060
     */
3061
0
    req.tp_frame_nr = (handle->opt.buffer_size + req.tp_frame_size - 1)/req.tp_frame_size;
3062
0
    break;
3063
0
#endif
3064
0
  default:
3065
0
    snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
3066
0
        "Internal error: unknown TPACKET_ value %u",
3067
0
        handlep->tp_version);
3068
0
    *status = PCAP_ERROR;
3069
0
    return -1;
3070
0
  }
3071
3072
  /* compute the minimum block size that will handle this frame.
3073
   * The block has to be page size aligned.
3074
   * The max block size allowed by the kernel is arch-dependent and
3075
   * it's not explicitly checked here. */
3076
0
  req.tp_block_size = getpagesize();
3077
0
  while (req.tp_block_size < req.tp_frame_size)
3078
0
    req.tp_block_size <<= 1;
3079
3080
0
  frames_per_block = req.tp_block_size/req.tp_frame_size;
3081
3082
  /*
3083
   * PACKET_TIMESTAMP was added after linux/net_tstamp.h was,
3084
   * so we check for PACKET_TIMESTAMP.  We check for
3085
   * linux/net_tstamp.h just in case a system somehow has
3086
   * PACKET_TIMESTAMP but not linux/net_tstamp.h; that might
3087
   * be unnecessary.
3088
   *
3089
   * SIOCSHWTSTAMP was introduced in the patch that introduced
3090
   * linux/net_tstamp.h, so we don't bother checking whether
3091
   * SIOCSHWTSTAMP is defined (if your Linux system has
3092
   * linux/net_tstamp.h but doesn't define SIOCSHWTSTAMP, your
3093
   * Linux system is badly broken).
3094
   */
3095
0
#if defined(HAVE_LINUX_NET_TSTAMP_H) && defined(PACKET_TIMESTAMP)
3096
  /*
3097
   * If we were told to do so, ask the kernel and the driver
3098
   * to use hardware timestamps.
3099
   *
3100
   * Hardware timestamps are only supported with mmapped
3101
   * captures.
3102
   */
3103
0
  if (handle->opt.tstamp_type == PCAP_TSTAMP_ADAPTER ||
3104
0
      handle->opt.tstamp_type == PCAP_TSTAMP_ADAPTER_UNSYNCED) {
3105
0
    struct hwtstamp_config hwconfig;
3106
0
    struct ifreq ifr;
3107
0
    int timesource;
3108
3109
    /*
3110
     * Ask for hardware time stamps on all packets,
3111
     * including transmitted packets.
3112
     */
3113
0
    memset(&hwconfig, 0, sizeof(hwconfig));
3114
0
    hwconfig.tx_type = HWTSTAMP_TX_ON;
3115
0
    hwconfig.rx_filter = HWTSTAMP_FILTER_ALL;
3116
3117
0
    memset(&ifr, 0, sizeof(ifr));
3118
0
    pcap_strlcpy(ifr.ifr_name, handle->opt.device, sizeof(ifr.ifr_name));
3119
0
    ifr.ifr_data = (void *)&hwconfig;
3120
3121
    /*
3122
     * This may require CAP_NET_ADMIN.
3123
     */
3124
0
    if (ioctl(handle->fd, SIOCSHWTSTAMP, &ifr) < 0) {
3125
0
      switch (errno) {
3126
3127
0
      case EPERM:
3128
        /*
3129
         * Treat this as an error, as the
3130
         * user should try to run this
3131
         * with the appropriate privileges -
3132
         * and, if they can't, shouldn't
3133
         * try requesting hardware time stamps.
3134
         */
3135
0
        *status = PCAP_ERROR_PERM_DENIED;
3136
0
        snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
3137
0
            "Attempt to set hardware timestamp failed - CAP_NET_ADMIN may be required");
3138
0
        return -1;
3139
3140
0
      case EOPNOTSUPP:
3141
0
      case ERANGE:
3142
        /*
3143
         * Treat this as a warning, as the
3144
         * only way to fix the warning is to
3145
         * get an adapter that supports hardware
3146
         * time stamps for *all* packets.
3147
         * (ERANGE means "we support hardware
3148
         * time stamps, but for packets matching
3149
         * that particular filter", so it means
3150
         * "we don't support hardware time stamps
3151
         * for all incoming packets" here.)
3152
         *
3153
         * We'll just fall back on the standard
3154
         * host time stamps.
3155
         */
3156
0
        *status = PCAP_WARNING_TSTAMP_TYPE_NOTSUP;
3157
0
        break;
3158
3159
0
      default:
3160
0
        pcap_fmt_errmsg_for_errno(handle->errbuf,
3161
0
            PCAP_ERRBUF_SIZE, errno,
3162
0
            "SIOCSHWTSTAMP failed");
3163
0
        *status = PCAP_ERROR;
3164
0
        return -1;
3165
0
      }
3166
0
    } else {
3167
      /*
3168
       * Well, that worked.  Now specify the type of
3169
       * hardware time stamp we want for this
3170
       * socket.
3171
       */
3172
0
      if (handle->opt.tstamp_type == PCAP_TSTAMP_ADAPTER) {
3173
        /*
3174
         * Hardware timestamp, synchronized
3175
         * with the system clock.
3176
         */
3177
0
        timesource = SOF_TIMESTAMPING_SYS_HARDWARE;
3178
0
      } else {
3179
        /*
3180
         * PCAP_TSTAMP_ADAPTER_UNSYNCED - hardware
3181
         * timestamp, not synchronized with the
3182
         * system clock.
3183
         */
3184
0
        timesource = SOF_TIMESTAMPING_RAW_HARDWARE;
3185
0
      }
3186
0
      if (setsockopt(handle->fd, SOL_PACKET, PACKET_TIMESTAMP,
3187
0
        (void *)&timesource, sizeof(timesource))) {
3188
0
        pcap_fmt_errmsg_for_errno(handle->errbuf,
3189
0
            PCAP_ERRBUF_SIZE, errno,
3190
0
            "can't set PACKET_TIMESTAMP");
3191
0
        *status = PCAP_ERROR;
3192
0
        return -1;
3193
0
      }
3194
0
    }
3195
0
  }
3196
0
#endif /* HAVE_LINUX_NET_TSTAMP_H && PACKET_TIMESTAMP */
3197
3198
  /* ask the kernel to create the ring */
3199
0
retry:
3200
0
  req.tp_block_nr = req.tp_frame_nr / frames_per_block;
3201
3202
  /* req.tp_frame_nr is requested to match frames_per_block*req.tp_block_nr */
3203
0
  req.tp_frame_nr = req.tp_block_nr * frames_per_block;
3204
3205
0
#ifdef HAVE_TPACKET3
3206
  /* timeout value to retire block - use the configured buffering timeout, or default if <0. */
3207
0
  if (handlep->timeout > 0) {
3208
    /* Use the user specified timeout as the block timeout */
3209
0
    req.tp_retire_blk_tov = handlep->timeout;
3210
0
  } else if (handlep->timeout == 0) {
3211
    /*
3212
     * In pcap, this means "infinite timeout"; TPACKET_V3
3213
     * doesn't support that, so just set it to UINT_MAX
3214
     * milliseconds.  In the TPACKET_V3 loop, if the
3215
     * timeout is 0, and we haven't yet seen any packets,
3216
     * and we block and still don't have any packets, we
3217
     * keep blocking until we do.
3218
     */
3219
0
    req.tp_retire_blk_tov = UINT_MAX;
3220
0
  } else {
3221
    /*
3222
     * XXX - this is not valid; use 0, meaning "have the
3223
     * kernel pick a default", for now.
3224
     */
3225
0
    req.tp_retire_blk_tov = 0;
3226
0
  }
3227
  /* private data not used */
3228
0
  req.tp_sizeof_priv = 0;
3229
  /* Rx ring - feature request bits - none (rxhash will not be filled) */
3230
0
  req.tp_feature_req_word = 0;
3231
0
#endif
3232
3233
0
  if (setsockopt(handle->fd, SOL_PACKET, PACKET_RX_RING,
3234
0
          (void *) &req, sizeof(req))) {
3235
0
    if ((errno == ENOMEM) && (req.tp_block_nr > 1)) {
3236
      /*
3237
       * Memory failure; try to reduce the requested ring
3238
       * size.
3239
       *
3240
       * We used to reduce this by half -- do 5% instead.
3241
       * That may result in more iterations and a longer
3242
       * startup, but the user will be much happier with
3243
       * the resulting buffer size.
3244
       */
3245
0
      if (req.tp_frame_nr < 20)
3246
0
        req.tp_frame_nr -= 1;
3247
0
      else
3248
0
        req.tp_frame_nr -= req.tp_frame_nr/20;
3249
0
      goto retry;
3250
0
    }
3251
0
    pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
3252
0
        errno, "can't create rx ring on packet socket");
3253
0
    *status = PCAP_ERROR;
3254
0
    return -1;
3255
0
  }
3256
3257
  /* memory map the rx ring */
3258
0
  handlep->mmapbuflen = req.tp_block_nr * req.tp_block_size;
3259
0
#ifdef MAP_32BIT
3260
0
  if (pcap_mmap_32bit) flags |= MAP_32BIT;
3261
0
#endif
3262
0
  handlep->mmapbuf = mmap(0, handlep->mmapbuflen, PROT_READ | PROT_WRITE, flags, handle->fd, 0);
3263
0
  if (handlep->mmapbuf == MAP_FAILED) {
3264
0
    pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
3265
0
        errno, "can't mmap rx ring");
3266
3267
    /* clear the allocated ring on error*/
3268
0
    destroy_ring(handle);
3269
0
    *status = PCAP_ERROR;
3270
0
    return -1;
3271
0
  }
3272
3273
  /* allocate a ring for each frame header pointer*/
3274
0
  handle->cc = req.tp_frame_nr;
3275
0
  handle->buffer = malloc(handle->cc * sizeof(union thdr *));
3276
0
  if (!handle->buffer) {
3277
0
    pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
3278
0
        errno, "can't allocate ring of frame headers");
3279
3280
0
    destroy_ring(handle);
3281
0
    *status = PCAP_ERROR;
3282
0
    return -1;
3283
0
  }
3284
3285
  /* fill the header ring with proper frame ptr*/
3286
0
  handle->offset = 0;
3287
0
  for (i=0; i<req.tp_block_nr; ++i) {
3288
0
    u_char *base = &handlep->mmapbuf[i*req.tp_block_size];
3289
0
    for (j=0; j<frames_per_block; ++j, ++handle->offset) {
3290
0
      RING_GET_CURRENT_FRAME(handle) = base;
3291
0
      base += req.tp_frame_size;
3292
0
    }
3293
0
  }
3294
3295
0
  handle->bufsize = req.tp_frame_size;
3296
0
  handle->offset = 0;
3297
0
  return 1;
3298
0
}
3299
3300
/* free all ring related resources*/
3301
static void
3302
destroy_ring(pcap_t *handle)
3303
0
{
3304
0
  struct pcap_linux *handlep = handle->priv;
3305
3306
  /*
3307
   * Tell the kernel to destroy the ring.
3308
   * We don't check for setsockopt failure, as 1) we can't recover
3309
   * from an error and 2) we might not yet have set it up in the
3310
   * first place.
3311
   */
3312
0
  struct tpacket_req req;
3313
0
  memset(&req, 0, sizeof(req));
3314
0
  (void)setsockopt(handle->fd, SOL_PACKET, PACKET_RX_RING,
3315
0
        (void *) &req, sizeof(req));
3316
3317
  /* if ring is mapped, unmap it*/
3318
0
  if (handlep->mmapbuf) {
3319
    /* do not test for mmap failure, as we can't recover from any error */
3320
0
    (void)munmap(handlep->mmapbuf, handlep->mmapbuflen);
3321
0
    handlep->mmapbuf = NULL;
3322
0
  }
3323
0
}
3324
3325
/*
3326
 * Special one-shot callback, used for pcap_next() and pcap_next_ex(),
3327
 * for Linux mmapped capture.
3328
 *
3329
 * The problem is that pcap_next() and pcap_next_ex() expect the packet
3330
 * data handed to the callback to be valid after the callback returns,
3331
 * but pcap_read_linux_mmap() has to release that packet as soon as
3332
 * the callback returns (otherwise, the kernel thinks there's still
3333
 * at least one unprocessed packet available in the ring, so a select()
3334
 * will immediately return indicating that there's data to process), so,
3335
 * in the callback, we have to make a copy of the packet.
3336
 *
3337
 * Yes, this means that, if the capture is using the ring buffer, using
3338
 * pcap_next() or pcap_next_ex() requires more copies than using
3339
 * pcap_loop() or pcap_dispatch().  If that bothers you, don't use
3340
 * pcap_next() or pcap_next_ex().
3341
 */
3342
static void
3343
pcap_oneshot_linux(u_char *user, const struct pcap_pkthdr *h,
3344
    const u_char *bytes)
3345
0
{
3346
0
  struct oneshot_userdata *sp = (struct oneshot_userdata *)user;
3347
0
  pcap_t *handle = sp->pd;
3348
0
  struct pcap_linux *handlep = handle->priv;
3349
3350
0
  *sp->hdr = *h;
3351
0
  memcpy(handlep->oneshot_buffer, bytes, h->caplen);
3352
0
  *sp->pkt = handlep->oneshot_buffer;
3353
0
}
3354
3355
static int
3356
pcap_getnonblock_linux(pcap_t *handle)
3357
0
{
3358
0
  struct pcap_linux *handlep = handle->priv;
3359
3360
  /* use negative value of timeout to indicate non blocking ops */
3361
0
  return (handlep->timeout<0);
3362
0
}
3363
3364
static int
3365
pcap_setnonblock_linux(pcap_t *handle, int nonblock)
3366
0
{
3367
0
  struct pcap_linux *handlep = handle->priv;
3368
3369
  /*
3370
   * Set the file descriptor to non-blocking mode, as we use
3371
   * it for sending packets.
3372
   */
3373
0
  if (pcap_setnonblock_fd(handle, nonblock) == -1)
3374
0
    return -1;
3375
3376
  /*
3377
   * Map each value to their corresponding negation to
3378
   * preserve the timeout value provided with pcap_set_timeout.
3379
   */
3380
0
  if (nonblock) {
3381
0
    if (handlep->timeout >= 0) {
3382
      /*
3383
       * Indicate that we're switching to
3384
       * non-blocking mode.
3385
       */
3386
0
      handlep->timeout = ~handlep->timeout;
3387
0
    }
3388
0
    if (handlep->poll_breakloop_fd != -1) {
3389
      /* Close the eventfd; we do not need it in nonblock mode. */
3390
0
      close(handlep->poll_breakloop_fd);
3391
0
      handlep->poll_breakloop_fd = -1;
3392
0
    }
3393
0
  } else {
3394
0
    if (handlep->poll_breakloop_fd == -1) {
3395
      /* If we did not have an eventfd, open one now that we are blocking. */
3396
0
      if ( ( handlep->poll_breakloop_fd = eventfd(0, EFD_NONBLOCK) ) == -1 ) {
3397
0
        int save_errno = errno;
3398
0
        snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
3399
0
            "Could not open eventfd: %s",
3400
0
            strerror(errno));
3401
0
        errno = save_errno;
3402
0
        return -1;
3403
0
      }
3404
0
    }
3405
0
    if (handlep->timeout < 0) {
3406
0
      handlep->timeout = ~handlep->timeout;
3407
0
    }
3408
0
  }
3409
  /* Update the timeout to use in poll(). */
3410
0
  set_poll_timeout(handlep);
3411
0
  return 0;
3412
0
}
3413
3414
/*
3415
 * Get the status field of the ring buffer frame at a specified offset.
3416
 */
3417
static inline u_int
3418
pcap_get_ring_frame_status(pcap_t *handle, int offset)
3419
0
{
3420
0
  struct pcap_linux *handlep = handle->priv;
3421
0
  union thdr h;
3422
3423
0
  h.raw = RING_GET_FRAME_AT(handle, offset);
3424
0
  switch (handlep->tp_version) {
3425
0
  case TPACKET_V2:
3426
0
    return __atomic_load_n(&h.h2->tp_status, __ATOMIC_ACQUIRE);
3427
0
    break;
3428
0
#ifdef HAVE_TPACKET3
3429
0
  case TPACKET_V3:
3430
0
    return __atomic_load_n(&h.h3->hdr.bh1.block_status, __ATOMIC_ACQUIRE);
3431
0
    break;
3432
0
#endif
3433
0
  }
3434
  /* This should not happen. */
3435
0
  return 0;
3436
0
}
3437
3438
/*
3439
 * Block waiting for frames to be available.
3440
 */
3441
static int pcap_wait_for_frames_mmap(pcap_t *handle)
3442
0
{
3443
0
  struct pcap_linux *handlep = handle->priv;
3444
0
  int timeout;
3445
0
  struct ifreq ifr;
3446
0
  int ret;
3447
0
  struct pollfd pollinfo[2];
3448
0
  int numpollinfo;
3449
0
  pollinfo[0].fd = handle->fd;
3450
0
  pollinfo[0].events = POLLIN;
3451
0
  if ( handlep->poll_breakloop_fd == -1 ) {
3452
0
    numpollinfo = 1;
3453
0
    pollinfo[1].revents = 0;
3454
    /*
3455
     * We set pollinfo[1].revents to zero, even though
3456
     * numpollinfo = 1 meaning that poll() doesn't see
3457
     * pollinfo[1], so that we do not have to add a
3458
     * conditional of numpollinfo > 1 below when we
3459
     * test pollinfo[1].revents.
3460
     */
3461
0
  } else {
3462
0
    pollinfo[1].fd = handlep->poll_breakloop_fd;
3463
0
    pollinfo[1].events = POLLIN;
3464
0
    numpollinfo = 2;
3465
0
  }
3466
3467
  /*
3468
   * Keep polling until we either get some packets to read, see
3469
   * that we got told to break out of the loop, get a fatal error,
3470
   * or discover that the device went away.
3471
   *
3472
   * In non-blocking mode, we must still do one poll() to catch
3473
   * any pending error indications, but the poll() has a timeout
3474
   * of 0, so that it doesn't block, and we quit after that one
3475
   * poll().
3476
   *
3477
   * If we've seen an ENETDOWN, it might be the first indication
3478
   * that the device went away, or it might just be that it was
3479
   * configured down.  Unfortunately, there's no guarantee that
3480
   * the device has actually been removed as an interface, because:
3481
   *
3482
   * 1) if, as appears to be the case at least some of the time,
3483
   * the PF_PACKET socket code first gets a NETDEV_DOWN indication
3484
   * for the device and then gets a NETDEV_UNREGISTER indication
3485
   * for it, the first indication will cause a wakeup with ENETDOWN
3486
   * but won't set the packet socket's field for the interface index
3487
   * to -1, and the second indication won't cause a wakeup (because
3488
   * the first indication also caused the protocol hook to be
3489
   * unregistered) but will set the packet socket's field for the
3490
   * interface index to -1;
3491
   *
3492
   * 2) even if just a NETDEV_UNREGISTER indication is registered,
3493
   * the packet socket's field for the interface index only gets
3494
   * set to -1 after the wakeup, so there's a small but non-zero
3495
   * risk that a thread blocked waiting for the wakeup will get
3496
   * to the "fetch the socket name" code before the interface index
3497
   * gets set to -1, so it'll get the old interface index.
3498
   *
3499
   * Therefore, if we got an ENETDOWN and haven't seen a packet
3500
   * since then, we assume that we might be waiting for the interface
3501
   * to disappear, and poll with a timeout to try again in a short
3502
   * period of time.  If we *do* see a packet, the interface has
3503
   * come back up again, and is *definitely* still there, so we
3504
   * don't need to poll.
3505
   */
3506
0
  for (;;) {
3507
    /*
3508
     * Yes, we do this even in non-blocking mode, as it's
3509
     * the only way to get error indications from a
3510
     * tpacket socket.
3511
     *
3512
     * The timeout is 0 in non-blocking mode, so poll()
3513
     * returns immediately.
3514
     */
3515
0
    timeout = handlep->poll_timeout;
3516
3517
    /*
3518
     * If we got an ENETDOWN and haven't gotten an indication
3519
     * that the device has gone away or that the device is up,
3520
     * we don't yet know for certain whether the device has
3521
     * gone away or not, do a poll() with a 1-millisecond timeout,
3522
     * as we have to poll indefinitely for "device went away"
3523
     * indications until we either get one or see that the
3524
     * device is up.
3525
     */
3526
0
    if (handlep->netdown) {
3527
0
      if (timeout != 0)
3528
0
        timeout = 1;
3529
0
    }
3530
0
    ret = poll(pollinfo, numpollinfo, timeout);
3531
0
    if (ret < 0) {
3532
      /*
3533
       * Error.  If it's not EINTR, report it.
3534
       */
3535
0
      if (errno != EINTR) {
3536
0
        pcap_fmt_errmsg_for_errno(handle->errbuf,
3537
0
            PCAP_ERRBUF_SIZE, errno,
3538
0
            "can't poll on packet socket");
3539
0
        return PCAP_ERROR;
3540
0
      }
3541
3542
      /*
3543
       * It's EINTR; if we were told to break out of
3544
       * the loop, do so.
3545
       */
3546
0
      if (handle->break_loop) {
3547
0
        handle->break_loop = 0;
3548
0
        return PCAP_ERROR_BREAK;
3549
0
      }
3550
0
    } else if (ret > 0) {
3551
      /*
3552
       * OK, some descriptor is ready.
3553
       * Check the socket descriptor first.
3554
       *
3555
       * As I read the Linux man page, pollinfo[0].revents
3556
       * will either be POLLIN, POLLERR, POLLHUP, or POLLNVAL.
3557
       */
3558
0
      if (pollinfo[0].revents == POLLIN) {
3559
        /*
3560
         * OK, we may have packets to
3561
         * read.
3562
         */
3563
0
        break;
3564
0
      }
3565
0
      if (pollinfo[0].revents != 0) {
3566
        /*
3567
         * There's some indication other than
3568
         * "you can read on this descriptor" on
3569
         * the descriptor.
3570
         */
3571
0
        if (pollinfo[0].revents & POLLNVAL) {
3572
0
          snprintf(handle->errbuf,
3573
0
              PCAP_ERRBUF_SIZE,
3574
0
              "Invalid polling request on packet socket");
3575
0
          return PCAP_ERROR;
3576
0
        }
3577
0
        if (pollinfo[0].revents & (POLLHUP | POLLRDHUP)) {
3578
0
          snprintf(handle->errbuf,
3579
0
              PCAP_ERRBUF_SIZE,
3580
0
              "Hangup on packet socket");
3581
0
          return PCAP_ERROR;
3582
0
        }
3583
0
        if (pollinfo[0].revents & POLLERR) {
3584
          /*
3585
           * Get the error.
3586
           */
3587
0
          int err;
3588
0
          socklen_t errlen;
3589
3590
0
          errlen = sizeof(err);
3591
0
          if (getsockopt(handle->fd, SOL_SOCKET,
3592
0
              SO_ERROR, &err, &errlen) == -1) {
3593
            /*
3594
             * The call *itself* returned
3595
             * an error; make *that*
3596
             * the error.
3597
             */
3598
0
            err = errno;
3599
0
          }
3600
3601
          /*
3602
           * OK, we have the error.
3603
           */
3604
0
          if (err == ENETDOWN) {
3605
            /*
3606
             * The device on which we're
3607
             * capturing went away or the
3608
             * interface was taken down.
3609
             *
3610
             * We don't know for certain
3611
             * which happened, and the
3612
             * next poll() may indicate
3613
             * that there are packets
3614
             * to be read, so just set
3615
             * a flag to get us to do
3616
             * checks later, and set
3617
             * the required select
3618
             * timeout to 1 millisecond
3619
             * so that event loops that
3620
             * check our socket descriptor
3621
             * also time out so that
3622
             * they can call us and we
3623
             * can do the checks.
3624
             */
3625
0
            handlep->netdown = 1;
3626
0
            handle->required_select_timeout = &netdown_timeout;
3627
0
          } else if (err == 0) {
3628
            /*
3629
             * This shouldn't happen, so
3630
             * report a special indication
3631
             * that it did.
3632
             */
3633
0
            snprintf(handle->errbuf,
3634
0
                PCAP_ERRBUF_SIZE,
3635
0
                "Error condition on packet socket: Reported error was 0");
3636
0
            return PCAP_ERROR;
3637
0
          } else {
3638
0
            pcap_fmt_errmsg_for_errno(handle->errbuf,
3639
0
                PCAP_ERRBUF_SIZE,
3640
0
                err,
3641
0
                "Error condition on packet socket");
3642
0
            return PCAP_ERROR;
3643
0
          }
3644
0
        }
3645
0
      }
3646
      /*
3647
       * Now check the event device.
3648
       */
3649
0
      if (pollinfo[1].revents & POLLIN) {
3650
0
        ssize_t nread;
3651
0
        uint64_t value;
3652
3653
        /*
3654
         * This should never fail, but, just
3655
         * in case....
3656
         */
3657
0
        nread = read(handlep->poll_breakloop_fd, &value,
3658
0
            sizeof(value));
3659
0
        if (nread == -1) {
3660
0
          pcap_fmt_errmsg_for_errno(handle->errbuf,
3661
0
              PCAP_ERRBUF_SIZE,
3662
0
              errno,
3663
0
              "Error reading from event FD");
3664
0
          return PCAP_ERROR;
3665
0
        }
3666
3667
        /*
3668
         * According to the Linux read(2) man
3669
         * page, read() will transfer at most
3670
         * 2^31-1 bytes, so the return value is
3671
         * either -1 or a value between 0
3672
         * and 2^31-1, so it's non-negative.
3673
         *
3674
         * Cast it to size_t to squelch
3675
         * warnings from the compiler; add this
3676
         * comment to squelch warnings from
3677
         * humans reading the code. :-)
3678
         *
3679
         * Don't treat an EOF as an error, but
3680
         * *do* treat a short read as an error;
3681
         * that "shouldn't happen", but....
3682
         */
3683
0
        if (nread != 0 &&
3684
0
            (size_t)nread < sizeof(value)) {
3685
0
          snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
3686
0
              "Short read from event FD: expected %zu, got %zd",
3687
0
              sizeof(value), nread);
3688
0
          return PCAP_ERROR;
3689
0
        }
3690
3691
        /*
3692
         * This event gets signaled by a
3693
         * pcap_breakloop() call; if we were told
3694
         * to break out of the loop, do so.
3695
         */
3696
0
        if (handle->break_loop) {
3697
0
          handle->break_loop = 0;
3698
0
          return PCAP_ERROR_BREAK;
3699
0
        }
3700
0
      }
3701
0
    }
3702
3703
    /*
3704
     * Either:
3705
     *
3706
     *   1) we got neither an error from poll() nor any
3707
     *      readable descriptors, in which case there
3708
     *      are no packets waiting to read
3709
     *
3710
     * or
3711
     *
3712
     *   2) We got readable descriptors but the PF_PACKET
3713
     *      socket wasn't one of them, in which case there
3714
     *      are no packets waiting to read
3715
     *
3716
     * so, if we got an ENETDOWN, we've drained whatever
3717
     * packets were available to read at the point of the
3718
     * ENETDOWN.
3719
     *
3720
     * So, if we got an ENETDOWN and haven't gotten an indication
3721
     * that the device has gone away or that the device is up,
3722
     * we don't yet know for certain whether the device has
3723
     * gone away or not, check whether the device exists and is
3724
     * up.
3725
     */
3726
0
    if (handlep->netdown) {
3727
0
      if (!device_still_exists(handle)) {
3728
        /*
3729
         * The device doesn't exist any more;
3730
         * report that.
3731
         *
3732
         * XXX - we should really return an
3733
         * appropriate error for that, but
3734
         * pcap_dispatch() etc. aren't documented
3735
         * as having error returns other than
3736
         * PCAP_ERROR or PCAP_ERROR_BREAK.
3737
         */
3738
0
        snprintf(handle->errbuf,  PCAP_ERRBUF_SIZE,
3739
0
            "The interface disappeared");
3740
0
        return PCAP_ERROR;
3741
0
      }
3742
3743
      /*
3744
       * The device still exists; try to see if it's up.
3745
       */
3746
0
      memset(&ifr, 0, sizeof(ifr));
3747
0
      pcap_strlcpy(ifr.ifr_name, handlep->device,
3748
0
          sizeof(ifr.ifr_name));
3749
0
      if (ioctl(handle->fd, SIOCGIFFLAGS, &ifr) == -1) {
3750
0
        if (errno == ENXIO || errno == ENODEV) {
3751
          /*
3752
           * OK, *now* it's gone.
3753
           *
3754
           * XXX - see above comment.
3755
           */
3756
0
          snprintf(handle->errbuf,
3757
0
              PCAP_ERRBUF_SIZE,
3758
0
              "The interface disappeared");
3759
0
          return PCAP_ERROR;
3760
0
        } else {
3761
0
          pcap_fmt_errmsg_for_errno(handle->errbuf,
3762
0
              PCAP_ERRBUF_SIZE, errno,
3763
0
              "%s: Can't get flags",
3764
0
              handlep->device);
3765
0
          return PCAP_ERROR;
3766
0
        }
3767
0
      }
3768
0
      if (ifr.ifr_flags & IFF_UP) {
3769
        /*
3770
         * It's up, so it definitely still exists.
3771
         * Cancel the ENETDOWN indication - we
3772
         * presumably got it due to the interface
3773
         * going down rather than the device going
3774
         * away - and revert to "no required select
3775
         * timeout.
3776
         */
3777
0
        handlep->netdown = 0;
3778
0
        handle->required_select_timeout = NULL;
3779
0
      }
3780
0
    }
3781
3782
    /*
3783
     * If we're in non-blocking mode, just quit now, rather
3784
     * than spinning in a loop doing poll()s that immediately
3785
     * time out if there's no indication on any descriptor.
3786
     */
3787
0
    if (handlep->poll_timeout == 0)
3788
0
      break;
3789
0
  }
3790
0
  return 0;
3791
0
}
3792
3793
/* handle a single memory mapped packet */
3794
static int pcap_handle_packet_mmap(
3795
    pcap_t *handle,
3796
    pcap_handler callback,
3797
    u_char *user,
3798
    unsigned char *frame,
3799
    unsigned int tp_len,
3800
    unsigned int tp_mac,
3801
    unsigned int tp_snaplen,
3802
    unsigned int tp_sec,
3803
    unsigned int tp_usec,
3804
    int tp_vlan_tci_valid,
3805
    __u16 tp_vlan_tci,
3806
    __u16 tp_vlan_tpid)
3807
0
{
3808
0
  struct pcap_linux *handlep = handle->priv;
3809
0
  unsigned char *bp;
3810
0
  struct sockaddr_ll *sll;
3811
0
  struct pcap_pkthdr pcaphdr;
3812
0
  pcap_can_socketcan_hdr *canhdr;
3813
0
  unsigned int snaplen = tp_snaplen;
3814
0
  struct utsname utsname;
3815
3816
  /* perform sanity check on internal offset. */
3817
0
  if (tp_mac + tp_snaplen > handle->bufsize) {
3818
    /*
3819
     * Report some system information as a debugging aid.
3820
     */
3821
0
    if (uname(&utsname) != -1) {
3822
0
      snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
3823
0
        "corrupted frame on kernel ring mac "
3824
0
        "offset %u + caplen %u > frame len %d "
3825
0
        "(kernel %.32s version %s, machine %.16s)",
3826
0
        tp_mac, tp_snaplen, handle->bufsize,
3827
0
        utsname.release, utsname.version,
3828
0
        utsname.machine);
3829
0
    } else {
3830
0
      snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
3831
0
        "corrupted frame on kernel ring mac "
3832
0
        "offset %u + caplen %u > frame len %d",
3833
0
        tp_mac, tp_snaplen, handle->bufsize);
3834
0
    }
3835
0
    return -1;
3836
0
  }
3837
3838
  /* run filter on received packet
3839
   * If the kernel filtering is enabled we need to run the
3840
   * filter until all the frames present into the ring
3841
   * at filter creation time are processed.
3842
   * In this case, blocks_to_filter_in_userland is used
3843
   * as a counter for the packet we need to filter.
3844
   * Note: alternatively it could be possible to stop applying
3845
   * the filter when the ring became empty, but it can possibly
3846
   * happen a lot later... */
3847
0
  bp = frame + tp_mac;
3848
3849
  /* if required build in place the sll header*/
3850
0
  sll = (void *)(frame + TPACKET_ALIGN(handlep->tp_hdrlen));
3851
0
  if (handlep->cooked) {
3852
0
    if (handle->linktype == DLT_LINUX_SLL2) {
3853
0
      struct sll2_header *hdrp;
3854
3855
      /*
3856
       * The kernel should have left us with enough
3857
       * space for an sll header; back up the packet
3858
       * data pointer into that space, as that'll be
3859
       * the beginning of the packet we pass to the
3860
       * callback.
3861
       */
3862
0
      bp -= SLL2_HDR_LEN;
3863
3864
      /*
3865
       * Let's make sure that's past the end of
3866
       * the tpacket header, i.e. >=
3867
       * ((u_char *)thdr + TPACKET_HDRLEN), so we
3868
       * don't step on the header when we construct
3869
       * the sll header.
3870
       */
3871
0
      if (bp < (u_char *)frame +
3872
0
             TPACKET_ALIGN(handlep->tp_hdrlen) +
3873
0
             sizeof(struct sockaddr_ll)) {
3874
0
        snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
3875
0
          "cooked-mode frame doesn't have room for sll header");
3876
0
        return -1;
3877
0
      }
3878
3879
      /*
3880
       * OK, that worked; construct the sll header.
3881
       */
3882
0
      hdrp = (struct sll2_header *)bp;
3883
0
      hdrp->sll2_protocol = sll->sll_protocol;
3884
0
      hdrp->sll2_reserved_mbz = 0;
3885
0
      hdrp->sll2_if_index = htonl(sll->sll_ifindex);
3886
0
      hdrp->sll2_hatype = htons(sll->sll_hatype);
3887
0
      hdrp->sll2_pkttype = sll->sll_pkttype;
3888
0
      hdrp->sll2_halen = sll->sll_halen;
3889
0
      memcpy(hdrp->sll2_addr, sll->sll_addr, SLL_ADDRLEN);
3890
3891
0
      snaplen += sizeof(struct sll2_header);
3892
0
    } else {
3893
0
      struct sll_header *hdrp;
3894
3895
      /*
3896
       * The kernel should have left us with enough
3897
       * space for an sll header; back up the packet
3898
       * data pointer into that space, as that'll be
3899
       * the beginning of the packet we pass to the
3900
       * callback.
3901
       */
3902
0
      bp -= SLL_HDR_LEN;
3903
3904
      /*
3905
       * Let's make sure that's past the end of
3906
       * the tpacket header, i.e. >=
3907
       * ((u_char *)thdr + TPACKET_HDRLEN), so we
3908
       * don't step on the header when we construct
3909
       * the sll header.
3910
       */
3911
0
      if (bp < (u_char *)frame +
3912
0
             TPACKET_ALIGN(handlep->tp_hdrlen) +
3913
0
             sizeof(struct sockaddr_ll)) {
3914
0
        snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
3915
0
          "cooked-mode frame doesn't have room for sll header");
3916
0
        return -1;
3917
0
      }
3918
3919
      /*
3920
       * OK, that worked; construct the sll header.
3921
       */
3922
0
      hdrp = (struct sll_header *)bp;
3923
0
      hdrp->sll_pkttype = htons(sll->sll_pkttype);
3924
0
      hdrp->sll_hatype = htons(sll->sll_hatype);
3925
0
      hdrp->sll_halen = htons(sll->sll_halen);
3926
0
      memcpy(hdrp->sll_addr, sll->sll_addr, SLL_ADDRLEN);
3927
0
      hdrp->sll_protocol = sll->sll_protocol;
3928
3929
0
      snaplen += sizeof(struct sll_header);
3930
0
    }
3931
0
  } else {
3932
    /*
3933
     * If this is a packet from a CAN device, so that
3934
     * sll->sll_hatype is ARPHRD_CAN, then, as we're
3935
     * not capturing in cooked mode, its link-layer
3936
     * type is DLT_CAN_SOCKETCAN.  Fix up the header
3937
     * provided by the code below us to match what
3938
     * DLT_CAN_SOCKETCAN is expected to provide.
3939
     */
3940
0
    if (sll->sll_hatype == ARPHRD_CAN) {
3941
      /*
3942
       * DLT_CAN_SOCKETCAN is specified as having the
3943
       * CAN ID and flags in network byte order, but
3944
       * capturing on a CAN device provides it in host
3945
       * byte order.  Convert it to network byte order.
3946
       */
3947
0
      canhdr = (pcap_can_socketcan_hdr *)bp;
3948
0
      canhdr->can_id = htonl(canhdr->can_id);
3949
3950
      /*
3951
       * In addition, set the CANFD_FDF flag if
3952
       * the protocol is LINUX_SLL_P_CANFD, as
3953
       * the protocol field itself isn't in
3954
       * the packet to indicate that it's a
3955
       * CAN FD packet.
3956
       */
3957
0
      uint16_t protocol = ntohs(sll->sll_protocol);
3958
0
      if (protocol == LINUX_SLL_P_CANFD) {
3959
0
        canhdr->fd_flags |= CANFD_FDF;
3960
3961
        /*
3962
         * Zero out all the unknown bits in
3963
         * fd_flags and clear the reserved
3964
         * fields, so that a program reading
3965
         * this can assume that CANFD_FDF
3966
         * is set because we set it, not
3967
         * because some uninitialized crap
3968
         * was provided in the fd_flags
3969
         * field.
3970
         *
3971
         * (At least some LINKTYPE_CAN_SOCKETCAN
3972
         * files attached to Wireshark bugs
3973
         * had uninitialized junk there, so it
3974
         * does happen.)
3975
         *
3976
         * Update this if Linux adds more flag
3977
         * bits to the fd_flags field or uses
3978
         * either of the reserved fields for
3979
         * FD frames.
3980
         */
3981
0
        canhdr->fd_flags &= ~(CANFD_FDF|CANFD_ESI|CANFD_BRS);
3982
0
        canhdr->reserved1 = 0;
3983
0
        canhdr->reserved2 = 0;
3984
0
      } else {
3985
        /*
3986
         * Clear CANFD_FDF if it's set (probably
3987
         * again meaning that this field is
3988
         * uninitialized junk).
3989
         */
3990
0
        canhdr->fd_flags &= ~CANFD_FDF;
3991
0
      }
3992
0
    }
3993
0
  }
3994
3995
0
  if (handlep->filter_in_userland && handle->fcode.bf_insns) {
3996
0
    struct pcap_bpf_aux_data aux_data;
3997
3998
0
    aux_data.vlan_tag_present = tp_vlan_tci_valid;
3999
0
    aux_data.vlan_tag = tp_vlan_tci & 0x0fff;
4000
4001
0
    if (pcap_filter_with_aux_data(handle->fcode.bf_insns,
4002
0
                bp,
4003
0
                tp_len,
4004
0
                snaplen,
4005
0
                &aux_data) == 0)
4006
0
      return 0;
4007
0
  }
4008
4009
0
  if (!linux_check_direction(handle, sll))
4010
0
    return 0;
4011
4012
  /* get required packet info from ring header */
4013
0
  pcaphdr.ts.tv_sec = tp_sec;
4014
0
  pcaphdr.ts.tv_usec = tp_usec;
4015
0
  pcaphdr.caplen = tp_snaplen;
4016
0
  pcaphdr.len = tp_len;
4017
4018
  /* if required build in place the sll header*/
4019
0
  if (handlep->cooked) {
4020
    /* update packet len */
4021
0
    if (handle->linktype == DLT_LINUX_SLL2) {
4022
0
      pcaphdr.caplen += SLL2_HDR_LEN;
4023
0
      pcaphdr.len += SLL2_HDR_LEN;
4024
0
    } else {
4025
0
      pcaphdr.caplen += SLL_HDR_LEN;
4026
0
      pcaphdr.len += SLL_HDR_LEN;
4027
0
    }
4028
0
  }
4029
4030
0
  if (tp_vlan_tci_valid &&
4031
0
    handlep->vlan_offset != -1 &&
4032
0
    tp_snaplen >= (unsigned int) handlep->vlan_offset)
4033
0
  {
4034
0
    struct vlan_tag *tag;
4035
4036
    /*
4037
     * Move everything in the header, except the type field,
4038
     * down VLAN_TAG_LEN bytes, to allow us to insert the
4039
     * VLAN tag between that stuff and the type field.
4040
     */
4041
0
    bp -= VLAN_TAG_LEN;
4042
0
    memmove(bp, bp + VLAN_TAG_LEN, handlep->vlan_offset);
4043
4044
    /*
4045
     * Now insert the tag.
4046
     */
4047
0
    tag = (struct vlan_tag *)(bp + handlep->vlan_offset);
4048
0
    tag->vlan_tpid = htons(tp_vlan_tpid);
4049
0
    tag->vlan_tci = htons(tp_vlan_tci);
4050
4051
    /*
4052
     * Add the tag to the packet lengths.
4053
     */
4054
0
    pcaphdr.caplen += VLAN_TAG_LEN;
4055
0
    pcaphdr.len += VLAN_TAG_LEN;
4056
0
  }
4057
4058
  /*
4059
   * The only way to tell the kernel to cut off the
4060
   * packet at a snapshot length is with a filter program;
4061
   * if there's no filter program, the kernel won't cut
4062
   * the packet off.
4063
   *
4064
   * Trim the snapshot length to be no longer than the
4065
   * specified snapshot length.
4066
   *
4067
   * XXX - an alternative is to put a filter, consisting
4068
   * of a "ret <snaplen>" instruction, on the socket
4069
   * in the activate routine, so that the truncation is
4070
   * done in the kernel even if nobody specified a filter;
4071
   * that means that less buffer space is consumed in
4072
   * the memory-mapped buffer.
4073
   */
4074
0
  if (pcaphdr.caplen > (bpf_u_int32)handle->snapshot)
4075
0
    pcaphdr.caplen = handle->snapshot;
4076
4077
  /* pass the packet to the user */
4078
0
  callback(user, &pcaphdr, bp);
4079
4080
0
  return 1;
4081
0
}
4082
4083
static int
4084
pcap_read_linux_mmap_v2(pcap_t *handle, int max_packets, pcap_handler callback,
4085
    u_char *user)
4086
0
{
4087
0
  struct pcap_linux *handlep = handle->priv;
4088
0
  union thdr h;
4089
0
  int pkts = 0;
4090
0
  int ret;
4091
4092
  /* wait for frames availability.*/
4093
0
  h.raw = RING_GET_CURRENT_FRAME(handle);
4094
0
  if (!packet_mmap_acquire(h.h2)) {
4095
    /*
4096
     * The current frame is owned by the kernel; wait for
4097
     * a frame to be handed to us.
4098
     */
4099
0
    ret = pcap_wait_for_frames_mmap(handle);
4100
0
    if (ret) {
4101
0
      return ret;
4102
0
    }
4103
0
  }
4104
4105
  /*
4106
   * This can conceivably process more than INT_MAX packets,
4107
   * which would overflow the packet count, causing it either
4108
   * to look like a negative number, and thus cause us to
4109
   * return a value that looks like an error, or overflow
4110
   * back into positive territory, and thus cause us to
4111
   * return a too-low count.
4112
   *
4113
   * Therefore, if the packet count is unlimited, we clip
4114
   * it at INT_MAX; this routine is not expected to
4115
   * process packets indefinitely, so that's not an issue.
4116
   */
4117
0
  if (PACKET_COUNT_IS_UNLIMITED(max_packets))
4118
0
    max_packets = INT_MAX;
4119
4120
0
  while (pkts < max_packets) {
4121
    /*
4122
     * Get the current ring buffer frame, and break if
4123
     * it's still owned by the kernel.
4124
     */
4125
0
    h.raw = RING_GET_CURRENT_FRAME(handle);
4126
0
    if (!packet_mmap_acquire(h.h2))
4127
0
      break;
4128
4129
0
    ret = pcap_handle_packet_mmap(
4130
0
        handle,
4131
0
        callback,
4132
0
        user,
4133
0
        h.raw,
4134
0
        h.h2->tp_len,
4135
0
        h.h2->tp_mac,
4136
0
        h.h2->tp_snaplen,
4137
0
        h.h2->tp_sec,
4138
0
        handle->opt.tstamp_precision == PCAP_TSTAMP_PRECISION_NANO ? h.h2->tp_nsec : h.h2->tp_nsec / 1000,
4139
0
        VLAN_VALID(h.h2, h.h2),
4140
0
        h.h2->tp_vlan_tci,
4141
0
        VLAN_TPID(h.h2, h.h2));
4142
0
    if (ret == 1) {
4143
0
      pkts++;
4144
0
    } else if (ret < 0) {
4145
0
      return ret;
4146
0
    }
4147
4148
    /*
4149
     * Hand this block back to the kernel, and, if we're
4150
     * counting blocks that need to be filtered in userland
4151
     * after having been filtered by the kernel, count
4152
     * the one we've just processed.
4153
     */
4154
0
    packet_mmap_release(h.h2);
4155
0
    if (handlep->blocks_to_filter_in_userland > 0) {
4156
0
      handlep->blocks_to_filter_in_userland--;
4157
0
      if (handlep->blocks_to_filter_in_userland == 0) {
4158
        /*
4159
         * No more blocks need to be filtered
4160
         * in userland.
4161
         */
4162
0
        handlep->filter_in_userland = 0;
4163
0
      }
4164
0
    }
4165
4166
    /* next block */
4167
0
    if (++handle->offset >= handle->cc)
4168
0
      handle->offset = 0;
4169
4170
    /* check for break loop condition*/
4171
0
    if (handle->break_loop) {
4172
0
      handle->break_loop = 0;
4173
0
      return PCAP_ERROR_BREAK;
4174
0
    }
4175
0
  }
4176
0
  return pkts;
4177
0
}
4178
4179
#ifdef HAVE_TPACKET3
4180
static int
4181
pcap_read_linux_mmap_v3(pcap_t *handle, int max_packets, pcap_handler callback,
4182
    u_char *user)
4183
0
{
4184
0
  struct pcap_linux *handlep = handle->priv;
4185
0
  union thdr h;
4186
0
  int pkts = 0;
4187
0
  int ret;
4188
4189
0
again:
4190
0
  if (handlep->current_packet == NULL) {
4191
    /* wait for frames availability.*/
4192
0
    h.raw = RING_GET_CURRENT_FRAME(handle);
4193
0
    if (!packet_mmap_v3_acquire(h.h3)) {
4194
      /*
4195
       * The current frame is owned by the kernel; wait
4196
       * for a frame to be handed to us.
4197
       */
4198
0
      ret = pcap_wait_for_frames_mmap(handle);
4199
0
      if (ret) {
4200
0
        return ret;
4201
0
      }
4202
0
    }
4203
0
  }
4204
0
  h.raw = RING_GET_CURRENT_FRAME(handle);
4205
0
  if (!packet_mmap_v3_acquire(h.h3)) {
4206
0
    if (pkts == 0 && handlep->timeout == 0) {
4207
      /* Block until we see a packet. */
4208
0
      goto again;
4209
0
    }
4210
0
    return pkts;
4211
0
  }
4212
4213
  /*
4214
   * This can conceivably process more than INT_MAX packets,
4215
   * which would overflow the packet count, causing it either
4216
   * to look like a negative number, and thus cause us to
4217
   * return a value that looks like an error, or overflow
4218
   * back into positive territory, and thus cause us to
4219
   * return a too-low count.
4220
   *
4221
   * Therefore, if the packet count is unlimited, we clip
4222
   * it at INT_MAX; this routine is not expected to
4223
   * process packets indefinitely, so that's not an issue.
4224
   */
4225
0
  if (PACKET_COUNT_IS_UNLIMITED(max_packets))
4226
0
    max_packets = INT_MAX;
4227
4228
0
  while (pkts < max_packets) {
4229
0
    int packets_to_read;
4230
4231
0
    if (handlep->current_packet == NULL) {
4232
0
      h.raw = RING_GET_CURRENT_FRAME(handle);
4233
0
      if (!packet_mmap_v3_acquire(h.h3))
4234
0
        break;
4235
4236
0
      handlep->current_packet = h.raw + h.h3->hdr.bh1.offset_to_first_pkt;
4237
0
      handlep->packets_left = h.h3->hdr.bh1.num_pkts;
4238
0
    }
4239
0
    packets_to_read = handlep->packets_left;
4240
4241
0
    if (packets_to_read > (max_packets - pkts)) {
4242
      /*
4243
       * There are more packets in the buffer than
4244
       * the number of packets we have left to
4245
       * process to get up to the maximum number
4246
       * of packets to process.  Only process enough
4247
       * of them to get us up to that maximum.
4248
       */
4249
0
      packets_to_read = max_packets - pkts;
4250
0
    }
4251
4252
0
    while (packets_to_read-- && !handle->break_loop) {
4253
0
      struct tpacket3_hdr* tp3_hdr = (struct tpacket3_hdr*) handlep->current_packet;
4254
0
      ret = pcap_handle_packet_mmap(
4255
0
          handle,
4256
0
          callback,
4257
0
          user,
4258
0
          handlep->current_packet,
4259
0
          tp3_hdr->tp_len,
4260
0
          tp3_hdr->tp_mac,
4261
0
          tp3_hdr->tp_snaplen,
4262
0
          tp3_hdr->tp_sec,
4263
0
          handle->opt.tstamp_precision == PCAP_TSTAMP_PRECISION_NANO ? tp3_hdr->tp_nsec : tp3_hdr->tp_nsec / 1000,
4264
0
          VLAN_VALID(tp3_hdr, &tp3_hdr->hv1),
4265
0
          tp3_hdr->hv1.tp_vlan_tci,
4266
0
          VLAN_TPID(tp3_hdr, &tp3_hdr->hv1));
4267
0
      if (ret == 1) {
4268
0
        pkts++;
4269
0
      } else if (ret < 0) {
4270
0
        handlep->current_packet = NULL;
4271
0
        return ret;
4272
0
      }
4273
0
      handlep->current_packet += tp3_hdr->tp_next_offset;
4274
0
      handlep->packets_left--;
4275
0
    }
4276
4277
0
    if (handlep->packets_left <= 0) {
4278
      /*
4279
       * Hand this block back to the kernel, and, if
4280
       * we're counting blocks that need to be
4281
       * filtered in userland after having been
4282
       * filtered by the kernel, count the one we've
4283
       * just processed.
4284
       */
4285
0
      packet_mmap_v3_release(h.h3);
4286
0
      if (handlep->blocks_to_filter_in_userland > 0) {
4287
0
        handlep->blocks_to_filter_in_userland--;
4288
0
        if (handlep->blocks_to_filter_in_userland == 0) {
4289
          /*
4290
           * No more blocks need to be filtered
4291
           * in userland.
4292
           */
4293
0
          handlep->filter_in_userland = 0;
4294
0
        }
4295
0
      }
4296
4297
      /* next block */
4298
0
      if (++handle->offset >= handle->cc)
4299
0
        handle->offset = 0;
4300
4301
0
      handlep->current_packet = NULL;
4302
0
    }
4303
4304
    /* check for break loop condition*/
4305
0
    if (handle->break_loop) {
4306
0
      handle->break_loop = 0;
4307
0
      return PCAP_ERROR_BREAK;
4308
0
    }
4309
0
  }
4310
0
  if (pkts == 0 && handlep->timeout == 0) {
4311
    /* Block until we see a packet. */
4312
0
    goto again;
4313
0
  }
4314
0
  return pkts;
4315
0
}
4316
#endif /* HAVE_TPACKET3 */
4317
4318
/*
4319
 *  Attach the given BPF code to the packet capture device.
4320
 */
4321
static int
4322
pcap_setfilter_linux(pcap_t *handle, struct bpf_program *filter)
4323
0
{
4324
0
  struct pcap_linux *handlep;
4325
0
  struct sock_fprog fcode;
4326
0
  int     can_filter_in_kernel;
4327
0
  int     err = 0;
4328
0
  int     n, offset;
4329
4330
0
  if (!handle)
4331
0
    return -1;
4332
0
  if (!filter) {
4333
0
          pcap_strlcpy(handle->errbuf, "setfilter: No filter specified",
4334
0
      PCAP_ERRBUF_SIZE);
4335
0
    return -1;
4336
0
  }
4337
4338
0
  handlep = handle->priv;
4339
4340
  /* Make our private copy of the filter */
4341
4342
0
  if (install_bpf_program(handle, filter) < 0)
4343
    /* install_bpf_program() filled in errbuf */
4344
0
    return -1;
4345
4346
  /*
4347
   * Run user level packet filter by default. Will be overridden if
4348
   * installing a kernel filter succeeds.
4349
   */
4350
0
  handlep->filter_in_userland = 1;
4351
4352
  /* Install kernel level filter if possible */
4353
4354
0
#ifdef USHRT_MAX
4355
0
  if (handle->fcode.bf_len > USHRT_MAX) {
4356
    /*
4357
     * fcode.len is an unsigned short for current kernel.
4358
     * I have yet to see BPF-Code with that much
4359
     * instructions but still it is possible. So for the
4360
     * sake of correctness I added this check.
4361
     */
4362
0
    fprintf(stderr, "Warning: Filter too complex for kernel\n");
4363
0
    fcode.len = 0;
4364
0
    fcode.filter = NULL;
4365
0
    can_filter_in_kernel = 0;
4366
0
  } else
4367
0
#endif /* USHRT_MAX */
4368
0
  {
4369
    /*
4370
     * Oh joy, the Linux kernel uses struct sock_fprog instead
4371
     * of struct bpf_program and of course the length field is
4372
     * of different size. Pointed out by Sebastian
4373
     *
4374
     * Oh, and we also need to fix it up so that all "ret"
4375
     * instructions with non-zero operands have MAXIMUM_SNAPLEN
4376
     * as the operand if we're not capturing in memory-mapped
4377
     * mode, and so that, if we're in cooked mode, all memory-
4378
     * reference instructions use special magic offsets in
4379
     * references to the link-layer header and assume that the
4380
     * link-layer payload begins at 0; "fix_program()" will do
4381
     * that.
4382
     */
4383
0
    switch (fix_program(handle, &fcode)) {
4384
4385
0
    case -1:
4386
0
    default:
4387
      /*
4388
       * Fatal error; just quit.
4389
       * (The "default" case shouldn't happen; we
4390
       * return -1 for that reason.)
4391
       */
4392
0
      return -1;
4393
4394
0
    case 0:
4395
      /*
4396
       * The program performed checks that we can't make
4397
       * work in the kernel.
4398
       */
4399
0
      can_filter_in_kernel = 0;
4400
0
      break;
4401
4402
0
    case 1:
4403
      /*
4404
       * We have a filter that'll work in the kernel.
4405
       */
4406
0
      can_filter_in_kernel = 1;
4407
0
      break;
4408
0
    }
4409
0
  }
4410
4411
  /*
4412
   * NOTE: at this point, we've set both the "len" and "filter"
4413
   * fields of "fcode".  As of the 2.6.32.4 kernel, at least,
4414
   * those are the only members of the "sock_fprog" structure,
4415
   * so we initialize every member of that structure.
4416
   *
4417
   * If there is anything in "fcode" that is not initialized,
4418
   * it is either a field added in a later kernel, or it's
4419
   * padding.
4420
   *
4421
   * If a new field is added, this code needs to be updated
4422
   * to set it correctly.
4423
   *
4424
   * If there are no other fields, then:
4425
   *
4426
   *  if the Linux kernel looks at the padding, it's
4427
   *  buggy;
4428
   *
4429
   *  if the Linux kernel doesn't look at the padding,
4430
   *  then if some tool complains that we're passing
4431
   *  uninitialized data to the kernel, then the tool
4432
   *  is buggy and needs to understand that it's just
4433
   *  padding.
4434
   */
4435
0
  if (can_filter_in_kernel) {
4436
0
    if ((err = set_kernel_filter(handle, &fcode)) == 0)
4437
0
    {
4438
      /*
4439
       * Installation succeeded - using kernel filter,
4440
       * so userland filtering not needed.
4441
       */
4442
0
      handlep->filter_in_userland = 0;
4443
0
    }
4444
0
    else if (err == -1) /* Non-fatal error */
4445
0
    {
4446
      /*
4447
       * Print a warning if we weren't able to install
4448
       * the filter for a reason other than "this kernel
4449
       * isn't configured to support socket filters.
4450
       */
4451
0
      if (errno == ENOMEM) {
4452
        /*
4453
         * Either a kernel memory allocation
4454
         * failure occurred, or there's too
4455
         * much "other/option memory" allocated
4456
         * for this socket.  Suggest that they
4457
         * increase the "other/option memory"
4458
         * limit.
4459
         */
4460
0
        fprintf(stderr,
4461
0
            "Warning: Couldn't allocate kernel memory for filter: try increasing net.core.optmem_max with sysctl\n");
4462
0
      } else if (errno != ENOPROTOOPT && errno != EOPNOTSUPP) {
4463
0
        fprintf(stderr,
4464
0
            "Warning: Kernel filter failed: %s\n",
4465
0
          pcap_strerror(errno));
4466
0
      }
4467
0
    }
4468
0
  }
4469
4470
  /*
4471
   * If we're not using the kernel filter, get rid of any kernel
4472
   * filter that might've been there before, e.g. because the
4473
   * previous filter could work in the kernel, or because some other
4474
   * code attached a filter to the socket by some means other than
4475
   * calling "pcap_setfilter()".  Otherwise, the kernel filter may
4476
   * filter out packets that would pass the new userland filter.
4477
   */
4478
0
  if (handlep->filter_in_userland) {
4479
0
    if (reset_kernel_filter(handle) == -1) {
4480
0
      pcap_fmt_errmsg_for_errno(handle->errbuf,
4481
0
          PCAP_ERRBUF_SIZE, errno,
4482
0
          "can't remove kernel filter");
4483
0
      err = -2; /* fatal error */
4484
0
    }
4485
0
  }
4486
4487
  /*
4488
   * Free up the copy of the filter that was made by "fix_program()".
4489
   */
4490
0
  if (fcode.filter != NULL)
4491
0
    free(fcode.filter);
4492
4493
0
  if (err == -2)
4494
    /* Fatal error */
4495
0
    return -1;
4496
4497
  /*
4498
   * If we're filtering in userland, there's nothing to do;
4499
   * the new filter will be used for the next packet.
4500
   */
4501
0
  if (handlep->filter_in_userland)
4502
0
    return 0;
4503
4504
  /*
4505
   * We're filtering in the kernel; the packets present in
4506
   * all blocks currently in the ring were already filtered
4507
   * by the old filter, and so will need to be filtered in
4508
   * userland by the new filter.
4509
   *
4510
   * Get an upper bound for the number of such blocks; first,
4511
   * walk the ring backward and count the free blocks.
4512
   */
4513
0
  offset = handle->offset;
4514
0
  if (--offset < 0)
4515
0
    offset = handle->cc - 1;
4516
0
  for (n=0; n < handle->cc; ++n) {
4517
0
    if (--offset < 0)
4518
0
      offset = handle->cc - 1;
4519
0
    if (pcap_get_ring_frame_status(handle, offset) != TP_STATUS_KERNEL)
4520
0
      break;
4521
0
  }
4522
4523
  /*
4524
   * If we found free blocks, decrement the count of free
4525
   * blocks by 1, just in case we lost a race with another
4526
   * thread of control that was adding a packet while
4527
   * we were counting and that had run the filter before
4528
   * we changed it.
4529
   *
4530
   * XXX - could there be more than one block added in
4531
   * this fashion?
4532
   *
4533
   * XXX - is there a way to avoid that race, e.g. somehow
4534
   * wait for all packets that passed the old filter to
4535
   * be added to the ring?
4536
   */
4537
0
  if (n != 0)
4538
0
    n--;
4539
4540
  /*
4541
   * Set the count of blocks worth of packets to filter
4542
   * in userland to the total number of blocks in the
4543
   * ring minus the number of free blocks we found, and
4544
   * turn on userland filtering.  (The count of blocks
4545
   * worth of packets to filter in userland is guaranteed
4546
   * not to be zero - n, above, couldn't be set to a
4547
   * value > handle->cc, and if it were equal to
4548
   * handle->cc, it wouldn't be zero, and thus would
4549
   * be decremented to handle->cc - 1.)
4550
   */
4551
0
  handlep->blocks_to_filter_in_userland = handle->cc - n;
4552
0
  handlep->filter_in_userland = 1;
4553
4554
0
  return 0;
4555
0
}
4556
4557
/*
4558
 *  Return the index of the given device name. Fill ebuf and return
4559
 *  -1 on failure.
4560
 */
4561
static int
4562
iface_get_id(int fd, const char *device, char *ebuf)
4563
0
{
4564
0
  struct ifreq  ifr;
4565
4566
0
  memset(&ifr, 0, sizeof(ifr));
4567
0
  pcap_strlcpy(ifr.ifr_name, device, sizeof(ifr.ifr_name));
4568
4569
0
  if (ioctl(fd, SIOCGIFINDEX, &ifr) == -1) {
4570
0
    pcap_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE,
4571
0
        errno, "SIOCGIFINDEX");
4572
0
    return -1;
4573
0
  }
4574
4575
0
  return ifr.ifr_ifindex;
4576
0
}
4577
4578
/*
4579
 *  Bind the socket associated with FD to the given device.
4580
 *  Return 0 on success or a PCAP_ERROR_ value on a hard error.
4581
 */
4582
static int
4583
iface_bind(int fd, int ifindex, char *ebuf, int protocol)
4584
0
{
4585
0
  struct sockaddr_ll  sll;
4586
0
  int     ret, err;
4587
0
  socklen_t   errlen = sizeof(err);
4588
4589
0
  memset(&sll, 0, sizeof(sll));
4590
0
  sll.sll_family    = AF_PACKET;
4591
0
  sll.sll_ifindex   = ifindex < 0 ? 0 : ifindex;
4592
0
  sll.sll_protocol  = protocol;
4593
4594
0
  if (bind(fd, (struct sockaddr *) &sll, sizeof(sll)) == -1) {
4595
0
    if (errno == ENETDOWN) {
4596
      /*
4597
       * Return a "network down" indication, so that
4598
       * the application can report that rather than
4599
       * saying we had a mysterious failure and
4600
       * suggest that they report a problem to the
4601
       * libpcap developers.
4602
       */
4603
0
      return PCAP_ERROR_IFACE_NOT_UP;
4604
0
    }
4605
0
    if (errno == ENODEV) {
4606
      /*
4607
       * There's nothing more to say, so clear the
4608
       * error message.
4609
       */
4610
0
      ebuf[0] = '\0';
4611
0
      ret = PCAP_ERROR_NO_SUCH_DEVICE;
4612
0
    } else {
4613
0
      ret = PCAP_ERROR;
4614
0
      pcap_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE,
4615
0
          errno, "bind");
4616
0
    }
4617
0
    return ret;
4618
0
  }
4619
4620
  /* Any pending errors, e.g., network is down? */
4621
4622
0
  if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &err, &errlen) == -1) {
4623
0
    pcap_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE,
4624
0
        errno, "getsockopt (SO_ERROR)");
4625
0
    return PCAP_ERROR;
4626
0
  }
4627
4628
0
  if (err == ENETDOWN) {
4629
    /*
4630
     * Return a "network down" indication, so that
4631
     * the application can report that rather than
4632
     * saying we had a mysterious failure and
4633
     * suggest that they report a problem to the
4634
     * libpcap developers.
4635
     */
4636
0
    return PCAP_ERROR_IFACE_NOT_UP;
4637
0
  } else if (err > 0) {
4638
0
    pcap_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE,
4639
0
        err, "bind");
4640
0
    return PCAP_ERROR;
4641
0
  }
4642
4643
0
  return 0;
4644
0
}
4645
4646
/*
4647
 * Try to enter monitor mode.
4648
 * If we have libnl, try to create a new monitor-mode device and
4649
 * capture on that; otherwise, just say "not supported".
4650
 */
4651
#ifdef HAVE_LIBNL
4652
static int
4653
enter_rfmon_mode(pcap_t *handle, int sock_fd, const char *device)
4654
{
4655
  struct pcap_linux *handlep = handle->priv;
4656
  int ret;
4657
  char phydev_path[PATH_MAX+1];
4658
  struct nl80211_state nlstate;
4659
  struct ifreq ifr;
4660
  u_int n;
4661
4662
  /*
4663
   * Is this a mac80211 device?
4664
   */
4665
  ret = get_mac80211_phydev(handle, device, phydev_path, PATH_MAX);
4666
  if (ret < 0)
4667
    return ret; /* error */
4668
  if (ret == 0)
4669
    return 0; /* no error, but not mac80211 device */
4670
4671
  /*
4672
   * XXX - is this already a monN device?
4673
   * If so, we're done.
4674
   */
4675
4676
  /*
4677
   * OK, it's apparently a mac80211 device.
4678
   * Try to find an unused monN device for it.
4679
   */
4680
  ret = nl80211_init(handle, &nlstate, device);
4681
  if (ret != 0)
4682
    return ret;
4683
  for (n = 0; n < UINT_MAX; n++) {
4684
    /*
4685
     * Try mon{n}.
4686
     */
4687
    char mondevice[3+10+1]; /* mon{UINT_MAX}\0 */
4688
4689
    snprintf(mondevice, sizeof mondevice, "mon%u", n);
4690
    ret = add_mon_if(handle, sock_fd, &nlstate, device, mondevice);
4691
    if (ret == 1) {
4692
      /*
4693
       * Success.  We don't clean up the libnl state
4694
       * yet, as we'll be using it later.
4695
       */
4696
      goto added;
4697
    }
4698
    if (ret < 0) {
4699
      /*
4700
       * Hard failure.  Just return ret; handle->errbuf
4701
       * has already been set.
4702
       */
4703
      nl80211_cleanup(&nlstate);
4704
      return ret;
4705
    }
4706
  }
4707
4708
  snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
4709
      "%s: No free monN interfaces", device);
4710
  nl80211_cleanup(&nlstate);
4711
  return PCAP_ERROR;
4712
4713
added:
4714
4715
#if 0
4716
  /*
4717
   * Sleep for .1 seconds.
4718
   */
4719
  delay.tv_sec = 0;
4720
  delay.tv_nsec = 500000000;
4721
  nanosleep(&delay, NULL);
4722
#endif
4723
4724
  /*
4725
   * If we haven't already done so, arrange to have
4726
   * "pcap_close_all()" called when we exit.
4727
   */
4728
  if (!pcap_do_addexit(handle)) {
4729
    /*
4730
     * "atexit()" failed; don't put the interface
4731
     * in rfmon mode, just give up.
4732
     */
4733
    del_mon_if(handle, sock_fd, &nlstate, device,
4734
        handlep->mondevice);
4735
    nl80211_cleanup(&nlstate);
4736
    return PCAP_ERROR;
4737
  }
4738
4739
  /*
4740
   * Now configure the monitor interface up.
4741
   */
4742
  memset(&ifr, 0, sizeof(ifr));
4743
  pcap_strlcpy(ifr.ifr_name, handlep->mondevice, sizeof(ifr.ifr_name));
4744
  if (ioctl(sock_fd, SIOCGIFFLAGS, &ifr) == -1) {
4745
    pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
4746
        errno, "%s: Can't get flags for %s", device,
4747
        handlep->mondevice);
4748
    del_mon_if(handle, sock_fd, &nlstate, device,
4749
        handlep->mondevice);
4750
    nl80211_cleanup(&nlstate);
4751
    return PCAP_ERROR;
4752
  }
4753
  ifr.ifr_flags |= IFF_UP|IFF_RUNNING;
4754
  if (ioctl(sock_fd, SIOCSIFFLAGS, &ifr) == -1) {
4755
    pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
4756
        errno, "%s: Can't set flags for %s", device,
4757
        handlep->mondevice);
4758
    del_mon_if(handle, sock_fd, &nlstate, device,
4759
        handlep->mondevice);
4760
    nl80211_cleanup(&nlstate);
4761
    return PCAP_ERROR;
4762
  }
4763
4764
  /*
4765
   * Success.  Clean up the libnl state.
4766
   */
4767
  nl80211_cleanup(&nlstate);
4768
4769
  /*
4770
   * Note that we have to delete the monitor device when we close
4771
   * the handle.
4772
   */
4773
  handlep->must_do_on_close |= MUST_DELETE_MONIF;
4774
4775
  /*
4776
   * Add this to the list of pcaps to close when we exit.
4777
   */
4778
  pcap_add_to_pcaps_to_close(handle);
4779
4780
  return 1;
4781
}
4782
#else /* HAVE_LIBNL */
4783
static int
4784
enter_rfmon_mode(pcap_t *handle _U_, int sock_fd _U_, const char *device _U_)
4785
0
{
4786
  /*
4787
   * We don't have libnl, so we can't do monitor mode.
4788
   */
4789
0
  return 0;
4790
0
}
4791
#endif /* HAVE_LIBNL */
4792
4793
#if defined(HAVE_LINUX_NET_TSTAMP_H) && defined(PACKET_TIMESTAMP)
4794
/*
4795
 * Map SOF_TIMESTAMPING_ values to PCAP_TSTAMP_ values.
4796
 */
4797
static const struct {
4798
  int soft_timestamping_val;
4799
  int pcap_tstamp_val;
4800
} sof_ts_type_map[3] = {
4801
  { SOF_TIMESTAMPING_SOFTWARE, PCAP_TSTAMP_HOST },
4802
  { SOF_TIMESTAMPING_SYS_HARDWARE, PCAP_TSTAMP_ADAPTER },
4803
  { SOF_TIMESTAMPING_RAW_HARDWARE, PCAP_TSTAMP_ADAPTER_UNSYNCED }
4804
};
4805
0
#define NUM_SOF_TIMESTAMPING_TYPES  (sizeof sof_ts_type_map / sizeof sof_ts_type_map[0])
4806
4807
/*
4808
 * Set the list of time stamping types to include all types.
4809
 */
4810
static int
4811
iface_set_all_ts_types(pcap_t *handle, char *ebuf)
4812
0
{
4813
0
  u_int i;
4814
4815
0
  handle->tstamp_type_list = malloc(NUM_SOF_TIMESTAMPING_TYPES * sizeof(u_int));
4816
0
  if (handle->tstamp_type_list == NULL) {
4817
0
    pcap_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE,
4818
0
        errno, "malloc");
4819
0
    return -1;
4820
0
  }
4821
0
  for (i = 0; i < NUM_SOF_TIMESTAMPING_TYPES; i++)
4822
0
    handle->tstamp_type_list[i] = sof_ts_type_map[i].pcap_tstamp_val;
4823
0
  handle->tstamp_type_count = NUM_SOF_TIMESTAMPING_TYPES;
4824
0
  return 0;
4825
0
}
4826
4827
/*
4828
 * Get a list of time stamp types.
4829
 */
4830
#ifdef ETHTOOL_GET_TS_INFO
4831
static int
4832
iface_get_ts_types(const char *device, pcap_t *handle, char *ebuf)
4833
0
{
4834
0
  int fd;
4835
0
  struct ifreq ifr;
4836
0
  struct ethtool_ts_info info;
4837
0
  int num_ts_types;
4838
0
  u_int i, j;
4839
4840
  /*
4841
   * This doesn't apply to the "any" device; you can't say "turn on
4842
   * hardware time stamping for all devices that exist now and arrange
4843
   * that it be turned on for any device that appears in the future",
4844
   * and not all devices even necessarily *support* hardware time
4845
   * stamping, so don't report any time stamp types.
4846
   */
4847
0
  if (strcmp(device, "any") == 0) {
4848
0
    handle->tstamp_type_list = NULL;
4849
0
    return 0;
4850
0
  }
4851
4852
  /*
4853
   * Create a socket from which to fetch time stamping capabilities.
4854
   */
4855
0
  fd = get_if_ioctl_socket();
4856
0
  if (fd < 0) {
4857
0
    pcap_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE,
4858
0
        errno, "socket for SIOCETHTOOL(ETHTOOL_GET_TS_INFO)");
4859
0
    return -1;
4860
0
  }
4861
4862
0
  memset(&ifr, 0, sizeof(ifr));
4863
0
  pcap_strlcpy(ifr.ifr_name, device, sizeof(ifr.ifr_name));
4864
0
  memset(&info, 0, sizeof(info));
4865
0
  info.cmd = ETHTOOL_GET_TS_INFO;
4866
0
  ifr.ifr_data = (caddr_t)&info;
4867
0
  if (ioctl(fd, SIOCETHTOOL, &ifr) == -1) {
4868
0
    int save_errno = errno;
4869
4870
0
    close(fd);
4871
0
    switch (save_errno) {
4872
4873
0
    case EOPNOTSUPP:
4874
0
    case EINVAL:
4875
      /*
4876
       * OK, this OS version or driver doesn't support
4877
       * asking for the time stamping types, so let's
4878
       * just return all the possible types.
4879
       */
4880
0
      if (iface_set_all_ts_types(handle, ebuf) == -1)
4881
0
        return -1;
4882
0
      return 0;
4883
4884
0
    case ENODEV:
4885
      /*
4886
       * OK, no such device.
4887
       * The user will find that out when they try to
4888
       * activate the device; just return an empty
4889
       * list of time stamp types.
4890
       */
4891
0
      handle->tstamp_type_list = NULL;
4892
0
      return 0;
4893
4894
0
    default:
4895
      /*
4896
       * Other error.
4897
       */
4898
0
      pcap_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE,
4899
0
          save_errno,
4900
0
          "%s: SIOCETHTOOL(ETHTOOL_GET_TS_INFO) ioctl failed",
4901
0
          device);
4902
0
      return -1;
4903
0
    }
4904
0
  }
4905
0
  close(fd);
4906
4907
  /*
4908
   * Do we support hardware time stamping of *all* packets?
4909
   */
4910
0
  if (!(info.rx_filters & (1 << HWTSTAMP_FILTER_ALL))) {
4911
    /*
4912
     * No, so don't report any time stamp types.
4913
     *
4914
     * XXX - some devices either don't report
4915
     * HWTSTAMP_FILTER_ALL when they do support it, or
4916
     * report HWTSTAMP_FILTER_ALL but map it to only
4917
     * time stamping a few PTP packets.  See
4918
     * http://marc.info/?l=linux-netdev&m=146318183529571&w=2
4919
     *
4920
     * Maybe that got fixed later.
4921
     */
4922
0
    handle->tstamp_type_list = NULL;
4923
0
    return 0;
4924
0
  }
4925
4926
0
  num_ts_types = 0;
4927
0
  for (i = 0; i < NUM_SOF_TIMESTAMPING_TYPES; i++) {
4928
0
    if (info.so_timestamping & sof_ts_type_map[i].soft_timestamping_val)
4929
0
      num_ts_types++;
4930
0
  }
4931
0
  if (num_ts_types != 0) {
4932
0
    handle->tstamp_type_list = malloc(num_ts_types * sizeof(u_int));
4933
0
    if (handle->tstamp_type_list == NULL) {
4934
0
      pcap_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE,
4935
0
          errno, "malloc");
4936
0
      return -1;
4937
0
    }
4938
0
    for (i = 0, j = 0; i < NUM_SOF_TIMESTAMPING_TYPES; i++) {
4939
0
      if (info.so_timestamping & sof_ts_type_map[i].soft_timestamping_val) {
4940
0
        handle->tstamp_type_list[j] = sof_ts_type_map[i].pcap_tstamp_val;
4941
0
        j++;
4942
0
      }
4943
0
    }
4944
0
    handle->tstamp_type_count = num_ts_types;
4945
0
  } else
4946
0
    handle->tstamp_type_list = NULL;
4947
4948
0
  return 0;
4949
0
}
4950
#else /* ETHTOOL_GET_TS_INFO */
4951
static int
4952
iface_get_ts_types(const char *device, pcap_t *handle, char *ebuf)
4953
{
4954
  /*
4955
   * This doesn't apply to the "any" device; you can't say "turn on
4956
   * hardware time stamping for all devices that exist now and arrange
4957
   * that it be turned on for any device that appears in the future",
4958
   * and not all devices even necessarily *support* hardware time
4959
   * stamping, so don't report any time stamp types.
4960
   */
4961
  if (strcmp(device, "any") == 0) {
4962
    handle->tstamp_type_list = NULL;
4963
    return 0;
4964
  }
4965
4966
  /*
4967
   * We don't have an ioctl to use to ask what's supported,
4968
   * so say we support everything.
4969
   */
4970
  if (iface_set_all_ts_types(handle, ebuf) == -1)
4971
    return -1;
4972
  return 0;
4973
}
4974
#endif /* ETHTOOL_GET_TS_INFO */
4975
#else  /* defined(HAVE_LINUX_NET_TSTAMP_H) && defined(PACKET_TIMESTAMP) */
4976
static int
4977
iface_get_ts_types(const char *device _U_, pcap_t *p _U_, char *ebuf _U_)
4978
{
4979
  /*
4980
   * Nothing to fetch, so it always "succeeds".
4981
   */
4982
  return 0;
4983
}
4984
#endif /* defined(HAVE_LINUX_NET_TSTAMP_H) && defined(PACKET_TIMESTAMP) */
4985
4986
/*
4987
 * Find out if we have any form of fragmentation/reassembly offloading.
4988
 *
4989
 * We do so using SIOCETHTOOL checking for various types of offloading;
4990
 * if SIOCETHTOOL isn't defined, or we don't have any #defines for any
4991
 * of the types of offloading, there's nothing we can do to check, so
4992
 * we just say "no, we don't".
4993
 *
4994
 * We treat EOPNOTSUPP, EINVAL and, if eperm_ok is true, EPERM as
4995
 * indications that the operation isn't supported.  We do EPERM
4996
 * weirdly because the SIOCETHTOOL code in later kernels 1) doesn't
4997
 * support ETHTOOL_GUFO, 2) also doesn't include it in the list
4998
 * of ethtool operations that don't require CAP_NET_ADMIN privileges,
4999
 * and 3) does the "is this permitted" check before doing the "is
5000
 * this even supported" check, so it fails with "this is not permitted"
5001
 * rather than "this is not even supported".  To work around this
5002
 * annoyance, we only treat EPERM as an error for the first feature,
5003
 * and assume that they all do the same permission checks, so if the
5004
 * first one is allowed all the others are allowed if supported.
5005
 */
5006
#if defined(SIOCETHTOOL) && (defined(ETHTOOL_GTSO) || defined(ETHTOOL_GUFO) || defined(ETHTOOL_GGSO) || defined(ETHTOOL_GFLAGS) || defined(ETHTOOL_GGRO))
5007
static int
5008
iface_ethtool_flag_ioctl(pcap_t *handle, int cmd, const char *cmdname,
5009
    int eperm_ok)
5010
0
{
5011
0
  struct ifreq  ifr;
5012
0
  struct ethtool_value eval;
5013
5014
0
  memset(&ifr, 0, sizeof(ifr));
5015
0
  pcap_strlcpy(ifr.ifr_name, handle->opt.device, sizeof(ifr.ifr_name));
5016
0
  eval.cmd = cmd;
5017
0
  eval.data = 0;
5018
0
  ifr.ifr_data = (caddr_t)&eval;
5019
0
  if (ioctl(handle->fd, SIOCETHTOOL, &ifr) == -1) {
5020
0
    if (errno == EOPNOTSUPP || errno == EINVAL ||
5021
0
        (errno == EPERM && eperm_ok)) {
5022
      /*
5023
       * OK, let's just return 0, which, in our
5024
       * case, either means "no, what we're asking
5025
       * about is not enabled" or "all the flags
5026
       * are clear (i.e., nothing is enabled)".
5027
       */
5028
0
      return 0;
5029
0
    }
5030
0
    pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
5031
0
        errno, "%s: SIOCETHTOOL(%s) ioctl failed",
5032
0
        handle->opt.device, cmdname);
5033
0
    return -1;
5034
0
  }
5035
0
  return eval.data;
5036
0
}
5037
5038
/*
5039
 * XXX - it's annoying that we have to check for offloading at all, but,
5040
 * given that we have to, it's still annoying that we have to check for
5041
 * particular types of offloading, especially that shiny new types of
5042
 * offloading may be added - and, worse, may not be checkable with
5043
 * a particular ETHTOOL_ operation; ETHTOOL_GFEATURES would, in
5044
 * theory, give those to you, but the actual flags being used are
5045
 * opaque (defined in a non-uapi header), and there doesn't seem to
5046
 * be any obvious way to ask the kernel what all the offloading flags
5047
 * are - at best, you can ask for a set of strings(!) to get *names*
5048
 * for various flags.  (That whole mechanism appears to have been
5049
 * designed for the sole purpose of letting ethtool report flags
5050
 * by name and set flags by name, with the names having no semantics
5051
 * ethtool understands.)
5052
 */
5053
static int
5054
iface_get_offload(pcap_t *handle)
5055
0
{
5056
0
  int ret;
5057
5058
0
#ifdef ETHTOOL_GTSO
5059
0
  ret = iface_ethtool_flag_ioctl(handle, ETHTOOL_GTSO, "ETHTOOL_GTSO", 0);
5060
0
  if (ret == -1)
5061
0
    return -1;
5062
0
  if (ret)
5063
0
    return 1; /* TCP segmentation offloading on */
5064
0
#endif
5065
5066
0
#ifdef ETHTOOL_GGSO
5067
  /*
5068
   * XXX - will this cause large unsegmented packets to be
5069
   * handed to PF_PACKET sockets on transmission?  If not,
5070
   * this need not be checked.
5071
   */
5072
0
  ret = iface_ethtool_flag_ioctl(handle, ETHTOOL_GGSO, "ETHTOOL_GGSO", 0);
5073
0
  if (ret == -1)
5074
0
    return -1;
5075
0
  if (ret)
5076
0
    return 1; /* generic segmentation offloading on */
5077
0
#endif
5078
5079
0
#ifdef ETHTOOL_GFLAGS
5080
0
  ret = iface_ethtool_flag_ioctl(handle, ETHTOOL_GFLAGS, "ETHTOOL_GFLAGS", 0);
5081
0
  if (ret == -1)
5082
0
    return -1;
5083
0
  if (ret & ETH_FLAG_LRO)
5084
0
    return 1; /* large receive offloading on */
5085
0
#endif
5086
5087
0
#ifdef ETHTOOL_GGRO
5088
  /*
5089
   * XXX - will this cause large reassembled packets to be
5090
   * handed to PF_PACKET sockets on receipt?  If not,
5091
   * this need not be checked.
5092
   */
5093
0
  ret = iface_ethtool_flag_ioctl(handle, ETHTOOL_GGRO, "ETHTOOL_GGRO", 0);
5094
0
  if (ret == -1)
5095
0
    return -1;
5096
0
  if (ret)
5097
0
    return 1; /* generic (large) receive offloading on */
5098
0
#endif
5099
5100
0
#ifdef ETHTOOL_GUFO
5101
  /*
5102
   * Do this one last, as support for it was removed in later
5103
   * kernels, and it fails with EPERM on those kernels rather
5104
   * than with EOPNOTSUPP (see explanation in comment for
5105
   * iface_ethtool_flag_ioctl()).
5106
   */
5107
0
  ret = iface_ethtool_flag_ioctl(handle, ETHTOOL_GUFO, "ETHTOOL_GUFO", 1);
5108
0
  if (ret == -1)
5109
0
    return -1;
5110
0
  if (ret)
5111
0
    return 1; /* UDP fragmentation offloading on */
5112
0
#endif
5113
5114
0
  return 0;
5115
0
}
5116
#else /* SIOCETHTOOL */
5117
static int
5118
iface_get_offload(pcap_t *handle _U_)
5119
{
5120
  /*
5121
   * XXX - do we need to get this information if we don't
5122
   * have the ethtool ioctls?  If so, how do we do that?
5123
   */
5124
  return 0;
5125
}
5126
#endif /* SIOCETHTOOL */
5127
5128
static struct dsa_proto {
5129
  const char *name;
5130
  bpf_u_int32 linktype;
5131
} dsa_protos[] = {
5132
  /*
5133
   * None is special and indicates that the interface does not have
5134
   * any tagging protocol configured, and is therefore a standard
5135
   * Ethernet interface.
5136
   */
5137
  { "none", DLT_EN10MB },
5138
  { "brcm", DLT_DSA_TAG_BRCM },
5139
  { "brcm-prepend", DLT_DSA_TAG_BRCM_PREPEND },
5140
  { "dsa", DLT_DSA_TAG_DSA },
5141
  { "edsa", DLT_DSA_TAG_EDSA },
5142
};
5143
5144
static int
5145
iface_dsa_get_proto_info(const char *device, pcap_t *handle)
5146
0
{
5147
0
  char *pathstr;
5148
0
  unsigned int i;
5149
  /*
5150
   * Make this significantly smaller than PCAP_ERRBUF_SIZE;
5151
   * the tag *shouldn't* have some huge long name, and making
5152
   * it smaller keeps newer versions of GCC from whining that
5153
   * the error message if we don't support the tag could
5154
   * overflow the error message buffer.
5155
   */
5156
0
  char buf[128];
5157
0
  ssize_t r;
5158
0
  int fd;
5159
5160
0
  fd = asprintf(&pathstr, "/sys/class/net/%s/dsa/tagging", device);
5161
0
  if (fd < 0) {
5162
0
    pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
5163
0
            fd, "asprintf");
5164
0
    return PCAP_ERROR;
5165
0
  }
5166
5167
0
  fd = open(pathstr, O_RDONLY);
5168
0
  free(pathstr);
5169
  /*
5170
   * This is not fatal, kernel >= 4.20 *might* expose this attribute
5171
   */
5172
0
  if (fd < 0)
5173
0
    return 0;
5174
5175
0
  r = read(fd, buf, sizeof(buf) - 1);
5176
0
  if (r <= 0) {
5177
0
    pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
5178
0
            errno, "read");
5179
0
    close(fd);
5180
0
    return PCAP_ERROR;
5181
0
  }
5182
0
  close(fd);
5183
5184
  /*
5185
   * Buffer should be LF terminated.
5186
   */
5187
0
  if (buf[r - 1] == '\n')
5188
0
    r--;
5189
0
  buf[r] = '\0';
5190
5191
0
  for (i = 0; i < sizeof(dsa_protos) / sizeof(dsa_protos[0]); i++) {
5192
0
    if (strlen(dsa_protos[i].name) == (size_t)r &&
5193
0
        strcmp(buf, dsa_protos[i].name) == 0) {
5194
0
      handle->linktype = dsa_protos[i].linktype;
5195
0
      switch (dsa_protos[i].linktype) {
5196
0
      case DLT_EN10MB:
5197
0
        return 0;
5198
0
      default:
5199
0
        return 1;
5200
0
      }
5201
0
    }
5202
0
  }
5203
5204
0
  snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
5205
0
          "unsupported DSA tag: %s", buf);
5206
5207
0
  return PCAP_ERROR;
5208
0
}
5209
5210
/*
5211
 *  Query the kernel for the MTU of the given interface.
5212
 */
5213
static int
5214
iface_get_mtu(int fd, const char *device, char *ebuf)
5215
0
{
5216
0
  struct ifreq  ifr;
5217
5218
0
  if (!device)
5219
0
    return BIGGER_THAN_ALL_MTUS;
5220
5221
0
  memset(&ifr, 0, sizeof(ifr));
5222
0
  pcap_strlcpy(ifr.ifr_name, device, sizeof(ifr.ifr_name));
5223
5224
0
  if (ioctl(fd, SIOCGIFMTU, &ifr) == -1) {
5225
0
    pcap_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE,
5226
0
        errno, "SIOCGIFMTU");
5227
0
    return -1;
5228
0
  }
5229
5230
0
  return ifr.ifr_mtu;
5231
0
}
5232
5233
/*
5234
 *  Get the hardware type of the given interface as ARPHRD_xxx constant.
5235
 */
5236
static int
5237
iface_get_arptype(int fd, const char *device, char *ebuf)
5238
0
{
5239
0
  struct ifreq  ifr;
5240
0
  int   ret;
5241
5242
0
  memset(&ifr, 0, sizeof(ifr));
5243
0
  pcap_strlcpy(ifr.ifr_name, device, sizeof(ifr.ifr_name));
5244
5245
0
  if (ioctl(fd, SIOCGIFHWADDR, &ifr) == -1) {
5246
0
    if (errno == ENODEV) {
5247
      /*
5248
       * No such device.
5249
       *
5250
       * There's nothing more to say, so clear
5251
       * the error message.
5252
       */
5253
0
      ret = PCAP_ERROR_NO_SUCH_DEVICE;
5254
0
      ebuf[0] = '\0';
5255
0
    } else {
5256
0
      ret = PCAP_ERROR;
5257
0
      pcap_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE,
5258
0
          errno, "SIOCGIFHWADDR");
5259
0
    }
5260
0
    return ret;
5261
0
  }
5262
5263
0
  return ifr.ifr_hwaddr.sa_family;
5264
0
}
5265
5266
static int
5267
fix_program(pcap_t *handle, struct sock_fprog *fcode)
5268
0
{
5269
0
  struct pcap_linux *handlep = handle->priv;
5270
0
  size_t prog_size;
5271
0
  register int i;
5272
0
  register struct bpf_insn *p;
5273
0
  struct bpf_insn *f;
5274
0
  int len;
5275
5276
  /*
5277
   * Make a copy of the filter, and modify that copy if
5278
   * necessary.
5279
   */
5280
0
  prog_size = sizeof(*handle->fcode.bf_insns) * handle->fcode.bf_len;
5281
0
  len = handle->fcode.bf_len;
5282
0
  f = (struct bpf_insn *)malloc(prog_size);
5283
0
  if (f == NULL) {
5284
0
    pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
5285
0
        errno, "malloc");
5286
0
    return -1;
5287
0
  }
5288
0
  memcpy(f, handle->fcode.bf_insns, prog_size);
5289
0
  fcode->len = len;
5290
0
  fcode->filter = (struct sock_filter *) f;
5291
5292
0
  for (i = 0; i < len; ++i) {
5293
0
    p = &f[i];
5294
    /*
5295
     * What type of instruction is this?
5296
     */
5297
0
    switch (BPF_CLASS(p->code)) {
5298
5299
0
    case BPF_LD:
5300
0
    case BPF_LDX:
5301
      /*
5302
       * It's a load instruction; is it loading
5303
       * from the packet?
5304
       */
5305
0
      switch (BPF_MODE(p->code)) {
5306
5307
0
      case BPF_ABS:
5308
0
      case BPF_IND:
5309
0
      case BPF_MSH:
5310
        /*
5311
         * Yes; are we in cooked mode?
5312
         */
5313
0
        if (handlep->cooked) {
5314
          /*
5315
           * Yes, so we need to fix this
5316
           * instruction.
5317
           */
5318
0
          if (fix_offset(handle, p) < 0) {
5319
            /*
5320
             * We failed to do so.
5321
             * Return 0, so our caller
5322
             * knows to punt to userland.
5323
             */
5324
0
            return 0;
5325
0
          }
5326
0
        }
5327
0
        break;
5328
0
      }
5329
0
      break;
5330
0
    }
5331
0
  }
5332
0
  return 1; /* we succeeded */
5333
0
}
5334
5335
static int
5336
fix_offset(pcap_t *handle, struct bpf_insn *p)
5337
0
{
5338
  /*
5339
   * Existing references to auxiliary data shouldn't be adjusted.
5340
   *
5341
   * Note that SKF_AD_OFF is negative, but p->k is unsigned, so
5342
   * we use >= and cast SKF_AD_OFF to unsigned.
5343
   */
5344
0
  if (p->k >= (bpf_u_int32)SKF_AD_OFF)
5345
0
    return 0;
5346
0
  if (handle->linktype == DLT_LINUX_SLL2) {
5347
    /*
5348
     * What's the offset?
5349
     */
5350
0
    if (p->k >= SLL2_HDR_LEN) {
5351
      /*
5352
       * It's within the link-layer payload; that starts
5353
       * at an offset of 0, as far as the kernel packet
5354
       * filter is concerned, so subtract the length of
5355
       * the link-layer header.
5356
       */
5357
0
      p->k -= SLL2_HDR_LEN;
5358
0
    } else if (p->k == 0) {
5359
      /*
5360
       * It's the protocol field; map it to the
5361
       * special magic kernel offset for that field.
5362
       */
5363
0
      p->k = SKF_AD_OFF + SKF_AD_PROTOCOL;
5364
0
    } else if (p->k == 4) {
5365
      /*
5366
       * It's the ifindex field; map it to the
5367
       * special magic kernel offset for that field.
5368
       */
5369
0
      p->k = SKF_AD_OFF + SKF_AD_IFINDEX;
5370
0
    } else if (p->k == 10) {
5371
      /*
5372
       * It's the packet type field; map it to the
5373
       * special magic kernel offset for that field.
5374
       */
5375
0
      p->k = SKF_AD_OFF + SKF_AD_PKTTYPE;
5376
0
    } else if ((bpf_int32)(p->k) > 0) {
5377
      /*
5378
       * It's within the header, but it's not one of
5379
       * those fields; we can't do that in the kernel,
5380
       * so punt to userland.
5381
       */
5382
0
      return -1;
5383
0
    }
5384
0
  } else {
5385
    /*
5386
     * What's the offset?
5387
     */
5388
0
    if (p->k >= SLL_HDR_LEN) {
5389
      /*
5390
       * It's within the link-layer payload; that starts
5391
       * at an offset of 0, as far as the kernel packet
5392
       * filter is concerned, so subtract the length of
5393
       * the link-layer header.
5394
       */
5395
0
      p->k -= SLL_HDR_LEN;
5396
0
    } else if (p->k == 0) {
5397
      /*
5398
       * It's the packet type field; map it to the
5399
       * special magic kernel offset for that field.
5400
       */
5401
0
      p->k = SKF_AD_OFF + SKF_AD_PKTTYPE;
5402
0
    } else if (p->k == 14) {
5403
      /*
5404
       * It's the protocol field; map it to the
5405
       * special magic kernel offset for that field.
5406
       */
5407
0
      p->k = SKF_AD_OFF + SKF_AD_PROTOCOL;
5408
0
    } else if ((bpf_int32)(p->k) > 0) {
5409
      /*
5410
       * It's within the header, but it's not one of
5411
       * those fields; we can't do that in the kernel,
5412
       * so punt to userland.
5413
       */
5414
0
      return -1;
5415
0
    }
5416
0
  }
5417
0
  return 0;
5418
0
}
5419
5420
static int
5421
set_kernel_filter(pcap_t *handle, struct sock_fprog *fcode)
5422
0
{
5423
0
  int total_filter_on = 0;
5424
0
  int save_mode;
5425
0
  int ret;
5426
0
  int save_errno;
5427
5428
  /*
5429
   * The socket filter code doesn't discard all packets queued
5430
   * up on the socket when the filter is changed; this means
5431
   * that packets that don't match the new filter may show up
5432
   * after the new filter is put onto the socket, if those
5433
   * packets haven't yet been read.
5434
   *
5435
   * This means, for example, that if you do a tcpdump capture
5436
   * with a filter, the first few packets in the capture might
5437
   * be packets that wouldn't have passed the filter.
5438
   *
5439
   * We therefore discard all packets queued up on the socket
5440
   * when setting a kernel filter.  (This isn't an issue for
5441
   * userland filters, as the userland filtering is done after
5442
   * packets are queued up.)
5443
   *
5444
   * To flush those packets, we put the socket in read-only mode,
5445
   * and read packets from the socket until there are no more to
5446
   * read.
5447
   *
5448
   * In order to keep that from being an infinite loop - i.e.,
5449
   * to keep more packets from arriving while we're draining
5450
   * the queue - we put the "total filter", which is a filter
5451
   * that rejects all packets, onto the socket before draining
5452
   * the queue.
5453
   *
5454
   * This code deliberately ignores any errors, so that you may
5455
   * get bogus packets if an error occurs, rather than having
5456
   * the filtering done in userland even if it could have been
5457
   * done in the kernel.
5458
   */
5459
0
  if (setsockopt(handle->fd, SOL_SOCKET, SO_ATTACH_FILTER,
5460
0
           &total_fcode, sizeof(total_fcode)) == 0) {
5461
0
    char drain[1];
5462
5463
    /*
5464
     * Note that we've put the total filter onto the socket.
5465
     */
5466
0
    total_filter_on = 1;
5467
5468
    /*
5469
     * Save the socket's current mode, and put it in
5470
     * non-blocking mode; we drain it by reading packets
5471
     * until we get an error (which is normally a
5472
     * "nothing more to be read" error).
5473
     */
5474
0
    save_mode = fcntl(handle->fd, F_GETFL, 0);
5475
0
    if (save_mode == -1) {
5476
0
      pcap_fmt_errmsg_for_errno(handle->errbuf,
5477
0
          PCAP_ERRBUF_SIZE, errno,
5478
0
          "can't get FD flags when changing filter");
5479
0
      return -2;
5480
0
    }
5481
0
    if (fcntl(handle->fd, F_SETFL, save_mode | O_NONBLOCK) < 0) {
5482
0
      pcap_fmt_errmsg_for_errno(handle->errbuf,
5483
0
          PCAP_ERRBUF_SIZE, errno,
5484
0
          "can't set nonblocking mode when changing filter");
5485
0
      return -2;
5486
0
    }
5487
0
    while (recv(handle->fd, &drain, sizeof drain, MSG_TRUNC) >= 0)
5488
0
      ;
5489
0
    save_errno = errno;
5490
0
    if (save_errno != EAGAIN) {
5491
      /*
5492
       * Fatal error.
5493
       *
5494
       * If we can't restore the mode or reset the
5495
       * kernel filter, there's nothing we can do.
5496
       */
5497
0
      (void)fcntl(handle->fd, F_SETFL, save_mode);
5498
0
      (void)reset_kernel_filter(handle);
5499
0
      pcap_fmt_errmsg_for_errno(handle->errbuf,
5500
0
          PCAP_ERRBUF_SIZE, save_errno,
5501
0
          "recv failed when changing filter");
5502
0
      return -2;
5503
0
    }
5504
0
    if (fcntl(handle->fd, F_SETFL, save_mode) == -1) {
5505
0
      pcap_fmt_errmsg_for_errno(handle->errbuf,
5506
0
          PCAP_ERRBUF_SIZE, errno,
5507
0
          "can't restore FD flags when changing filter");
5508
0
      return -2;
5509
0
    }
5510
0
  }
5511
5512
  /*
5513
   * Now attach the new filter.
5514
   */
5515
0
  ret = setsockopt(handle->fd, SOL_SOCKET, SO_ATTACH_FILTER,
5516
0
       fcode, sizeof(*fcode));
5517
0
  if (ret == -1 && total_filter_on) {
5518
    /*
5519
     * Well, we couldn't set that filter on the socket,
5520
     * but we could set the total filter on the socket.
5521
     *
5522
     * This could, for example, mean that the filter was
5523
     * too big to put into the kernel, so we'll have to
5524
     * filter in userland; in any case, we'll be doing
5525
     * filtering in userland, so we need to remove the
5526
     * total filter so we see packets.
5527
     */
5528
0
    save_errno = errno;
5529
5530
    /*
5531
     * If this fails, we're really screwed; we have the
5532
     * total filter on the socket, and it won't come off.
5533
     * Report it as a fatal error.
5534
     */
5535
0
    if (reset_kernel_filter(handle) == -1) {
5536
0
      pcap_fmt_errmsg_for_errno(handle->errbuf,
5537
0
          PCAP_ERRBUF_SIZE, errno,
5538
0
          "can't remove kernel total filter");
5539
0
      return -2;  /* fatal error */
5540
0
    }
5541
5542
0
    errno = save_errno;
5543
0
  }
5544
0
  return ret;
5545
0
}
5546
5547
static int
5548
reset_kernel_filter(pcap_t *handle)
5549
0
{
5550
0
  int ret;
5551
  /*
5552
   * setsockopt() barfs unless it get a dummy parameter.
5553
   * valgrind whines unless the value is initialized,
5554
   * as it has no idea that setsockopt() ignores its
5555
   * parameter.
5556
   */
5557
0
  int dummy = 0;
5558
5559
0
  ret = setsockopt(handle->fd, SOL_SOCKET, SO_DETACH_FILTER,
5560
0
           &dummy, sizeof(dummy));
5561
  /*
5562
   * Ignore ENOENT - it means "we don't have a filter", so there
5563
   * was no filter to remove, and there's still no filter.
5564
   *
5565
   * Also ignore ENONET, as a lot of kernel versions had a
5566
   * typo where ENONET, rather than ENOENT, was returned.
5567
   */
5568
0
  if (ret == -1 && errno != ENOENT && errno != ENONET)
5569
0
    return -1;
5570
0
  return 0;
5571
0
}
5572
5573
int
5574
pcap_set_protocol_linux(pcap_t *p, int protocol)
5575
0
{
5576
0
  if (pcap_check_activated(p))
5577
0
    return (PCAP_ERROR_ACTIVATED);
5578
0
  p->opt.protocol = protocol;
5579
0
  return (0);
5580
0
}
5581
5582
/*
5583
 * Libpcap version string.
5584
 */
5585
const char *
5586
pcap_lib_version(void)
5587
0
{
5588
0
#if defined(HAVE_TPACKET3)
5589
0
  return (PCAP_VERSION_STRING " (with TPACKET_V3)");
5590
#else
5591
  return (PCAP_VERSION_STRING " (with TPACKET_V2)");
5592
#endif
5593
0
}