Coverage Report

Created: 2026-04-29 07:06

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/SockFuzzer/third_party/xnu/bsd/net/dlil.c
Line
Count
Source
1
/*
2
 * Copyright (c) 1999-2021 Apple Inc. All rights reserved.
3
 *
4
 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5
 *
6
 * This file contains Original Code and/or Modifications of Original Code
7
 * as defined in and that are subject to the Apple Public Source License
8
 * Version 2.0 (the 'License'). You may not use this file except in
9
 * compliance with the License. The rights granted to you under the License
10
 * may not be used to create, or enable the creation or redistribution of,
11
 * unlawful or unlicensed copies of an Apple operating system, or to
12
 * circumvent, violate, or enable the circumvention or violation of, any
13
 * terms of an Apple operating system software license agreement.
14
 *
15
 * Please obtain a copy of the License at
16
 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17
 *
18
 * The Original Code and all software distributed under the License are
19
 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20
 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21
 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22
 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23
 * Please see the License for the specific language governing rights and
24
 * limitations under the License.
25
 *
26
 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27
 */
28
/*
29
 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30
 * support for mandatory and extensible security protections.  This notice
31
 * is included in support of clause 2.2 (b) of the Apple Public License,
32
 * Version 2.0.
33
 */
34
#include <stddef.h>
35
#include <ptrauth.h>
36
37
#include <sys/param.h>
38
#include <sys/systm.h>
39
#include <sys/kernel.h>
40
#include <sys/malloc.h>
41
#include <sys/mbuf.h>
42
#include <sys/socket.h>
43
#include <sys/domain.h>
44
#include <sys/user.h>
45
#include <sys/random.h>
46
#include <sys/socketvar.h>
47
#include <net/if_dl.h>
48
#include <net/if.h>
49
#include <net/route.h>
50
#include <net/if_var.h>
51
#include <net/dlil.h>
52
#include <net/if_arp.h>
53
#include <net/iptap.h>
54
#include <net/pktap.h>
55
#include <sys/kern_event.h>
56
#include <sys/kdebug.h>
57
#include <sys/mcache.h>
58
#include <sys/syslog.h>
59
#include <sys/protosw.h>
60
#include <sys/priv.h>
61
62
#include <kern/assert.h>
63
#include <kern/task.h>
64
#include <kern/thread.h>
65
#include <kern/sched_prim.h>
66
#include <kern/locks.h>
67
#include <kern/zalloc.h>
68
69
#include <net/kpi_protocol.h>
70
#include <net/if_types.h>
71
#include <net/if_ipsec.h>
72
#include <net/if_llreach.h>
73
#include <net/if_utun.h>
74
#include <net/kpi_interfacefilter.h>
75
#include <net/classq/classq.h>
76
#include <net/classq/classq_sfb.h>
77
#include <net/flowhash.h>
78
#include <net/ntstat.h>
79
#include <net/if_llatbl.h>
80
#include <net/net_api_stats.h>
81
#include <net/if_ports_used.h>
82
#include <net/if_vlan_var.h>
83
#include <netinet/in.h>
84
#if INET
85
#include <netinet/in_var.h>
86
#include <netinet/igmp_var.h>
87
#include <netinet/ip_var.h>
88
#include <netinet/tcp.h>
89
#include <netinet/tcp_var.h>
90
#include <netinet/udp.h>
91
#include <netinet/udp_var.h>
92
#include <netinet/if_ether.h>
93
#include <netinet/in_pcb.h>
94
#include <netinet/in_tclass.h>
95
#include <netinet/ip.h>
96
#include <netinet/ip_icmp.h>
97
#include <netinet/icmp_var.h>
98
#endif /* INET */
99
100
#include <net/nat464_utils.h>
101
#include <netinet6/in6_var.h>
102
#include <netinet6/nd6.h>
103
#include <netinet6/mld6_var.h>
104
#include <netinet6/scope6_var.h>
105
#include <netinet/ip6.h>
106
#include <netinet/icmp6.h>
107
#include <net/pf_pbuf.h>
108
#include <libkern/OSAtomic.h>
109
#include <libkern/tree.h>
110
111
#include <dev/random/randomdev.h>
112
#include <machine/machine_routines.h>
113
114
#include <mach/thread_act.h>
115
#include <mach/sdt.h>
116
117
#if CONFIG_MACF
118
#include <sys/kauth.h>
119
#include <security/mac_framework.h>
120
#include <net/ethernet.h>
121
#include <net/if_vlan_var.h>
122
#include <net/firewire.h>
123
#endif
124
125
#if PF
126
#include <net/pfvar.h>
127
#endif /* PF */
128
#include <net/pktsched/pktsched.h>
129
#include <net/pktsched/pktsched_netem.h>
130
131
#if NECP
132
#include <net/necp.h>
133
#endif /* NECP */
134
135
136
#include <os/log.h>
137
138
#define DBG_LAYER_BEG           DLILDBG_CODE(DBG_DLIL_STATIC, 0)
139
#define DBG_LAYER_END           DLILDBG_CODE(DBG_DLIL_STATIC, 2)
140
#define DBG_FNC_DLIL_INPUT      DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8))
141
#define DBG_FNC_DLIL_OUTPUT     DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8))
142
#define DBG_FNC_DLIL_IFOUT      DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8))
143
144
#define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */
145
#define MAX_LINKADDR        4 /* LONGWORDS */
146
#define M_NKE M_IFADDR
147
148
#if 1
149
8
#define DLIL_PRINTF     printf
150
#else
151
#define DLIL_PRINTF     kprintf
152
#endif
153
154
#define IF_DATA_REQUIRE_ALIGNED_64(f)   \
155
15
  _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t)))
156
157
#define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f)     \
158
15
  _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t)))
159
160
enum {
161
  kProtoKPI_v1    = 1,
162
  kProtoKPI_v2    = 2
163
};
164
165
/*
166
 * List of if_proto structures in if_proto_hash[] is protected by
167
 * the ifnet lock.  The rest of the fields are initialized at protocol
168
 * attach time and never change, thus no lock required as long as
169
 * a reference to it is valid, via if_proto_ref().
170
 */
171
struct if_proto {
172
  SLIST_ENTRY(if_proto)       next_hash;
173
  u_int32_t                   refcount;
174
  u_int32_t                   detached;
175
  struct ifnet                *ifp;
176
  protocol_family_t           protocol_family;
177
  int                         proto_kpi;
178
  union {
179
    struct {
180
      proto_media_input               input;
181
      proto_media_preout              pre_output;
182
      proto_media_event               event;
183
      proto_media_ioctl               ioctl;
184
      proto_media_detached            detached;
185
      proto_media_resolve_multi       resolve_multi;
186
      proto_media_send_arp            send_arp;
187
    } v1;
188
    struct {
189
      proto_media_input_v2            input;
190
      proto_media_preout              pre_output;
191
      proto_media_event               event;
192
      proto_media_ioctl               ioctl;
193
      proto_media_detached            detached;
194
      proto_media_resolve_multi       resolve_multi;
195
      proto_media_send_arp            send_arp;
196
    } v2;
197
  } kpi;
198
};
199
200
SLIST_HEAD(proto_hash_entry, if_proto);
201
202
#define DLIL_SDLDATALEN \
203
  (DLIL_SDLMAXLEN - offsetof(struct sockaddr_dl, sdl_data[0]))
204
205
struct dlil_ifnet {
206
  struct ifnet    dl_if;                  /* public ifnet */
207
  /*
208
   * DLIL private fields, protected by dl_if_lock
209
   */
210
  decl_lck_mtx_data(, dl_if_lock);
211
  TAILQ_ENTRY(dlil_ifnet) dl_if_link;     /* dlil_ifnet link */
212
  u_int32_t dl_if_flags;                  /* flags (below) */
213
  u_int32_t dl_if_refcnt;                 /* refcnt */
214
  void (*dl_if_trace)(struct dlil_ifnet *, int); /* ref trace callback */
215
  void    *dl_if_uniqueid;                /* unique interface id */
216
  size_t  dl_if_uniqueid_len;             /* length of the unique id */
217
  char    dl_if_namestorage[IFNAMSIZ];    /* interface name storage */
218
  char    dl_if_xnamestorage[IFXNAMSIZ];  /* external name storage */
219
  struct {
220
    struct ifaddr   ifa;            /* lladdr ifa */
221
    u_int8_t        asdl[DLIL_SDLMAXLEN]; /* addr storage */
222
    u_int8_t        msdl[DLIL_SDLMAXLEN]; /* mask storage */
223
  } dl_if_lladdr;
224
  u_int8_t dl_if_descstorage[IF_DESCSIZE]; /* desc storage */
225
  u_int8_t dl_if_permanent_ether[ETHER_ADDR_LEN]; /* permanent address */
226
  u_int8_t dl_if_permanent_ether_is_set;
227
  u_int8_t dl_if_unused;
228
  struct dlil_threading_info dl_if_inpstorage; /* input thread storage */
229
  ctrace_t        dl_if_attach;           /* attach PC stacktrace */
230
  ctrace_t        dl_if_detach;           /* detach PC stacktrace */
231
};
232
233
/* Values for dl_if_flags (private to DLIL) */
234
4
#define DLIF_INUSE      0x1     /* DLIL ifnet recycler, ifnet in use */
235
14
#define DLIF_REUSE      0x2     /* DLIL ifnet recycles, ifnet is not new */
236
0
#define DLIF_DEBUG      0x4     /* has debugging info */
237
238
0
#define IF_REF_TRACE_HIST_SIZE  8       /* size of ref trace history */
239
240
/* For gdb */
241
__private_extern__ unsigned int if_ref_trace_hist_size = IF_REF_TRACE_HIST_SIZE;
242
243
struct dlil_ifnet_dbg {
244
  struct dlil_ifnet       dldbg_dlif;             /* dlil_ifnet */
245
  u_int16_t               dldbg_if_refhold_cnt;   /* # ifnet references */
246
  u_int16_t               dldbg_if_refrele_cnt;   /* # ifnet releases */
247
  /*
248
   * Circular lists of ifnet_{reference,release} callers.
249
   */
250
  ctrace_t                dldbg_if_refhold[IF_REF_TRACE_HIST_SIZE];
251
  ctrace_t                dldbg_if_refrele[IF_REF_TRACE_HIST_SIZE];
252
};
253
254
#define DLIL_TO_IFP(s)  (&s->dl_if)
255
#define IFP_TO_DLIL(s)  ((struct dlil_ifnet *)s)
256
257
struct ifnet_filter {
258
  TAILQ_ENTRY(ifnet_filter)       filt_next;
259
  u_int32_t                       filt_skip;
260
  u_int32_t                       filt_flags;
261
  ifnet_t                         filt_ifp;
262
  const char                      *filt_name;
263
  void                            *filt_cookie;
264
  protocol_family_t               filt_protocol;
265
  iff_input_func                  filt_input;
266
  iff_output_func                 filt_output;
267
  iff_event_func                  filt_event;
268
  iff_ioctl_func                  filt_ioctl;
269
  iff_detached_func               filt_detached;
270
};
271
272
struct proto_input_entry;
273
274
static TAILQ_HEAD(, dlil_ifnet) dlil_ifnet_head;
275
static lck_grp_t *dlil_lock_group;
276
lck_grp_t *ifnet_lock_group;
277
static lck_grp_t *ifnet_head_lock_group;
278
static lck_grp_t *ifnet_snd_lock_group;
279
static lck_grp_t *ifnet_rcv_lock_group;
280
lck_attr_t *ifnet_lock_attr;
281
decl_lck_rw_data(static, ifnet_head_lock);
282
decl_lck_mtx_data(static, dlil_ifnet_lock);
283
u_int32_t dlil_filter_disable_tso_count = 0;
284
285
#if DEBUG
286
static unsigned int ifnet_debug = 1;    /* debugging (enabled) */
287
#else
288
static unsigned int ifnet_debug;        /* debugging (disabled) */
289
#endif /* !DEBUG */
290
static unsigned int dlif_size;          /* size of dlil_ifnet to allocate */
291
static unsigned int dlif_bufsize;       /* size of dlif_size + headroom */
292
static struct zone *dlif_zone;          /* zone for dlil_ifnet */
293
1
#define DLIF_ZONE_NAME          "ifnet"         /* zone name */
294
295
static ZONE_DECLARE(dlif_filt_zone, "ifnet_filter",
296
    sizeof(struct ifnet_filter), ZC_ZFREE_CLEARMEM);
297
298
static ZONE_DECLARE(dlif_phash_zone, "ifnet_proto_hash",
299
    sizeof(struct proto_hash_entry) * PROTO_HASH_SLOTS, ZC_ZFREE_CLEARMEM);
300
301
static ZONE_DECLARE(dlif_proto_zone, "ifnet_proto",
302
    sizeof(struct if_proto), ZC_ZFREE_CLEARMEM);
303
304
static unsigned int dlif_tcpstat_size;  /* size of tcpstat_local to allocate */
305
static unsigned int dlif_tcpstat_bufsize; /* size of dlif_tcpstat_size + headroom */
306
static struct zone *dlif_tcpstat_zone;          /* zone for tcpstat_local */
307
1
#define DLIF_TCPSTAT_ZONE_NAME  "ifnet_tcpstat" /* zone name */
308
309
static unsigned int dlif_udpstat_size;  /* size of udpstat_local to allocate */
310
static unsigned int dlif_udpstat_bufsize;       /* size of dlif_udpstat_size + headroom */
311
static struct zone *dlif_udpstat_zone;          /* zone for udpstat_local */
312
1
#define DLIF_UDPSTAT_ZONE_NAME  "ifnet_udpstat" /* zone name */
313
314
static u_int32_t net_rtref;
315
316
static struct dlil_main_threading_info dlil_main_input_thread_info;
317
__private_extern__ struct dlil_threading_info *dlil_main_input_thread =
318
    (struct dlil_threading_info *)&dlil_main_input_thread_info;
319
320
static int dlil_event_internal(struct ifnet *ifp, struct kev_msg *msg, bool update_generation);
321
static int dlil_detach_filter_internal(interface_filter_t filter, int detached);
322
static void dlil_if_trace(struct dlil_ifnet *, int);
323
static void if_proto_ref(struct if_proto *);
324
static void if_proto_free(struct if_proto *);
325
static struct if_proto *find_attached_proto(struct ifnet *, u_int32_t);
326
static u_int32_t dlil_ifp_protolist(struct ifnet *ifp, protocol_family_t *list,
327
    u_int32_t list_count);
328
static void if_flt_monitor_busy(struct ifnet *);
329
static void if_flt_monitor_unbusy(struct ifnet *);
330
static void if_flt_monitor_enter(struct ifnet *);
331
static void if_flt_monitor_leave(struct ifnet *);
332
static int dlil_interface_filters_input(struct ifnet *, struct mbuf **,
333
    char **, protocol_family_t);
334
static int dlil_interface_filters_output(struct ifnet *, struct mbuf **,
335
    protocol_family_t);
336
static struct ifaddr *dlil_alloc_lladdr(struct ifnet *,
337
    const struct sockaddr_dl *);
338
static int ifnet_lookup(struct ifnet *);
339
static void if_purgeaddrs(struct ifnet *);
340
341
static errno_t ifproto_media_input_v1(struct ifnet *, protocol_family_t,
342
    struct mbuf *, char *);
343
static errno_t ifproto_media_input_v2(struct ifnet *, protocol_family_t,
344
    struct mbuf *);
345
static errno_t ifproto_media_preout(struct ifnet *, protocol_family_t,
346
    mbuf_t *, const struct sockaddr *, void *, char *, char *);
347
static void ifproto_media_event(struct ifnet *, protocol_family_t,
348
    const struct kev_msg *);
349
static errno_t ifproto_media_ioctl(struct ifnet *, protocol_family_t,
350
    unsigned long, void *);
351
static errno_t ifproto_media_resolve_multi(ifnet_t, const struct sockaddr *,
352
    struct sockaddr_dl *, size_t);
353
static errno_t ifproto_media_send_arp(struct ifnet *, u_short,
354
    const struct sockaddr_dl *, const struct sockaddr *,
355
    const struct sockaddr_dl *, const struct sockaddr *);
356
357
static errno_t ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
358
    struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
359
    boolean_t poll, struct thread *tp);
360
static void ifp_if_input_poll(struct ifnet *, u_int32_t, u_int32_t,
361
    struct mbuf **, struct mbuf **, u_int32_t *, u_int32_t *);
362
static errno_t ifp_if_ctl(struct ifnet *, ifnet_ctl_cmd_t, u_int32_t, void *);
363
static errno_t ifp_if_demux(struct ifnet *, struct mbuf *, char *,
364
    protocol_family_t *);
365
static errno_t ifp_if_add_proto(struct ifnet *, protocol_family_t,
366
    const struct ifnet_demux_desc *, u_int32_t);
367
static errno_t ifp_if_del_proto(struct ifnet *, protocol_family_t);
368
static errno_t ifp_if_check_multi(struct ifnet *, const struct sockaddr *);
369
#if !XNU_TARGET_OS_OSX
370
static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
371
    const struct sockaddr *, const char *, const char *,
372
    u_int32_t *, u_int32_t *);
373
#else /* XNU_TARGET_OS_OSX */
374
static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
375
    const struct sockaddr *, const char *, const char *);
376
#endif /* XNU_TARGET_OS_OSX */
377
static errno_t ifp_if_framer_extended(struct ifnet *, struct mbuf **,
378
    const struct sockaddr *, const char *, const char *,
379
    u_int32_t *, u_int32_t *);
380
static errno_t ifp_if_set_bpf_tap(struct ifnet *, bpf_tap_mode, bpf_packet_func);
381
static void ifp_if_free(struct ifnet *);
382
static void ifp_if_event(struct ifnet *, const struct kev_msg *);
383
static __inline void ifp_inc_traffic_class_in(struct ifnet *, struct mbuf *);
384
static __inline void ifp_inc_traffic_class_out(struct ifnet *, struct mbuf *);
385
386
static errno_t dlil_input_async(struct dlil_threading_info *, struct ifnet *,
387
    struct mbuf *, struct mbuf *, const struct ifnet_stat_increment_param *,
388
    boolean_t, struct thread *);
389
static errno_t dlil_input_sync(struct dlil_threading_info *, struct ifnet *,
390
    struct mbuf *, struct mbuf *, const struct ifnet_stat_increment_param *,
391
    boolean_t, struct thread *);
392
393
static void dlil_main_input_thread_func(void *, wait_result_t);
394
static void dlil_main_input_thread_cont(void *, wait_result_t);
395
396
static void dlil_input_thread_func(void *, wait_result_t);
397
static void dlil_input_thread_cont(void *, wait_result_t);
398
399
static void dlil_rxpoll_input_thread_func(void *, wait_result_t);
400
static void dlil_rxpoll_input_thread_cont(void *, wait_result_t);
401
402
static int dlil_create_input_thread(ifnet_t, struct dlil_threading_info *,
403
    thread_continue_t *);
404
static void dlil_terminate_input_thread(struct dlil_threading_info *);
405
static void dlil_input_stats_add(const struct ifnet_stat_increment_param *,
406
    struct dlil_threading_info *, struct ifnet *, boolean_t);
407
static boolean_t dlil_input_stats_sync(struct ifnet *,
408
    struct dlil_threading_info *);
409
static void dlil_input_packet_list_common(struct ifnet *, struct mbuf *,
410
    u_int32_t, ifnet_model_t, boolean_t);
411
static errno_t ifnet_input_common(struct ifnet *, struct mbuf *, struct mbuf *,
412
    const struct ifnet_stat_increment_param *, boolean_t, boolean_t);
413
static int dlil_is_clat_needed(protocol_family_t, mbuf_t );
414
static errno_t dlil_clat46(ifnet_t, protocol_family_t *, mbuf_t *);
415
static errno_t dlil_clat64(ifnet_t, protocol_family_t *, mbuf_t *);
416
#if DEBUG || DEVELOPMENT
417
static void dlil_verify_sum16(void);
418
#endif /* DEBUG || DEVELOPMENT */
419
static void dlil_output_cksum_dbg(struct ifnet *, struct mbuf *, uint32_t,
420
    protocol_family_t);
421
static void dlil_input_cksum_dbg(struct ifnet *, struct mbuf *, char *,
422
    protocol_family_t);
423
424
static void dlil_incr_pending_thread_count(void);
425
static void dlil_decr_pending_thread_count(void);
426
427
static void ifnet_detacher_thread_func(void *, wait_result_t);
428
static void ifnet_detacher_thread_cont(void *, wait_result_t);
429
static void ifnet_detach_final(struct ifnet *);
430
static void ifnet_detaching_enqueue(struct ifnet *);
431
static struct ifnet *ifnet_detaching_dequeue(void);
432
433
static void ifnet_start_thread_func(void *, wait_result_t);
434
static void ifnet_start_thread_cont(void *, wait_result_t);
435
436
static void ifnet_poll_thread_func(void *, wait_result_t);
437
static void ifnet_poll_thread_cont(void *, wait_result_t);
438
439
static errno_t ifnet_enqueue_common(struct ifnet *, classq_pkt_t *,
440
    boolean_t, boolean_t *);
441
442
static void ifp_src_route_copyout(struct ifnet *, struct route *);
443
static void ifp_src_route_copyin(struct ifnet *, struct route *);
444
static void ifp_src_route6_copyout(struct ifnet *, struct route_in6 *);
445
static void ifp_src_route6_copyin(struct ifnet *, struct route_in6 *);
446
447
static int sysctl_rxpoll SYSCTL_HANDLER_ARGS;
448
static int sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS;
449
static int sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS;
450
static int sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS;
451
static int sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS;
452
static int sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS;
453
static int sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS;
454
static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS;
455
static int sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS;
456
static int sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS;
457
static int sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS;
458
459
struct chain_len_stats tx_chain_len_stats;
460
static int sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS;
461
462
#if TEST_INPUT_THREAD_TERMINATION
463
static int sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS;
464
#endif /* TEST_INPUT_THREAD_TERMINATION */
465
466
/* The following are protected by dlil_ifnet_lock */
467
static TAILQ_HEAD(, ifnet) ifnet_detaching_head;
468
static u_int32_t ifnet_detaching_cnt;
469
static boolean_t ifnet_detaching_embryonic;
470
static void *ifnet_delayed_run; /* wait channel for detaching thread */
471
472
decl_lck_mtx_data(static, ifnet_fc_lock);
473
474
static uint32_t ifnet_flowhash_seed;
475
476
struct ifnet_flowhash_key {
477
  char            ifk_name[IFNAMSIZ];
478
  uint32_t        ifk_unit;
479
  uint32_t        ifk_flags;
480
  uint32_t        ifk_eflags;
481
  uint32_t        ifk_capabilities;
482
  uint32_t        ifk_capenable;
483
  uint32_t        ifk_output_sched_model;
484
  uint32_t        ifk_rand1;
485
  uint32_t        ifk_rand2;
486
};
487
488
/* Flow control entry per interface */
489
struct ifnet_fc_entry {
490
  RB_ENTRY(ifnet_fc_entry) ifce_entry;
491
  u_int32_t       ifce_flowhash;
492
  struct ifnet    *ifce_ifp;
493
};
494
495
static uint32_t ifnet_calc_flowhash(struct ifnet *);
496
static int ifce_cmp(const struct ifnet_fc_entry *,
497
    const struct ifnet_fc_entry *);
498
static int ifnet_fc_add(struct ifnet *);
499
static struct ifnet_fc_entry *ifnet_fc_get(u_int32_t);
500
static void ifnet_fc_entry_free(struct ifnet_fc_entry *);
501
502
/* protected by ifnet_fc_lock */
503
RB_HEAD(ifnet_fc_tree, ifnet_fc_entry) ifnet_fc_tree;
504
RB_PROTOTYPE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
505
RB_GENERATE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
506
507
static ZONE_DECLARE(ifnet_fc_zone, "ifnet_fc_zone",
508
    sizeof(struct ifnet_fc_entry), ZC_ZFREE_CLEARMEM);
509
510
extern void bpfdetach(struct ifnet *);
511
extern void proto_input_run(void);
512
513
extern uint32_t udp_count_opportunistic(unsigned int ifindex,
514
    u_int32_t flags);
515
extern uint32_t tcp_count_opportunistic(unsigned int ifindex,
516
    u_int32_t flags);
517
518
__private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *);
519
520
#if CONFIG_MACF
521
#if !XNU_TARGET_OS_OSX
522
int dlil_lladdr_ckreq = 1;
523
#else /* XNU_TARGET_OS_OSX */
524
int dlil_lladdr_ckreq = 0;
525
#endif /* XNU_TARGET_OS_OSX */
526
#endif /* CONFIG_MACF */
527
528
#if DEBUG
529
int dlil_verbose = 1;
530
#else
531
int dlil_verbose = 0;
532
#endif /* DEBUG */
533
#if IFNET_INPUT_SANITY_CHK
534
/* sanity checking of input packet lists received */
535
static u_int32_t dlil_input_sanity_check = 0;
536
#endif /* IFNET_INPUT_SANITY_CHK */
537
/* rate limit debug messages */
538
struct timespec dlil_dbgrate = { .tv_sec = 1, .tv_nsec = 0 };
539
540
SYSCTL_DECL(_net_link_generic_system);
541
542
SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_verbose,
543
    CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_verbose, 0, "Log DLIL error messages");
544
545
0
#define IF_SNDQ_MINLEN  32
546
u_int32_t if_sndq_maxlen = IFQ_MAXLEN;
547
SYSCTL_PROC(_net_link_generic_system, OID_AUTO, sndq_maxlen,
548
    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sndq_maxlen, IFQ_MAXLEN,
549
    sysctl_sndq_maxlen, "I", "Default transmit queue max length");
550
551
0
#define IF_RCVQ_MINLEN  32
552
#define IF_RCVQ_MAXLEN  256
553
u_int32_t if_rcvq_maxlen = IF_RCVQ_MAXLEN;
554
SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rcvq_maxlen,
555
    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rcvq_maxlen, IFQ_MAXLEN,
556
    sysctl_rcvq_maxlen, "I", "Default receive queue max length");
557
558
#define IF_RXPOLL_DECAY         2       /* ilog2 of EWMA decay rate (4) */
559
u_int32_t if_rxpoll_decay = IF_RXPOLL_DECAY;
560
SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_decay,
561
    CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_decay, IF_RXPOLL_DECAY,
562
    "ilog2 of EWMA decay rate of avg inbound packets");
563
564
0
#define IF_RXPOLL_MODE_HOLDTIME_MIN     (10ULL * 1000 * 1000)   /* 10 ms */
565
#define IF_RXPOLL_MODE_HOLDTIME         (1000ULL * 1000 * 1000) /* 1 sec */
566
static u_int64_t if_rxpoll_mode_holdtime = IF_RXPOLL_MODE_HOLDTIME;
567
SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_freeze_time,
568
    CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_mode_holdtime,
569
    IF_RXPOLL_MODE_HOLDTIME, sysctl_rxpoll_mode_holdtime,
570
    "Q", "input poll mode freeze time");
571
572
0
#define IF_RXPOLL_SAMPLETIME_MIN        (1ULL * 1000 * 1000)    /* 1 ms */
573
#define IF_RXPOLL_SAMPLETIME            (10ULL * 1000 * 1000)   /* 10 ms */
574
static u_int64_t if_rxpoll_sample_holdtime = IF_RXPOLL_SAMPLETIME;
575
SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_sample_time,
576
    CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_sample_holdtime,
577
    IF_RXPOLL_SAMPLETIME, sysctl_rxpoll_sample_holdtime,
578
    "Q", "input poll sampling time");
579
580
static u_int64_t if_rxpoll_interval_time = IF_RXPOLL_INTERVALTIME;
581
SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_interval_time,
582
    CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_time,
583
    IF_RXPOLL_INTERVALTIME, sysctl_rxpoll_interval_time,
584
    "Q", "input poll interval (time)");
585
586
#define IF_RXPOLL_INTERVAL_PKTS 0       /* 0 (disabled) */
587
u_int32_t if_rxpoll_interval_pkts = IF_RXPOLL_INTERVAL_PKTS;
588
SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_interval_pkts,
589
    CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_pkts,
590
    IF_RXPOLL_INTERVAL_PKTS, "input poll interval (packets)");
591
592
#define IF_RXPOLL_WLOWAT        10
593
static u_int32_t if_sysctl_rxpoll_wlowat = IF_RXPOLL_WLOWAT;
594
SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_lowat,
595
    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sysctl_rxpoll_wlowat,
596
    IF_RXPOLL_WLOWAT, sysctl_rxpoll_wlowat,
597
    "I", "input poll wakeup low watermark");
598
599
#define IF_RXPOLL_WHIWAT        100
600
static u_int32_t if_sysctl_rxpoll_whiwat = IF_RXPOLL_WHIWAT;
601
SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_hiwat,
602
    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sysctl_rxpoll_whiwat,
603
    IF_RXPOLL_WHIWAT, sysctl_rxpoll_whiwat,
604
    "I", "input poll wakeup high watermark");
605
606
static u_int32_t if_rxpoll_max = 0;                     /* 0 (automatic) */
607
SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_max,
608
    CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_max, 0,
609
    "max packets per poll call");
610
611
u_int32_t if_rxpoll = 1;
612
SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll,
613
    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll, 0,
614
    sysctl_rxpoll, "I", "enable opportunistic input polling");
615
616
#if TEST_INPUT_THREAD_TERMINATION
617
static u_int32_t if_input_thread_termination_spin = 0;
618
SYSCTL_PROC(_net_link_generic_system, OID_AUTO, input_thread_termination_spin,
619
    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
620
    &if_input_thread_termination_spin, 0,
621
    sysctl_input_thread_termination_spin,
622
    "I", "input thread termination spin limit");
623
#endif /* TEST_INPUT_THREAD_TERMINATION */
624
625
static u_int32_t cur_dlil_input_threads = 0;
626
SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_threads,
627
    CTLFLAG_RD | CTLFLAG_LOCKED, &cur_dlil_input_threads, 0,
628
    "Current number of DLIL input threads");
629
630
#if IFNET_INPUT_SANITY_CHK
631
SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_sanity_check,
632
    CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_input_sanity_check, 0,
633
    "Turn on sanity checking in DLIL input");
634
#endif /* IFNET_INPUT_SANITY_CHK */
635
636
static u_int32_t if_flowadv = 1;
637
SYSCTL_UINT(_net_link_generic_system, OID_AUTO, flow_advisory,
638
    CTLFLAG_RW | CTLFLAG_LOCKED, &if_flowadv, 1,
639
    "enable flow-advisory mechanism");
640
641
static u_int32_t if_delaybased_queue = 1;
642
SYSCTL_UINT(_net_link_generic_system, OID_AUTO, delaybased_queue,
643
    CTLFLAG_RW | CTLFLAG_LOCKED, &if_delaybased_queue, 1,
644
    "enable delay based dynamic queue sizing");
645
646
static uint64_t hwcksum_in_invalidated = 0;
647
SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
648
    hwcksum_in_invalidated, CTLFLAG_RD | CTLFLAG_LOCKED,
649
    &hwcksum_in_invalidated, "inbound packets with invalidated hardware cksum");
650
651
uint32_t hwcksum_dbg = 0;
652
SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_dbg,
653
    CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg, 0,
654
    "enable hardware cksum debugging");
655
656
u_int32_t ifnet_start_delayed = 0;
657
SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delayed,
658
    CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_start_delayed, 0,
659
    "number of times start was delayed");
660
661
u_int32_t ifnet_delay_start_disabled = 0;
662
SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delay_disabled,
663
    CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_delay_start_disabled, 0,
664
    "number of times start was delayed");
665
666
static inline void
667
ifnet_delay_start_disabled_increment(void)
668
0
{
669
0
  OSIncrementAtomic(&ifnet_delay_start_disabled);
670
0
}
671
672
0
#define HWCKSUM_DBG_PARTIAL_FORCED      0x1     /* forced partial checksum */
673
0
#define HWCKSUM_DBG_PARTIAL_RXOFF_ADJ   0x2     /* adjust start offset */
674
0
#define HWCKSUM_DBG_FINALIZE_FORCED     0x10    /* forced finalize */
675
#define HWCKSUM_DBG_MASK \
676
0
  (HWCKSUM_DBG_PARTIAL_FORCED | HWCKSUM_DBG_PARTIAL_RXOFF_ADJ |   \
677
0
  HWCKSUM_DBG_FINALIZE_FORCED)
678
679
static uint32_t hwcksum_dbg_mode = 0;
680
SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_mode,
681
    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_mode,
682
    0, sysctl_hwcksum_dbg_mode, "I", "hardware cksum debugging mode");
683
684
static uint64_t hwcksum_dbg_partial_forced = 0;
685
SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
686
    hwcksum_dbg_partial_forced, CTLFLAG_RD | CTLFLAG_LOCKED,
687
    &hwcksum_dbg_partial_forced, "packets forced using partial cksum");
688
689
static uint64_t hwcksum_dbg_partial_forced_bytes = 0;
690
SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
691
    hwcksum_dbg_partial_forced_bytes, CTLFLAG_RD | CTLFLAG_LOCKED,
692
    &hwcksum_dbg_partial_forced_bytes, "bytes forced using partial cksum");
693
694
static uint32_t hwcksum_dbg_partial_rxoff_forced = 0;
695
SYSCTL_PROC(_net_link_generic_system, OID_AUTO,
696
    hwcksum_dbg_partial_rxoff_forced, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
697
    &hwcksum_dbg_partial_rxoff_forced, 0,
698
    sysctl_hwcksum_dbg_partial_rxoff_forced, "I",
699
    "forced partial cksum rx offset");
700
701
static uint32_t hwcksum_dbg_partial_rxoff_adj = 0;
702
SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_partial_rxoff_adj,
703
    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_partial_rxoff_adj,
704
    0, sysctl_hwcksum_dbg_partial_rxoff_adj, "I",
705
    "adjusted partial cksum rx offset");
706
707
static uint64_t hwcksum_dbg_verified = 0;
708
SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
709
    hwcksum_dbg_verified, CTLFLAG_RD | CTLFLAG_LOCKED,
710
    &hwcksum_dbg_verified, "packets verified for having good checksum");
711
712
static uint64_t hwcksum_dbg_bad_cksum = 0;
713
SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
714
    hwcksum_dbg_bad_cksum, CTLFLAG_RD | CTLFLAG_LOCKED,
715
    &hwcksum_dbg_bad_cksum, "packets with bad hardware calculated checksum");
716
717
static uint64_t hwcksum_dbg_bad_rxoff = 0;
718
SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
719
    hwcksum_dbg_bad_rxoff, CTLFLAG_RD | CTLFLAG_LOCKED,
720
    &hwcksum_dbg_bad_rxoff, "packets with invalid rxoff");
721
722
static uint64_t hwcksum_dbg_adjusted = 0;
723
SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
724
    hwcksum_dbg_adjusted, CTLFLAG_RD | CTLFLAG_LOCKED,
725
    &hwcksum_dbg_adjusted, "packets with rxoff adjusted");
726
727
static uint64_t hwcksum_dbg_finalized_hdr = 0;
728
SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
729
    hwcksum_dbg_finalized_hdr, CTLFLAG_RD | CTLFLAG_LOCKED,
730
    &hwcksum_dbg_finalized_hdr, "finalized headers");
731
732
static uint64_t hwcksum_dbg_finalized_data = 0;
733
SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
734
    hwcksum_dbg_finalized_data, CTLFLAG_RD | CTLFLAG_LOCKED,
735
    &hwcksum_dbg_finalized_data, "finalized payloads");
736
737
uint32_t hwcksum_tx = 1;
738
SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_tx,
739
    CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_tx, 0,
740
    "enable transmit hardware checksum offload");
741
742
uint32_t hwcksum_rx = 1;
743
SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_rx,
744
    CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_rx, 0,
745
    "enable receive hardware checksum offload");
746
747
SYSCTL_PROC(_net_link_generic_system, OID_AUTO, tx_chain_len_stats,
748
    CTLFLAG_RD | CTLFLAG_LOCKED, 0, 9,
749
    sysctl_tx_chain_len_stats, "S", "");
750
751
uint32_t tx_chain_len_count = 0;
752
SYSCTL_UINT(_net_link_generic_system, OID_AUTO, tx_chain_len_count,
753
    CTLFLAG_RW | CTLFLAG_LOCKED, &tx_chain_len_count, 0, "");
754
755
static uint32_t threshold_notify = 1;           /* enable/disable */
756
SYSCTL_UINT(_net_link_generic_system, OID_AUTO, threshold_notify,
757
    CTLFLAG_RW | CTLFLAG_LOCKED, &threshold_notify, 0, "");
758
759
static uint32_t threshold_interval = 2;         /* in seconds */
760
SYSCTL_UINT(_net_link_generic_system, OID_AUTO, threshold_interval,
761
    CTLFLAG_RW | CTLFLAG_LOCKED, &threshold_interval, 0, "");
762
763
#if (DEVELOPMENT || DEBUG)
764
static int sysctl_get_kao_frames SYSCTL_HANDLER_ARGS;
765
SYSCTL_NODE(_net_link_generic_system, OID_AUTO, get_kao_frames,
766
    CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_get_kao_frames, "");
767
#endif /* DEVELOPMENT || DEBUG */
768
769
struct net_api_stats net_api_stats;
770
SYSCTL_STRUCT(_net, OID_AUTO, api_stats, CTLFLAG_RD | CTLFLAG_LOCKED,
771
    &net_api_stats, net_api_stats, "");
772
773
unsigned int net_rxpoll = 1;
774
unsigned int net_affinity = 1;
775
unsigned int net_async = 1;     /* 0: synchronous, 1: asynchronous */
776
777
static kern_return_t dlil_affinity_set(struct thread *, u_int32_t);
778
779
extern u_int32_t        inject_buckets;
780
781
static  lck_grp_attr_t  *dlil_grp_attributes = NULL;
782
static  lck_attr_t      *dlil_lck_attributes = NULL;
783
784
/* DLIL data threshold thread call */
785
static void dlil_dt_tcall_fn(thread_call_param_t, thread_call_param_t);
786
787
void
788
ifnet_filter_update_tso(boolean_t filter_enable)
789
0
{
790
  /*
791
   * update filter count and route_generation ID to let TCP
792
   * know it should reevalute doing TSO or not
793
   */
794
0
  OSAddAtomic(filter_enable ? 1 : -1, &dlil_filter_disable_tso_count);
795
0
  routegenid_update();
796
0
}
797
798
799
0
#define DLIL_INPUT_CHECK(m, ifp) {                                      \
800
0
  struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m);                    \
801
0
  if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) ||       \
802
0
      !(mbuf_flags(m) & MBUF_PKTHDR)) {                           \
803
0
          panic_plain("%s: invalid mbuf %p\n", __func__, m);      \
804
0
  /* NOTREACHED */                                        \
805
0
  }                                                               \
806
0
}
807
808
0
#define DLIL_EWMA(old, new, decay) do {                                 \
809
0
  u_int32_t _avg;                                                 \
810
0
  if ((_avg = (old)) > 0)                                         \
811
0
          _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
812
0
  else                                                            \
813
0
          _avg = (new);                                           \
814
0
  (old) = _avg;                                                   \
815
0
} while (0)
816
817
#define MBPS    (1ULL * 1000 * 1000)
818
#define GBPS    (MBPS * 1000)
819
820
struct rxpoll_time_tbl {
821
  u_int64_t       speed;          /* downlink speed */
822
  u_int32_t       plowat;         /* packets low watermark */
823
  u_int32_t       phiwat;         /* packets high watermark */
824
  u_int32_t       blowat;         /* bytes low watermark */
825
  u_int32_t       bhiwat;         /* bytes high watermark */
826
};
827
828
static struct rxpoll_time_tbl rxpoll_tbl[] = {
829
  { .speed =  10 * MBPS, .plowat = 2, .phiwat = 8, .blowat = (1 * 1024), .bhiwat = (6 * 1024)    },
830
  { .speed = 100 * MBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024)   },
831
  { .speed =   1 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024)   },
832
  { .speed =  10 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024)   },
833
  { .speed = 100 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024)   },
834
  { .speed = 0, .plowat = 0, .phiwat = 0, .blowat = 0, .bhiwat = 0 }
835
};
836
837
decl_lck_mtx_data(static, dlil_thread_sync_lock);
838
static uint32_t dlil_pending_thread_cnt = 0;
839
840
static void
841
dlil_incr_pending_thread_count(void)
842
2
{
843
2
  LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_MTX_ASSERT_NOTOWNED);
844
2
  lck_mtx_lock(&dlil_thread_sync_lock);
845
2
  dlil_pending_thread_cnt++;
846
2
  lck_mtx_unlock(&dlil_thread_sync_lock);
847
2
}
848
849
static void
850
dlil_decr_pending_thread_count(void)
851
0
{
852
0
  LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_MTX_ASSERT_NOTOWNED);
853
0
  lck_mtx_lock(&dlil_thread_sync_lock);
854
0
  VERIFY(dlil_pending_thread_cnt > 0);
855
0
  dlil_pending_thread_cnt--;
856
0
  if (dlil_pending_thread_cnt == 0) {
857
0
    wakeup(&dlil_pending_thread_cnt);
858
0
  }
859
0
  lck_mtx_unlock(&dlil_thread_sync_lock);
860
0
}
861
862
int
863
proto_hash_value(u_int32_t protocol_family)
864
75.4k
{
865
  /*
866
   * dlil_proto_unplumb_all() depends on the mapping between
867
   * the hash bucket index and the protocol family defined
868
   * here; future changes must be applied there as well.
869
   */
870
75.4k
  switch (protocol_family) {
871
6.02k
  case PF_INET:
872
6.02k
    return 0;
873
56.6k
  case PF_INET6:
874
56.6k
    return 1;
875
0
  case PF_VLAN:
876
0
    return 2;
877
0
  case PF_802154:
878
0
    return 3;
879
0
  case PF_UNSPEC:
880
12.8k
  default:
881
12.8k
    return 4;
882
75.4k
  }
883
75.4k
}
884
885
/*
886
 * Caller must already be holding ifnet lock.
887
 */
888
static struct if_proto *
889
find_attached_proto(struct ifnet *ifp, u_int32_t protocol_family)
890
70.3k
{
891
70.3k
  struct if_proto *proto = NULL;
892
70.3k
  u_int32_t i = proto_hash_value(protocol_family);
893
894
70.3k
  ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
895
896
70.3k
  if (ifp->if_proto_hash != NULL) {
897
70.3k
    proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
898
70.3k
  }
899
900
70.3k
  while (proto != NULL && proto->protocol_family != protocol_family) {
901
0
    proto = SLIST_NEXT(proto, next_hash);
902
0
  }
903
904
70.3k
  if (proto != NULL) {
905
57.5k
    if_proto_ref(proto);
906
57.5k
  }
907
908
70.3k
  return proto;
909
70.3k
}
910
911
static void
912
if_proto_ref(struct if_proto *proto)
913
57.5k
{
914
57.5k
  atomic_add_32(&proto->refcount, 1);
915
57.5k
}
916
917
extern void if_rtproto_del(struct ifnet *ifp, int protocol);
918
919
static void
920
if_proto_free(struct if_proto *proto)
921
57.5k
{
922
57.5k
  u_int32_t oldval;
923
57.5k
  struct ifnet *ifp = proto->ifp;
924
57.5k
  u_int32_t proto_family = proto->protocol_family;
925
57.5k
  struct kev_dl_proto_data ev_pr_data;
926
927
57.5k
  oldval = atomic_add_32_ov(&proto->refcount, -1);
928
57.5k
  if (oldval > 1) {
929
57.5k
    return;
930
57.5k
  }
931
932
0
  if (proto->proto_kpi == kProtoKPI_v1) {
933
0
    if (proto->kpi.v1.detached) {
934
0
      proto->kpi.v1.detached(ifp, proto->protocol_family);
935
0
    }
936
0
  }
937
0
  if (proto->proto_kpi == kProtoKPI_v2) {
938
0
    if (proto->kpi.v2.detached) {
939
0
      proto->kpi.v2.detached(ifp, proto->protocol_family);
940
0
    }
941
0
  }
942
943
  /*
944
   * Cleanup routes that may still be in the routing table for that
945
   * interface/protocol pair.
946
   */
947
0
  if_rtproto_del(ifp, proto_family);
948
949
0
  ifnet_lock_shared(ifp);
950
951
  /* No more reference on this, protocol must have been detached */
952
0
  VERIFY(proto->detached);
953
954
  /*
955
   * The reserved field carries the number of protocol still attached
956
   * (subject to change)
957
   */
958
0
  ev_pr_data.proto_family = proto_family;
959
0
  ev_pr_data.proto_remaining_count = dlil_ifp_protolist(ifp, NULL, 0);
960
961
0
  ifnet_lock_done(ifp);
962
963
0
  dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_DETACHED,
964
0
      (struct net_event_data *)&ev_pr_data,
965
0
      sizeof(struct kev_dl_proto_data));
966
967
0
  if (ev_pr_data.proto_remaining_count == 0) {
968
    /*
969
     * The protocol count has gone to zero, mark the interface down.
970
     * This used to be done by configd.KernelEventMonitor, but that
971
     * is inherently prone to races (rdar://problem/30810208).
972
     */
973
0
    (void) ifnet_set_flags(ifp, 0, IFF_UP);
974
0
    (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
975
0
    dlil_post_sifflags_msg(ifp);
976
0
  }
977
978
0
  zfree(dlif_proto_zone, proto);
979
0
}
980
981
__private_extern__ void
982
ifnet_lock_assert(struct ifnet *ifp, ifnet_lock_assert_t what)
983
70.3k
{
984
#if !MACH_ASSERT
985
#pragma unused(ifp)
986
#endif
987
70.3k
  unsigned int type = 0;
988
70.3k
  int ass = 1;
989
990
70.3k
  switch (what) {
991
23
  case IFNET_LCK_ASSERT_EXCLUSIVE:
992
23
    type = LCK_RW_ASSERT_EXCLUSIVE;
993
23
    break;
994
995
1
  case IFNET_LCK_ASSERT_SHARED:
996
1
    type = LCK_RW_ASSERT_SHARED;
997
1
    break;
998
999
70.3k
  case IFNET_LCK_ASSERT_OWNED:
1000
70.3k
    type = LCK_RW_ASSERT_HELD;
1001
70.3k
    break;
1002
1003
0
  case IFNET_LCK_ASSERT_NOTOWNED:
1004
    /* nothing to do here for RW lock; bypass assert */
1005
0
    ass = 0;
1006
0
    break;
1007
1008
0
  default:
1009
0
    panic("bad ifnet assert type: %d", what);
1010
    /* NOTREACHED */
1011
70.3k
  }
1012
70.3k
  if (ass) {
1013
70.3k
    LCK_RW_ASSERT(&ifp->if_lock, type);
1014
70.3k
  }
1015
70.3k
}
1016
1017
__private_extern__ void
1018
ifnet_lock_shared(struct ifnet *ifp)
1019
206k
{
1020
206k
  lck_rw_lock_shared(&ifp->if_lock);
1021
206k
}
1022
1023
__private_extern__ void
1024
ifnet_lock_exclusive(struct ifnet *ifp)
1025
67.1k
{
1026
67.1k
  lck_rw_lock_exclusive(&ifp->if_lock);
1027
67.1k
}
1028
1029
__private_extern__ void
1030
ifnet_lock_done(struct ifnet *ifp)
1031
274k
{
1032
274k
  lck_rw_done(&ifp->if_lock);
1033
274k
}
1034
1035
#if INET
1036
__private_extern__ void
1037
if_inetdata_lock_shared(struct ifnet *ifp)
1038
0
{
1039
0
  lck_rw_lock_shared(&ifp->if_inetdata_lock);
1040
0
}
1041
1042
__private_extern__ void
1043
if_inetdata_lock_exclusive(struct ifnet *ifp)
1044
0
{
1045
0
  lck_rw_lock_exclusive(&ifp->if_inetdata_lock);
1046
0
}
1047
1048
__private_extern__ void
1049
if_inetdata_lock_done(struct ifnet *ifp)
1050
0
{
1051
0
  lck_rw_done(&ifp->if_inetdata_lock);
1052
0
}
1053
#endif
1054
1055
__private_extern__ void
1056
if_inet6data_lock_shared(struct ifnet *ifp)
1057
180k
{
1058
180k
  lck_rw_lock_shared(&ifp->if_inet6data_lock);
1059
180k
}
1060
1061
__private_extern__ void
1062
if_inet6data_lock_exclusive(struct ifnet *ifp)
1063
4
{
1064
4
  lck_rw_lock_exclusive(&ifp->if_inet6data_lock);
1065
4
}
1066
1067
__private_extern__ void
1068
if_inet6data_lock_done(struct ifnet *ifp)
1069
180k
{
1070
180k
  lck_rw_done(&ifp->if_inet6data_lock);
1071
180k
}
1072
1073
__private_extern__ void
1074
ifnet_head_lock_shared(void)
1075
1.09M
{
1076
1.09M
  lck_rw_lock_shared(&ifnet_head_lock);
1077
1.09M
}
1078
1079
__private_extern__ void
1080
ifnet_head_lock_exclusive(void)
1081
641
{
1082
641
  lck_rw_lock_exclusive(&ifnet_head_lock);
1083
641
}
1084
1085
__private_extern__ void
1086
ifnet_head_done(void)
1087
1.09M
{
1088
1.09M
  lck_rw_done(&ifnet_head_lock);
1089
1.09M
}
1090
1091
__private_extern__ void
1092
ifnet_head_assert_exclusive(void)
1093
0
{
1094
0
  LCK_RW_ASSERT(&ifnet_head_lock, LCK_RW_ASSERT_EXCLUSIVE);
1095
0
}
1096
1097
/*
1098
 * dlil_ifp_protolist
1099
 * - get the list of protocols attached to the interface, or just the number
1100
 *   of attached protocols
1101
 * - if the number returned is greater than 'list_count', truncation occurred
1102
 *
1103
 * Note:
1104
 * - caller must already be holding ifnet lock.
1105
 */
1106
static u_int32_t
1107
dlil_ifp_protolist(struct ifnet *ifp, protocol_family_t *list,
1108
    u_int32_t list_count)
1109
8
{
1110
8
  u_int32_t       count = 0;
1111
8
  int             i;
1112
1113
8
  ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
1114
1115
8
  if (ifp->if_proto_hash == NULL) {
1116
0
    goto done;
1117
0
  }
1118
1119
48
  for (i = 0; i < PROTO_HASH_SLOTS; i++) {
1120
40
    struct if_proto *proto;
1121
40
    SLIST_FOREACH(proto, &ifp->if_proto_hash[i], next_hash) {
1122
6
      if (list != NULL && count < list_count) {
1123
0
        list[count] = proto->protocol_family;
1124
0
      }
1125
6
      count++;
1126
6
    }
1127
40
  }
1128
8
done:
1129
8
  return count;
1130
8
}
1131
1132
__private_extern__ u_int32_t
1133
if_get_protolist(struct ifnet * ifp, u_int32_t *protolist, u_int32_t count)
1134
0
{
1135
0
  ifnet_lock_shared(ifp);
1136
0
  count = dlil_ifp_protolist(ifp, protolist, count);
1137
0
  ifnet_lock_done(ifp);
1138
0
  return count;
1139
0
}
1140
1141
__private_extern__ void
1142
if_free_protolist(u_int32_t *list)
1143
0
{
1144
0
  _FREE(list, M_TEMP);
1145
0
}
1146
1147
__private_extern__ int
1148
dlil_post_msg(struct ifnet *ifp, u_int32_t event_subclass,
1149
    u_int32_t event_code, struct net_event_data *event_data,
1150
    u_int32_t event_data_len)
1151
6
{
1152
6
  struct net_event_data ev_data;
1153
6
  struct kev_msg ev_msg;
1154
1155
6
  bzero(&ev_msg, sizeof(ev_msg));
1156
6
  bzero(&ev_data, sizeof(ev_data));
1157
  /*
1158
   * a net event always starts with a net_event_data structure
1159
   * but the caller can generate a simple net event or
1160
   * provide a longer event structure to post
1161
   */
1162
6
  ev_msg.vendor_code      = KEV_VENDOR_APPLE;
1163
6
  ev_msg.kev_class        = KEV_NETWORK_CLASS;
1164
6
  ev_msg.kev_subclass     = event_subclass;
1165
6
  ev_msg.event_code       = event_code;
1166
1167
6
  if (event_data == NULL) {
1168
4
    event_data = &ev_data;
1169
4
    event_data_len = sizeof(struct net_event_data);
1170
4
  }
1171
1172
6
  strlcpy(&event_data->if_name[0], ifp->if_name, IFNAMSIZ);
1173
6
  event_data->if_family = ifp->if_family;
1174
6
  event_data->if_unit   = (u_int32_t)ifp->if_unit;
1175
1176
6
  ev_msg.dv[0].data_length = event_data_len;
1177
6
  ev_msg.dv[0].data_ptr    = event_data;
1178
6
  ev_msg.dv[1].data_length = 0;
1179
1180
6
  bool update_generation = true;
1181
6
  if (event_subclass == KEV_DL_SUBCLASS) {
1182
    /* Don't update interface generation for frequent link quality and state changes  */
1183
6
    switch (event_code) {
1184
0
    case KEV_DL_LINK_QUALITY_METRIC_CHANGED:
1185
0
    case KEV_DL_RRC_STATE_CHANGED:
1186
0
    case KEV_DL_NODE_PRESENCE:
1187
0
    case KEV_DL_NODE_ABSENCE:
1188
0
    case KEV_DL_MASTER_ELECTED:
1189
0
      update_generation = false;
1190
0
      break;
1191
6
    default:
1192
6
      break;
1193
6
    }
1194
6
  }
1195
1196
6
  return dlil_event_internal(ifp, &ev_msg, update_generation);
1197
6
}
1198
1199
__private_extern__ int
1200
dlil_alloc_local_stats(struct ifnet *ifp)
1201
4
{
1202
4
  int ret = EINVAL;
1203
4
  void *buf, *base, **pbuf;
1204
1205
4
  if (ifp == NULL) {
1206
0
    goto end;
1207
0
  }
1208
1209
4
  if (ifp->if_tcp_stat == NULL && ifp->if_udp_stat == NULL) {
1210
    /* allocate tcpstat_local structure */
1211
4
    buf = zalloc_flags(dlif_tcpstat_zone, Z_WAITOK | Z_ZERO);
1212
4
    if (buf == NULL) {
1213
0
      ret = ENOMEM;
1214
0
      goto end;
1215
0
    }
1216
1217
    /* Get the 64-bit aligned base address for this object */
1218
4
    base = (void *)P2ROUNDUP((intptr_t)buf + sizeof(u_int64_t),
1219
4
        sizeof(u_int64_t));
1220
4
    VERIFY(((intptr_t)base + dlif_tcpstat_size) <=
1221
4
        ((intptr_t)buf + dlif_tcpstat_bufsize));
1222
1223
    /*
1224
     * Wind back a pointer size from the aligned base and
1225
     * save the original address so we can free it later.
1226
     */
1227
0
    pbuf = (void **)((intptr_t)base - sizeof(void *));
1228
4
    *pbuf = buf;
1229
4
    ifp->if_tcp_stat = base;
1230
1231
    /* allocate udpstat_local structure */
1232
4
    buf = zalloc_flags(dlif_udpstat_zone, Z_WAITOK | Z_ZERO);
1233
4
    if (buf == NULL) {
1234
0
      ret = ENOMEM;
1235
0
      goto end;
1236
0
    }
1237
1238
    /* Get the 64-bit aligned base address for this object */
1239
4
    base = (void *)P2ROUNDUP((intptr_t)buf + sizeof(u_int64_t),
1240
4
        sizeof(u_int64_t));
1241
4
    VERIFY(((intptr_t)base + dlif_udpstat_size) <=
1242
4
        ((intptr_t)buf + dlif_udpstat_bufsize));
1243
1244
    /*
1245
     * Wind back a pointer size from the aligned base and
1246
     * save the original address so we can free it later.
1247
     */
1248
0
    pbuf = (void **)((intptr_t)base - sizeof(void *));
1249
4
    *pbuf = buf;
1250
4
    ifp->if_udp_stat = base;
1251
1252
4
    VERIFY(IS_P2ALIGNED(ifp->if_tcp_stat, sizeof(u_int64_t)) &&
1253
4
        IS_P2ALIGNED(ifp->if_udp_stat, sizeof(u_int64_t)));
1254
1255
0
    ret = 0;
1256
4
  }
1257
1258
4
  if (ifp->if_ipv4_stat == NULL) {
1259
4
    MALLOC(ifp->if_ipv4_stat, struct if_tcp_ecn_stat *,
1260
4
        sizeof(struct if_tcp_ecn_stat), M_TEMP, M_WAITOK | M_ZERO);
1261
4
    if (ifp->if_ipv4_stat == NULL) {
1262
0
      ret = ENOMEM;
1263
0
      goto end;
1264
0
    }
1265
4
  }
1266
1267
4
  if (ifp->if_ipv6_stat == NULL) {
1268
4
    MALLOC(ifp->if_ipv6_stat, struct if_tcp_ecn_stat *,
1269
4
        sizeof(struct if_tcp_ecn_stat), M_TEMP, M_WAITOK | M_ZERO);
1270
4
    if (ifp->if_ipv6_stat == NULL) {
1271
0
      ret = ENOMEM;
1272
0
      goto end;
1273
0
    }
1274
4
  }
1275
4
end:
1276
4
  if (ifp != NULL && ret != 0) {
1277
0
    if (ifp->if_tcp_stat != NULL) {
1278
0
      pbuf = (void **)
1279
0
          ((intptr_t)ifp->if_tcp_stat - sizeof(void *));
1280
0
      zfree(dlif_tcpstat_zone, *pbuf);
1281
0
      ifp->if_tcp_stat = NULL;
1282
0
    }
1283
0
    if (ifp->if_udp_stat != NULL) {
1284
0
      pbuf = (void **)
1285
0
          ((intptr_t)ifp->if_udp_stat - sizeof(void *));
1286
0
      zfree(dlif_udpstat_zone, *pbuf);
1287
0
      ifp->if_udp_stat = NULL;
1288
0
    }
1289
0
    if (ifp->if_ipv4_stat != NULL) {
1290
0
      FREE(ifp->if_ipv4_stat, M_TEMP);
1291
0
      ifp->if_ipv4_stat = NULL;
1292
0
    }
1293
0
    if (ifp->if_ipv6_stat != NULL) {
1294
0
      FREE(ifp->if_ipv6_stat, M_TEMP);
1295
0
      ifp->if_ipv6_stat = NULL;
1296
0
    }
1297
0
  }
1298
1299
4
  return ret;
1300
4
}
1301
1302
static void
1303
dlil_reset_rxpoll_params(ifnet_t ifp)
1304
4
{
1305
4
  ASSERT(ifp != NULL);
1306
4
  ifnet_set_poll_cycle(ifp, NULL);
1307
4
  ifp->if_poll_update = 0;
1308
4
  ifp->if_poll_flags = 0;
1309
4
  ifp->if_poll_req = 0;
1310
4
  ifp->if_poll_mode = IFNET_MODEL_INPUT_POLL_OFF;
1311
4
  bzero(&ifp->if_poll_tstats, sizeof(ifp->if_poll_tstats));
1312
4
  bzero(&ifp->if_poll_pstats, sizeof(ifp->if_poll_pstats));
1313
4
  bzero(&ifp->if_poll_sstats, sizeof(ifp->if_poll_sstats));
1314
4
  net_timerclear(&ifp->if_poll_mode_holdtime);
1315
4
  net_timerclear(&ifp->if_poll_mode_lasttime);
1316
4
  net_timerclear(&ifp->if_poll_sample_holdtime);
1317
4
  net_timerclear(&ifp->if_poll_sample_lasttime);
1318
4
  net_timerclear(&ifp->if_poll_dbg_lasttime);
1319
4
}
1320
1321
static int
1322
dlil_create_input_thread(ifnet_t ifp, struct dlil_threading_info *inp,
1323
    thread_continue_t *thfunc)
1324
4
{
1325
4
  boolean_t dlil_rxpoll_input;
1326
4
  thread_continue_t func = NULL;
1327
4
  u_int32_t limit;
1328
4
  int error = 0;
1329
1330
4
  dlil_rxpoll_input = (ifp != NULL && net_rxpoll &&
1331
0
      (ifp->if_eflags & IFEF_RXPOLL) && (ifp->if_xflags & IFXF_LEGACY));
1332
1333
  /* default strategy utilizes the DLIL worker thread */
1334
4
  inp->dlth_strategy = dlil_input_async;
1335
1336
  /* NULL ifp indicates the main input thread, called at dlil_init time */
1337
4
  if (ifp == NULL) {
1338
    /*
1339
     * Main input thread only.
1340
     */
1341
1
    func = dlil_main_input_thread_func;
1342
1
    VERIFY(inp == dlil_main_input_thread);
1343
0
    (void) strlcat(inp->dlth_name,
1344
1
        "main_input", DLIL_THREADNAME_LEN);
1345
3
  } else if (dlil_rxpoll_input) {
1346
    /*
1347
     * Legacy (non-netif) hybrid polling.
1348
     */
1349
0
    func = dlil_rxpoll_input_thread_func;
1350
0
    VERIFY(inp != dlil_main_input_thread);
1351
0
    (void) snprintf(inp->dlth_name, DLIL_THREADNAME_LEN,
1352
0
        "%s_input_poll", if_name(ifp));
1353
3
  } else if (net_async || (ifp->if_xflags & IFXF_LEGACY)) {
1354
    /*
1355
     * Asynchronous strategy.
1356
     */
1357
3
    func = dlil_input_thread_func;
1358
3
    VERIFY(inp != dlil_main_input_thread);
1359
3
    (void) snprintf(inp->dlth_name, DLIL_THREADNAME_LEN,
1360
3
        "%s_input", if_name(ifp));
1361
3
  } else {
1362
    /*
1363
     * Synchronous strategy if there's a netif below and
1364
     * the device isn't capable of hybrid polling.
1365
     */
1366
0
    ASSERT(func == NULL);
1367
0
    ASSERT(!(ifp->if_xflags & IFXF_LEGACY));
1368
0
    VERIFY(inp != dlil_main_input_thread);
1369
0
    ASSERT(!inp->dlth_affinity);
1370
0
    inp->dlth_strategy = dlil_input_sync;
1371
0
  }
1372
4
  VERIFY(inp->dlth_thread == THREAD_NULL);
1373
1374
  /* let caller know */
1375
4
  if (thfunc != NULL) {
1376
3
    *thfunc = func;
1377
3
  }
1378
1379
4
  inp->dlth_lock_grp = lck_grp_alloc_init(inp->dlth_name,
1380
4
      dlil_grp_attributes);
1381
4
  lck_mtx_init(&inp->dlth_lock, inp->dlth_lock_grp, dlil_lck_attributes);
1382
1383
4
  inp->dlth_ifp = ifp; /* NULL for main input thread */
1384
  /*
1385
   * For interfaces that support opportunistic polling, set the
1386
   * low and high watermarks for outstanding inbound packets/bytes.
1387
   * Also define freeze times for transitioning between modes
1388
   * and updating the average.
1389
   */
1390
4
  if (ifp != NULL && net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
1391
0
    limit = MAX(if_rcvq_maxlen, IF_RCVQ_MINLEN);
1392
0
    if (ifp->if_xflags & IFXF_LEGACY) {
1393
0
      (void) dlil_rxpoll_set_params(ifp, NULL, FALSE);
1394
0
    }
1395
4
  } else {
1396
4
    limit = (u_int32_t)-1;
1397
4
  }
1398
1399
4
  _qinit(&inp->dlth_pkts, Q_DROPTAIL, limit, QP_MBUF);
1400
4
  if (inp == dlil_main_input_thread) {
1401
1
    struct dlil_main_threading_info *inpm =
1402
1
        (struct dlil_main_threading_info *)inp;
1403
1
    _qinit(&inpm->lo_rcvq_pkts, Q_DROPTAIL, limit, QP_MBUF);
1404
1
  }
1405
1406
4
  if (func == NULL) {
1407
0
    ASSERT(!(ifp->if_xflags & IFXF_LEGACY));
1408
0
    ASSERT(error == 0);
1409
0
    error = ENODEV;
1410
0
    goto done;
1411
0
  }
1412
1413
4
  error = kernel_thread_start(func, inp, &inp->dlth_thread);
1414
4
  if (error == KERN_SUCCESS) {
1415
4
    thread_precedence_policy_data_t info;
1416
4
    __unused kern_return_t kret;
1417
1418
4
    bzero(&info, sizeof(info));
1419
4
    info.importance = 0;
1420
4
    kret = thread_policy_set(inp->dlth_thread,
1421
4
        THREAD_PRECEDENCE_POLICY, (thread_policy_t)&info,
1422
4
        THREAD_PRECEDENCE_POLICY_COUNT);
1423
4
    ASSERT(kret == KERN_SUCCESS);
1424
    /*
1425
     * We create an affinity set so that the matching workloop
1426
     * thread or the starter thread (for loopback) can be
1427
     * scheduled on the same processor set as the input thread.
1428
     */
1429
4
    if (net_affinity) {
1430
0
      struct thread *tp = inp->dlth_thread;
1431
0
      u_int32_t tag;
1432
      /*
1433
       * Randomize to reduce the probability
1434
       * of affinity tag namespace collision.
1435
       */
1436
0
      read_frandom(&tag, sizeof(tag));
1437
0
      if (dlil_affinity_set(tp, tag) == KERN_SUCCESS) {
1438
0
        thread_reference(tp);
1439
0
        inp->dlth_affinity_tag = tag;
1440
0
        inp->dlth_affinity = TRUE;
1441
0
      }
1442
0
    }
1443
4
  } else if (inp == dlil_main_input_thread) {
1444
0
    panic_plain("%s: couldn't create main input thread", __func__);
1445
    /* NOTREACHED */
1446
0
  } else {
1447
0
    panic_plain("%s: couldn't create %s input thread", __func__,
1448
0
        if_name(ifp));
1449
    /* NOTREACHED */
1450
0
  }
1451
4
  OSAddAtomic(1, &cur_dlil_input_threads);
1452
1453
4
done:
1454
4
  return error;
1455
4
}
1456
1457
#if TEST_INPUT_THREAD_TERMINATION
1458
static int
1459
sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS
1460
{
1461
#pragma unused(arg1, arg2)
1462
  uint32_t i;
1463
  int err;
1464
1465
  i = if_input_thread_termination_spin;
1466
1467
  err = sysctl_handle_int(oidp, &i, 0, req);
1468
  if (err != 0 || req->newptr == USER_ADDR_NULL) {
1469
    return err;
1470
  }
1471
1472
  if (net_rxpoll == 0) {
1473
    return ENXIO;
1474
  }
1475
1476
  if_input_thread_termination_spin = i;
1477
  return err;
1478
}
1479
#endif /* TEST_INPUT_THREAD_TERMINATION */
1480
1481
static void
1482
dlil_clean_threading_info(struct dlil_threading_info *inp)
1483
0
{
1484
0
  lck_mtx_destroy(&inp->dlth_lock, inp->dlth_lock_grp);
1485
0
  lck_grp_free(inp->dlth_lock_grp);
1486
0
  inp->dlth_lock_grp = NULL;
1487
1488
0
  inp->dlth_flags = 0;
1489
0
  inp->dlth_wtot = 0;
1490
0
  bzero(inp->dlth_name, sizeof(inp->dlth_name));
1491
0
  inp->dlth_ifp = NULL;
1492
0
  VERIFY(qhead(&inp->dlth_pkts) == NULL && qempty(&inp->dlth_pkts));
1493
0
  qlimit(&inp->dlth_pkts) = 0;
1494
0
  bzero(&inp->dlth_stats, sizeof(inp->dlth_stats));
1495
1496
0
  VERIFY(!inp->dlth_affinity);
1497
0
  inp->dlth_thread = THREAD_NULL;
1498
0
  inp->dlth_strategy = NULL;
1499
0
  VERIFY(inp->dlth_driver_thread == THREAD_NULL);
1500
0
  VERIFY(inp->dlth_poller_thread == THREAD_NULL);
1501
0
  VERIFY(inp->dlth_affinity_tag == 0);
1502
0
#if IFNET_INPUT_SANITY_CHK
1503
0
  inp->dlth_pkts_cnt = 0;
1504
0
#endif /* IFNET_INPUT_SANITY_CHK */
1505
0
}
1506
1507
static void
1508
dlil_terminate_input_thread(struct dlil_threading_info *inp)
1509
0
{
1510
0
  struct ifnet *ifp = inp->dlth_ifp;
1511
0
  classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
1512
1513
0
  VERIFY(current_thread() == inp->dlth_thread);
1514
0
  VERIFY(inp != dlil_main_input_thread);
1515
1516
0
  OSAddAtomic(-1, &cur_dlil_input_threads);
1517
1518
#if TEST_INPUT_THREAD_TERMINATION
1519
  { /* do something useless that won't get optimized away */
1520
    uint32_t        v = 1;
1521
    for (uint32_t i = 0;
1522
        i < if_input_thread_termination_spin;
1523
        i++) {
1524
      v = (i + 1) * v;
1525
    }
1526
    DLIL_PRINTF("the value is %d\n", v);
1527
  }
1528
#endif /* TEST_INPUT_THREAD_TERMINATION */
1529
1530
0
  lck_mtx_lock_spin(&inp->dlth_lock);
1531
0
  _getq_all(&inp->dlth_pkts, &pkt, NULL, NULL, NULL);
1532
0
  VERIFY((inp->dlth_flags & DLIL_INPUT_TERMINATE) != 0);
1533
0
  inp->dlth_flags |= DLIL_INPUT_TERMINATE_COMPLETE;
1534
0
  wakeup_one((caddr_t)&inp->dlth_flags);
1535
0
  lck_mtx_unlock(&inp->dlth_lock);
1536
1537
  /* free up pending packets */
1538
0
  if (pkt.cp_mbuf != NULL) {
1539
0
    mbuf_freem_list(pkt.cp_mbuf);
1540
0
  }
1541
1542
  /* for the extra refcnt from kernel_thread_start() */
1543
0
  thread_deallocate(current_thread());
1544
1545
0
  if (dlil_verbose) {
1546
0
    DLIL_PRINTF("%s: input thread terminated\n",
1547
0
        if_name(ifp));
1548
0
  }
1549
1550
  /* this is the end */
1551
0
  thread_terminate(current_thread());
1552
  /* NOTREACHED */
1553
0
}
1554
1555
static kern_return_t
1556
dlil_affinity_set(struct thread *tp, u_int32_t tag)
1557
0
{
1558
0
  thread_affinity_policy_data_t policy;
1559
1560
0
  bzero(&policy, sizeof(policy));
1561
0
  policy.affinity_tag = tag;
1562
0
  return thread_policy_set(tp, THREAD_AFFINITY_POLICY,
1563
0
             (thread_policy_t)&policy, THREAD_AFFINITY_POLICY_COUNT);
1564
0
}
1565
1566
void
1567
dlil_init(void)
1568
1
{
1569
1
  thread_t thread = THREAD_NULL;
1570
1571
  /*
1572
   * The following fields must be 64-bit aligned for atomic operations.
1573
   */
1574
1
  IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1575
1
  IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
1576
1
  IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1577
1
  IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1578
1
  IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1579
1
  IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1580
1
  IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1581
1
  IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1582
1
  IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1583
1
  IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1584
1
  IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
1585
1
  IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
1586
1
  IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1587
1
  IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1588
1
  IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
1589
1590
1
  IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1591
1
  IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
1592
1
  IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1593
1
  IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1594
1
  IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1595
1
  IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1596
1
  IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1597
1
  IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1598
1
  IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1599
1
  IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1600
1
  IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
1601
1
  IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
1602
1
  IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1603
1
  IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1604
1
  IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
1605
1606
  /*
1607
   * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
1608
   */
1609
1
  _CASSERT(IF_HWASSIST_CSUM_IP == IFNET_CSUM_IP);
1610
1
  _CASSERT(IF_HWASSIST_CSUM_TCP == IFNET_CSUM_TCP);
1611
1
  _CASSERT(IF_HWASSIST_CSUM_UDP == IFNET_CSUM_UDP);
1612
1
  _CASSERT(IF_HWASSIST_CSUM_IP_FRAGS == IFNET_CSUM_FRAGMENT);
1613
1
  _CASSERT(IF_HWASSIST_CSUM_FRAGMENT == IFNET_IP_FRAGMENT);
1614
1
  _CASSERT(IF_HWASSIST_CSUM_TCPIPV6 == IFNET_CSUM_TCPIPV6);
1615
1
  _CASSERT(IF_HWASSIST_CSUM_UDPIPV6 == IFNET_CSUM_UDPIPV6);
1616
1
  _CASSERT(IF_HWASSIST_CSUM_FRAGMENT_IPV6 == IFNET_IPV6_FRAGMENT);
1617
1
  _CASSERT(IF_HWASSIST_CSUM_PARTIAL == IFNET_CSUM_PARTIAL);
1618
1
  _CASSERT(IF_HWASSIST_CSUM_ZERO_INVERT == IFNET_CSUM_ZERO_INVERT);
1619
1
  _CASSERT(IF_HWASSIST_VLAN_TAGGING == IFNET_VLAN_TAGGING);
1620
1
  _CASSERT(IF_HWASSIST_VLAN_MTU == IFNET_VLAN_MTU);
1621
1
  _CASSERT(IF_HWASSIST_TSO_V4 == IFNET_TSO_IPV4);
1622
1
  _CASSERT(IF_HWASSIST_TSO_V6 == IFNET_TSO_IPV6);
1623
1624
  /*
1625
   * ... as well as the mbuf checksum flags counterparts.
1626
   */
1627
1
  _CASSERT(CSUM_IP == IF_HWASSIST_CSUM_IP);
1628
1
  _CASSERT(CSUM_TCP == IF_HWASSIST_CSUM_TCP);
1629
1
  _CASSERT(CSUM_UDP == IF_HWASSIST_CSUM_UDP);
1630
1
  _CASSERT(CSUM_IP_FRAGS == IF_HWASSIST_CSUM_IP_FRAGS);
1631
1
  _CASSERT(CSUM_FRAGMENT == IF_HWASSIST_CSUM_FRAGMENT);
1632
1
  _CASSERT(CSUM_TCPIPV6 == IF_HWASSIST_CSUM_TCPIPV6);
1633
1
  _CASSERT(CSUM_UDPIPV6 == IF_HWASSIST_CSUM_UDPIPV6);
1634
1
  _CASSERT(CSUM_FRAGMENT_IPV6 == IF_HWASSIST_CSUM_FRAGMENT_IPV6);
1635
1
  _CASSERT(CSUM_PARTIAL == IF_HWASSIST_CSUM_PARTIAL);
1636
1
  _CASSERT(CSUM_ZERO_INVERT == IF_HWASSIST_CSUM_ZERO_INVERT);
1637
1
  _CASSERT(CSUM_VLAN_TAG_VALID == IF_HWASSIST_VLAN_TAGGING);
1638
1639
  /*
1640
   * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
1641
   */
1642
1
  _CASSERT(IF_LLREACH_MAXLEN <= IF_LLREACHINFO_ADDRLEN);
1643
1
  _CASSERT(IFNET_LLREACHINFO_ADDRLEN == IF_LLREACHINFO_ADDRLEN);
1644
1645
1
  _CASSERT(IFRLOGF_DLIL == IFNET_LOGF_DLIL);
1646
1
  _CASSERT(IFRLOGF_FAMILY == IFNET_LOGF_FAMILY);
1647
1
  _CASSERT(IFRLOGF_DRIVER == IFNET_LOGF_DRIVER);
1648
1
  _CASSERT(IFRLOGF_FIRMWARE == IFNET_LOGF_FIRMWARE);
1649
1650
1
  _CASSERT(IFRLOGCAT_CONNECTIVITY == IFNET_LOGCAT_CONNECTIVITY);
1651
1
  _CASSERT(IFRLOGCAT_QUALITY == IFNET_LOGCAT_QUALITY);
1652
1
  _CASSERT(IFRLOGCAT_PERFORMANCE == IFNET_LOGCAT_PERFORMANCE);
1653
1654
1
  _CASSERT(IFRTYPE_FAMILY_ANY == IFNET_FAMILY_ANY);
1655
1
  _CASSERT(IFRTYPE_FAMILY_LOOPBACK == IFNET_FAMILY_LOOPBACK);
1656
1
  _CASSERT(IFRTYPE_FAMILY_ETHERNET == IFNET_FAMILY_ETHERNET);
1657
1
  _CASSERT(IFRTYPE_FAMILY_SLIP == IFNET_FAMILY_SLIP);
1658
1
  _CASSERT(IFRTYPE_FAMILY_TUN == IFNET_FAMILY_TUN);
1659
1
  _CASSERT(IFRTYPE_FAMILY_VLAN == IFNET_FAMILY_VLAN);
1660
1
  _CASSERT(IFRTYPE_FAMILY_PPP == IFNET_FAMILY_PPP);
1661
1
  _CASSERT(IFRTYPE_FAMILY_PVC == IFNET_FAMILY_PVC);
1662
1
  _CASSERT(IFRTYPE_FAMILY_DISC == IFNET_FAMILY_DISC);
1663
1
  _CASSERT(IFRTYPE_FAMILY_MDECAP == IFNET_FAMILY_MDECAP);
1664
1
  _CASSERT(IFRTYPE_FAMILY_GIF == IFNET_FAMILY_GIF);
1665
1
  _CASSERT(IFRTYPE_FAMILY_FAITH == IFNET_FAMILY_FAITH);
1666
1
  _CASSERT(IFRTYPE_FAMILY_STF == IFNET_FAMILY_STF);
1667
1
  _CASSERT(IFRTYPE_FAMILY_FIREWIRE == IFNET_FAMILY_FIREWIRE);
1668
1
  _CASSERT(IFRTYPE_FAMILY_BOND == IFNET_FAMILY_BOND);
1669
1
  _CASSERT(IFRTYPE_FAMILY_CELLULAR == IFNET_FAMILY_CELLULAR);
1670
1
  _CASSERT(IFRTYPE_FAMILY_6LOWPAN == IFNET_FAMILY_6LOWPAN);
1671
1
  _CASSERT(IFRTYPE_FAMILY_UTUN == IFNET_FAMILY_UTUN);
1672
1
  _CASSERT(IFRTYPE_FAMILY_IPSEC == IFNET_FAMILY_IPSEC);
1673
1674
1
  _CASSERT(IFRTYPE_SUBFAMILY_ANY == IFNET_SUBFAMILY_ANY);
1675
1
  _CASSERT(IFRTYPE_SUBFAMILY_USB == IFNET_SUBFAMILY_USB);
1676
1
  _CASSERT(IFRTYPE_SUBFAMILY_BLUETOOTH == IFNET_SUBFAMILY_BLUETOOTH);
1677
1
  _CASSERT(IFRTYPE_SUBFAMILY_WIFI == IFNET_SUBFAMILY_WIFI);
1678
1
  _CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT == IFNET_SUBFAMILY_THUNDERBOLT);
1679
1
  _CASSERT(IFRTYPE_SUBFAMILY_RESERVED == IFNET_SUBFAMILY_RESERVED);
1680
1
  _CASSERT(IFRTYPE_SUBFAMILY_INTCOPROC == IFNET_SUBFAMILY_INTCOPROC);
1681
1
  _CASSERT(IFRTYPE_SUBFAMILY_QUICKRELAY == IFNET_SUBFAMILY_QUICKRELAY);
1682
1
  _CASSERT(IFRTYPE_SUBFAMILY_DEFAULT == IFNET_SUBFAMILY_DEFAULT);
1683
1684
1
  _CASSERT(DLIL_MODIDLEN == IFNET_MODIDLEN);
1685
1
  _CASSERT(DLIL_MODARGLEN == IFNET_MODARGLEN);
1686
1687
1
  PE_parse_boot_argn("net_affinity", &net_affinity,
1688
1
      sizeof(net_affinity));
1689
1690
1
  PE_parse_boot_argn("net_rxpoll", &net_rxpoll, sizeof(net_rxpoll));
1691
1692
1
  PE_parse_boot_argn("net_rtref", &net_rtref, sizeof(net_rtref));
1693
1694
1
  PE_parse_boot_argn("net_async", &net_async, sizeof(net_async));
1695
1696
1
  PE_parse_boot_argn("ifnet_debug", &ifnet_debug, sizeof(ifnet_debug));
1697
1698
1
  VERIFY(dlil_pending_thread_cnt == 0);
1699
1
  dlif_size = (ifnet_debug == 0) ? sizeof(struct dlil_ifnet) :
1700
1
      sizeof(struct dlil_ifnet_dbg);
1701
  /* Enforce 64-bit alignment for dlil_ifnet structure */
1702
1
  dlif_bufsize = dlif_size + sizeof(void *) + sizeof(u_int64_t);
1703
1
  dlif_bufsize = (uint32_t)P2ROUNDUP(dlif_bufsize, sizeof(u_int64_t));
1704
1
  dlif_zone = zone_create(DLIF_ZONE_NAME, dlif_bufsize, ZC_ZFREE_CLEARMEM);
1705
1706
1
  dlif_tcpstat_size = sizeof(struct tcpstat_local);
1707
  /* Enforce 64-bit alignment for tcpstat_local structure */
1708
1
  dlif_tcpstat_bufsize =
1709
1
      dlif_tcpstat_size + sizeof(void *) + sizeof(u_int64_t);
1710
1
  dlif_tcpstat_bufsize = (uint32_t)
1711
1
      P2ROUNDUP(dlif_tcpstat_bufsize, sizeof(u_int64_t));
1712
1
  dlif_tcpstat_zone = zone_create(DLIF_TCPSTAT_ZONE_NAME,
1713
1
      dlif_tcpstat_bufsize, ZC_ZFREE_CLEARMEM);
1714
1715
1
  dlif_udpstat_size = sizeof(struct udpstat_local);
1716
  /* Enforce 64-bit alignment for udpstat_local structure */
1717
1
  dlif_udpstat_bufsize =
1718
1
      dlif_udpstat_size + sizeof(void *) + sizeof(u_int64_t);
1719
1
  dlif_udpstat_bufsize = (uint32_t)
1720
1
      P2ROUNDUP(dlif_udpstat_bufsize, sizeof(u_int64_t));
1721
1
  dlif_udpstat_zone = zone_create(DLIF_UDPSTAT_ZONE_NAME,
1722
1
      dlif_udpstat_bufsize, ZC_ZFREE_CLEARMEM);
1723
1724
1
  eventhandler_lists_ctxt_init(&ifnet_evhdlr_ctxt);
1725
1726
1
  TAILQ_INIT(&dlil_ifnet_head);
1727
1
  TAILQ_INIT(&ifnet_head);
1728
1
  TAILQ_INIT(&ifnet_detaching_head);
1729
1
  TAILQ_INIT(&ifnet_ordered_head);
1730
1731
  /* Setup the lock groups we will use */
1732
1
  dlil_grp_attributes = lck_grp_attr_alloc_init();
1733
1734
1
  dlil_lock_group = lck_grp_alloc_init("DLIL internal locks",
1735
1
      dlil_grp_attributes);
1736
1
  ifnet_lock_group = lck_grp_alloc_init("ifnet locks",
1737
1
      dlil_grp_attributes);
1738
1
  ifnet_head_lock_group = lck_grp_alloc_init("ifnet head lock",
1739
1
      dlil_grp_attributes);
1740
1
  ifnet_rcv_lock_group = lck_grp_alloc_init("ifnet rcv locks",
1741
1
      dlil_grp_attributes);
1742
1
  ifnet_snd_lock_group = lck_grp_alloc_init("ifnet snd locks",
1743
1
      dlil_grp_attributes);
1744
1745
  /* Setup the lock attributes we will use */
1746
1
  dlil_lck_attributes = lck_attr_alloc_init();
1747
1748
1
  ifnet_lock_attr = lck_attr_alloc_init();
1749
1750
1
  lck_rw_init(&ifnet_head_lock, ifnet_head_lock_group,
1751
1
      dlil_lck_attributes);
1752
1
  lck_mtx_init(&dlil_ifnet_lock, dlil_lock_group, dlil_lck_attributes);
1753
1
  lck_mtx_init(&dlil_thread_sync_lock, dlil_lock_group, dlil_lck_attributes);
1754
1755
  /* Setup interface flow control related items */
1756
1
  lck_mtx_init(&ifnet_fc_lock, dlil_lock_group, dlil_lck_attributes);
1757
1758
  /* Initialize interface address subsystem */
1759
1
  ifa_init();
1760
1761
1
#if PF
1762
  /* Initialize the packet filter */
1763
1
  pfinit();
1764
1
#endif /* PF */
1765
1766
  /* Initialize queue algorithms */
1767
1
  classq_init();
1768
1769
  /* Initialize packet schedulers */
1770
1
  pktsched_init();
1771
1772
  /* Initialize flow advisory subsystem */
1773
1
  flowadv_init();
1774
1775
  /* Initialize the pktap virtual interface */
1776
1
  pktap_init();
1777
1778
  /* Initialize the service class to dscp map */
1779
1
  net_qos_map_init();
1780
1781
  /* Initialize the interface port list */
1782
1
  if_ports_used_init();
1783
1784
  /* Initialize the interface low power mode event handler */
1785
1
  if_low_power_evhdlr_init();
1786
1787
1
#if DEBUG || DEVELOPMENT
1788
  /* Run self-tests */
1789
1
  dlil_verify_sum16();
1790
1
#endif /* DEBUG || DEVELOPMENT */
1791
1792
  /* Initialize link layer table */
1793
1
  lltable_glbl_init();
1794
1795
  /*
1796
   * Create and start up the main DLIL input thread and the interface
1797
   * detacher threads once everything is initialized.
1798
   */
1799
1
  dlil_incr_pending_thread_count();
1800
1
  (void) dlil_create_input_thread(NULL, dlil_main_input_thread, NULL);
1801
1802
  /*
1803
   * Create ifnet detacher thread.
1804
   * When an interface gets detached, part of the detach processing
1805
   * is delayed. The interface is added to delayed detach list
1806
   * and this thread is woken up to call ifnet_detach_final
1807
   * on these interfaces.
1808
   */
1809
1
  dlil_incr_pending_thread_count();
1810
1
  if (kernel_thread_start(ifnet_detacher_thread_func,
1811
1
      NULL, &thread) != KERN_SUCCESS) {
1812
0
    panic_plain("%s: couldn't create detacher thread", __func__);
1813
    /* NOTREACHED */
1814
0
  }
1815
1
  thread_deallocate(thread);
1816
1817
  /*
1818
   * Wait for the created kernel threads for dlil to get
1819
   * scheduled and run at least once before we proceed
1820
   */
1821
1
  lck_mtx_lock(&dlil_thread_sync_lock);
1822
  // while (dlil_pending_thread_cnt != 0) {
1823
  //  DLIL_PRINTF("%s: Waiting for all the create dlil kernel "
1824
  //      "threads to get scheduled at least once.\n", __func__);
1825
  //  (void) msleep(&dlil_pending_thread_cnt, &dlil_thread_sync_lock,
1826
  //      (PZERO - 1), __func__, NULL);
1827
  //  LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_ASSERT_OWNED);
1828
  // }
1829
1
  lck_mtx_unlock(&dlil_thread_sync_lock);
1830
1
  DLIL_PRINTF("%s: All the created dlil kernel threads have been "
1831
1
      "scheduled at least once. Proceeding.\n", __func__);
1832
1
}
1833
1834
static void
1835
if_flt_monitor_busy(struct ifnet *ifp)
1836
65.2k
{
1837
65.2k
  LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1838
1839
65.2k
  ++ifp->if_flt_busy;
1840
65.2k
  VERIFY(ifp->if_flt_busy != 0);
1841
65.2k
}
1842
1843
static void
1844
if_flt_monitor_unbusy(struct ifnet *ifp)
1845
65.2k
{
1846
65.2k
  if_flt_monitor_leave(ifp);
1847
65.2k
}
1848
1849
static void
1850
if_flt_monitor_enter(struct ifnet *ifp)
1851
0
{
1852
0
  LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1853
1854
0
  while (ifp->if_flt_busy) {
1855
0
    ++ifp->if_flt_waiters;
1856
0
    (void) msleep(&ifp->if_flt_head, &ifp->if_flt_lock,
1857
0
        (PZERO - 1), "if_flt_monitor", NULL);
1858
0
  }
1859
0
  if_flt_monitor_busy(ifp);
1860
0
}
1861
1862
static void
1863
if_flt_monitor_leave(struct ifnet *ifp)
1864
65.2k
{
1865
65.2k
  LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1866
1867
65.2k
  VERIFY(ifp->if_flt_busy != 0);
1868
0
  --ifp->if_flt_busy;
1869
1870
65.2k
  if (ifp->if_flt_busy == 0 && ifp->if_flt_waiters > 0) {
1871
0
    ifp->if_flt_waiters = 0;
1872
0
    wakeup(&ifp->if_flt_head);
1873
0
  }
1874
65.2k
}
1875
1876
__private_extern__ int
1877
dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter,
1878
    interface_filter_t *filter_ref, u_int32_t flags)
1879
0
{
1880
0
  int retval = 0;
1881
0
  struct ifnet_filter *filter = NULL;
1882
1883
0
  ifnet_head_lock_shared();
1884
  /* Check that the interface is in the global list */
1885
0
  if (!ifnet_lookup(ifp)) {
1886
0
    retval = ENXIO;
1887
0
    goto done;
1888
0
  }
1889
1890
0
  filter = zalloc_flags(dlif_filt_zone, Z_WAITOK | Z_ZERO);
1891
0
  if (filter == NULL) {
1892
0
    retval = ENOMEM;
1893
0
    goto done;
1894
0
  }
1895
1896
  /* refcnt held above during lookup */
1897
0
  filter->filt_flags = flags;
1898
0
  filter->filt_ifp = ifp;
1899
0
  filter->filt_cookie = if_filter->iff_cookie;
1900
0
  filter->filt_name = if_filter->iff_name;
1901
0
  filter->filt_protocol = if_filter->iff_protocol;
1902
  /*
1903
   * Do not install filter callbacks for internal coproc interface
1904
   */
1905
0
  if (!IFNET_IS_INTCOPROC(ifp)) {
1906
0
    filter->filt_input = if_filter->iff_input;
1907
0
    filter->filt_output = if_filter->iff_output;
1908
0
    filter->filt_event = if_filter->iff_event;
1909
0
    filter->filt_ioctl = if_filter->iff_ioctl;
1910
0
  }
1911
0
  filter->filt_detached = if_filter->iff_detached;
1912
1913
0
  lck_mtx_lock(&ifp->if_flt_lock);
1914
0
  if_flt_monitor_enter(ifp);
1915
1916
0
  LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1917
0
  TAILQ_INSERT_TAIL(&ifp->if_flt_head, filter, filt_next);
1918
1919
0
  if_flt_monitor_leave(ifp);
1920
0
  lck_mtx_unlock(&ifp->if_flt_lock);
1921
1922
0
  *filter_ref = filter;
1923
1924
  /*
1925
   * Bump filter count and route_generation ID to let TCP
1926
   * know it shouldn't do TSO on this connection
1927
   */
1928
0
  if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1929
0
    ifnet_filter_update_tso(TRUE);
1930
0
  }
1931
0
  OSIncrementAtomic64(&net_api_stats.nas_iflt_attach_count);
1932
0
  INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_total);
1933
0
  if ((filter->filt_flags & DLIL_IFF_INTERNAL)) {
1934
0
    INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_os_total);
1935
0
  }
1936
0
  if (dlil_verbose) {
1937
0
    DLIL_PRINTF("%s: %s filter attached\n", if_name(ifp),
1938
0
        if_filter->iff_name);
1939
0
  }
1940
0
done:
1941
0
  ifnet_head_done();
1942
0
  if (retval != 0 && ifp != NULL) {
1943
0
    DLIL_PRINTF("%s: failed to attach %s (err=%d)\n",
1944
0
        if_name(ifp), if_filter->iff_name, retval);
1945
0
  }
1946
0
  if (retval != 0 && filter != NULL) {
1947
0
    zfree(dlif_filt_zone, filter);
1948
0
  }
1949
1950
0
  return retval;
1951
0
}
1952
1953
static int
1954
dlil_detach_filter_internal(interface_filter_t  filter, int detached)
1955
0
{
1956
0
  int retval = 0;
1957
1958
0
  if (detached == 0) {
1959
0
    ifnet_t ifp = NULL;
1960
1961
0
    ifnet_head_lock_shared();
1962
0
    TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
1963
0
      interface_filter_t entry = NULL;
1964
1965
0
      lck_mtx_lock(&ifp->if_flt_lock);
1966
0
      TAILQ_FOREACH(entry, &ifp->if_flt_head, filt_next) {
1967
0
        if (entry != filter || entry->filt_skip) {
1968
0
          continue;
1969
0
        }
1970
        /*
1971
         * We've found a match; since it's possible
1972
         * that the thread gets blocked in the monitor,
1973
         * we do the lock dance.  Interface should
1974
         * not be detached since we still have a use
1975
         * count held during filter attach.
1976
         */
1977
0
        entry->filt_skip = 1;   /* skip input/output */
1978
0
        lck_mtx_unlock(&ifp->if_flt_lock);
1979
0
        ifnet_head_done();
1980
1981
0
        lck_mtx_lock(&ifp->if_flt_lock);
1982
0
        if_flt_monitor_enter(ifp);
1983
0
        LCK_MTX_ASSERT(&ifp->if_flt_lock,
1984
0
            LCK_MTX_ASSERT_OWNED);
1985
1986
        /* Remove the filter from the list */
1987
0
        TAILQ_REMOVE(&ifp->if_flt_head, filter,
1988
0
            filt_next);
1989
1990
0
        if_flt_monitor_leave(ifp);
1991
0
        lck_mtx_unlock(&ifp->if_flt_lock);
1992
0
        if (dlil_verbose) {
1993
0
          DLIL_PRINTF("%s: %s filter detached\n",
1994
0
              if_name(ifp), filter->filt_name);
1995
0
        }
1996
0
        goto destroy;
1997
0
      }
1998
0
      lck_mtx_unlock(&ifp->if_flt_lock);
1999
0
    }
2000
0
    ifnet_head_done();
2001
2002
    /* filter parameter is not a valid filter ref */
2003
0
    retval = EINVAL;
2004
0
    goto done;
2005
0
  }
2006
2007
0
  if (dlil_verbose) {
2008
0
    DLIL_PRINTF("%s filter detached\n", filter->filt_name);
2009
0
  }
2010
2011
0
destroy:
2012
2013
  /* Call the detached function if there is one */
2014
0
  if (filter->filt_detached) {
2015
0
    filter->filt_detached(filter->filt_cookie, filter->filt_ifp);
2016
0
  }
2017
2018
  /*
2019
   * Decrease filter count and route_generation ID to let TCP
2020
   * know it should reevalute doing TSO or not
2021
   */
2022
0
  if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
2023
0
    ifnet_filter_update_tso(FALSE);
2024
0
  }
2025
2026
0
  VERIFY(OSDecrementAtomic64(&net_api_stats.nas_iflt_attach_count) > 0);
2027
2028
  /* Free the filter */
2029
0
  zfree(dlif_filt_zone, filter);
2030
0
  filter = NULL;
2031
0
done:
2032
0
  if (retval != 0 && filter != NULL) {
2033
0
    DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
2034
0
        filter->filt_name, retval);
2035
0
  }
2036
2037
0
  return retval;
2038
0
}
2039
2040
__private_extern__ void
2041
dlil_detach_filter(interface_filter_t filter)
2042
0
{
2043
0
  if (filter == NULL) {
2044
0
    return;
2045
0
  }
2046
0
  dlil_detach_filter_internal(filter, 0);
2047
0
}
2048
2049
__private_extern__ boolean_t
2050
dlil_has_ip_filter(void)
2051
0
{
2052
0
  boolean_t has_filter = (net_api_stats.nas_ipf_add_count > 0);
2053
0
  DTRACE_IP1(dlil_has_ip_filter, boolean_t, has_filter);
2054
0
  return has_filter;
2055
0
}
2056
2057
__private_extern__ boolean_t
2058
dlil_has_if_filter(struct ifnet *ifp)
2059
0
{
2060
0
  boolean_t has_filter = !TAILQ_EMPTY(&ifp->if_flt_head);
2061
0
  DTRACE_IP1(dlil_has_if_filter, boolean_t, has_filter);
2062
0
  return has_filter;
2063
0
}
2064
2065
static inline void
2066
dlil_input_wakeup(struct dlil_threading_info *inp)
2067
19.4k
{
2068
19.4k
  LCK_MTX_ASSERT(&inp->dlth_lock, LCK_MTX_ASSERT_OWNED);
2069
2070
19.4k
  inp->dlth_flags |= DLIL_INPUT_WAITING;
2071
19.4k
  if (!(inp->dlth_flags & DLIL_INPUT_RUNNING)) {
2072
19.4k
    inp->dlth_wtot++;
2073
19.4k
    wakeup_one((caddr_t)&inp->dlth_flags);
2074
19.4k
  }
2075
19.4k
}
2076
2077
__attribute__((noreturn))
2078
static void
2079
dlil_main_input_thread_func(void *v, wait_result_t w)
2080
0
{
2081
0
#pragma unused(w)
2082
0
  struct dlil_threading_info *inp = v;
2083
2084
0
  VERIFY(inp == dlil_main_input_thread);
2085
0
  VERIFY(inp->dlth_ifp == NULL);
2086
0
  VERIFY(current_thread() == inp->dlth_thread);
2087
2088
0
  lck_mtx_lock(&inp->dlth_lock);
2089
0
  VERIFY(!(inp->dlth_flags & (DLIL_INPUT_EMBRYONIC | DLIL_INPUT_RUNNING)));
2090
0
  (void) assert_wait(&inp->dlth_flags, THREAD_UNINT);
2091
0
  inp->dlth_flags |= DLIL_INPUT_EMBRYONIC;
2092
  /* wake up once to get out of embryonic state */
2093
0
  dlil_input_wakeup(inp);
2094
0
  lck_mtx_unlock(&inp->dlth_lock);
2095
0
  (void) thread_block_parameter(dlil_main_input_thread_cont, inp);
2096
  /* NOTREACHED */
2097
0
  __builtin_unreachable();
2098
0
}
2099
2100
/*
2101
 * Main input thread:
2102
 *
2103
 *   a) handles all inbound packets for lo0
2104
 *   b) handles all inbound packets for interfaces with no dedicated
2105
 *  input thread (e.g. anything but Ethernet/PDP or those that support
2106
 *  opportunistic polling.)
2107
 *   c) protocol registrations
2108
 *   d) packet injections
2109
 */
2110
__attribute__((noreturn))
2111
static void
2112
dlil_main_input_thread_cont(void *v, wait_result_t wres)
2113
0
{
2114
0
  struct dlil_main_threading_info *inpm = v;
2115
0
  struct dlil_threading_info *inp = v;
2116
2117
  /* main input thread is uninterruptible */
2118
0
  VERIFY(wres != THREAD_INTERRUPTED);
2119
0
  lck_mtx_lock_spin(&inp->dlth_lock);
2120
0
  VERIFY(!(inp->dlth_flags & (DLIL_INPUT_TERMINATE |
2121
0
      DLIL_INPUT_RUNNING)));
2122
0
  inp->dlth_flags |= DLIL_INPUT_RUNNING;
2123
2124
0
  while (1) {
2125
0
    struct mbuf *m = NULL, *m_loop = NULL;
2126
0
    u_int32_t m_cnt, m_cnt_loop;
2127
0
    classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
2128
0
    boolean_t proto_req;
2129
0
    boolean_t embryonic;
2130
2131
0
    inp->dlth_flags &= ~DLIL_INPUT_WAITING;
2132
2133
0
    if (__improbable(embryonic =
2134
0
        (inp->dlth_flags & DLIL_INPUT_EMBRYONIC))) {
2135
0
      inp->dlth_flags &= ~DLIL_INPUT_EMBRYONIC;
2136
0
    }
2137
2138
0
    proto_req = (inp->dlth_flags &
2139
0
        (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER));
2140
2141
    /* Packets for non-dedicated interfaces other than lo0 */
2142
0
    m_cnt = qlen(&inp->dlth_pkts);
2143
0
    _getq_all(&inp->dlth_pkts, &pkt, NULL, NULL, NULL);
2144
0
    m = pkt.cp_mbuf;
2145
2146
    /* Packets exclusive to lo0 */
2147
0
    m_cnt_loop = qlen(&inpm->lo_rcvq_pkts);
2148
0
    _getq_all(&inpm->lo_rcvq_pkts, &pkt, NULL, NULL, NULL);
2149
0
    m_loop = pkt.cp_mbuf;
2150
2151
0
    inp->dlth_wtot = 0;
2152
2153
0
    lck_mtx_unlock(&inp->dlth_lock);
2154
2155
0
    if (__improbable(embryonic)) {
2156
0
      dlil_decr_pending_thread_count();
2157
0
    }
2158
2159
    /*
2160
     * NOTE warning %%% attention !!!!
2161
     * We should think about putting some thread starvation
2162
     * safeguards if we deal with long chains of packets.
2163
     */
2164
0
    if (__probable(m_loop != NULL)) {
2165
0
      dlil_input_packet_list_extended(lo_ifp, m_loop,
2166
0
          m_cnt_loop, IFNET_MODEL_INPUT_POLL_OFF);
2167
0
    }
2168
2169
0
    if (__probable(m != NULL)) {
2170
0
      dlil_input_packet_list_extended(NULL, m,
2171
0
          m_cnt, IFNET_MODEL_INPUT_POLL_OFF);
2172
0
    }
2173
2174
0
    if (__improbable(proto_req)) {
2175
0
      proto_input_run();
2176
0
    }
2177
2178
0
    lck_mtx_lock_spin(&inp->dlth_lock);
2179
0
    VERIFY(inp->dlth_flags & DLIL_INPUT_RUNNING);
2180
    /* main input thread cannot be terminated */
2181
0
    VERIFY(!(inp->dlth_flags & DLIL_INPUT_TERMINATE));
2182
0
    if (!(inp->dlth_flags & ~DLIL_INPUT_RUNNING)) {
2183
0
      break;
2184
0
    }
2185
0
  }
2186
2187
0
  inp->dlth_flags &= ~DLIL_INPUT_RUNNING;
2188
0
  (void) assert_wait(&inp->dlth_flags, THREAD_UNINT);
2189
0
  lck_mtx_unlock(&inp->dlth_lock);
2190
0
  (void) thread_block_parameter(dlil_main_input_thread_cont, inp);
2191
2192
0
  VERIFY(0);      /* we should never get here */
2193
  /* NOTREACHED */
2194
0
  __builtin_unreachable();
2195
0
}
2196
2197
/*
2198
 * Input thread for interfaces with legacy input model.
2199
 */
2200
__attribute__((noreturn))
2201
static void
2202
dlil_input_thread_func(void *v, wait_result_t w)
2203
0
{
2204
0
#pragma unused(w)
2205
0
  char thread_name[MAXTHREADNAMESIZE];
2206
0
  struct dlil_threading_info *inp = v;
2207
0
  struct ifnet *ifp = inp->dlth_ifp;
2208
2209
0
  VERIFY(inp != dlil_main_input_thread);
2210
0
  VERIFY(ifp != NULL);
2211
0
  VERIFY(!(ifp->if_eflags & IFEF_RXPOLL) || !net_rxpoll ||
2212
0
      !(ifp->if_xflags & IFXF_LEGACY));
2213
0
  VERIFY(ifp->if_poll_mode == IFNET_MODEL_INPUT_POLL_OFF ||
2214
0
      !(ifp->if_xflags & IFXF_LEGACY));
2215
0
  VERIFY(current_thread() == inp->dlth_thread);
2216
2217
  /* construct the name for this thread, and then apply it */
2218
0
  bzero(thread_name, sizeof(thread_name));
2219
0
  (void) snprintf(thread_name, sizeof(thread_name),
2220
0
      "dlil_input_%s", ifp->if_xname);
2221
0
  thread_set_thread_name(inp->dlth_thread, thread_name);
2222
2223
0
  lck_mtx_lock(&inp->dlth_lock);
2224
0
  VERIFY(!(inp->dlth_flags & (DLIL_INPUT_EMBRYONIC | DLIL_INPUT_RUNNING)));
2225
0
  (void) assert_wait(&inp->dlth_flags, THREAD_UNINT);
2226
0
  inp->dlth_flags |= DLIL_INPUT_EMBRYONIC;
2227
  /* wake up once to get out of embryonic state */
2228
0
  dlil_input_wakeup(inp);
2229
0
  lck_mtx_unlock(&inp->dlth_lock);
2230
0
  (void) thread_block_parameter(dlil_input_thread_cont, inp);
2231
  /* NOTREACHED */
2232
0
  __builtin_unreachable();
2233
0
}
2234
2235
__attribute__((noreturn))
2236
static void
2237
dlil_input_thread_cont(void *v, wait_result_t wres)
2238
0
{
2239
0
  struct dlil_threading_info *inp = v;
2240
0
  struct ifnet *ifp = inp->dlth_ifp;
2241
2242
0
  lck_mtx_lock_spin(&inp->dlth_lock);
2243
0
  if (__improbable(wres == THREAD_INTERRUPTED ||
2244
0
      (inp->dlth_flags & DLIL_INPUT_TERMINATE))) {
2245
0
    goto terminate;
2246
0
  }
2247
2248
0
  VERIFY(!(inp->dlth_flags & DLIL_INPUT_RUNNING));
2249
0
  inp->dlth_flags |= DLIL_INPUT_RUNNING;
2250
2251
0
  while (1) {
2252
0
    struct mbuf *m = NULL;
2253
0
    classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
2254
0
    boolean_t notify = FALSE;
2255
0
    boolean_t embryonic;
2256
0
    u_int32_t m_cnt;
2257
2258
0
    inp->dlth_flags &= ~DLIL_INPUT_WAITING;
2259
2260
0
    if (__improbable(embryonic =
2261
0
        (inp->dlth_flags & DLIL_INPUT_EMBRYONIC))) {
2262
0
      inp->dlth_flags &= ~DLIL_INPUT_EMBRYONIC;
2263
0
    }
2264
2265
    /*
2266
     * Protocol registration and injection must always use
2267
     * the main input thread; in theory the latter can utilize
2268
     * the corresponding input thread where the packet arrived
2269
     * on, but that requires our knowing the interface in advance
2270
     * (and the benefits might not worth the trouble.)
2271
     */
2272
0
    VERIFY(!(inp->dlth_flags &
2273
0
        (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER)));
2274
2275
    /* Packets for this interface */
2276
0
    m_cnt = qlen(&inp->dlth_pkts);
2277
0
    _getq_all(&inp->dlth_pkts, &pkt, NULL, NULL, NULL);
2278
0
    m = pkt.cp_mbuf;
2279
2280
0
    inp->dlth_wtot = 0;
2281
2282
0
    notify = dlil_input_stats_sync(ifp, inp);
2283
2284
0
    lck_mtx_unlock(&inp->dlth_lock);
2285
2286
0
    if (__improbable(embryonic)) {
2287
0
      ifnet_decr_pending_thread_count(ifp);
2288
0
    }
2289
2290
0
    if (__improbable(notify)) {
2291
0
      ifnet_notify_data_threshold(ifp);
2292
0
    }
2293
2294
    /*
2295
     * NOTE warning %%% attention !!!!
2296
     * We should think about putting some thread starvation
2297
     * safeguards if we deal with long chains of packets.
2298
     */
2299
0
    if (__probable(m != NULL)) {
2300
0
      dlil_input_packet_list_extended(NULL, m,
2301
0
          m_cnt, ifp->if_poll_mode);
2302
0
    }
2303
2304
0
    lck_mtx_lock_spin(&inp->dlth_lock);
2305
0
    VERIFY(inp->dlth_flags & DLIL_INPUT_RUNNING);
2306
0
    if (!(inp->dlth_flags & ~(DLIL_INPUT_RUNNING |
2307
0
        DLIL_INPUT_TERMINATE))) {
2308
0
      break;
2309
0
    }
2310
0
  }
2311
2312
0
  inp->dlth_flags &= ~DLIL_INPUT_RUNNING;
2313
2314
0
  if (__improbable(inp->dlth_flags & DLIL_INPUT_TERMINATE)) {
2315
0
terminate:
2316
0
    lck_mtx_unlock(&inp->dlth_lock);
2317
0
    dlil_terminate_input_thread(inp);
2318
    /* NOTREACHED */
2319
0
  } else {
2320
0
    (void) assert_wait(&inp->dlth_flags, THREAD_UNINT);
2321
0
    lck_mtx_unlock(&inp->dlth_lock);
2322
0
    (void) thread_block_parameter(dlil_input_thread_cont, inp);
2323
    /* NOTREACHED */
2324
0
  }
2325
2326
0
  VERIFY(0);      /* we should never get here */
2327
  /* NOTREACHED */
2328
0
  __builtin_unreachable();
2329
0
}
2330
2331
/*
2332
 * Input thread for interfaces with opportunistic polling input model.
2333
 */
2334
__attribute__((noreturn))
2335
static void
2336
dlil_rxpoll_input_thread_func(void *v, wait_result_t w)
2337
0
{
2338
0
#pragma unused(w)
2339
0
  char thread_name[MAXTHREADNAMESIZE];
2340
0
  struct dlil_threading_info *inp = v;
2341
0
  struct ifnet *ifp = inp->dlth_ifp;
2342
2343
0
  VERIFY(inp != dlil_main_input_thread);
2344
0
  VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_RXPOLL) &&
2345
0
      (ifp->if_xflags & IFXF_LEGACY));
2346
0
  VERIFY(current_thread() == inp->dlth_thread);
2347
2348
  /* construct the name for this thread, and then apply it */
2349
0
  bzero(thread_name, sizeof(thread_name));
2350
0
  (void) snprintf(thread_name, sizeof(thread_name),
2351
0
      "dlil_input_poll_%s", ifp->if_xname);
2352
0
  thread_set_thread_name(inp->dlth_thread, thread_name);
2353
2354
0
  lck_mtx_lock(&inp->dlth_lock);
2355
0
  VERIFY(!(inp->dlth_flags & (DLIL_INPUT_EMBRYONIC | DLIL_INPUT_RUNNING)));
2356
0
  (void) assert_wait(&inp->dlth_flags, THREAD_UNINT);
2357
0
  inp->dlth_flags |= DLIL_INPUT_EMBRYONIC;
2358
  /* wake up once to get out of embryonic state */
2359
0
  dlil_input_wakeup(inp);
2360
0
  lck_mtx_unlock(&inp->dlth_lock);
2361
0
  (void) thread_block_parameter(dlil_rxpoll_input_thread_cont, inp);
2362
  /* NOTREACHED */
2363
0
  __builtin_unreachable();
2364
0
}
2365
2366
__attribute__((noreturn))
2367
static void
2368
dlil_rxpoll_input_thread_cont(void *v, wait_result_t wres)
2369
0
{
2370
0
  struct dlil_threading_info *inp = v;
2371
0
  struct ifnet *ifp = inp->dlth_ifp;
2372
0
  struct timespec ts;
2373
2374
0
  lck_mtx_lock_spin(&inp->dlth_lock);
2375
0
  if (__improbable(wres == THREAD_INTERRUPTED ||
2376
0
      (inp->dlth_flags & DLIL_INPUT_TERMINATE))) {
2377
0
    goto terminate;
2378
0
  }
2379
2380
0
  VERIFY(!(inp->dlth_flags & DLIL_INPUT_RUNNING));
2381
0
  inp->dlth_flags |= DLIL_INPUT_RUNNING;
2382
2383
0
  while (1) {
2384
0
    struct mbuf *m = NULL;
2385
0
    uint32_t m_cnt, poll_req = 0;
2386
0
    uint64_t m_size = 0;
2387
0
    ifnet_model_t mode;
2388
0
    struct timespec now, delta;
2389
0
    classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
2390
0
    boolean_t notify;
2391
0
    boolean_t embryonic;
2392
0
    uint64_t ival;
2393
2394
0
    inp->dlth_flags &= ~DLIL_INPUT_WAITING;
2395
2396
0
    if (__improbable(embryonic =
2397
0
        (inp->dlth_flags & DLIL_INPUT_EMBRYONIC))) {
2398
0
      inp->dlth_flags &= ~DLIL_INPUT_EMBRYONIC;
2399
0
      goto skip;
2400
0
    }
2401
2402
0
    if ((ival = ifp->if_rxpoll_ival) < IF_RXPOLL_INTERVALTIME_MIN) {
2403
0
      ival = IF_RXPOLL_INTERVALTIME_MIN;
2404
0
    }
2405
2406
    /* Link parameters changed? */
2407
0
    if (ifp->if_poll_update != 0) {
2408
0
      ifp->if_poll_update = 0;
2409
0
      (void) dlil_rxpoll_set_params(ifp, NULL, TRUE);
2410
0
    }
2411
2412
    /* Current operating mode */
2413
0
    mode = ifp->if_poll_mode;
2414
2415
    /*
2416
     * Protocol registration and injection must always use
2417
     * the main input thread; in theory the latter can utilize
2418
     * the corresponding input thread where the packet arrived
2419
     * on, but that requires our knowing the interface in advance
2420
     * (and the benefits might not worth the trouble.)
2421
     */
2422
0
    VERIFY(!(inp->dlth_flags &
2423
0
        (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER)));
2424
2425
    /* Total count of all packets */
2426
0
    m_cnt = qlen(&inp->dlth_pkts);
2427
2428
    /* Total bytes of all packets */
2429
0
    m_size = qsize(&inp->dlth_pkts);
2430
2431
    /* Packets for this interface */
2432
0
    _getq_all(&inp->dlth_pkts, &pkt, NULL, NULL, NULL);
2433
0
    m = pkt.cp_mbuf;
2434
0
    VERIFY(m != NULL || m_cnt == 0);
2435
2436
0
    nanouptime(&now);
2437
0
    if (!net_timerisset(&ifp->if_poll_sample_lasttime)) {
2438
0
      *(&ifp->if_poll_sample_lasttime) = *(&now);
2439
0
    }
2440
2441
0
    net_timersub(&now, &ifp->if_poll_sample_lasttime, &delta);
2442
0
    if (if_rxpoll && net_timerisset(&ifp->if_poll_sample_holdtime)) {
2443
0
      u_int32_t ptot, btot;
2444
2445
      /* Accumulate statistics for current sampling */
2446
0
      PKTCNTR_ADD(&ifp->if_poll_sstats, m_cnt, m_size);
2447
2448
0
      if (net_timercmp(&delta, &ifp->if_poll_sample_holdtime, <)) {
2449
0
        goto skip;
2450
0
      }
2451
2452
0
      *(&ifp->if_poll_sample_lasttime) = *(&now);
2453
2454
      /* Calculate min/max of inbound bytes */
2455
0
      btot = (u_int32_t)ifp->if_poll_sstats.bytes;
2456
0
      if (ifp->if_rxpoll_bmin == 0 || ifp->if_rxpoll_bmin > btot) {
2457
0
        ifp->if_rxpoll_bmin = btot;
2458
0
      }
2459
0
      if (btot > ifp->if_rxpoll_bmax) {
2460
0
        ifp->if_rxpoll_bmax = btot;
2461
0
      }
2462
2463
      /* Calculate EWMA of inbound bytes */
2464
0
      DLIL_EWMA(ifp->if_rxpoll_bavg, btot, if_rxpoll_decay);
2465
2466
      /* Calculate min/max of inbound packets */
2467
0
      ptot = (u_int32_t)ifp->if_poll_sstats.packets;
2468
0
      if (ifp->if_rxpoll_pmin == 0 || ifp->if_rxpoll_pmin > ptot) {
2469
0
        ifp->if_rxpoll_pmin = ptot;
2470
0
      }
2471
0
      if (ptot > ifp->if_rxpoll_pmax) {
2472
0
        ifp->if_rxpoll_pmax = ptot;
2473
0
      }
2474
2475
      /* Calculate EWMA of inbound packets */
2476
0
      DLIL_EWMA(ifp->if_rxpoll_pavg, ptot, if_rxpoll_decay);
2477
2478
      /* Reset sampling statistics */
2479
0
      PKTCNTR_CLEAR(&ifp->if_poll_sstats);
2480
2481
      /* Calculate EWMA of wakeup requests */
2482
0
      DLIL_EWMA(ifp->if_rxpoll_wavg, inp->dlth_wtot,
2483
0
          if_rxpoll_decay);
2484
0
      inp->dlth_wtot = 0;
2485
2486
0
      if (dlil_verbose) {
2487
0
        if (!net_timerisset(&ifp->if_poll_dbg_lasttime)) {
2488
0
          *(&ifp->if_poll_dbg_lasttime) = *(&now);
2489
0
        }
2490
0
        net_timersub(&now, &ifp->if_poll_dbg_lasttime, &delta);
2491
0
        if (net_timercmp(&delta, &dlil_dbgrate, >=)) {
2492
0
          *(&ifp->if_poll_dbg_lasttime) = *(&now);
2493
0
          DLIL_PRINTF("%s: [%s] pkts avg %d max %d "
2494
0
              "limits [%d/%d], wreq avg %d "
2495
0
              "limits [%d/%d], bytes avg %d "
2496
0
              "limits [%d/%d]\n", if_name(ifp),
2497
0
              (ifp->if_poll_mode ==
2498
0
              IFNET_MODEL_INPUT_POLL_ON) ?
2499
0
              "ON" : "OFF", ifp->if_rxpoll_pavg,
2500
0
              ifp->if_rxpoll_pmax,
2501
0
              ifp->if_rxpoll_plowat,
2502
0
              ifp->if_rxpoll_phiwat,
2503
0
              ifp->if_rxpoll_wavg,
2504
0
              ifp->if_rxpoll_wlowat,
2505
0
              ifp->if_rxpoll_whiwat,
2506
0
              ifp->if_rxpoll_bavg,
2507
0
              ifp->if_rxpoll_blowat,
2508
0
              ifp->if_rxpoll_bhiwat);
2509
0
        }
2510
0
      }
2511
2512
      /* Perform mode transition, if necessary */
2513
0
      if (!net_timerisset(&ifp->if_poll_mode_lasttime)) {
2514
0
        *(&ifp->if_poll_mode_lasttime) = *(&now);
2515
0
      }
2516
2517
0
      net_timersub(&now, &ifp->if_poll_mode_lasttime, &delta);
2518
0
      if (net_timercmp(&delta, &ifp->if_poll_mode_holdtime, <)) {
2519
0
        goto skip;
2520
0
      }
2521
2522
0
      if (ifp->if_rxpoll_pavg <= ifp->if_rxpoll_plowat &&
2523
0
          ifp->if_rxpoll_bavg <= ifp->if_rxpoll_blowat &&
2524
0
          ifp->if_poll_mode != IFNET_MODEL_INPUT_POLL_OFF) {
2525
0
        mode = IFNET_MODEL_INPUT_POLL_OFF;
2526
0
      } else if (ifp->if_rxpoll_pavg >= ifp->if_rxpoll_phiwat &&
2527
0
          (ifp->if_rxpoll_bavg >= ifp->if_rxpoll_bhiwat ||
2528
0
          ifp->if_rxpoll_wavg >= ifp->if_rxpoll_whiwat) &&
2529
0
          ifp->if_poll_mode != IFNET_MODEL_INPUT_POLL_ON) {
2530
0
        mode = IFNET_MODEL_INPUT_POLL_ON;
2531
0
      }
2532
2533
0
      if (mode != ifp->if_poll_mode) {
2534
0
        ifp->if_poll_mode = mode;
2535
0
        *(&ifp->if_poll_mode_lasttime) = *(&now);
2536
0
        poll_req++;
2537
0
      }
2538
0
    }
2539
0
skip:
2540
0
    notify = dlil_input_stats_sync(ifp, inp);
2541
2542
0
    lck_mtx_unlock(&inp->dlth_lock);
2543
2544
0
    if (__improbable(embryonic)) {
2545
0
      ifnet_decr_pending_thread_count(ifp);
2546
0
    }
2547
2548
0
    if (__improbable(notify)) {
2549
0
      ifnet_notify_data_threshold(ifp);
2550
0
    }
2551
2552
    /*
2553
     * If there's a mode change and interface is still attached,
2554
     * perform a downcall to the driver for the new mode.  Also
2555
     * hold an IO refcnt on the interface to prevent it from
2556
     * being detached (will be release below.)
2557
     */
2558
0
    if (poll_req != 0 && ifnet_is_attached(ifp, 1)) {
2559
0
      struct ifnet_model_params p = {
2560
0
        .model = mode, .reserved = { 0 }
2561
0
      };
2562
0
      errno_t err;
2563
2564
0
      if (dlil_verbose) {
2565
0
        DLIL_PRINTF("%s: polling is now %s, "
2566
0
            "pkts avg %d max %d limits [%d/%d], "
2567
0
            "wreq avg %d limits [%d/%d], "
2568
0
            "bytes avg %d limits [%d/%d]\n",
2569
0
            if_name(ifp),
2570
0
            (mode == IFNET_MODEL_INPUT_POLL_ON) ?
2571
0
            "ON" : "OFF", ifp->if_rxpoll_pavg,
2572
0
            ifp->if_rxpoll_pmax, ifp->if_rxpoll_plowat,
2573
0
            ifp->if_rxpoll_phiwat, ifp->if_rxpoll_wavg,
2574
0
            ifp->if_rxpoll_wlowat, ifp->if_rxpoll_whiwat,
2575
0
            ifp->if_rxpoll_bavg, ifp->if_rxpoll_blowat,
2576
0
            ifp->if_rxpoll_bhiwat);
2577
0
      }
2578
2579
0
      if ((err = ((*ifp->if_input_ctl)(ifp,
2580
0
          IFNET_CTL_SET_INPUT_MODEL, sizeof(p), &p))) != 0) {
2581
0
        DLIL_PRINTF("%s: error setting polling mode "
2582
0
            "to %s (%d)\n", if_name(ifp),
2583
0
            (mode == IFNET_MODEL_INPUT_POLL_ON) ?
2584
0
            "ON" : "OFF", err);
2585
0
      }
2586
2587
0
      switch (mode) {
2588
0
      case IFNET_MODEL_INPUT_POLL_OFF:
2589
0
        ifnet_set_poll_cycle(ifp, NULL);
2590
0
        ifp->if_rxpoll_offreq++;
2591
0
        if (err != 0) {
2592
0
          ifp->if_rxpoll_offerr++;
2593
0
        }
2594
0
        break;
2595
2596
0
      case IFNET_MODEL_INPUT_POLL_ON:
2597
0
        net_nsectimer(&ival, &ts);
2598
0
        ifnet_set_poll_cycle(ifp, &ts);
2599
0
        ifnet_poll(ifp);
2600
0
        ifp->if_rxpoll_onreq++;
2601
0
        if (err != 0) {
2602
0
          ifp->if_rxpoll_onerr++;
2603
0
        }
2604
0
        break;
2605
2606
0
      default:
2607
0
        VERIFY(0);
2608
        /* NOTREACHED */
2609
0
      }
2610
2611
      /* Release the IO refcnt */
2612
0
      ifnet_decr_iorefcnt(ifp);
2613
0
    }
2614
2615
    /*
2616
     * NOTE warning %%% attention !!!!
2617
     * We should think about putting some thread starvation
2618
     * safeguards if we deal with long chains of packets.
2619
     */
2620
0
    if (__probable(m != NULL)) {
2621
0
      dlil_input_packet_list_extended(NULL, m, m_cnt, mode);
2622
0
    }
2623
2624
0
    lck_mtx_lock_spin(&inp->dlth_lock);
2625
0
    VERIFY(inp->dlth_flags & DLIL_INPUT_RUNNING);
2626
0
    if (!(inp->dlth_flags & ~(DLIL_INPUT_RUNNING |
2627
0
        DLIL_INPUT_TERMINATE))) {
2628
0
      break;
2629
0
    }
2630
0
  }
2631
2632
0
  inp->dlth_flags &= ~DLIL_INPUT_RUNNING;
2633
2634
0
  if (__improbable(inp->dlth_flags & DLIL_INPUT_TERMINATE)) {
2635
0
terminate:
2636
0
    lck_mtx_unlock(&inp->dlth_lock);
2637
0
    dlil_terminate_input_thread(inp);
2638
    /* NOTREACHED */
2639
0
  } else {
2640
0
    (void) assert_wait(&inp->dlth_flags, THREAD_UNINT);
2641
0
    lck_mtx_unlock(&inp->dlth_lock);
2642
0
    (void) thread_block_parameter(dlil_rxpoll_input_thread_cont,
2643
0
        inp);
2644
    /* NOTREACHED */
2645
0
  }
2646
2647
0
  VERIFY(0);      /* we should never get here */
2648
  /* NOTREACHED */
2649
0
  __builtin_unreachable();
2650
0
}
2651
2652
errno_t
2653
dlil_rxpoll_validate_params(struct ifnet_poll_params *p)
2654
0
{
2655
0
  if (p != NULL) {
2656
0
    if ((p->packets_lowat == 0 && p->packets_hiwat != 0) ||
2657
0
        (p->packets_lowat != 0 && p->packets_hiwat == 0)) {
2658
0
      return EINVAL;
2659
0
    }
2660
0
    if (p->packets_lowat != 0 &&    /* hiwat must be non-zero */
2661
0
        p->packets_lowat >= p->packets_hiwat) {
2662
0
      return EINVAL;
2663
0
    }
2664
0
    if ((p->bytes_lowat == 0 && p->bytes_hiwat != 0) ||
2665
0
        (p->bytes_lowat != 0 && p->bytes_hiwat == 0)) {
2666
0
      return EINVAL;
2667
0
    }
2668
0
    if (p->bytes_lowat != 0 &&      /* hiwat must be non-zero */
2669
0
        p->bytes_lowat >= p->bytes_hiwat) {
2670
0
      return EINVAL;
2671
0
    }
2672
0
    if (p->interval_time != 0 &&
2673
0
        p->interval_time < IF_RXPOLL_INTERVALTIME_MIN) {
2674
0
      p->interval_time = IF_RXPOLL_INTERVALTIME_MIN;
2675
0
    }
2676
0
  }
2677
0
  return 0;
2678
0
}
2679
2680
void
2681
dlil_rxpoll_update_params(struct ifnet *ifp, struct ifnet_poll_params *p)
2682
0
{
2683
0
  u_int64_t sample_holdtime, inbw;
2684
2685
0
  if ((inbw = ifnet_input_linkrate(ifp)) == 0 && p == NULL) {
2686
0
    sample_holdtime = 0;    /* polling is disabled */
2687
0
    ifp->if_rxpoll_wlowat = ifp->if_rxpoll_plowat =
2688
0
        ifp->if_rxpoll_blowat = 0;
2689
0
    ifp->if_rxpoll_whiwat = ifp->if_rxpoll_phiwat =
2690
0
        ifp->if_rxpoll_bhiwat = (u_int32_t)-1;
2691
0
    ifp->if_rxpoll_plim = 0;
2692
0
    ifp->if_rxpoll_ival = IF_RXPOLL_INTERVALTIME_MIN;
2693
0
  } else {
2694
0
    u_int32_t plowat, phiwat, blowat, bhiwat, plim;
2695
0
    u_int64_t ival;
2696
0
    unsigned int n, i;
2697
2698
0
    for (n = 0, i = 0; rxpoll_tbl[i].speed != 0; i++) {
2699
0
      if (inbw < rxpoll_tbl[i].speed) {
2700
0
        break;
2701
0
      }
2702
0
      n = i;
2703
0
    }
2704
    /* auto-tune if caller didn't specify a value */
2705
0
    plowat = ((p == NULL || p->packets_lowat == 0) ?
2706
0
        rxpoll_tbl[n].plowat : p->packets_lowat);
2707
0
    phiwat = ((p == NULL || p->packets_hiwat == 0) ?
2708
0
        rxpoll_tbl[n].phiwat : p->packets_hiwat);
2709
0
    blowat = ((p == NULL || p->bytes_lowat == 0) ?
2710
0
        rxpoll_tbl[n].blowat : p->bytes_lowat);
2711
0
    bhiwat = ((p == NULL || p->bytes_hiwat == 0) ?
2712
0
        rxpoll_tbl[n].bhiwat : p->bytes_hiwat);
2713
0
    plim = ((p == NULL || p->packets_limit == 0) ?
2714
0
        if_rxpoll_max : p->packets_limit);
2715
0
    ival = ((p == NULL || p->interval_time == 0) ?
2716
0
        if_rxpoll_interval_time : p->interval_time);
2717
2718
0
    VERIFY(plowat != 0 && phiwat != 0);
2719
0
    VERIFY(blowat != 0 && bhiwat != 0);
2720
0
    VERIFY(ival >= IF_RXPOLL_INTERVALTIME_MIN);
2721
2722
0
    sample_holdtime = if_rxpoll_sample_holdtime;
2723
0
    ifp->if_rxpoll_wlowat = if_sysctl_rxpoll_wlowat;
2724
0
    ifp->if_rxpoll_whiwat = if_sysctl_rxpoll_whiwat;
2725
0
    ifp->if_rxpoll_plowat = plowat;
2726
0
    ifp->if_rxpoll_phiwat = phiwat;
2727
0
    ifp->if_rxpoll_blowat = blowat;
2728
0
    ifp->if_rxpoll_bhiwat = bhiwat;
2729
0
    ifp->if_rxpoll_plim = plim;
2730
0
    ifp->if_rxpoll_ival = ival;
2731
0
  }
2732
2733
0
  net_nsectimer(&if_rxpoll_mode_holdtime, &ifp->if_poll_mode_holdtime);
2734
0
  net_nsectimer(&sample_holdtime, &ifp->if_poll_sample_holdtime);
2735
2736
0
  if (dlil_verbose) {
2737
0
    DLIL_PRINTF("%s: speed %llu bps, sample per %llu nsec, "
2738
0
        "poll interval %llu nsec, pkts per poll %u, "
2739
0
        "pkt limits [%u/%u], wreq limits [%u/%u], "
2740
0
        "bytes limits [%u/%u]\n", if_name(ifp),
2741
0
        inbw, sample_holdtime, ifp->if_rxpoll_ival,
2742
0
        ifp->if_rxpoll_plim, ifp->if_rxpoll_plowat,
2743
0
        ifp->if_rxpoll_phiwat, ifp->if_rxpoll_wlowat,
2744
0
        ifp->if_rxpoll_whiwat, ifp->if_rxpoll_blowat,
2745
0
        ifp->if_rxpoll_bhiwat);
2746
0
  }
2747
0
}
2748
2749
/*
2750
 * Must be called on an attached ifnet (caller is expected to check.)
2751
 * Caller may pass NULL for poll parameters to indicate "auto-tuning."
2752
 */
2753
errno_t
2754
dlil_rxpoll_set_params(struct ifnet *ifp, struct ifnet_poll_params *p,
2755
    boolean_t locked)
2756
0
{
2757
0
  errno_t err;
2758
0
  struct dlil_threading_info *inp;
2759
2760
0
  VERIFY(ifp != NULL);
2761
0
  if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) {
2762
0
    return ENXIO;
2763
0
  }
2764
0
  err = dlil_rxpoll_validate_params(p);
2765
0
  if (err != 0) {
2766
0
    return err;
2767
0
  }
2768
2769
0
  if (!locked) {
2770
0
    lck_mtx_lock(&inp->dlth_lock);
2771
0
  }
2772
0
  LCK_MTX_ASSERT(&inp->dlth_lock, LCK_MTX_ASSERT_OWNED);
2773
  /*
2774
   * Normally, we'd reset the parameters to the auto-tuned values
2775
   * if the the input thread detects a change in link rate.  If the
2776
   * driver provides its own parameters right after a link rate
2777
   * changes, but before the input thread gets to run, we want to
2778
   * make sure to keep the driver's values.  Clearing if_poll_update
2779
   * will achieve that.
2780
   */
2781
0
  if (p != NULL && !locked && ifp->if_poll_update != 0) {
2782
0
    ifp->if_poll_update = 0;
2783
0
  }
2784
0
  dlil_rxpoll_update_params(ifp, p);
2785
0
  if (!locked) {
2786
0
    lck_mtx_unlock(&inp->dlth_lock);
2787
0
  }
2788
0
  return 0;
2789
0
}
2790
2791
/*
2792
 * Must be called on an attached ifnet (caller is expected to check.)
2793
 */
2794
errno_t
2795
dlil_rxpoll_get_params(struct ifnet *ifp, struct ifnet_poll_params *p)
2796
0
{
2797
0
  struct dlil_threading_info *inp;
2798
2799
0
  VERIFY(ifp != NULL && p != NULL);
2800
0
  if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) {
2801
0
    return ENXIO;
2802
0
  }
2803
2804
0
  bzero(p, sizeof(*p));
2805
2806
0
  lck_mtx_lock(&inp->dlth_lock);
2807
0
  p->packets_limit = ifp->if_rxpoll_plim;
2808
0
  p->packets_lowat = ifp->if_rxpoll_plowat;
2809
0
  p->packets_hiwat = ifp->if_rxpoll_phiwat;
2810
0
  p->bytes_lowat = ifp->if_rxpoll_blowat;
2811
0
  p->bytes_hiwat = ifp->if_rxpoll_bhiwat;
2812
0
  p->interval_time = ifp->if_rxpoll_ival;
2813
0
  lck_mtx_unlock(&inp->dlth_lock);
2814
2815
0
  return 0;
2816
0
}
2817
2818
errno_t
2819
ifnet_input(struct ifnet *ifp, struct mbuf *m_head,
2820
    const struct ifnet_stat_increment_param *s)
2821
0
{
2822
0
  return ifnet_input_common(ifp, m_head, NULL, s, FALSE, FALSE);
2823
0
}
2824
2825
errno_t
2826
ifnet_input_extended(struct ifnet *ifp, struct mbuf *m_head,
2827
    struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
2828
19.4k
{
2829
19.4k
  return ifnet_input_common(ifp, m_head, m_tail, s, TRUE, FALSE);
2830
19.4k
}
2831
2832
errno_t
2833
ifnet_input_poll(struct ifnet *ifp, struct mbuf *m_head,
2834
    struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
2835
0
{
2836
0
  return ifnet_input_common(ifp, m_head, m_tail, s,
2837
0
             (m_head != NULL), TRUE);
2838
0
}
2839
2840
static errno_t
2841
ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
2842
    const struct ifnet_stat_increment_param *s, boolean_t ext, boolean_t poll)
2843
19.4k
{
2844
19.4k
  dlil_input_func input_func;
2845
19.4k
  struct ifnet_stat_increment_param _s;
2846
19.4k
  u_int32_t m_cnt = 0, m_size = 0;
2847
19.4k
  struct mbuf *last;
2848
19.4k
  errno_t err = 0;
2849
2850
19.4k
  if ((m_head == NULL && !poll) || (s == NULL && ext)) {
2851
0
    if (m_head != NULL) {
2852
0
      mbuf_freem_list(m_head);
2853
0
    }
2854
0
    return EINVAL;
2855
0
  }
2856
2857
19.4k
  VERIFY(m_head != NULL || (s == NULL && m_tail == NULL && !ext && poll));
2858
19.4k
  VERIFY(m_tail == NULL || ext);
2859
19.4k
  VERIFY(s != NULL || !ext);
2860
2861
  /*
2862
   * Drop the packet(s) if the parameters are invalid, or if the
2863
   * interface is no longer attached; else hold an IO refcnt to
2864
   * prevent it from being detached (will be released below.)
2865
   */
2866
19.4k
  if (ifp == NULL || (ifp != lo_ifp && !ifnet_datamov_begin(ifp))) {
2867
0
    if (m_head != NULL) {
2868
0
      mbuf_freem_list(m_head);
2869
0
    }
2870
0
    return EINVAL;
2871
0
  }
2872
2873
19.4k
  input_func = ifp->if_input_dlil;
2874
19.4k
  VERIFY(input_func != NULL);
2875
2876
19.4k
  if (m_tail == NULL) {
2877
0
    last = m_head;
2878
0
    while (m_head != NULL) {
2879
0
#if IFNET_INPUT_SANITY_CHK
2880
0
      if (__improbable(dlil_input_sanity_check != 0)) {
2881
0
        DLIL_INPUT_CHECK(last, ifp);
2882
0
      }
2883
0
#endif /* IFNET_INPUT_SANITY_CHK */
2884
0
      m_cnt++;
2885
0
      m_size += m_length(last);
2886
0
      if (mbuf_nextpkt(last) == NULL) {
2887
0
        break;
2888
0
      }
2889
0
      last = mbuf_nextpkt(last);
2890
0
    }
2891
0
    m_tail = last;
2892
19.4k
  } else {
2893
19.4k
#if IFNET_INPUT_SANITY_CHK
2894
19.4k
    if (__improbable(dlil_input_sanity_check != 0)) {
2895
0
      last = m_head;
2896
0
      while (1) {
2897
0
        DLIL_INPUT_CHECK(last, ifp);
2898
0
        m_cnt++;
2899
0
        m_size += m_length(last);
2900
0
        if (mbuf_nextpkt(last) == NULL) {
2901
0
          break;
2902
0
        }
2903
0
        last = mbuf_nextpkt(last);
2904
0
      }
2905
19.4k
    } else {
2906
19.4k
      m_cnt = s->packets_in;
2907
19.4k
      m_size = s->bytes_in;
2908
19.4k
      last = m_tail;
2909
19.4k
    }
2910
#else
2911
    m_cnt = s->packets_in;
2912
    m_size = s->bytes_in;
2913
    last = m_tail;
2914
#endif /* IFNET_INPUT_SANITY_CHK */
2915
19.4k
  }
2916
2917
19.4k
  if (last != m_tail) {
2918
0
    panic_plain("%s: invalid input packet chain for %s, "
2919
0
        "tail mbuf %p instead of %p\n", __func__, if_name(ifp),
2920
0
        m_tail, last);
2921
0
  }
2922
2923
  /*
2924
   * Assert packet count only for the extended variant, for backwards
2925
   * compatibility, since this came directly from the device driver.
2926
   * Relax this assertion for input bytes, as the driver may have
2927
   * included the link-layer headers in the computation; hence
2928
   * m_size is just an approximation.
2929
   */
2930
19.4k
  if (ext && s->packets_in != m_cnt) {
2931
0
    panic_plain("%s: input packet count mismatch for %s, "
2932
0
        "%d instead of %d\n", __func__, if_name(ifp),
2933
0
        s->packets_in, m_cnt);
2934
0
  }
2935
2936
19.4k
  if (s == NULL) {
2937
0
    bzero(&_s, sizeof(_s));
2938
0
    s = &_s;
2939
19.4k
  } else {
2940
19.4k
    _s = *s;
2941
19.4k
  }
2942
19.4k
  _s.packets_in = m_cnt;
2943
19.4k
  _s.bytes_in = m_size;
2944
2945
19.4k
  err = (*input_func)(ifp, m_head, m_tail, s, poll, current_thread());
2946
2947
19.4k
  if (ifp != lo_ifp) {
2948
    /* Release the IO refcnt */
2949
0
    ifnet_datamov_end(ifp);
2950
0
  }
2951
2952
19.4k
  return err;
2953
19.4k
}
2954
2955
2956
errno_t
2957
dlil_output_handler(struct ifnet *ifp, struct mbuf *m)
2958
19.4k
{
2959
19.4k
  return ifp->if_output(ifp, m);
2960
19.4k
}
2961
2962
errno_t
2963
dlil_input_handler(struct ifnet *ifp, struct mbuf *m_head,
2964
    struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
2965
    boolean_t poll, struct thread *tp)
2966
19.4k
{
2967
19.4k
  struct dlil_threading_info *inp = ifp->if_inp;
2968
2969
19.4k
  if (__improbable(inp == NULL)) {
2970
19.4k
    inp = dlil_main_input_thread;
2971
19.4k
  }
2972
2973
19.4k
  return inp->dlth_strategy(inp, ifp, m_head, m_tail, s, poll, tp);
2974
19.4k
}
2975
2976
static errno_t
2977
dlil_input_async(struct dlil_threading_info *inp,
2978
    struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
2979
    const struct ifnet_stat_increment_param *s, boolean_t poll,
2980
    struct thread *tp)
2981
19.4k
{
2982
19.4k
  u_int32_t m_cnt = s->packets_in;
2983
19.4k
  u_int32_t m_size = s->bytes_in;
2984
19.4k
  boolean_t notify = FALSE;
2985
2986
  /*
2987
   * If there is a matching DLIL input thread associated with an
2988
   * affinity set, associate this thread with the same set.  We
2989
   * will only do this once.
2990
   */
2991
19.4k
  lck_mtx_lock_spin(&inp->dlth_lock);
2992
19.4k
  if (inp != dlil_main_input_thread && inp->dlth_affinity && tp != NULL &&
2993
0
      ((!poll && inp->dlth_driver_thread == THREAD_NULL) ||
2994
0
      (poll && inp->dlth_poller_thread == THREAD_NULL))) {
2995
0
    u_int32_t tag = inp->dlth_affinity_tag;
2996
2997
0
    if (poll) {
2998
0
      VERIFY(inp->dlth_poller_thread == THREAD_NULL);
2999
0
      inp->dlth_poller_thread = tp;
3000
0
    } else {
3001
0
      VERIFY(inp->dlth_driver_thread == THREAD_NULL);
3002
0
      inp->dlth_driver_thread = tp;
3003
0
    }
3004
0
    lck_mtx_unlock(&inp->dlth_lock);
3005
3006
    /* Associate the current thread with the new affinity tag */
3007
0
    (void) dlil_affinity_set(tp, tag);
3008
3009
    /*
3010
     * Take a reference on the current thread; during detach,
3011
     * we will need to refer to it in order to tear down its
3012
     * affinity.
3013
     */
3014
0
    thread_reference(tp);
3015
0
    lck_mtx_lock_spin(&inp->dlth_lock);
3016
0
  }
3017
3018
19.4k
  VERIFY(m_head != NULL || (m_tail == NULL && m_cnt == 0));
3019
3020
  /*
3021
   * Because of loopbacked multicast we cannot stuff the ifp in
3022
   * the rcvif of the packet header: loopback (lo0) packets use a
3023
   * dedicated list so that we can later associate them with lo_ifp
3024
   * on their way up the stack.  Packets for other interfaces without
3025
   * dedicated input threads go to the regular list.
3026
   */
3027
19.4k
  if (m_head != NULL) {
3028
19.4k
    classq_pkt_t head, tail;
3029
19.4k
    CLASSQ_PKT_INIT_MBUF(&head, m_head);
3030
19.4k
    CLASSQ_PKT_INIT_MBUF(&tail, m_tail);
3031
19.4k
    if (inp == dlil_main_input_thread && ifp == lo_ifp) {
3032
19.4k
      struct dlil_main_threading_info *inpm =
3033
19.4k
          (struct dlil_main_threading_info *)inp;
3034
19.4k
      _addq_multi(&inpm->lo_rcvq_pkts, &head, &tail,
3035
19.4k
          m_cnt, m_size);
3036
19.4k
    } else {
3037
0
      _addq_multi(&inp->dlth_pkts, &head, &tail,
3038
0
          m_cnt, m_size);
3039
0
    }
3040
19.4k
  }
3041
3042
19.4k
#if IFNET_INPUT_SANITY_CHK
3043
19.4k
  if (__improbable(dlil_input_sanity_check != 0)) {
3044
0
    u_int32_t count = 0, size = 0;
3045
0
    struct mbuf *m0;
3046
3047
0
    for (m0 = m_head; m0; m0 = mbuf_nextpkt(m0)) {
3048
0
      size += m_length(m0);
3049
0
      count++;
3050
0
    }
3051
3052
0
    if (count != m_cnt) {
3053
0
      panic_plain("%s: invalid total packet count %u "
3054
0
          "(expected %u)\n", if_name(ifp), count, m_cnt);
3055
      /* NOTREACHED */
3056
0
      __builtin_unreachable();
3057
0
    } else if (size != m_size) {
3058
0
      panic_plain("%s: invalid total packet size %u "
3059
0
          "(expected %u)\n", if_name(ifp), size, m_size);
3060
      /* NOTREACHED */
3061
0
      __builtin_unreachable();
3062
0
    }
3063
3064
0
    inp->dlth_pkts_cnt += m_cnt;
3065
0
  }
3066
19.4k
#endif /* IFNET_INPUT_SANITY_CHK */
3067
3068
19.4k
  dlil_input_stats_add(s, inp, ifp, poll);
3069
  /*
3070
   * If we're using the main input thread, synchronize the
3071
   * stats now since we have the interface context.  All
3072
   * other cases involving dedicated input threads will
3073
   * have their stats synchronized there.
3074
   */
3075
19.4k
  if (inp == dlil_main_input_thread) {
3076
19.4k
    notify = dlil_input_stats_sync(ifp, inp);
3077
19.4k
  }
3078
3079
19.4k
  dlil_input_wakeup(inp);
3080
19.4k
  lck_mtx_unlock(&inp->dlth_lock);
3081
3082
19.4k
  if (notify) {
3083
0
    ifnet_notify_data_threshold(ifp);
3084
0
  }
3085
3086
19.4k
  return 0;
3087
19.4k
}
3088
3089
static errno_t
3090
dlil_input_sync(struct dlil_threading_info *inp,
3091
    struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
3092
    const struct ifnet_stat_increment_param *s, boolean_t poll,
3093
    struct thread *tp)
3094
0
{
3095
0
#pragma unused(tp)
3096
0
  u_int32_t m_cnt = s->packets_in;
3097
0
  u_int32_t m_size = s->bytes_in;
3098
0
  boolean_t notify = FALSE;
3099
0
  classq_pkt_t head, tail;
3100
3101
0
  ASSERT(inp != dlil_main_input_thread);
3102
3103
  /* XXX: should we just assert instead? */
3104
0
  if (__improbable(m_head == NULL)) {
3105
0
    return 0;
3106
0
  }
3107
3108
0
  CLASSQ_PKT_INIT_MBUF(&head, m_head);
3109
0
  CLASSQ_PKT_INIT_MBUF(&tail, m_tail);
3110
3111
0
  lck_mtx_lock_spin(&inp->dlth_lock);
3112
0
  _addq_multi(&inp->dlth_pkts, &head, &tail, m_cnt, m_size);
3113
3114
0
#if IFNET_INPUT_SANITY_CHK
3115
0
  if (__improbable(dlil_input_sanity_check != 0)) {
3116
0
    u_int32_t count = 0, size = 0;
3117
0
    struct mbuf *m0;
3118
3119
0
    for (m0 = m_head; m0; m0 = mbuf_nextpkt(m0)) {
3120
0
      size += m_length(m0);
3121
0
      count++;
3122
0
    }
3123
3124
0
    if (count != m_cnt) {
3125
0
      panic_plain("%s: invalid total packet count %u "
3126
0
          "(expected %u)\n", if_name(ifp), count, m_cnt);
3127
      /* NOTREACHED */
3128
0
      __builtin_unreachable();
3129
0
    } else if (size != m_size) {
3130
0
      panic_plain("%s: invalid total packet size %u "
3131
0
          "(expected %u)\n", if_name(ifp), size, m_size);
3132
      /* NOTREACHED */
3133
0
      __builtin_unreachable();
3134
0
    }
3135
3136
0
    inp->dlth_pkts_cnt += m_cnt;
3137
0
  }
3138
0
#endif /* IFNET_INPUT_SANITY_CHK */
3139
3140
0
  dlil_input_stats_add(s, inp, ifp, poll);
3141
3142
0
  m_cnt = qlen(&inp->dlth_pkts);
3143
0
  _getq_all(&inp->dlth_pkts, &head, NULL, NULL, NULL);
3144
3145
0
  notify = dlil_input_stats_sync(ifp, inp);
3146
3147
0
  lck_mtx_unlock(&inp->dlth_lock);
3148
3149
0
  if (notify) {
3150
0
    ifnet_notify_data_threshold(ifp);
3151
0
  }
3152
3153
  /*
3154
   * NOTE warning %%% attention !!!!
3155
   * We should think about putting some thread starvation
3156
   * safeguards if we deal with long chains of packets.
3157
   */
3158
0
  if (head.cp_mbuf != NULL) {
3159
0
    dlil_input_packet_list_extended(NULL, head.cp_mbuf,
3160
0
        m_cnt, ifp->if_poll_mode);
3161
0
  }
3162
3163
0
  return 0;
3164
0
}
3165
3166
3167
static void
3168
ifnet_start_common(struct ifnet *ifp, boolean_t resetfc)
3169
0
{
3170
0
  if (!(ifp->if_eflags & IFEF_TXSTART)) {
3171
0
    return;
3172
0
  }
3173
  /*
3174
   * If the starter thread is inactive, signal it to do work,
3175
   * unless the interface is being flow controlled from below,
3176
   * e.g. a virtual interface being flow controlled by a real
3177
   * network interface beneath it, or it's been disabled via
3178
   * a call to ifnet_disable_output().
3179
   */
3180
0
  lck_mtx_lock_spin(&ifp->if_start_lock);
3181
0
  if (resetfc) {
3182
0
    ifp->if_start_flags &= ~IFSF_FLOW_CONTROLLED;
3183
0
  } else if (ifp->if_start_flags & IFSF_FLOW_CONTROLLED) {
3184
0
    lck_mtx_unlock(&ifp->if_start_lock);
3185
0
    return;
3186
0
  }
3187
0
  ifp->if_start_req++;
3188
0
  if (!ifp->if_start_active && ifp->if_start_thread != THREAD_NULL &&
3189
0
      (resetfc || !(ifp->if_eflags & IFEF_ENQUEUE_MULTI) ||
3190
0
      IFCQ_LEN(&ifp->if_snd) >= ifp->if_start_delay_qlen ||
3191
0
      ifp->if_start_delayed == 0)) {
3192
0
    (void) wakeup_one((caddr_t)&ifp->if_start_thread);
3193
0
  }
3194
0
  lck_mtx_unlock(&ifp->if_start_lock);
3195
0
}
3196
3197
void
3198
ifnet_start(struct ifnet *ifp)
3199
0
{
3200
0
  ifnet_start_common(ifp, FALSE);
3201
0
}
3202
3203
__attribute__((noreturn))
3204
static void
3205
ifnet_start_thread_func(void *v, wait_result_t w)
3206
0
{
3207
0
#pragma unused(w)
3208
0
  struct ifnet *ifp = v;
3209
0
  char thread_name[MAXTHREADNAMESIZE];
3210
3211
  /* Construct the name for this thread, and then apply it. */
3212
0
  bzero(thread_name, sizeof(thread_name));
3213
0
  (void) snprintf(thread_name, sizeof(thread_name),
3214
0
      "ifnet_start_%s", ifp->if_xname);
3215
0
  ASSERT(ifp->if_start_thread == current_thread());
3216
0
  thread_set_thread_name(current_thread(), thread_name);
3217
3218
  /*
3219
   * Treat the dedicated starter thread for lo0 as equivalent to
3220
   * the driver workloop thread; if net_affinity is enabled for
3221
   * the main input thread, associate this starter thread to it
3222
   * by binding them with the same affinity tag.  This is done
3223
   * only once (as we only have one lo_ifp which never goes away.)
3224
   */
3225
0
  if (ifp == lo_ifp) {
3226
0
    struct dlil_threading_info *inp = dlil_main_input_thread;
3227
0
    struct thread *tp = current_thread();
3228
3229
0
    lck_mtx_lock(&inp->dlth_lock);
3230
0
    if (inp->dlth_affinity) {
3231
0
      u_int32_t tag = inp->dlth_affinity_tag;
3232
3233
0
      VERIFY(inp->dlth_driver_thread == THREAD_NULL);
3234
0
      VERIFY(inp->dlth_poller_thread == THREAD_NULL);
3235
0
      inp->dlth_driver_thread = tp;
3236
0
      lck_mtx_unlock(&inp->dlth_lock);
3237
3238
      /* Associate this thread with the affinity tag */
3239
0
      (void) dlil_affinity_set(tp, tag);
3240
0
    } else {
3241
0
      lck_mtx_unlock(&inp->dlth_lock);
3242
0
    }
3243
0
  }
3244
3245
0
  lck_mtx_lock(&ifp->if_start_lock);
3246
0
  VERIFY(!ifp->if_start_embryonic && !ifp->if_start_active);
3247
0
  (void) assert_wait(&ifp->if_start_thread, THREAD_UNINT);
3248
0
  ifp->if_start_embryonic = 1;
3249
  /* wake up once to get out of embryonic state */
3250
0
  ifp->if_start_req++;
3251
0
  (void) wakeup_one((caddr_t)&ifp->if_start_thread);
3252
0
  lck_mtx_unlock(&ifp->if_start_lock);
3253
0
  (void) thread_block_parameter(ifnet_start_thread_cont, ifp);
3254
  /* NOTREACHED */
3255
0
  __builtin_unreachable();
3256
0
}
3257
3258
__attribute__((noreturn))
3259
static void
3260
ifnet_start_thread_cont(void *v, wait_result_t wres)
3261
0
{
3262
0
  struct ifnet *ifp = v;
3263
0
  struct ifclassq *ifq = &ifp->if_snd;
3264
3265
0
  lck_mtx_lock_spin(&ifp->if_start_lock);
3266
0
  if (__improbable(wres == THREAD_INTERRUPTED ||
3267
0
      ifp->if_start_thread == THREAD_NULL)) {
3268
0
    goto terminate;
3269
0
  }
3270
3271
0
  if (__improbable(ifp->if_start_embryonic)) {
3272
0
    ifp->if_start_embryonic = 0;
3273
0
    lck_mtx_unlock(&ifp->if_start_lock);
3274
0
    ifnet_decr_pending_thread_count(ifp);
3275
0
    lck_mtx_lock_spin(&ifp->if_start_lock);
3276
0
    goto skip;
3277
0
  }
3278
3279
0
  ifp->if_start_active = 1;
3280
3281
  /*
3282
   * Keep on servicing until no more request.
3283
   */
3284
0
  for (;;) {
3285
0
    u_int32_t req = ifp->if_start_req;
3286
0
    if (!IFCQ_IS_EMPTY(ifq) &&
3287
0
        (ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
3288
0
        ifp->if_start_delayed == 0 &&
3289
0
        IFCQ_LEN(ifq) < ifp->if_start_delay_qlen &&
3290
0
        (ifp->if_eflags & IFEF_DELAY_START)) {
3291
0
      ifp->if_start_delayed = 1;
3292
0
      ifnet_start_delayed++;
3293
0
      break;
3294
0
    } else {
3295
0
      ifp->if_start_delayed = 0;
3296
0
    }
3297
0
    lck_mtx_unlock(&ifp->if_start_lock);
3298
3299
    /*
3300
     * If no longer attached, don't call start because ifp
3301
     * is being destroyed; else hold an IO refcnt to
3302
     * prevent the interface from being detached (will be
3303
     * released below.)
3304
     */
3305
0
    if (!ifnet_datamov_begin(ifp)) {
3306
0
      lck_mtx_lock_spin(&ifp->if_start_lock);
3307
0
      break;
3308
0
    }
3309
3310
    /* invoke the driver's start routine */
3311
0
    ((*ifp->if_start)(ifp));
3312
3313
    /*
3314
     * Release the io ref count taken above.
3315
     */
3316
0
    ifnet_datamov_end(ifp);
3317
3318
0
    lck_mtx_lock_spin(&ifp->if_start_lock);
3319
3320
    /*
3321
     * If there's no pending request or if the
3322
     * interface has been disabled, we're done.
3323
     */
3324
0
    if (req == ifp->if_start_req ||
3325
0
        (ifp->if_start_flags & IFSF_FLOW_CONTROLLED)) {
3326
0
      break;
3327
0
    }
3328
0
  }
3329
0
skip:
3330
0
  ifp->if_start_req = 0;
3331
0
  ifp->if_start_active = 0;
3332
3333
3334
0
  if (__probable(ifp->if_start_thread != THREAD_NULL)) {
3335
0
    uint64_t deadline = TIMEOUT_WAIT_FOREVER;
3336
0
    struct timespec delay_start_ts;
3337
0
    struct timespec *ts;
3338
3339
    /*
3340
     * Wakeup N ns from now if rate-controlled by TBR, and if
3341
     * there are still packets in the send queue which haven't
3342
     * been dequeued so far; else sleep indefinitely (ts = NULL)
3343
     * until ifnet_start() is called again.
3344
     */
3345
0
    ts = ((IFCQ_TBR_IS_ENABLED(ifq) && !IFCQ_IS_EMPTY(ifq)) ?
3346
0
        &ifp->if_start_cycle : NULL);
3347
3348
0
    if (ts == NULL && ifp->if_start_delayed == 1) {
3349
0
      delay_start_ts.tv_sec = 0;
3350
0
      delay_start_ts.tv_nsec = ifp->if_start_delay_timeout;
3351
0
      ts = &delay_start_ts;
3352
0
    }
3353
3354
0
    if (ts != NULL && ts->tv_sec == 0 && ts->tv_nsec == 0) {
3355
0
      ts = NULL;
3356
0
    }
3357
3358
0
    if (__improbable(ts != NULL)) {
3359
0
      clock_interval_to_deadline((uint32_t)(ts->tv_nsec +
3360
0
          (ts->tv_sec * NSEC_PER_SEC)), 1, &deadline);
3361
0
    }
3362
3363
0
    (void) assert_wait_deadline(&ifp->if_start_thread,
3364
0
        THREAD_UNINT, deadline);
3365
0
    lck_mtx_unlock(&ifp->if_start_lock);
3366
0
    (void) thread_block_parameter(ifnet_start_thread_cont, ifp);
3367
    /* NOTREACHED */
3368
0
  } else {
3369
0
terminate:
3370
    /* interface is detached? */
3371
0
    ifnet_set_start_cycle(ifp, NULL);
3372
0
    lck_mtx_unlock(&ifp->if_start_lock);
3373
0
    ifnet_purge(ifp);
3374
3375
0
    if (dlil_verbose) {
3376
0
      DLIL_PRINTF("%s: starter thread terminated\n",
3377
0
          if_name(ifp));
3378
0
    }
3379
3380
    /* for the extra refcnt from kernel_thread_start() */
3381
0
    thread_deallocate(current_thread());
3382
    /* this is the end */
3383
0
    thread_terminate(current_thread());
3384
    /* NOTREACHED */
3385
0
  }
3386
3387
  /* must never get here */
3388
0
  VERIFY(0);
3389
  /* NOTREACHED */
3390
0
  __builtin_unreachable();
3391
0
}
3392
3393
void
3394
ifnet_set_start_cycle(struct ifnet *ifp, struct timespec *ts)
3395
0
{
3396
0
  if (ts == NULL) {
3397
0
    bzero(&ifp->if_start_cycle, sizeof(ifp->if_start_cycle));
3398
0
  } else {
3399
0
    *(&ifp->if_start_cycle) = *ts;
3400
0
  }
3401
3402
0
  if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) {
3403
0
    DLIL_PRINTF("%s: restart interval set to %lu nsec\n",
3404
0
        if_name(ifp), ts->tv_nsec);
3405
0
  }
3406
0
}
3407
3408
static inline void
3409
ifnet_poll_wakeup(struct ifnet *ifp)
3410
0
{
3411
0
  LCK_MTX_ASSERT(&ifp->if_poll_lock, LCK_MTX_ASSERT_OWNED);
3412
3413
0
  ifp->if_poll_req++;
3414
0
  if (!(ifp->if_poll_flags & IF_POLLF_RUNNING) &&
3415
0
      ifp->if_poll_thread != THREAD_NULL) {
3416
0
    wakeup_one((caddr_t)&ifp->if_poll_thread);
3417
0
  }
3418
0
}
3419
3420
void
3421
ifnet_poll(struct ifnet *ifp)
3422
0
{
3423
  /*
3424
   * If the poller thread is inactive, signal it to do work.
3425
   */
3426
0
  lck_mtx_lock_spin(&ifp->if_poll_lock);
3427
0
  ifnet_poll_wakeup(ifp);
3428
0
  lck_mtx_unlock(&ifp->if_poll_lock);
3429
0
}
3430
3431
__attribute__((noreturn))
3432
static void
3433
ifnet_poll_thread_func(void *v, wait_result_t w)
3434
0
{
3435
0
#pragma unused(w)
3436
0
  char thread_name[MAXTHREADNAMESIZE];
3437
0
  struct ifnet *ifp = v;
3438
3439
0
  VERIFY(ifp->if_eflags & IFEF_RXPOLL);
3440
0
  VERIFY(current_thread() == ifp->if_poll_thread);
3441
3442
  /* construct the name for this thread, and then apply it */
3443
0
  bzero(thread_name, sizeof(thread_name));
3444
0
  (void) snprintf(thread_name, sizeof(thread_name),
3445
0
      "ifnet_poller_%s", ifp->if_xname);
3446
0
  thread_set_thread_name(ifp->if_poll_thread, thread_name);
3447
3448
0
  lck_mtx_lock(&ifp->if_poll_lock);
3449
0
  VERIFY(!(ifp->if_poll_flags & (IF_POLLF_EMBRYONIC | IF_POLLF_RUNNING)));
3450
0
  (void) assert_wait(&ifp->if_poll_thread, THREAD_UNINT);
3451
0
  ifp->if_poll_flags |= IF_POLLF_EMBRYONIC;
3452
  /* wake up once to get out of embryonic state */
3453
0
  ifnet_poll_wakeup(ifp);
3454
0
  lck_mtx_unlock(&ifp->if_poll_lock);
3455
0
  (void) thread_block_parameter(ifnet_poll_thread_cont, ifp);
3456
  /* NOTREACHED */
3457
0
  __builtin_unreachable();
3458
0
}
3459
3460
__attribute__((noreturn))
3461
static void
3462
ifnet_poll_thread_cont(void *v, wait_result_t wres)
3463
0
{
3464
0
  struct dlil_threading_info *inp;
3465
0
  struct ifnet *ifp = v;
3466
0
  struct ifnet_stat_increment_param s;
3467
0
  struct timespec start_time;
3468
3469
0
  VERIFY(ifp->if_eflags & IFEF_RXPOLL);
3470
3471
0
  bzero(&s, sizeof(s));
3472
0
  net_timerclear(&start_time);
3473
3474
0
  lck_mtx_lock_spin(&ifp->if_poll_lock);
3475
0
  if (__improbable(wres == THREAD_INTERRUPTED ||
3476
0
      ifp->if_poll_thread == THREAD_NULL)) {
3477
0
    goto terminate;
3478
0
  }
3479
3480
0
  inp = ifp->if_inp;
3481
0
  VERIFY(inp != NULL);
3482
3483
0
  if (__improbable(ifp->if_poll_flags & IF_POLLF_EMBRYONIC)) {
3484
0
    ifp->if_poll_flags &= ~IF_POLLF_EMBRYONIC;
3485
0
    lck_mtx_unlock(&ifp->if_poll_lock);
3486
0
    ifnet_decr_pending_thread_count(ifp);
3487
0
    lck_mtx_lock_spin(&ifp->if_poll_lock);
3488
0
    goto skip;
3489
0
  }
3490
3491
0
  ifp->if_poll_flags |= IF_POLLF_RUNNING;
3492
3493
  /*
3494
   * Keep on servicing until no more request.
3495
   */
3496
0
  for (;;) {
3497
0
    struct mbuf *m_head, *m_tail;
3498
0
    u_int32_t m_lim, m_cnt, m_totlen;
3499
0
    u_int16_t req = ifp->if_poll_req;
3500
3501
0
    m_lim = (ifp->if_rxpoll_plim != 0) ? ifp->if_rxpoll_plim :
3502
0
        MAX((qlimit(&inp->dlth_pkts)), (ifp->if_rxpoll_phiwat << 2));
3503
0
    lck_mtx_unlock(&ifp->if_poll_lock);
3504
3505
    /*
3506
     * If no longer attached, there's nothing to do;
3507
     * else hold an IO refcnt to prevent the interface
3508
     * from being detached (will be released below.)
3509
     */
3510
0
    if (!ifnet_is_attached(ifp, 1)) {
3511
0
      lck_mtx_lock_spin(&ifp->if_poll_lock);
3512
0
      break;
3513
0
    }
3514
3515
0
    if (dlil_verbose > 1) {
3516
0
      DLIL_PRINTF("%s: polling up to %d pkts, "
3517
0
          "pkts avg %d max %d, wreq avg %d, "
3518
0
          "bytes avg %d\n",
3519
0
          if_name(ifp), m_lim,
3520
0
          ifp->if_rxpoll_pavg, ifp->if_rxpoll_pmax,
3521
0
          ifp->if_rxpoll_wavg, ifp->if_rxpoll_bavg);
3522
0
    }
3523
3524
    /* invoke the driver's input poll routine */
3525
0
    ((*ifp->if_input_poll)(ifp, 0, m_lim, &m_head, &m_tail,
3526
0
    &m_cnt, &m_totlen));
3527
3528
0
    if (m_head != NULL) {
3529
0
      VERIFY(m_tail != NULL && m_cnt > 0);
3530
3531
0
      if (dlil_verbose > 1) {
3532
0
        DLIL_PRINTF("%s: polled %d pkts, "
3533
0
            "pkts avg %d max %d, wreq avg %d, "
3534
0
            "bytes avg %d\n",
3535
0
            if_name(ifp), m_cnt,
3536
0
            ifp->if_rxpoll_pavg, ifp->if_rxpoll_pmax,
3537
0
            ifp->if_rxpoll_wavg, ifp->if_rxpoll_bavg);
3538
0
      }
3539
3540
      /* stats are required for extended variant */
3541
0
      s.packets_in = m_cnt;
3542
0
      s.bytes_in = m_totlen;
3543
3544
0
      (void) ifnet_input_common(ifp, m_head, m_tail,
3545
0
          &s, TRUE, TRUE);
3546
0
    } else {
3547
0
      if (dlil_verbose > 1) {
3548
0
        DLIL_PRINTF("%s: no packets, "
3549
0
            "pkts avg %d max %d, wreq avg %d, "
3550
0
            "bytes avg %d\n",
3551
0
            if_name(ifp), ifp->if_rxpoll_pavg,
3552
0
            ifp->if_rxpoll_pmax, ifp->if_rxpoll_wavg,
3553
0
            ifp->if_rxpoll_bavg);
3554
0
      }
3555
3556
0
      (void) ifnet_input_common(ifp, NULL, NULL,
3557
0
          NULL, FALSE, TRUE);
3558
0
    }
3559
3560
    /* Release the io ref count */
3561
0
    ifnet_decr_iorefcnt(ifp);
3562
3563
0
    lck_mtx_lock_spin(&ifp->if_poll_lock);
3564
3565
    /* if there's no pending request, we're done */
3566
0
    if (req == ifp->if_poll_req ||
3567
0
        ifp->if_poll_thread == THREAD_NULL) {
3568
0
      break;
3569
0
    }
3570
0
  }
3571
0
skip:
3572
0
  ifp->if_poll_req = 0;
3573
0
  ifp->if_poll_flags &= ~IF_POLLF_RUNNING;
3574
3575
0
  if (ifp->if_poll_thread != THREAD_NULL) {
3576
0
    uint64_t deadline = TIMEOUT_WAIT_FOREVER;
3577
0
    struct timespec *ts;
3578
3579
    /*
3580
     * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
3581
     * until ifnet_poll() is called again.
3582
     */
3583
0
    ts = &ifp->if_poll_cycle;
3584
0
    if (ts->tv_sec == 0 && ts->tv_nsec == 0) {
3585
0
      ts = NULL;
3586
0
    }
3587
3588
0
    if (ts != NULL) {
3589
0
      clock_interval_to_deadline((uint32_t)(ts->tv_nsec +
3590
0
          (ts->tv_sec * NSEC_PER_SEC)), 1, &deadline);
3591
0
    }
3592
3593
0
    (void) assert_wait_deadline(&ifp->if_poll_thread,
3594
0
        THREAD_UNINT, deadline);
3595
0
    lck_mtx_unlock(&ifp->if_poll_lock);
3596
0
    (void) thread_block_parameter(ifnet_poll_thread_cont, ifp);
3597
    /* NOTREACHED */
3598
0
  } else {
3599
0
terminate:
3600
    /* interface is detached (maybe while asleep)? */
3601
0
    ifnet_set_poll_cycle(ifp, NULL);
3602
0
    lck_mtx_unlock(&ifp->if_poll_lock);
3603
3604
0
    if (dlil_verbose) {
3605
0
      DLIL_PRINTF("%s: poller thread terminated\n",
3606
0
          if_name(ifp));
3607
0
    }
3608
3609
    /* for the extra refcnt from kernel_thread_start() */
3610
0
    thread_deallocate(current_thread());
3611
    /* this is the end */
3612
0
    thread_terminate(current_thread());
3613
    /* NOTREACHED */
3614
0
  }
3615
3616
  /* must never get here */
3617
0
  VERIFY(0);
3618
  /* NOTREACHED */
3619
0
  __builtin_unreachable();
3620
0
}
3621
3622
void
3623
ifnet_set_poll_cycle(struct ifnet *ifp, struct timespec *ts)
3624
8
{
3625
8
  if (ts == NULL) {
3626
8
    bzero(&ifp->if_poll_cycle, sizeof(ifp->if_poll_cycle));
3627
8
  } else {
3628
0
    *(&ifp->if_poll_cycle) = *ts;
3629
0
  }
3630
3631
8
  if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) {
3632
0
    DLIL_PRINTF("%s: poll interval set to %lu nsec\n",
3633
0
        if_name(ifp), ts->tv_nsec);
3634
0
  }
3635
8
}
3636
3637
void
3638
ifnet_purge(struct ifnet *ifp)
3639
0
{
3640
0
  if (ifp != NULL && (ifp->if_eflags & IFEF_TXSTART)) {
3641
0
    if_qflush(ifp, 0);
3642
0
  }
3643
0
}
3644
3645
void
3646
ifnet_update_sndq(struct ifclassq *ifq, cqev_t ev)
3647
17.7k
{
3648
17.7k
  IFCQ_LOCK_ASSERT_HELD(ifq);
3649
3650
17.7k
  if (!(IFCQ_IS_READY(ifq))) {
3651
17.7k
    return;
3652
17.7k
  }
3653
3654
0
  if (IFCQ_TBR_IS_ENABLED(ifq)) {
3655
0
    struct tb_profile tb = {
3656
0
      .rate = ifq->ifcq_tbr.tbr_rate_raw,
3657
0
      .percent = ifq->ifcq_tbr.tbr_percent, .depth = 0
3658
0
    };
3659
0
    (void) ifclassq_tbr_set(ifq, &tb, FALSE);
3660
0
  }
3661
3662
0
  ifclassq_update(ifq, ev);
3663
0
}
3664
3665
void
3666
ifnet_update_rcv(struct ifnet *ifp, cqev_t ev)
3667
0
{
3668
0
  switch (ev) {
3669
0
  case CLASSQ_EV_LINK_BANDWIDTH:
3670
0
    if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
3671
0
      ifp->if_poll_update++;
3672
0
    }
3673
0
    break;
3674
3675
0
  default:
3676
0
    break;
3677
0
  }
3678
0
}
3679
3680
errno_t
3681
ifnet_set_output_sched_model(struct ifnet *ifp, u_int32_t model)
3682
0
{
3683
0
  struct ifclassq *ifq;
3684
0
  u_int32_t omodel;
3685
0
  errno_t err;
3686
3687
0
  if (ifp == NULL || model >= IFNET_SCHED_MODEL_MAX) {
3688
0
    return EINVAL;
3689
0
  } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
3690
0
    return ENXIO;
3691
0
  }
3692
3693
0
  ifq = &ifp->if_snd;
3694
0
  IFCQ_LOCK(ifq);
3695
0
  omodel = ifp->if_output_sched_model;
3696
0
  ifp->if_output_sched_model = model;
3697
0
  if ((err = ifclassq_pktsched_setup(ifq)) != 0) {
3698
0
    ifp->if_output_sched_model = omodel;
3699
0
  }
3700
0
  IFCQ_UNLOCK(ifq);
3701
3702
0
  return err;
3703
0
}
3704
3705
errno_t
3706
ifnet_set_sndq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
3707
0
{
3708
0
  if (ifp == NULL) {
3709
0
    return EINVAL;
3710
0
  } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
3711
0
    return ENXIO;
3712
0
  }
3713
3714
0
  ifclassq_set_maxlen(&ifp->if_snd, maxqlen);
3715
3716
0
  return 0;
3717
0
}
3718
3719
errno_t
3720
ifnet_get_sndq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
3721
0
{
3722
0
  if (ifp == NULL || maxqlen == NULL) {
3723
0
    return EINVAL;
3724
0
  } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
3725
0
    return ENXIO;
3726
0
  }
3727
3728
0
  *maxqlen = ifclassq_get_maxlen(&ifp->if_snd);
3729
3730
0
  return 0;
3731
0
}
3732
3733
errno_t
3734
ifnet_get_sndq_len(struct ifnet *ifp, u_int32_t *pkts)
3735
0
{
3736
0
  errno_t err;
3737
3738
0
  if (ifp == NULL || pkts == NULL) {
3739
0
    err = EINVAL;
3740
0
  } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
3741
0
    err = ENXIO;
3742
0
  } else {
3743
0
    err = ifclassq_get_len(&ifp->if_snd, MBUF_SC_UNSPEC,
3744
0
        pkts, NULL);
3745
0
  }
3746
3747
0
  return err;
3748
0
}
3749
3750
errno_t
3751
ifnet_get_service_class_sndq_len(struct ifnet *ifp, mbuf_svc_class_t sc,
3752
    u_int32_t *pkts, u_int32_t *bytes)
3753
0
{
3754
0
  errno_t err;
3755
3756
0
  if (ifp == NULL || !MBUF_VALID_SC(sc) ||
3757
0
      (pkts == NULL && bytes == NULL)) {
3758
0
    err = EINVAL;
3759
0
  } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
3760
0
    err = ENXIO;
3761
0
  } else {
3762
0
    err = ifclassq_get_len(&ifp->if_snd, sc, pkts, bytes);
3763
0
  }
3764
3765
0
  return err;
3766
0
}
3767
3768
errno_t
3769
ifnet_set_rcvq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
3770
0
{
3771
0
  struct dlil_threading_info *inp;
3772
3773
0
  if (ifp == NULL) {
3774
0
    return EINVAL;
3775
0
  } else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) {
3776
0
    return ENXIO;
3777
0
  }
3778
3779
0
  if (maxqlen == 0) {
3780
0
    maxqlen = if_rcvq_maxlen;
3781
0
  } else if (maxqlen < IF_RCVQ_MINLEN) {
3782
0
    maxqlen = IF_RCVQ_MINLEN;
3783
0
  }
3784
3785
0
  inp = ifp->if_inp;
3786
0
  lck_mtx_lock(&inp->dlth_lock);
3787
0
  qlimit(&inp->dlth_pkts) = maxqlen;
3788
0
  lck_mtx_unlock(&inp->dlth_lock);
3789
3790
0
  return 0;
3791
0
}
3792
3793
errno_t
3794
ifnet_get_rcvq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
3795
0
{
3796
0
  struct dlil_threading_info *inp;
3797
3798
0
  if (ifp == NULL || maxqlen == NULL) {
3799
0
    return EINVAL;
3800
0
  } else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) {
3801
0
    return ENXIO;
3802
0
  }
3803
3804
0
  inp = ifp->if_inp;
3805
0
  lck_mtx_lock(&inp->dlth_lock);
3806
0
  *maxqlen = qlimit(&inp->dlth_pkts);
3807
0
  lck_mtx_unlock(&inp->dlth_lock);
3808
0
  return 0;
3809
0
}
3810
3811
void
3812
ifnet_enqueue_multi_setup(struct ifnet *ifp, uint16_t delay_qlen,
3813
    uint16_t delay_timeout)
3814
4
{
3815
4
  if (delay_qlen > 0 && delay_timeout > 0) {
3816
0
    if_set_eflags(ifp, IFEF_ENQUEUE_MULTI);
3817
0
    ifp->if_start_delay_qlen = MIN(100, delay_qlen);
3818
0
    ifp->if_start_delay_timeout = min(20000, delay_timeout);
3819
    /* convert timeout to nanoseconds */
3820
0
    ifp->if_start_delay_timeout *= 1000;
3821
0
    kprintf("%s: forced IFEF_ENQUEUE_MULTI qlen %u timeout %u\n",
3822
0
        ifp->if_xname, (uint32_t)delay_qlen,
3823
0
        (uint32_t)delay_timeout);
3824
4
  } else {
3825
4
    if_clear_eflags(ifp, IFEF_ENQUEUE_MULTI);
3826
4
  }
3827
4
}
3828
3829
/*
3830
 * This function clears the DSCP bits in the IPV4/V6 header pointed to by buf.
3831
 * While it's ok for buf to be not 32 bit aligned, the caller must ensure that
3832
 * buf holds the full header.
3833
 */
3834
static __attribute__((noinline)) void
3835
ifnet_mcast_clear_dscp(uint8_t *buf, uint8_t ip_ver)
3836
0
{
3837
0
  struct ip *ip;
3838
0
  struct ip6_hdr *ip6;
3839
0
  uint8_t lbuf[64] __attribute__((aligned(8)));
3840
0
  uint8_t *p = buf;
3841
3842
0
  if (ip_ver == IPVERSION) {
3843
0
    uint8_t old_tos;
3844
0
    uint32_t sum;
3845
3846
0
    if (__improbable(!IP_HDR_ALIGNED_P(p))) {
3847
0
      DTRACE_IP1(not__aligned__v4, uint8_t *, buf);
3848
0
      bcopy(buf, lbuf, sizeof(struct ip));
3849
0
      p = lbuf;
3850
0
    }
3851
0
    ip = (struct ip *)(void *)p;
3852
0
    if (__probable((ip->ip_tos & ~IPTOS_ECN_MASK) == 0)) {
3853
0
      return;
3854
0
    }
3855
3856
0
    DTRACE_IP1(clear__v4, struct ip *, ip);
3857
0
    old_tos = ip->ip_tos;
3858
0
    ip->ip_tos &= IPTOS_ECN_MASK;
3859
0
    sum = ip->ip_sum + htons(old_tos) - htons(ip->ip_tos);
3860
0
    sum = (sum >> 16) + (sum & 0xffff);
3861
0
    ip->ip_sum = (uint16_t)(sum & 0xffff);
3862
3863
0
    if (__improbable(p == lbuf)) {
3864
0
      bcopy(lbuf, buf, sizeof(struct ip));
3865
0
    }
3866
0
  } else {
3867
0
    uint32_t flow;
3868
0
    ASSERT(ip_ver == IPV6_VERSION);
3869
3870
0
    if (__improbable(!IP_HDR_ALIGNED_P(p))) {
3871
0
      DTRACE_IP1(not__aligned__v6, uint8_t *, buf);
3872
0
      bcopy(buf, lbuf, sizeof(struct ip6_hdr));
3873
0
      p = lbuf;
3874
0
    }
3875
0
    ip6 = (struct ip6_hdr *)(void *)p;
3876
0
    flow = ntohl(ip6->ip6_flow);
3877
0
    if (__probable((flow & IP6FLOW_DSCP_MASK) == 0)) {
3878
0
      return;
3879
0
    }
3880
3881
0
    DTRACE_IP1(clear__v6, struct ip6_hdr *, ip6);
3882
0
    ip6->ip6_flow = htonl(flow & ~IP6FLOW_DSCP_MASK);
3883
3884
0
    if (__improbable(p == lbuf)) {
3885
0
      bcopy(lbuf, buf, sizeof(struct ip6_hdr));
3886
0
    }
3887
0
  }
3888
0
}
3889
3890
static inline errno_t
3891
ifnet_enqueue_ifclassq(struct ifnet *ifp, classq_pkt_t *p, boolean_t flush,
3892
    boolean_t *pdrop)
3893
0
{
3894
0
  volatile uint64_t *fg_ts = NULL;
3895
0
  volatile uint64_t *rt_ts = NULL;
3896
0
  struct timespec now;
3897
0
  u_int64_t now_nsec = 0;
3898
0
  int error = 0;
3899
0
  uint8_t *mcast_buf = NULL;
3900
0
  uint8_t ip_ver;
3901
0
  uint32_t pktlen;
3902
3903
0
  ASSERT(ifp->if_eflags & IFEF_TXSTART);
3904
3905
  /*
3906
   * If packet already carries a timestamp, either from dlil_output()
3907
   * or from flowswitch, use it here.  Otherwise, record timestamp.
3908
   * PKTF_TS_VALID is always cleared prior to entering classq, i.e.
3909
   * the timestamp value is used internally there.
3910
   */
3911
0
  switch (p->cp_ptype) {
3912
0
  case QP_MBUF:
3913
0
    ASSERT(p->cp_mbuf->m_flags & M_PKTHDR);
3914
0
    ASSERT(p->cp_mbuf->m_nextpkt == NULL);
3915
3916
0
    if (!(p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_TS_VALID) ||
3917
0
        p->cp_mbuf->m_pkthdr.pkt_timestamp == 0) {
3918
0
      nanouptime(&now);
3919
0
      net_timernsec(&now, &now_nsec);
3920
0
      p->cp_mbuf->m_pkthdr.pkt_timestamp = now_nsec;
3921
0
    }
3922
0
    p->cp_mbuf->m_pkthdr.pkt_flags &= ~PKTF_TS_VALID;
3923
    /*
3924
     * If the packet service class is not background,
3925
     * update the timestamp to indicate recent activity
3926
     * on a foreground socket.
3927
     */
3928
0
    if ((p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_FLOW_ID) &&
3929
0
        p->cp_mbuf->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
3930
0
      if (!(p->cp_mbuf->m_pkthdr.pkt_flags &
3931
0
          PKTF_SO_BACKGROUND)) {
3932
0
        ifp->if_fg_sendts = (uint32_t)_net_uptime;
3933
0
        if (fg_ts != NULL) {
3934
0
          *fg_ts = (uint32_t)_net_uptime;
3935
0
        }
3936
0
      }
3937
0
      if (p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_SO_REALTIME) {
3938
0
        ifp->if_rt_sendts = (uint32_t)_net_uptime;
3939
0
        if (rt_ts != NULL) {
3940
0
          *rt_ts = (uint32_t)_net_uptime;
3941
0
        }
3942
0
      }
3943
0
    }
3944
0
    pktlen = m_pktlen(p->cp_mbuf);
3945
3946
    /*
3947
     * Some Wi-Fi AP implementations do not correctly handle
3948
     * multicast IP packets with DSCP bits set (radr://9331522).
3949
     * As a workaround we clear the DSCP bits but keep service
3950
     * class (rdar://51507725).
3951
     */
3952
0
    if ((p->cp_mbuf->m_flags & M_MCAST) != 0 &&
3953
0
        IFNET_IS_WIFI_INFRA(ifp)) {
3954
0
      size_t len = mbuf_len(p->cp_mbuf), hlen;
3955
0
      struct ether_header *eh;
3956
0
      boolean_t pullup = FALSE;
3957
0
      uint16_t etype;
3958
3959
0
      if (__improbable(len < sizeof(struct ether_header))) {
3960
0
        DTRACE_IP1(small__ether, size_t, len);
3961
0
        if ((p->cp_mbuf = m_pullup(p->cp_mbuf,
3962
0
            sizeof(struct ether_header))) == NULL) {
3963
0
          return ENOMEM;
3964
0
        }
3965
0
      }
3966
0
      eh = (struct ether_header *)mbuf_data(p->cp_mbuf);
3967
0
      etype = ntohs(eh->ether_type);
3968
0
      if (etype == ETHERTYPE_IP) {
3969
0
        hlen = sizeof(struct ether_header) +
3970
0
            sizeof(struct ip);
3971
0
        if (len < hlen) {
3972
0
          DTRACE_IP1(small__v4, size_t, len);
3973
0
          pullup = TRUE;
3974
0
        }
3975
0
        ip_ver = IPVERSION;
3976
0
      } else if (etype == ETHERTYPE_IPV6) {
3977
0
        hlen = sizeof(struct ether_header) +
3978
0
            sizeof(struct ip6_hdr);
3979
0
        if (len < hlen) {
3980
0
          DTRACE_IP1(small__v6, size_t, len);
3981
0
          pullup = TRUE;
3982
0
        }
3983
0
        ip_ver = IPV6_VERSION;
3984
0
      } else {
3985
0
        DTRACE_IP1(invalid__etype, uint16_t, etype);
3986
0
        break;
3987
0
      }
3988
0
      if (pullup) {
3989
0
        if ((p->cp_mbuf = m_pullup(p->cp_mbuf, (int)hlen)) ==
3990
0
            NULL) {
3991
0
          return ENOMEM;
3992
0
        }
3993
3994
0
        eh = (struct ether_header *)mbuf_data(
3995
0
          p->cp_mbuf);
3996
0
      }
3997
0
      mcast_buf = (uint8_t *)(eh + 1);
3998
      /*
3999
       * ifnet_mcast_clear_dscp() will finish the work below.
4000
       * Note that the pullups above ensure that mcast_buf
4001
       * points to a full IP header.
4002
       */
4003
0
    }
4004
0
    break;
4005
4006
4007
0
  default:
4008
0
    VERIFY(0);
4009
    /* NOTREACHED */
4010
0
    __builtin_unreachable();
4011
0
  }
4012
4013
0
  if (mcast_buf != NULL) {
4014
0
    ifnet_mcast_clear_dscp(mcast_buf, ip_ver);
4015
0
  }
4016
4017
0
  if (ifp->if_eflags & IFEF_ENQUEUE_MULTI) {
4018
0
    if (now_nsec == 0) {
4019
0
      nanouptime(&now);
4020
0
      net_timernsec(&now, &now_nsec);
4021
0
    }
4022
    /*
4023
     * If the driver chose to delay start callback for
4024
     * coalescing multiple packets, Then use the following
4025
     * heuristics to make sure that start callback will
4026
     * be delayed only when bulk data transfer is detected.
4027
     * 1. number of packets enqueued in (delay_win * 2) is
4028
     * greater than or equal to the delay qlen.
4029
     * 2. If delay_start is enabled it will stay enabled for
4030
     * another 10 idle windows. This is to take into account
4031
     * variable RTT and burst traffic.
4032
     * 3. If the time elapsed since last enqueue is more
4033
     * than 200ms we disable delaying start callback. This is
4034
     * is to take idle time into account.
4035
     */
4036
0
    u_int64_t dwin = (ifp->if_start_delay_timeout << 1);
4037
0
    if (ifp->if_start_delay_swin > 0) {
4038
0
      if ((ifp->if_start_delay_swin + dwin) > now_nsec) {
4039
0
        ifp->if_start_delay_cnt++;
4040
0
      } else if ((now_nsec - ifp->if_start_delay_swin)
4041
0
          >= (200 * 1000 * 1000)) {
4042
0
        ifp->if_start_delay_swin = now_nsec;
4043
0
        ifp->if_start_delay_cnt = 1;
4044
0
        ifp->if_start_delay_idle = 0;
4045
0
        if (ifp->if_eflags & IFEF_DELAY_START) {
4046
0
          if_clear_eflags(ifp, IFEF_DELAY_START);
4047
0
          ifnet_delay_start_disabled_increment();
4048
0
        }
4049
0
      } else {
4050
0
        if (ifp->if_start_delay_cnt >=
4051
0
            ifp->if_start_delay_qlen) {
4052
0
          if_set_eflags(ifp, IFEF_DELAY_START);
4053
0
          ifp->if_start_delay_idle = 0;
4054
0
        } else {
4055
0
          if (ifp->if_start_delay_idle >= 10) {
4056
0
            if_clear_eflags(ifp,
4057
0
                IFEF_DELAY_START);
4058
0
            ifnet_delay_start_disabled_increment();
4059
0
          } else {
4060
0
            ifp->if_start_delay_idle++;
4061
0
          }
4062
0
        }
4063
0
        ifp->if_start_delay_swin = now_nsec;
4064
0
        ifp->if_start_delay_cnt = 1;
4065
0
      }
4066
0
    } else {
4067
0
      ifp->if_start_delay_swin = now_nsec;
4068
0
      ifp->if_start_delay_cnt = 1;
4069
0
      ifp->if_start_delay_idle = 0;
4070
0
      if_clear_eflags(ifp, IFEF_DELAY_START);
4071
0
    }
4072
0
  } else {
4073
0
    if_clear_eflags(ifp, IFEF_DELAY_START);
4074
0
  }
4075
4076
  /* enqueue the packet (caller consumes object) */
4077
0
  error = ifclassq_enqueue(&ifp->if_snd, p, p, 1, pktlen, pdrop);
4078
4079
  /*
4080
   * Tell the driver to start dequeueing; do this even when the queue
4081
   * for the packet is suspended (EQSUSPENDED), as the driver could still
4082
   * be dequeueing from other unsuspended queues.
4083
   */
4084
0
  if (!(ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
4085
0
      ((error == 0 && flush) || error == EQFULL || error == EQSUSPENDED)) {
4086
0
    ifnet_start(ifp);
4087
0
  }
4088
4089
0
  return error;
4090
0
}
4091
4092
static inline errno_t
4093
ifnet_enqueue_ifclassq_chain(struct ifnet *ifp, classq_pkt_t *head,
4094
    classq_pkt_t *tail, uint32_t cnt, uint32_t bytes, boolean_t flush,
4095
    boolean_t *pdrop)
4096
0
{
4097
0
  int error;
4098
4099
  /* enqueue the packet (caller consumes object) */
4100
0
  error = ifclassq_enqueue(&ifp->if_snd, head, tail, cnt, bytes, pdrop);
4101
4102
  /*
4103
   * Tell the driver to start dequeueing; do this even when the queue
4104
   * for the packet is suspended (EQSUSPENDED), as the driver could still
4105
   * be dequeueing from other unsuspended queues.
4106
   */
4107
0
  if ((error == 0 && flush) || error == EQFULL || error == EQSUSPENDED) {
4108
0
    ifnet_start(ifp);
4109
0
  }
4110
0
  return error;
4111
0
}
4112
4113
int
4114
ifnet_enqueue_netem(void *handle, pktsched_pkt_t *pkts, uint32_t n_pkts)
4115
0
{
4116
0
  struct ifnet *ifp = handle;
4117
0
  boolean_t pdrop;        /* dummy */
4118
0
  uint32_t i;
4119
4120
0
  ASSERT(n_pkts >= 1);
4121
0
  for (i = 0; i < n_pkts - 1; i++) {
4122
0
    (void) ifnet_enqueue_ifclassq(ifp, &pkts[i].pktsched_pkt,
4123
0
        FALSE, &pdrop);
4124
0
  }
4125
  /* flush with the last packet */
4126
0
  (void) ifnet_enqueue_ifclassq(ifp, &pkts[i].pktsched_pkt, TRUE, &pdrop);
4127
4128
0
  return 0;
4129
0
}
4130
4131
static inline errno_t
4132
ifnet_enqueue_common(struct ifnet *ifp, classq_pkt_t *pkt, boolean_t flush,
4133
    boolean_t *pdrop)
4134
0
{
4135
0
  if (ifp->if_output_netem != NULL) {
4136
0
    return netem_enqueue(ifp->if_output_netem, pkt, pdrop);
4137
0
  } else {
4138
0
    return ifnet_enqueue_ifclassq(ifp, pkt, flush, pdrop);
4139
0
  }
4140
0
}
4141
4142
errno_t
4143
ifnet_enqueue(struct ifnet *ifp, struct mbuf *m)
4144
0
{
4145
0
  boolean_t pdrop;
4146
0
  return ifnet_enqueue_mbuf(ifp, m, TRUE, &pdrop);
4147
0
}
4148
4149
errno_t
4150
ifnet_enqueue_mbuf(struct ifnet *ifp, struct mbuf *m, boolean_t flush,
4151
    boolean_t *pdrop)
4152
0
{
4153
0
  classq_pkt_t pkt;
4154
4155
0
  if (ifp == NULL || m == NULL || !(m->m_flags & M_PKTHDR) ||
4156
0
      m->m_nextpkt != NULL) {
4157
0
    if (m != NULL) {
4158
0
      m_freem_list(m);
4159
0
      *pdrop = TRUE;
4160
0
    }
4161
0
    return EINVAL;
4162
0
  } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
4163
0
      !IF_FULLY_ATTACHED(ifp)) {
4164
    /* flag tested without lock for performance */
4165
0
    m_freem(m);
4166
0
    *pdrop = TRUE;
4167
0
    return ENXIO;
4168
0
  } else if (!(ifp->if_flags & IFF_UP)) {
4169
0
    m_freem(m);
4170
0
    *pdrop = TRUE;
4171
0
    return ENETDOWN;
4172
0
  }
4173
4174
0
  CLASSQ_PKT_INIT_MBUF(&pkt, m);
4175
0
  return ifnet_enqueue_common(ifp, &pkt, flush, pdrop);
4176
0
}
4177
4178
errno_t
4179
ifnet_enqueue_mbuf_chain(struct ifnet *ifp, struct mbuf *m_head,
4180
    struct mbuf *m_tail, uint32_t cnt, uint32_t bytes, boolean_t flush,
4181
    boolean_t *pdrop)
4182
0
{
4183
0
  classq_pkt_t head, tail;
4184
4185
0
  ASSERT(m_head != NULL);
4186
0
  ASSERT((m_head->m_flags & M_PKTHDR) != 0);
4187
0
  ASSERT(m_tail != NULL);
4188
0
  ASSERT((m_tail->m_flags & M_PKTHDR) != 0);
4189
0
  ASSERT(ifp != NULL);
4190
0
  ASSERT((ifp->if_eflags & IFEF_TXSTART) != 0);
4191
4192
0
  if (!IF_FULLY_ATTACHED(ifp)) {
4193
    /* flag tested without lock for performance */
4194
0
    m_freem_list(m_head);
4195
0
    *pdrop = TRUE;
4196
0
    return ENXIO;
4197
0
  } else if (!(ifp->if_flags & IFF_UP)) {
4198
0
    m_freem_list(m_head);
4199
0
    *pdrop = TRUE;
4200
0
    return ENETDOWN;
4201
0
  }
4202
4203
0
  CLASSQ_PKT_INIT_MBUF(&head, m_head);
4204
0
  CLASSQ_PKT_INIT_MBUF(&tail, m_tail);
4205
0
  return ifnet_enqueue_ifclassq_chain(ifp, &head, &tail, cnt, bytes,
4206
0
             flush, pdrop);
4207
0
}
4208
4209
4210
errno_t
4211
ifnet_dequeue(struct ifnet *ifp, struct mbuf **mp)
4212
0
{
4213
0
  errno_t rc;
4214
0
  classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
4215
4216
0
  if (ifp == NULL || mp == NULL) {
4217
0
    return EINVAL;
4218
0
  } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
4219
0
      ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
4220
0
    return ENXIO;
4221
0
  }
4222
0
  if (!ifnet_is_attached(ifp, 1)) {
4223
0
    return ENXIO;
4224
0
  }
4225
4226
0
  rc = ifclassq_dequeue(&ifp->if_snd, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT,
4227
0
      &pkt, NULL, NULL, NULL);
4228
0
  VERIFY((pkt.cp_ptype == QP_MBUF) || (pkt.cp_mbuf == NULL));
4229
0
  ifnet_decr_iorefcnt(ifp);
4230
0
  *mp = pkt.cp_mbuf;
4231
0
  return rc;
4232
0
}
4233
4234
errno_t
4235
ifnet_dequeue_service_class(struct ifnet *ifp, mbuf_svc_class_t sc,
4236
    struct mbuf **mp)
4237
0
{
4238
0
  errno_t rc;
4239
0
  classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
4240
4241
0
  if (ifp == NULL || mp == NULL || !MBUF_VALID_SC(sc)) {
4242
0
    return EINVAL;
4243
0
  } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
4244
0
      ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
4245
0
    return ENXIO;
4246
0
  }
4247
0
  if (!ifnet_is_attached(ifp, 1)) {
4248
0
    return ENXIO;
4249
0
  }
4250
4251
0
  rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, 1,
4252
0
      CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt, NULL, NULL, NULL);
4253
0
  VERIFY((pkt.cp_ptype == QP_MBUF) || (pkt.cp_mbuf == NULL));
4254
0
  ifnet_decr_iorefcnt(ifp);
4255
0
  *mp = pkt.cp_mbuf;
4256
0
  return rc;
4257
0
}
4258
4259
errno_t
4260
ifnet_dequeue_multi(struct ifnet *ifp, u_int32_t pkt_limit,
4261
    struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
4262
0
{
4263
0
  errno_t rc;
4264
0
  classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
4265
0
  classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
4266
4267
0
  if (ifp == NULL || head == NULL || pkt_limit < 1) {
4268
0
    return EINVAL;
4269
0
  } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
4270
0
      ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
4271
0
    return ENXIO;
4272
0
  }
4273
0
  if (!ifnet_is_attached(ifp, 1)) {
4274
0
    return ENXIO;
4275
0
  }
4276
4277
0
  rc = ifclassq_dequeue(&ifp->if_snd, pkt_limit,
4278
0
      CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt_head, &pkt_tail, cnt, len);
4279
0
  VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
4280
0
  ifnet_decr_iorefcnt(ifp);
4281
0
  *head = pkt_head.cp_mbuf;
4282
0
  if (tail != NULL) {
4283
0
    *tail = pkt_tail.cp_mbuf;
4284
0
  }
4285
0
  return rc;
4286
0
}
4287
4288
errno_t
4289
ifnet_dequeue_multi_bytes(struct ifnet *ifp, u_int32_t byte_limit,
4290
    struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
4291
0
{
4292
0
  errno_t rc;
4293
0
  classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
4294
0
  classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
4295
4296
0
  if (ifp == NULL || head == NULL || byte_limit < 1) {
4297
0
    return EINVAL;
4298
0
  } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
4299
0
      ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
4300
0
    return ENXIO;
4301
0
  }
4302
0
  if (!ifnet_is_attached(ifp, 1)) {
4303
0
    return ENXIO;
4304
0
  }
4305
4306
0
  rc = ifclassq_dequeue(&ifp->if_snd, CLASSQ_DEQUEUE_MAX_PKT_LIMIT,
4307
0
      byte_limit, &pkt_head, &pkt_tail, cnt, len);
4308
0
  VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
4309
0
  ifnet_decr_iorefcnt(ifp);
4310
0
  *head = pkt_head.cp_mbuf;
4311
0
  if (tail != NULL) {
4312
0
    *tail = pkt_tail.cp_mbuf;
4313
0
  }
4314
0
  return rc;
4315
0
}
4316
4317
errno_t
4318
ifnet_dequeue_service_class_multi(struct ifnet *ifp, mbuf_svc_class_t sc,
4319
    u_int32_t pkt_limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt,
4320
    u_int32_t *len)
4321
0
{
4322
0
  errno_t rc;
4323
0
  classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
4324
0
  classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
4325
4326
0
  if (ifp == NULL || head == NULL || pkt_limit < 1 ||
4327
0
      !MBUF_VALID_SC(sc)) {
4328
0
    return EINVAL;
4329
0
  } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
4330
0
      ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
4331
0
    return ENXIO;
4332
0
  }
4333
0
  if (!ifnet_is_attached(ifp, 1)) {
4334
0
    return ENXIO;
4335
0
  }
4336
4337
0
  rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, pkt_limit,
4338
0
      CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt_head, &pkt_tail,
4339
0
      cnt, len);
4340
0
  VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
4341
0
  ifnet_decr_iorefcnt(ifp);
4342
0
  *head = pkt_head.cp_mbuf;
4343
0
  if (tail != NULL) {
4344
0
    *tail = pkt_tail.cp_mbuf;
4345
0
  }
4346
0
  return rc;
4347
0
}
4348
4349
#if XNU_TARGET_OS_OSX
4350
errno_t
4351
ifnet_framer_stub(struct ifnet *ifp, struct mbuf **m,
4352
    const struct sockaddr *dest, const char *dest_linkaddr,
4353
    const char *frame_type, u_int32_t *pre, u_int32_t *post)
4354
0
{
4355
0
  if (pre != NULL) {
4356
0
    *pre = 0;
4357
0
  }
4358
0
  if (post != NULL) {
4359
0
    *post = 0;
4360
0
  }
4361
4362
0
  return ifp->if_framer_legacy(ifp, m, dest, dest_linkaddr, frame_type);
4363
0
}
4364
#endif /* XNU_TARGET_OS_OSX */
4365
4366
static boolean_t
4367
packet_has_vlan_tag(struct mbuf * m)
4368
19.4k
{
4369
19.4k
  u_int   tag = 0;
4370
4371
19.4k
  if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) != 0) {
4372
0
    tag = EVL_VLANOFTAG(m->m_pkthdr.vlan_tag);
4373
0
    if (tag == 0) {
4374
      /* the packet is just priority-tagged, clear the bit */
4375
0
      m->m_pkthdr.csum_flags &= ~CSUM_VLAN_TAG_VALID;
4376
0
    }
4377
0
  }
4378
19.4k
  return tag != 0;
4379
19.4k
}
4380
4381
static int
4382
dlil_interface_filters_input(struct ifnet *ifp, struct mbuf **m_p,
4383
    char **frame_header_p, protocol_family_t protocol_family)
4384
0
{
4385
0
  boolean_t               is_vlan_packet = FALSE;
4386
0
  struct ifnet_filter     *filter;
4387
0
  struct mbuf             *m = *m_p;
4388
4389
0
  is_vlan_packet = packet_has_vlan_tag(m);
4390
4391
0
  if (TAILQ_EMPTY(&ifp->if_flt_head)) {
4392
0
    return 0;
4393
0
  }
4394
4395
  /*
4396
   * Pass the inbound packet to the interface filters
4397
   */
4398
0
  lck_mtx_lock_spin(&ifp->if_flt_lock);
4399
  /* prevent filter list from changing in case we drop the lock */
4400
0
  if_flt_monitor_busy(ifp);
4401
0
  TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
4402
0
    int result;
4403
4404
    /* exclude VLAN packets from external filters PR-3586856 */
4405
0
    if (is_vlan_packet &&
4406
0
        (filter->filt_flags & DLIL_IFF_INTERNAL) == 0) {
4407
0
      continue;
4408
0
    }
4409
4410
0
    if (!filter->filt_skip && filter->filt_input != NULL &&
4411
0
        (filter->filt_protocol == 0 ||
4412
0
        filter->filt_protocol == protocol_family)) {
4413
0
      lck_mtx_unlock(&ifp->if_flt_lock);
4414
4415
0
      result = (*filter->filt_input)(filter->filt_cookie,
4416
0
          ifp, protocol_family, m_p, frame_header_p);
4417
4418
0
      lck_mtx_lock_spin(&ifp->if_flt_lock);
4419
0
      if (result != 0) {
4420
        /* we're done with the filter list */
4421
0
        if_flt_monitor_unbusy(ifp);
4422
0
        lck_mtx_unlock(&ifp->if_flt_lock);
4423
0
        return result;
4424
0
      }
4425
0
    }
4426
0
  }
4427
  /* we're done with the filter list */
4428
0
  if_flt_monitor_unbusy(ifp);
4429
0
  lck_mtx_unlock(&ifp->if_flt_lock);
4430
4431
  /*
4432
   * Strip away M_PROTO1 bit prior to sending packet up the stack as
4433
   * it is meant to be local to a subsystem -- if_bridge for M_PROTO1
4434
   */
4435
0
  if (*m_p != NULL) {
4436
0
    (*m_p)->m_flags &= ~M_PROTO1;
4437
0
  }
4438
4439
0
  return 0;
4440
0
}
4441
4442
static int
4443
dlil_interface_filters_output(struct ifnet *ifp, struct mbuf **m_p,
4444
    protocol_family_t protocol_family)
4445
19.4k
{
4446
19.4k
  boolean_t               is_vlan_packet;
4447
19.4k
  struct ifnet_filter     *filter;
4448
19.4k
  struct mbuf             *m = *m_p;
4449
4450
19.4k
  is_vlan_packet = packet_has_vlan_tag(m);
4451
4452
  /*
4453
   * Pass the outbound packet to the interface filters
4454
   */
4455
19.4k
  lck_mtx_lock_spin(&ifp->if_flt_lock);
4456
  /* prevent filter list from changing in case we drop the lock */
4457
19.4k
  if_flt_monitor_busy(ifp);
4458
19.4k
  TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
4459
0
    int result;
4460
4461
    /* exclude VLAN packets from external filters PR-3586856 */
4462
0
    if (is_vlan_packet &&
4463
0
        (filter->filt_flags & DLIL_IFF_INTERNAL) == 0) {
4464
0
      continue;
4465
0
    }
4466
4467
0
    if (!filter->filt_skip && filter->filt_output != NULL &&
4468
0
        (filter->filt_protocol == 0 ||
4469
0
        filter->filt_protocol == protocol_family)) {
4470
0
      lck_mtx_unlock(&ifp->if_flt_lock);
4471
4472
0
      result = filter->filt_output(filter->filt_cookie, ifp,
4473
0
          protocol_family, m_p);
4474
4475
0
      lck_mtx_lock_spin(&ifp->if_flt_lock);
4476
0
      if (result != 0) {
4477
        /* we're done with the filter list */
4478
0
        if_flt_monitor_unbusy(ifp);
4479
0
        lck_mtx_unlock(&ifp->if_flt_lock);
4480
0
        return result;
4481
0
      }
4482
0
    }
4483
0
  }
4484
  /* we're done with the filter list */
4485
19.4k
  if_flt_monitor_unbusy(ifp);
4486
19.4k
  lck_mtx_unlock(&ifp->if_flt_lock);
4487
4488
19.4k
  return 0;
4489
19.4k
}
4490
4491
static void
4492
dlil_ifproto_input(struct if_proto * ifproto, mbuf_t m)
4493
0
{
4494
0
  int error;
4495
4496
0
  if (ifproto->proto_kpi == kProtoKPI_v1) {
4497
    /* Version 1 protocols get one packet at a time */
4498
0
    while (m != NULL) {
4499
0
      char *  frame_header;
4500
0
      mbuf_t  next_packet;
4501
4502
0
      next_packet = m->m_nextpkt;
4503
0
      m->m_nextpkt = NULL;
4504
0
      frame_header = m->m_pkthdr.pkt_hdr;
4505
0
      m->m_pkthdr.pkt_hdr = NULL;
4506
0
      error = (*ifproto->kpi.v1.input)(ifproto->ifp,
4507
0
          ifproto->protocol_family, m, frame_header);
4508
0
      if (error != 0 && error != EJUSTRETURN) {
4509
0
        m_freem(m);
4510
0
      }
4511
0
      m = next_packet;
4512
0
    }
4513
0
  } else if (ifproto->proto_kpi == kProtoKPI_v2) {
4514
    /* Version 2 protocols support packet lists */
4515
0
    error = (*ifproto->kpi.v2.input)(ifproto->ifp,
4516
0
        ifproto->protocol_family, m);
4517
0
    if (error != 0 && error != EJUSTRETURN) {
4518
0
      m_freem_list(m);
4519
0
    }
4520
0
  }
4521
0
}
4522
4523
static void
4524
dlil_input_stats_add(const struct ifnet_stat_increment_param *s,
4525
    struct dlil_threading_info *inp, struct ifnet *ifp, boolean_t poll)
4526
19.4k
{
4527
19.4k
  struct ifnet_stat_increment_param *d = &inp->dlth_stats;
4528
4529
19.4k
  if (s->packets_in != 0) {
4530
19.4k
    d->packets_in += s->packets_in;
4531
19.4k
  }
4532
19.4k
  if (s->bytes_in != 0) {
4533
19.4k
    d->bytes_in += s->bytes_in;
4534
19.4k
  }
4535
19.4k
  if (s->errors_in != 0) {
4536
0
    d->errors_in += s->errors_in;
4537
0
  }
4538
4539
19.4k
  if (s->packets_out != 0) {
4540
19.4k
    d->packets_out += s->packets_out;
4541
19.4k
  }
4542
19.4k
  if (s->bytes_out != 0) {
4543
19.4k
    d->bytes_out += s->bytes_out;
4544
19.4k
  }
4545
19.4k
  if (s->errors_out != 0) {
4546
0
    d->errors_out += s->errors_out;
4547
0
  }
4548
4549
19.4k
  if (s->collisions != 0) {
4550
0
    d->collisions += s->collisions;
4551
0
  }
4552
19.4k
  if (s->dropped != 0) {
4553
0
    d->dropped += s->dropped;
4554
0
  }
4555
4556
19.4k
  if (poll) {
4557
0
    PKTCNTR_ADD(&ifp->if_poll_tstats, s->packets_in, s->bytes_in);
4558
0
  }
4559
19.4k
}
4560
4561
static boolean_t
4562
dlil_input_stats_sync(struct ifnet *ifp, struct dlil_threading_info *inp)
4563
19.4k
{
4564
19.4k
  struct ifnet_stat_increment_param *s = &inp->dlth_stats;
4565
4566
  /*
4567
   * Use of atomic operations is unavoidable here because
4568
   * these stats may also be incremented elsewhere via KPIs.
4569
   */
4570
19.4k
  if (s->packets_in != 0) {
4571
19.4k
    atomic_add_64(&ifp->if_data.ifi_ipackets, s->packets_in);
4572
19.4k
    s->packets_in = 0;
4573
19.4k
  }
4574
19.4k
  if (s->bytes_in != 0) {
4575
19.4k
    atomic_add_64(&ifp->if_data.ifi_ibytes, s->bytes_in);
4576
19.4k
    s->bytes_in = 0;
4577
19.4k
  }
4578
19.4k
  if (s->errors_in != 0) {
4579
0
    atomic_add_64(&ifp->if_data.ifi_ierrors, s->errors_in);
4580
0
    s->errors_in = 0;
4581
0
  }
4582
4583
19.4k
  if (s->packets_out != 0) {
4584
19.4k
    atomic_add_64(&ifp->if_data.ifi_opackets, s->packets_out);
4585
19.4k
    s->packets_out = 0;
4586
19.4k
  }
4587
19.4k
  if (s->bytes_out != 0) {
4588
19.4k
    atomic_add_64(&ifp->if_data.ifi_obytes, s->bytes_out);
4589
19.4k
    s->bytes_out = 0;
4590
19.4k
  }
4591
19.4k
  if (s->errors_out != 0) {
4592
0
    atomic_add_64(&ifp->if_data.ifi_oerrors, s->errors_out);
4593
0
    s->errors_out = 0;
4594
0
  }
4595
4596
19.4k
  if (s->collisions != 0) {
4597
0
    atomic_add_64(&ifp->if_data.ifi_collisions, s->collisions);
4598
0
    s->collisions = 0;
4599
0
  }
4600
19.4k
  if (s->dropped != 0) {
4601
0
    atomic_add_64(&ifp->if_data.ifi_iqdrops, s->dropped);
4602
0
    s->dropped = 0;
4603
0
  }
4604
4605
  /*
4606
   * No need for atomic operations as they are modified here
4607
   * only from within the DLIL input thread context.
4608
   */
4609
19.4k
  if (ifp->if_poll_tstats.packets != 0) {
4610
0
    ifp->if_poll_pstats.ifi_poll_packets += ifp->if_poll_tstats.packets;
4611
0
    ifp->if_poll_tstats.packets = 0;
4612
0
  }
4613
19.4k
  if (ifp->if_poll_tstats.bytes != 0) {
4614
0
    ifp->if_poll_pstats.ifi_poll_bytes += ifp->if_poll_tstats.bytes;
4615
0
    ifp->if_poll_tstats.bytes = 0;
4616
0
  }
4617
4618
19.4k
  return ifp->if_data_threshold != 0;
4619
19.4k
}
4620
4621
__private_extern__ void
4622
dlil_input_packet_list(struct ifnet *ifp, struct mbuf *m)
4623
0
{
4624
0
  return dlil_input_packet_list_common(ifp, m, 0,
4625
0
             IFNET_MODEL_INPUT_POLL_OFF, FALSE);
4626
0
}
4627
4628
__private_extern__ void
4629
dlil_input_packet_list_extended(struct ifnet *ifp, struct mbuf *m,
4630
    u_int32_t cnt, ifnet_model_t mode)
4631
0
{
4632
0
  return dlil_input_packet_list_common(ifp, m, cnt, mode, TRUE);
4633
0
}
4634
4635
static void
4636
dlil_input_packet_list_common(struct ifnet *ifp_param, struct mbuf *m,
4637
    u_int32_t cnt, ifnet_model_t mode, boolean_t ext)
4638
0
{
4639
0
  int error = 0;
4640
0
  protocol_family_t protocol_family;
4641
0
  mbuf_t next_packet;
4642
0
  ifnet_t ifp = ifp_param;
4643
0
  char *frame_header = NULL;
4644
0
  struct if_proto *last_ifproto = NULL;
4645
0
  mbuf_t pkt_first = NULL;
4646
0
  mbuf_t *pkt_next = NULL;
4647
0
  u_int32_t poll_thresh = 0, poll_ival = 0;
4648
0
  int iorefcnt = 0;
4649
4650
0
  KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
4651
4652
0
  if (ext && mode == IFNET_MODEL_INPUT_POLL_ON && cnt > 1 &&
4653
0
      (poll_ival = if_rxpoll_interval_pkts) > 0) {
4654
0
    poll_thresh = cnt;
4655
0
  }
4656
4657
0
  while (m != NULL) {
4658
0
    struct if_proto *ifproto = NULL;
4659
0
    uint32_t pktf_mask;     /* pkt flags to preserve */
4660
4661
0
    if (ifp_param == NULL) {
4662
0
      ifp = m->m_pkthdr.rcvif;
4663
0
    }
4664
4665
0
    if ((ifp->if_eflags & IFEF_RXPOLL) &&
4666
0
        (ifp->if_xflags & IFXF_LEGACY) && poll_thresh != 0 &&
4667
0
        poll_ival > 0 && (--poll_thresh % poll_ival) == 0) {
4668
0
      ifnet_poll(ifp);
4669
0
    }
4670
4671
    /* Check if this mbuf looks valid */
4672
0
    MBUF_INPUT_CHECK(m, ifp);
4673
4674
0
    next_packet = m->m_nextpkt;
4675
0
    m->m_nextpkt = NULL;
4676
0
    frame_header = m->m_pkthdr.pkt_hdr;
4677
0
    m->m_pkthdr.pkt_hdr = NULL;
4678
4679
    /*
4680
     * Get an IO reference count if the interface is not
4681
     * loopback (lo0) and it is attached; lo0 never goes
4682
     * away, so optimize for that.
4683
     */
4684
0
    if (ifp != lo_ifp) {
4685
      /* iorefcnt is 0 if it hasn't been taken yet */
4686
0
      if (iorefcnt == 0) {
4687
0
        if (!ifnet_datamov_begin(ifp)) {
4688
0
          m_freem(m);
4689
0
          goto next;
4690
0
        }
4691
0
      }
4692
0
      iorefcnt = 1;
4693
      /*
4694
       * Preserve the time stamp and skip pktap flags.
4695
       */
4696
0
      pktf_mask = PKTF_TS_VALID | PKTF_SKIP_PKTAP;
4697
0
    } else {
4698
      /*
4699
       * If this arrived on lo0, preserve interface addr
4700
       * info to allow for connectivity between loopback
4701
       * and local interface addresses.
4702
       */
4703
0
      pktf_mask = (PKTF_LOOP | PKTF_IFAINFO);
4704
0
    }
4705
4706
    /* make sure packet comes in clean */
4707
0
    m_classifier_init(m, pktf_mask);
4708
4709
0
    ifp_inc_traffic_class_in(ifp, m);
4710
4711
    /* find which protocol family this packet is for */
4712
0
    ifnet_lock_shared(ifp);
4713
0
    error = (*ifp->if_demux)(ifp, m, frame_header,
4714
0
        &protocol_family);
4715
0
    ifnet_lock_done(ifp);
4716
0
    if (error != 0) {
4717
0
      if (error == EJUSTRETURN) {
4718
0
        goto next;
4719
0
      }
4720
0
      protocol_family = 0;
4721
0
    }
4722
4723
0
    pktap_input(ifp, protocol_family, m, frame_header);
4724
4725
    /* Drop v4 packets received on CLAT46 enabled interface */
4726
0
    if (protocol_family == PF_INET && IS_INTF_CLAT46(ifp)) {
4727
0
      m_freem(m);
4728
0
      ip6stat.ip6s_clat464_in_v4_drop++;
4729
0
      goto next;
4730
0
    }
4731
4732
    /* Translate the packet if it is received on CLAT interface */
4733
0
    if (protocol_family == PF_INET6 && IS_INTF_CLAT46(ifp)
4734
0
        && dlil_is_clat_needed(protocol_family, m)) {
4735
0
      char *data = NULL;
4736
0
      struct ether_header eh;
4737
0
      struct ether_header *ehp = NULL;
4738
4739
0
      if (ifp->if_type == IFT_ETHER) {
4740
0
        ehp = (struct ether_header *)(void *)frame_header;
4741
        /* Skip RX Ethernet packets if they are not IPV6 */
4742
0
        if (ntohs(ehp->ether_type) != ETHERTYPE_IPV6) {
4743
0
          goto skip_clat;
4744
0
        }
4745
4746
        /* Keep a copy of frame_header for Ethernet packets */
4747
0
        bcopy(frame_header, (caddr_t)&eh, ETHER_HDR_LEN);
4748
0
      }
4749
0
      error = dlil_clat64(ifp, &protocol_family, &m);
4750
0
      data = (char *) mbuf_data(m);
4751
0
      if (error != 0) {
4752
0
        m_freem(m);
4753
0
        ip6stat.ip6s_clat464_in_drop++;
4754
0
        goto next;
4755
0
      }
4756
      /* Native v6 should be No-op */
4757
0
      if (protocol_family != PF_INET) {
4758
0
        goto skip_clat;
4759
0
      }
4760
4761
      /* Do this only for translated v4 packets. */
4762
0
      switch (ifp->if_type) {
4763
0
      case IFT_CELLULAR:
4764
0
        frame_header = data;
4765
0
        break;
4766
0
      case IFT_ETHER:
4767
        /*
4768
         * Drop if the mbuf doesn't have enough
4769
         * space for Ethernet header
4770
         */
4771
0
        if (M_LEADINGSPACE(m) < ETHER_HDR_LEN) {
4772
0
          m_free(m);
4773
0
          ip6stat.ip6s_clat464_in_drop++;
4774
0
          goto next;
4775
0
        }
4776
        /*
4777
         * Set the frame_header ETHER_HDR_LEN bytes
4778
         * preceeding the data pointer. Change
4779
         * the ether_type too.
4780
         */
4781
0
        frame_header = data - ETHER_HDR_LEN;
4782
0
        eh.ether_type = htons(ETHERTYPE_IP);
4783
0
        bcopy((caddr_t)&eh, frame_header, ETHER_HDR_LEN);
4784
0
        break;
4785
0
      }
4786
0
    }
4787
0
skip_clat:
4788
0
    if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK) &&
4789
0
        !(m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
4790
0
      dlil_input_cksum_dbg(ifp, m, frame_header,
4791
0
          protocol_family);
4792
0
    }
4793
    /*
4794
     * For partial checksum offload, we expect the driver to
4795
     * set the start offset indicating the start of the span
4796
     * that is covered by the hardware-computed checksum;
4797
     * adjust this start offset accordingly because the data
4798
     * pointer has been advanced beyond the link-layer header.
4799
     *
4800
     * Virtual lan types (bridge, vlan, bond) can call
4801
     * dlil_input_packet_list() with the same packet with the
4802
     * checksum flags set. Set a flag indicating that the
4803
     * adjustment has already been done.
4804
     */
4805
0
    if ((m->m_pkthdr.csum_flags & CSUM_ADJUST_DONE) != 0) {
4806
      /* adjustment has already been done */
4807
0
    } else if ((m->m_pkthdr.csum_flags &
4808
0
        (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
4809
0
        (CSUM_DATA_VALID | CSUM_PARTIAL)) {
4810
0
      int adj;
4811
0
      if (frame_header == NULL ||
4812
0
          frame_header < (char *)mbuf_datastart(m) ||
4813
0
          frame_header > (char *)m->m_data ||
4814
0
          (adj = (int)(m->m_data - frame_header)) >
4815
0
          m->m_pkthdr.csum_rx_start) {
4816
0
        m->m_pkthdr.csum_data = 0;
4817
0
        m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID;
4818
0
        hwcksum_in_invalidated++;
4819
0
      } else {
4820
0
        m->m_pkthdr.csum_rx_start -= adj;
4821
0
      }
4822
      /* make sure we don't adjust more than once */
4823
0
      m->m_pkthdr.csum_flags |= CSUM_ADJUST_DONE;
4824
0
    }
4825
0
    if (clat_debug) {
4826
0
      pktap_input(ifp, protocol_family, m, frame_header);
4827
0
    }
4828
4829
0
    if (m->m_flags & (M_BCAST | M_MCAST)) {
4830
0
      atomic_add_64(&ifp->if_imcasts, 1);
4831
0
    }
4832
4833
    /* run interface filters */
4834
0
    error = dlil_interface_filters_input(ifp, &m,
4835
0
        &frame_header, protocol_family);
4836
0
    if (error != 0) {
4837
0
      if (error != EJUSTRETURN) {
4838
0
        m_freem(m);
4839
0
      }
4840
0
      goto next;
4841
0
    }
4842
    /*
4843
     * A VLAN interface receives VLAN-tagged packets by attaching
4844
     * its PF_VLAN protocol to a parent interface. When a VLAN
4845
     * interface is a member of a bridge, the parent interface
4846
     * receives VLAN-tagged M_PROMISC packets. A VLAN-tagged
4847
     * M_PROMISC packet must be processed by the VLAN protocol
4848
     * so that it can be sent up the stack via
4849
     * dlil_input_packet_list(). That allows the bridge interface's
4850
     * input filter, attached to the VLAN interface, to process
4851
     * the packet.
4852
     */
4853
0
    if (protocol_family != PF_VLAN &&
4854
0
        (m->m_flags & M_PROMISC) != 0) {
4855
0
      m_freem(m);
4856
0
      goto next;
4857
0
    }
4858
4859
    /* Lookup the protocol attachment to this interface */
4860
0
    if (protocol_family == 0) {
4861
0
      ifproto = NULL;
4862
0
    } else if (last_ifproto != NULL && last_ifproto->ifp == ifp &&
4863
0
        (last_ifproto->protocol_family == protocol_family)) {
4864
0
      VERIFY(ifproto == NULL);
4865
0
      ifproto = last_ifproto;
4866
0
      if_proto_ref(last_ifproto);
4867
0
    } else {
4868
0
      VERIFY(ifproto == NULL);
4869
0
      ifnet_lock_shared(ifp);
4870
      /* callee holds a proto refcnt upon success */
4871
0
      ifproto = find_attached_proto(ifp, protocol_family);
4872
0
      ifnet_lock_done(ifp);
4873
0
    }
4874
0
    if (ifproto == NULL) {
4875
      /* no protocol for this packet, discard */
4876
0
      m_freem(m);
4877
0
      goto next;
4878
0
    }
4879
0
    if (ifproto != last_ifproto) {
4880
0
      if (last_ifproto != NULL) {
4881
        /* pass up the list for the previous protocol */
4882
0
        dlil_ifproto_input(last_ifproto, pkt_first);
4883
0
        pkt_first = NULL;
4884
0
        if_proto_free(last_ifproto);
4885
0
      }
4886
0
      last_ifproto = ifproto;
4887
0
      if_proto_ref(ifproto);
4888
0
    }
4889
    /* extend the list */
4890
0
    m->m_pkthdr.pkt_hdr = frame_header;
4891
0
    if (pkt_first == NULL) {
4892
0
      pkt_first = m;
4893
0
    } else {
4894
0
      *pkt_next = m;
4895
0
    }
4896
0
    pkt_next = &m->m_nextpkt;
4897
4898
0
next:
4899
0
    if (next_packet == NULL && last_ifproto != NULL) {
4900
      /* pass up the last list of packets */
4901
0
      dlil_ifproto_input(last_ifproto, pkt_first);
4902
0
      if_proto_free(last_ifproto);
4903
0
      last_ifproto = NULL;
4904
0
    }
4905
0
    if (ifproto != NULL) {
4906
0
      if_proto_free(ifproto);
4907
0
      ifproto = NULL;
4908
0
    }
4909
4910
0
    m = next_packet;
4911
4912
    /* update the driver's multicast filter, if needed */
4913
0
    if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0) {
4914
0
      ifp->if_updatemcasts = 0;
4915
0
    }
4916
0
    if (iorefcnt == 1) {
4917
      /* If the next mbuf is on a different interface, unlock data-mov */
4918
0
      if (!m || (ifp != ifp_param && ifp != m->m_pkthdr.rcvif)) {
4919
0
        ifnet_datamov_end(ifp);
4920
0
        iorefcnt = 0;
4921
0
      }
4922
0
    }
4923
0
  }
4924
4925
0
  KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
4926
0
}
4927
4928
errno_t
4929
if_mcasts_update(struct ifnet *ifp)
4930
0
{
4931
0
  errno_t err;
4932
4933
0
  err = ifnet_ioctl(ifp, 0, SIOCADDMULTI, NULL);
4934
0
  if (err == EAFNOSUPPORT) {
4935
0
    err = 0;
4936
0
  }
4937
0
  DLIL_PRINTF("%s: %s %d suspended link-layer multicast membership(s) "
4938
0
      "(err=%d)\n", if_name(ifp),
4939
0
      (err == 0 ? "successfully restored" : "failed to restore"),
4940
0
      ifp->if_updatemcasts, err);
4941
4942
  /* just return success */
4943
0
  return 0;
4944
0
}
4945
4946
/* If ifp is set, we will increment the generation for the interface */
4947
int
4948
dlil_post_complete_msg(struct ifnet *ifp, struct kev_msg *event)
4949
44.1k
{
4950
44.1k
  if (ifp != NULL) {
4951
44.1k
    ifnet_increment_generation(ifp);
4952
44.1k
  }
4953
4954
44.1k
#if NECP
4955
44.1k
  necp_update_all_clients();
4956
44.1k
#endif /* NECP */
4957
4958
44.1k
  return kev_post_msg(event);
4959
44.1k
}
4960
4961
__private_extern__ void
4962
dlil_post_sifflags_msg(struct ifnet * ifp)
4963
44.1k
{
4964
44.1k
  struct kev_msg ev_msg;
4965
44.1k
  struct net_event_data ev_data;
4966
4967
44.1k
  bzero(&ev_data, sizeof(ev_data));
4968
44.1k
  bzero(&ev_msg, sizeof(ev_msg));
4969
44.1k
  ev_msg.vendor_code = KEV_VENDOR_APPLE;
4970
44.1k
  ev_msg.kev_class = KEV_NETWORK_CLASS;
4971
44.1k
  ev_msg.kev_subclass = KEV_DL_SUBCLASS;
4972
44.1k
  ev_msg.event_code = KEV_DL_SIFFLAGS;
4973
44.1k
  strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ);
4974
44.1k
  ev_data.if_family = ifp->if_family;
4975
44.1k
  ev_data.if_unit = (u_int32_t) ifp->if_unit;
4976
44.1k
  ev_msg.dv[0].data_length = sizeof(struct net_event_data);
4977
44.1k
  ev_msg.dv[0].data_ptr = &ev_data;
4978
44.1k
  ev_msg.dv[1].data_length = 0;
4979
44.1k
  dlil_post_complete_msg(ifp, &ev_msg);
4980
44.1k
}
4981
4982
2
#define TMP_IF_PROTO_ARR_SIZE   10
4983
static int
4984
dlil_event_internal(struct ifnet *ifp, struct kev_msg *event, bool update_generation)
4985
6
{
4986
6
  struct ifnet_filter *filter = NULL;
4987
6
  struct if_proto *proto = NULL;
4988
6
  int if_proto_count = 0;
4989
6
  struct if_proto **tmp_ifproto_arr = NULL;
4990
6
  struct if_proto *tmp_ifproto_stack_arr[TMP_IF_PROTO_ARR_SIZE] = {NULL};
4991
6
  int tmp_ifproto_arr_idx = 0;
4992
6
  bool tmp_malloc = false;
4993
4994
  /*
4995
   * Pass the event to the interface filters
4996
   */
4997
6
  lck_mtx_lock_spin(&ifp->if_flt_lock);
4998
  /* prevent filter list from changing in case we drop the lock */
4999
6
  if_flt_monitor_busy(ifp);
5000
6
  TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
5001
0
    if (filter->filt_event != NULL) {
5002
0
      lck_mtx_unlock(&ifp->if_flt_lock);
5003
5004
0
      filter->filt_event(filter->filt_cookie, ifp,
5005
0
          filter->filt_protocol, event);
5006
5007
0
      lck_mtx_lock_spin(&ifp->if_flt_lock);
5008
0
    }
5009
0
  }
5010
  /* we're done with the filter list */
5011
6
  if_flt_monitor_unbusy(ifp);
5012
6
  lck_mtx_unlock(&ifp->if_flt_lock);
5013
5014
  /* Get an io ref count if the interface is attached */
5015
6
  if (!ifnet_is_attached(ifp, 1)) {
5016
0
    goto done;
5017
0
  }
5018
5019
  /*
5020
   * An embedded tmp_list_entry in if_proto may still get
5021
   * over-written by another thread after giving up ifnet lock,
5022
   * therefore we are avoiding embedded pointers here.
5023
   */
5024
6
  ifnet_lock_shared(ifp);
5025
6
  if_proto_count = dlil_ifp_protolist(ifp, NULL, 0);
5026
6
  if (if_proto_count) {
5027
2
    int i;
5028
2
    VERIFY(ifp->if_proto_hash != NULL);
5029
2
    if (if_proto_count <= TMP_IF_PROTO_ARR_SIZE) {
5030
2
      tmp_ifproto_arr = tmp_ifproto_stack_arr;
5031
2
    } else {
5032
0
      MALLOC(tmp_ifproto_arr, struct if_proto **,
5033
0
          sizeof(*tmp_ifproto_arr) * if_proto_count,
5034
0
          M_TEMP, M_ZERO);
5035
0
      if (tmp_ifproto_arr == NULL) {
5036
0
        ifnet_lock_done(ifp);
5037
0
        goto cleanup;
5038
0
      }
5039
0
      tmp_malloc = true;
5040
0
    }
5041
5042
12
    for (i = 0; i < PROTO_HASH_SLOTS; i++) {
5043
10
      SLIST_FOREACH(proto, &ifp->if_proto_hash[i],
5044
10
          next_hash) {
5045
3
        if_proto_ref(proto);
5046
3
        tmp_ifproto_arr[tmp_ifproto_arr_idx] = proto;
5047
3
        tmp_ifproto_arr_idx++;
5048
3
      }
5049
10
    }
5050
2
    VERIFY(if_proto_count == tmp_ifproto_arr_idx);
5051
2
  }
5052
6
  ifnet_lock_done(ifp);
5053
5054
9
  for (tmp_ifproto_arr_idx = 0; tmp_ifproto_arr_idx < if_proto_count;
5055
6
      tmp_ifproto_arr_idx++) {
5056
3
    proto = tmp_ifproto_arr[tmp_ifproto_arr_idx];
5057
3
    VERIFY(proto != NULL);
5058
0
    proto_media_event eventp =
5059
3
        (proto->proto_kpi == kProtoKPI_v1 ?
5060
0
        proto->kpi.v1.event :
5061
3
        proto->kpi.v2.event);
5062
5063
3
    if (eventp != NULL) {
5064
0
      eventp(ifp, proto->protocol_family,
5065
0
          event);
5066
0
    }
5067
3
    if_proto_free(proto);
5068
3
  }
5069
5070
6
cleanup:
5071
6
  if (tmp_malloc) {
5072
0
    FREE(tmp_ifproto_arr, M_TEMP);
5073
0
  }
5074
5075
  /* Pass the event to the interface */
5076
6
  if (ifp->if_event != NULL) {
5077
0
    ifp->if_event(ifp, event);
5078
0
  }
5079
5080
  /* Release the io ref count */
5081
6
  ifnet_decr_iorefcnt(ifp);
5082
6
done:
5083
6
  return dlil_post_complete_msg(update_generation ? ifp : NULL, event);
5084
6
}
5085
5086
errno_t
5087
ifnet_event(ifnet_t ifp, struct kern_event_msg *event)
5088
0
{
5089
0
  struct kev_msg kev_msg;
5090
0
  int result = 0;
5091
5092
0
  if (ifp == NULL || event == NULL) {
5093
0
    return EINVAL;
5094
0
  }
5095
5096
0
  bzero(&kev_msg, sizeof(kev_msg));
5097
0
  kev_msg.vendor_code = event->vendor_code;
5098
0
  kev_msg.kev_class = event->kev_class;
5099
0
  kev_msg.kev_subclass = event->kev_subclass;
5100
0
  kev_msg.event_code = event->event_code;
5101
0
  kev_msg.dv[0].data_ptr = &event->event_data[0];
5102
0
  kev_msg.dv[0].data_length = event->total_size - KEV_MSG_HEADER_SIZE;
5103
0
  kev_msg.dv[1].data_length = 0;
5104
5105
0
  result = dlil_event_internal(ifp, &kev_msg, TRUE);
5106
5107
0
  return result;
5108
0
}
5109
5110
static void
5111
dlil_count_chain_len(mbuf_t m, struct chain_len_stats *cls)
5112
0
{
5113
0
  mbuf_t  n = m;
5114
0
  int chainlen = 0;
5115
5116
0
  while (n != NULL) {
5117
0
    chainlen++;
5118
0
    n = n->m_next;
5119
0
  }
5120
0
  switch (chainlen) {
5121
0
  case 0:
5122
0
    break;
5123
0
  case 1:
5124
0
    atomic_add_64(&cls->cls_one, 1);
5125
0
    break;
5126
0
  case 2:
5127
0
    atomic_add_64(&cls->cls_two, 1);
5128
0
    break;
5129
0
  case 3:
5130
0
    atomic_add_64(&cls->cls_three, 1);
5131
0
    break;
5132
0
  case 4:
5133
0
    atomic_add_64(&cls->cls_four, 1);
5134
0
    break;
5135
0
  case 5:
5136
0
  default:
5137
0
    atomic_add_64(&cls->cls_five_or_more, 1);
5138
0
    break;
5139
0
  }
5140
0
}
5141
5142
/*
5143
 * dlil_output
5144
 *
5145
 * Caller should have a lock on the protocol domain if the protocol
5146
 * doesn't support finer grained locking. In most cases, the lock
5147
 * will be held from the socket layer and won't be released until
5148
 * we return back to the socket layer.
5149
 *
5150
 * This does mean that we must take a protocol lock before we take
5151
 * an interface lock if we're going to take both. This makes sense
5152
 * because a protocol is likely to interact with an ifp while it
5153
 * is under the protocol lock.
5154
 *
5155
 * An advisory code will be returned if adv is not null. This
5156
 * can be used to provide feedback about interface queues to the
5157
 * application.
5158
 */
5159
errno_t
5160
dlil_output(ifnet_t ifp, protocol_family_t proto_family, mbuf_t packetlist,
5161
    void *route, const struct sockaddr *dest, int raw, struct flowadv *adv)
5162
19.4k
{
5163
19.4k
  char *frame_type = NULL;
5164
19.4k
  char *dst_linkaddr = NULL;
5165
19.4k
  int retval = 0;
5166
19.4k
  char frame_type_buffer[MAX_FRAME_TYPE_SIZE * 4];
5167
19.4k
  char dst_linkaddr_buffer[MAX_LINKADDR * 4];
5168
19.4k
  struct if_proto *proto = NULL;
5169
19.4k
  mbuf_t  m = NULL;
5170
19.4k
  mbuf_t  send_head = NULL;
5171
19.4k
  mbuf_t  *send_tail = &send_head;
5172
19.4k
  int iorefcnt = 0;
5173
19.4k
  u_int32_t pre = 0, post = 0;
5174
19.4k
  u_int32_t fpkts = 0, fbytes = 0;
5175
19.4k
  int32_t flen = 0;
5176
19.4k
  struct timespec now;
5177
19.4k
  u_int64_t now_nsec;
5178
19.4k
  boolean_t did_clat46 = FALSE;
5179
19.4k
  protocol_family_t old_proto_family = proto_family;
5180
19.4k
  struct sockaddr_in6 dest6;
5181
19.4k
  struct rtentry *rt = NULL;
5182
19.4k
  u_int32_t m_loop_set = 0;
5183
5184
19.4k
  KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
5185
5186
  /*
5187
   * Get an io refcnt if the interface is attached to prevent ifnet_detach
5188
   * from happening while this operation is in progress
5189
   */
5190
19.4k
  if (!ifnet_datamov_begin(ifp)) {
5191
0
    retval = ENXIO;
5192
0
    goto cleanup;
5193
0
  }
5194
19.4k
  iorefcnt = 1;
5195
5196
19.4k
  VERIFY(ifp->if_output_dlil != NULL);
5197
5198
  /* update the driver's multicast filter, if needed */
5199
19.4k
  if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0) {
5200
0
    ifp->if_updatemcasts = 0;
5201
0
  }
5202
5203
19.4k
  frame_type = frame_type_buffer;
5204
19.4k
  dst_linkaddr = dst_linkaddr_buffer;
5205
5206
19.4k
  if (raw == 0) {
5207
19.4k
    ifnet_lock_shared(ifp);
5208
    /* callee holds a proto refcnt upon success */
5209
19.4k
    proto = find_attached_proto(ifp, proto_family);
5210
19.4k
    if (proto == NULL) {
5211
11
      ifnet_lock_done(ifp);
5212
11
      retval = ENXIO;
5213
11
      goto cleanup;
5214
11
    }
5215
19.4k
    ifnet_lock_done(ifp);
5216
19.4k
  }
5217
5218
19.4k
preout_again:
5219
19.4k
  if (packetlist == NULL) {
5220
0
    goto cleanup;
5221
0
  }
5222
5223
19.4k
  m = packetlist;
5224
19.4k
  packetlist = packetlist->m_nextpkt;
5225
19.4k
  m->m_nextpkt = NULL;
5226
5227
  /*
5228
   * Perform address family translation for the first
5229
   * packet outside the loop in order to perform address
5230
   * lookup for the translated proto family.
5231
   */
5232
19.4k
  if (proto_family == PF_INET && IS_INTF_CLAT46(ifp) &&
5233
0
      (ifp->if_type == IFT_CELLULAR ||
5234
0
      dlil_is_clat_needed(proto_family, m))) {
5235
0
    retval = dlil_clat46(ifp, &proto_family, &m);
5236
    /*
5237
     * Go to the next packet if translation fails
5238
     */
5239
0
    if (retval != 0) {
5240
0
      m_freem(m);
5241
0
      m = NULL;
5242
0
      ip6stat.ip6s_clat464_out_drop++;
5243
      /* Make sure that the proto family is PF_INET */
5244
0
      ASSERT(proto_family == PF_INET);
5245
0
      goto preout_again;
5246
0
    }
5247
    /*
5248
     * Free the old one and make it point to the IPv6 proto structure.
5249
     *
5250
     * Change proto for the first time we have successfully
5251
     * performed address family translation.
5252
     */
5253
0
    if (!did_clat46 && proto_family == PF_INET6) {
5254
0
      did_clat46 = TRUE;
5255
5256
0
      if (proto != NULL) {
5257
0
        if_proto_free(proto);
5258
0
      }
5259
0
      ifnet_lock_shared(ifp);
5260
      /* callee holds a proto refcnt upon success */
5261
0
      proto = find_attached_proto(ifp, proto_family);
5262
0
      if (proto == NULL) {
5263
0
        ifnet_lock_done(ifp);
5264
0
        retval = ENXIO;
5265
0
        m_freem(m);
5266
0
        m = NULL;
5267
0
        goto cleanup;
5268
0
      }
5269
0
      ifnet_lock_done(ifp);
5270
0
      if (ifp->if_type == IFT_ETHER) {
5271
        /* Update the dest to translated v6 address */
5272
0
        dest6.sin6_len = sizeof(struct sockaddr_in6);
5273
0
        dest6.sin6_family = AF_INET6;
5274
0
        dest6.sin6_addr = (mtod(m, struct ip6_hdr *))->ip6_dst;
5275
0
        dest = (const struct sockaddr *)&dest6;
5276
5277
        /*
5278
         * Lookup route to the translated destination
5279
         * Free this route ref during cleanup
5280
         */
5281
0
        rt = rtalloc1_scoped((struct sockaddr *)&dest6,
5282
0
            0, 0, ifp->if_index);
5283
5284
0
        route = rt;
5285
0
      }
5286
0
    }
5287
0
  }
5288
5289
  /*
5290
   * This path gets packet chain going to the same destination.
5291
   * The pre output routine is used to either trigger resolution of
5292
   * the next hop or retreive the next hop's link layer addressing.
5293
   * For ex: ether_inet(6)_pre_output routine.
5294
   *
5295
   * If the routine returns EJUSTRETURN, it implies that packet has
5296
   * been queued, and therefore we have to call preout_again for the
5297
   * following packet in the chain.
5298
   *
5299
   * For errors other than EJUSTRETURN, the current packet is freed
5300
   * and the rest of the chain (pointed by packetlist is freed as
5301
   * part of clean up.
5302
   *
5303
   * Else if there is no error the retrieved information is used for
5304
   * all the packets in the chain.
5305
   */
5306
19.4k
  if (raw == 0) {
5307
19.4k
    proto_media_preout preoutp = (proto->proto_kpi == kProtoKPI_v1 ?
5308
19.4k
        proto->kpi.v1.pre_output : proto->kpi.v2.pre_output);
5309
19.4k
    retval = 0;
5310
19.4k
    if (preoutp != NULL) {
5311
19.4k
      retval = preoutp(ifp, proto_family, &m, dest, route,
5312
19.4k
          frame_type, dst_linkaddr);
5313
5314
19.4k
      if (retval != 0) {
5315
0
        if (retval == EJUSTRETURN) {
5316
0
          goto preout_again;
5317
0
        }
5318
0
        m_freem(m);
5319
0
        m = NULL;
5320
0
        goto cleanup;
5321
0
      }
5322
19.4k
    }
5323
19.4k
  }
5324
5325
19.4k
  do {
5326
    /*
5327
     * pkt_hdr is set here to point to m_data prior to
5328
     * calling into the framer. This value of pkt_hdr is
5329
     * used by the netif gso logic to retrieve the ip header
5330
     * for the TCP packets, offloaded for TSO processing.
5331
     */
5332
19.4k
    if ((raw != 0) && (ifp->if_family == IFNET_FAMILY_ETHERNET)) {
5333
0
      uint8_t vlan_encap_len = 0;
5334
5335
0
      if ((m->m_pkthdr.csum_flags & CSUM_VLAN_ENCAP_PRESENT) != 0) {
5336
0
        vlan_encap_len = ETHER_VLAN_ENCAP_LEN;
5337
0
      }
5338
0
      m->m_pkthdr.pkt_hdr = mtod(m, char *) + ETHER_HDR_LEN + vlan_encap_len;
5339
19.4k
    } else {
5340
19.4k
      m->m_pkthdr.pkt_hdr = mtod(m, void *);
5341
19.4k
    }
5342
5343
    /*
5344
     * Perform address family translation if needed.
5345
     * For now we only support stateless 4 to 6 translation
5346
     * on the out path.
5347
     *
5348
     * The routine below translates IP header, updates protocol
5349
     * checksum and also translates ICMP.
5350
     *
5351
     * We skip the first packet as it is already translated and
5352
     * the proto family is set to PF_INET6.
5353
     */
5354
19.4k
    if (proto_family == PF_INET && IS_INTF_CLAT46(ifp) &&
5355
0
        (ifp->if_type == IFT_CELLULAR ||
5356
0
        dlil_is_clat_needed(proto_family, m))) {
5357
0
      retval = dlil_clat46(ifp, &proto_family, &m);
5358
      /* Goto the next packet if the translation fails */
5359
0
      if (retval != 0) {
5360
0
        m_freem(m);
5361
0
        m = NULL;
5362
0
        ip6stat.ip6s_clat464_out_drop++;
5363
0
        goto next;
5364
0
      }
5365
0
    }
5366
5367
#if CONFIG_DTRACE
5368
    if (!raw && proto_family == PF_INET) {
5369
      struct ip *ip = mtod(m, struct ip *);
5370
      DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
5371
          struct ip *, ip, struct ifnet *, ifp,
5372
          struct ip *, ip, struct ip6_hdr *, NULL);
5373
    } else if (!raw && proto_family == PF_INET6) {
5374
      struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
5375
      DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
5376
          struct ip6_hdr *, ip6, struct ifnet *, ifp,
5377
          struct ip *, NULL, struct ip6_hdr *, ip6);
5378
    }
5379
#endif /* CONFIG_DTRACE */
5380
5381
19.4k
    if (raw == 0 && ifp->if_framer != NULL) {
5382
19.4k
      int rcvif_set = 0;
5383
5384
      /*
5385
       * If this is a broadcast packet that needs to be
5386
       * looped back into the system, set the inbound ifp
5387
       * to that of the outbound ifp.  This will allow
5388
       * us to determine that it is a legitimate packet
5389
       * for the system.  Only set the ifp if it's not
5390
       * already set, just to be safe.
5391
       */
5392
19.4k
      if ((m->m_flags & (M_BCAST | M_LOOP)) &&
5393
19.4k
          m->m_pkthdr.rcvif == NULL) {
5394
1.17k
        m->m_pkthdr.rcvif = ifp;
5395
1.17k
        rcvif_set = 1;
5396
1.17k
      }
5397
19.4k
      m_loop_set = m->m_flags & M_LOOP;
5398
19.4k
      retval = ifp->if_framer(ifp, &m, dest, dst_linkaddr,
5399
19.4k
          frame_type, &pre, &post);
5400
19.4k
      if (retval != 0) {
5401
0
        if (retval != EJUSTRETURN) {
5402
0
          m_freem(m);
5403
0
        }
5404
0
        goto next;
5405
0
      }
5406
5407
      /*
5408
       * For partial checksum offload, adjust the start
5409
       * and stuff offsets based on the prepended header.
5410
       */
5411
19.4k
      if ((m->m_pkthdr.csum_flags &
5412
19.4k
          (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
5413
19.4k
          (CSUM_DATA_VALID | CSUM_PARTIAL)) {
5414
0
        m->m_pkthdr.csum_tx_stuff += pre;
5415
0
        m->m_pkthdr.csum_tx_start += pre;
5416
0
      }
5417
5418
19.4k
      if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK)) {
5419
0
        dlil_output_cksum_dbg(ifp, m, pre,
5420
0
            proto_family);
5421
0
      }
5422
5423
      /*
5424
       * Clear the ifp if it was set above, and to be
5425
       * safe, only if it is still the same as the
5426
       * outbound ifp we have in context.  If it was
5427
       * looped back, then a copy of it was sent to the
5428
       * loopback interface with the rcvif set, and we
5429
       * are clearing the one that will go down to the
5430
       * layer below.
5431
       */
5432
19.4k
      if (rcvif_set && m->m_pkthdr.rcvif == ifp) {
5433
1.17k
        m->m_pkthdr.rcvif = NULL;
5434
1.17k
      }
5435
19.4k
    }
5436
5437
    /*
5438
     * Let interface filters (if any) do their thing ...
5439
     */
5440
19.4k
    retval = dlil_interface_filters_output(ifp, &m, proto_family);
5441
19.4k
    if (retval != 0) {
5442
0
      if (retval != EJUSTRETURN) {
5443
0
        m_freem(m);
5444
0
      }
5445
0
      goto next;
5446
0
    }
5447
    /*
5448
     * Strip away M_PROTO1 bit prior to sending packet
5449
     * to the driver as this field may be used by the driver
5450
     */
5451
19.4k
    m->m_flags &= ~M_PROTO1;
5452
5453
    /*
5454
     * If the underlying interface is not capable of handling a
5455
     * packet whose data portion spans across physically disjoint
5456
     * pages, we need to "normalize" the packet so that we pass
5457
     * down a chain of mbufs where each mbuf points to a span that
5458
     * resides in the system page boundary.  If the packet does
5459
     * not cross page(s), the following is a no-op.
5460
     */
5461
19.4k
    if (!(ifp->if_hwassist & IFNET_MULTIPAGES)) {
5462
0
      if ((m = m_normalize(m)) == NULL) {
5463
0
        goto next;
5464
0
      }
5465
0
    }
5466
5467
    /*
5468
     * If this is a TSO packet, make sure the interface still
5469
     * advertise TSO capability.
5470
     */
5471
19.4k
    if (TSO_IPV4_NOTOK(ifp, m) || TSO_IPV6_NOTOK(ifp, m)) {
5472
0
      retval = EMSGSIZE;
5473
0
      m_freem(m);
5474
0
      goto cleanup;
5475
0
    }
5476
5477
19.4k
    ifp_inc_traffic_class_out(ifp, m);
5478
5479
19.4k
    pktap_output(ifp, proto_family, m, pre, post);
5480
5481
    /*
5482
     * Count the number of elements in the mbuf chain
5483
     */
5484
19.4k
    if (tx_chain_len_count) {
5485
0
      dlil_count_chain_len(m, &tx_chain_len_stats);
5486
0
    }
5487
5488
    /*
5489
     * Record timestamp; ifnet_enqueue() will use this info
5490
     * rather than redoing the work.  An optimization could
5491
     * involve doing this just once at the top, if there are
5492
     * no interface filters attached, but that's probably
5493
     * not a big deal.
5494
     */
5495
19.4k
    nanouptime(&now);
5496
19.4k
    net_timernsec(&now, &now_nsec);
5497
19.4k
    (void) mbuf_set_timestamp(m, now_nsec, TRUE);
5498
5499
    /*
5500
     * Discard partial sum information if this packet originated
5501
     * from another interface; the packet would already have the
5502
     * final checksum and we shouldn't recompute it.
5503
     */
5504
19.4k
    if ((m->m_pkthdr.pkt_flags & PKTF_FORWARDED) &&
5505
0
        (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
5506
0
        (CSUM_DATA_VALID | CSUM_PARTIAL)) {
5507
0
      m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
5508
0
      m->m_pkthdr.csum_data = 0;
5509
0
    }
5510
5511
    /*
5512
     * Finally, call the driver.
5513
     */
5514
19.4k
    if (ifp->if_eflags & (IFEF_SENDLIST | IFEF_ENQUEUE_MULTI)) {
5515
19.4k
      if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
5516
0
        flen += (m_pktlen(m) - (pre + post));
5517
0
        m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
5518
0
      }
5519
19.4k
      *send_tail = m;
5520
19.4k
      send_tail = &m->m_nextpkt;
5521
19.4k
    } else {
5522
0
      if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
5523
0
        flen = (m_pktlen(m) - (pre + post));
5524
0
        m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
5525
0
      } else {
5526
0
        flen = 0;
5527
0
      }
5528
0
      KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
5529
0
          0, 0, 0, 0, 0);
5530
0
      retval = (*ifp->if_output_dlil)(ifp, m);
5531
0
      if (retval == EQFULL || retval == EQSUSPENDED) {
5532
0
        if (adv != NULL && adv->code == FADV_SUCCESS) {
5533
0
          adv->code = (retval == EQFULL ?
5534
0
              FADV_FLOW_CONTROLLED :
5535
0
              FADV_SUSPENDED);
5536
0
        }
5537
0
        retval = 0;
5538
0
      }
5539
0
      if (retval == 0 && flen > 0) {
5540
0
        fbytes += flen;
5541
0
        fpkts++;
5542
0
      }
5543
0
      if (retval != 0 && dlil_verbose) {
5544
0
        DLIL_PRINTF("%s: output error on %s retval = %d\n",
5545
0
            __func__, if_name(ifp),
5546
0
            retval);
5547
0
      }
5548
0
      KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END,
5549
0
          0, 0, 0, 0, 0);
5550
0
    }
5551
19.4k
    KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
5552
5553
19.4k
next:
5554
19.4k
    m = packetlist;
5555
19.4k
    if (m != NULL) {
5556
0
      m->m_flags |= m_loop_set;
5557
0
      packetlist = packetlist->m_nextpkt;
5558
0
      m->m_nextpkt = NULL;
5559
0
    }
5560
    /* Reset the proto family to old proto family for CLAT */
5561
19.4k
    if (did_clat46) {
5562
0
      proto_family = old_proto_family;
5563
0
    }
5564
19.4k
  } while (m != NULL);
5565
5566
19.4k
  if (send_head != NULL) {
5567
19.4k
    KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
5568
19.4k
        0, 0, 0, 0, 0);
5569
19.4k
    if (ifp->if_eflags & IFEF_SENDLIST) {
5570
19.4k
      retval = (*ifp->if_output_dlil)(ifp, send_head);
5571
19.4k
      if (retval == EQFULL || retval == EQSUSPENDED) {
5572
0
        if (adv != NULL) {
5573
0
          adv->code = (retval == EQFULL ?
5574
0
              FADV_FLOW_CONTROLLED :
5575
0
              FADV_SUSPENDED);
5576
0
        }
5577
0
        retval = 0;
5578
0
      }
5579
19.4k
      if (retval == 0 && flen > 0) {
5580
0
        fbytes += flen;
5581
0
        fpkts++;
5582
0
      }
5583
19.4k
      if (retval != 0 && dlil_verbose) {
5584
0
        DLIL_PRINTF("%s: output error on %s retval = %d\n",
5585
0
            __func__, if_name(ifp), retval);
5586
0
      }
5587
19.4k
    } else {
5588
0
      struct mbuf *send_m;
5589
0
      int enq_cnt = 0;
5590
0
      VERIFY(ifp->if_eflags & IFEF_ENQUEUE_MULTI);
5591
0
      while (send_head != NULL) {
5592
0
        send_m = send_head;
5593
0
        send_head = send_m->m_nextpkt;
5594
0
        send_m->m_nextpkt = NULL;
5595
0
        retval = (*ifp->if_output_dlil)(ifp, send_m);
5596
0
        if (retval == EQFULL || retval == EQSUSPENDED) {
5597
0
          if (adv != NULL) {
5598
0
            adv->code = (retval == EQFULL ?
5599
0
                FADV_FLOW_CONTROLLED :
5600
0
                FADV_SUSPENDED);
5601
0
          }
5602
0
          retval = 0;
5603
0
        }
5604
0
        if (retval == 0) {
5605
0
          enq_cnt++;
5606
0
          if (flen > 0) {
5607
0
            fpkts++;
5608
0
          }
5609
0
        }
5610
0
        if (retval != 0 && dlil_verbose) {
5611
0
          DLIL_PRINTF("%s: output error on %s "
5612
0
              "retval = %d\n",
5613
0
              __func__, if_name(ifp), retval);
5614
0
        }
5615
0
      }
5616
0
      if (enq_cnt > 0) {
5617
0
        fbytes += flen;
5618
0
        ifnet_start(ifp);
5619
0
      }
5620
0
    }
5621
19.4k
    KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
5622
19.4k
  }
5623
5624
19.4k
  KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
5625
5626
19.4k
cleanup:
5627
19.4k
  if (fbytes > 0) {
5628
0
    ifp->if_fbytes += fbytes;
5629
0
  }
5630
19.4k
  if (fpkts > 0) {
5631
0
    ifp->if_fpackets += fpkts;
5632
0
  }
5633
19.4k
  if (proto != NULL) {
5634
19.4k
    if_proto_free(proto);
5635
19.4k
  }
5636
19.4k
  if (packetlist) { /* if any packets are left, clean up */
5637
11
    mbuf_freem_list(packetlist);
5638
11
  }
5639
19.4k
  if (retval == EJUSTRETURN) {
5640
0
    retval = 0;
5641
0
  }
5642
19.4k
  if (iorefcnt == 1) {
5643
19.4k
    ifnet_datamov_end(ifp);
5644
19.4k
  }
5645
19.4k
  if (rt != NULL) {
5646
0
    rtfree(rt);
5647
0
    rt = NULL;
5648
0
  }
5649
5650
19.4k
  return retval;
5651
19.4k
}
5652
5653
/*
5654
 * This routine checks if the destination address is not a loopback, link-local,
5655
 * multicast or broadcast address.
5656
 */
5657
static int
5658
dlil_is_clat_needed(protocol_family_t proto_family, mbuf_t m)
5659
0
{
5660
0
  int ret = 0;
5661
0
  switch (proto_family) {
5662
0
  case PF_INET: {
5663
0
    struct ip *iph = mtod(m, struct ip *);
5664
0
    if (CLAT46_NEEDED(ntohl(iph->ip_dst.s_addr))) {
5665
0
      ret = 1;
5666
0
    }
5667
0
    break;
5668
0
  }
5669
0
  case PF_INET6: {
5670
0
    struct ip6_hdr *ip6h = mtod(m, struct ip6_hdr *);
5671
0
    if ((size_t)m_pktlen(m) >= sizeof(struct ip6_hdr) &&
5672
0
        CLAT64_NEEDED(&ip6h->ip6_dst)) {
5673
0
      ret = 1;
5674
0
    }
5675
0
    break;
5676
0
  }
5677
0
  }
5678
5679
0
  return ret;
5680
0
}
5681
/*
5682
 * @brief This routine translates IPv4 packet to IPv6 packet,
5683
 *     updates protocol checksum and also translates ICMP for code
5684
 *     along with inner header translation.
5685
 *
5686
 * @param ifp Pointer to the interface
5687
 * @param proto_family pointer to protocol family. It is updated if function
5688
 *     performs the translation successfully.
5689
 * @param m Pointer to the pointer pointing to the packet. Needed because this
5690
 *     routine can end up changing the mbuf to a different one.
5691
 *
5692
 * @return 0 on success or else a negative value.
5693
 */
5694
static errno_t
5695
dlil_clat46(ifnet_t ifp, protocol_family_t *proto_family, mbuf_t *m)
5696
0
{
5697
0
  VERIFY(*proto_family == PF_INET);
5698
0
  VERIFY(IS_INTF_CLAT46(ifp));
5699
5700
0
  pbuf_t pbuf_store, *pbuf = NULL;
5701
0
  struct ip *iph = NULL;
5702
0
  struct in_addr osrc, odst;
5703
0
  uint8_t proto = 0;
5704
0
  struct in6_ifaddr *ia6_clat_src = NULL;
5705
0
  struct in6_addr *src = NULL;
5706
0
  struct in6_addr dst;
5707
0
  int error = 0;
5708
0
  uint16_t off = 0;
5709
0
  uint16_t tot_len = 0;
5710
0
  uint16_t ip_id_val = 0;
5711
0
  uint16_t ip_frag_off = 0;
5712
5713
0
  boolean_t is_frag = FALSE;
5714
0
  boolean_t is_first_frag = TRUE;
5715
0
  boolean_t is_last_frag = TRUE;
5716
5717
0
  pbuf_init_mbuf(&pbuf_store, *m, ifp);
5718
0
  pbuf = &pbuf_store;
5719
0
  iph = pbuf->pb_data;
5720
5721
0
  osrc = iph->ip_src;
5722
0
  odst = iph->ip_dst;
5723
0
  proto = iph->ip_p;
5724
0
  off = (uint16_t)(iph->ip_hl << 2);
5725
0
  ip_id_val = iph->ip_id;
5726
0
  ip_frag_off = ntohs(iph->ip_off) & IP_OFFMASK;
5727
5728
0
  tot_len = ntohs(iph->ip_len);
5729
5730
  /*
5731
   * For packets that are not first frags
5732
   * we only need to adjust CSUM.
5733
   * For 4 to 6, Fragmentation header gets appended
5734
   * after proto translation.
5735
   */
5736
0
  if (ntohs(iph->ip_off) & ~(IP_DF | IP_RF)) {
5737
0
    is_frag = TRUE;
5738
5739
    /* If the offset is not zero, it is not first frag */
5740
0
    if (ip_frag_off != 0) {
5741
0
      is_first_frag = FALSE;
5742
0
    }
5743
5744
    /* If IP_MF is set, then it is not last frag */
5745
0
    if (ntohs(iph->ip_off) & IP_MF) {
5746
0
      is_last_frag = FALSE;
5747
0
    }
5748
0
  }
5749
5750
  /*
5751
   * Retrive the local IPv6 CLAT46 address reserved for stateless
5752
   * translation.
5753
   */
5754
0
  ia6_clat_src = in6ifa_ifpwithflag(ifp, IN6_IFF_CLAT46);
5755
0
  if (ia6_clat_src == NULL) {
5756
0
    ip6stat.ip6s_clat464_out_nov6addr_drop++;
5757
0
    error = -1;
5758
0
    goto cleanup;
5759
0
  }
5760
5761
0
  src = &ia6_clat_src->ia_addr.sin6_addr;
5762
5763
  /*
5764
   * Translate IPv4 destination to IPv6 destination by using the
5765
   * prefixes learned through prior PLAT discovery.
5766
   */
5767
0
  if ((error = nat464_synthesize_ipv6(ifp, &odst, &dst)) != 0) {
5768
0
    ip6stat.ip6s_clat464_out_v6synthfail_drop++;
5769
0
    goto cleanup;
5770
0
  }
5771
5772
  /* Translate the IP header part first */
5773
0
  error = (nat464_translate_46(pbuf, off, iph->ip_tos, iph->ip_p,
5774
0
      iph->ip_ttl, *src, dst, tot_len) == NT_NAT64) ? 0 : -1;
5775
5776
0
  iph = NULL;     /* Invalidate iph as pbuf has been modified */
5777
5778
0
  if (error != 0) {
5779
0
    ip6stat.ip6s_clat464_out_46transfail_drop++;
5780
0
    goto cleanup;
5781
0
  }
5782
5783
  /*
5784
   * Translate protocol header, update checksum, checksum flags
5785
   * and related fields.
5786
   */
5787
0
  error = (nat464_translate_proto(pbuf, (struct nat464_addr *)&osrc, (struct nat464_addr *)&odst,
5788
0
      proto, PF_INET, PF_INET6, NT_OUT, !is_first_frag) == NT_NAT64) ? 0 : -1;
5789
5790
0
  if (error != 0) {
5791
0
    ip6stat.ip6s_clat464_out_46proto_transfail_drop++;
5792
0
    goto cleanup;
5793
0
  }
5794
5795
  /* Now insert the IPv6 fragment header */
5796
0
  if (is_frag) {
5797
0
    error = nat464_insert_frag46(pbuf, ip_id_val, ip_frag_off, is_last_frag);
5798
5799
0
    if (error != 0) {
5800
0
      ip6stat.ip6s_clat464_out_46frag_transfail_drop++;
5801
0
      goto cleanup;
5802
0
    }
5803
0
  }
5804
5805
0
cleanup:
5806
0
  if (ia6_clat_src != NULL) {
5807
0
    IFA_REMREF(&ia6_clat_src->ia_ifa);
5808
0
  }
5809
5810
0
  if (pbuf_is_valid(pbuf)) {
5811
0
    *m = pbuf->pb_mbuf;
5812
0
    pbuf->pb_mbuf = NULL;
5813
0
    pbuf_destroy(pbuf);
5814
0
  } else {
5815
0
    error = -1;
5816
0
    ip6stat.ip6s_clat464_out_invalpbuf_drop++;
5817
0
  }
5818
5819
0
  if (error == 0) {
5820
0
    *proto_family = PF_INET6;
5821
0
    ip6stat.ip6s_clat464_out_success++;
5822
0
  }
5823
5824
0
  return error;
5825
0
}
5826
5827
/*
5828
 * @brief This routine translates incoming IPv6 to IPv4 packet,
5829
 *     updates protocol checksum and also translates ICMPv6 outer
5830
 *     and inner headers
5831
 *
5832
 * @return 0 on success or else a negative value.
5833
 */
5834
static errno_t
5835
dlil_clat64(ifnet_t ifp, protocol_family_t *proto_family, mbuf_t *m)
5836
0
{
5837
0
  VERIFY(*proto_family == PF_INET6);
5838
0
  VERIFY(IS_INTF_CLAT46(ifp));
5839
5840
0
  struct ip6_hdr *ip6h = NULL;
5841
0
  struct in6_addr osrc, odst;
5842
0
  uint8_t proto = 0;
5843
0
  struct in6_ifaddr *ia6_clat_dst = NULL;
5844
0
  struct in_ifaddr *ia4_clat_dst = NULL;
5845
0
  struct in_addr *dst = NULL;
5846
0
  struct in_addr src;
5847
0
  int error = 0;
5848
0
  uint32_t off = 0;
5849
0
  u_int64_t tot_len = 0;
5850
0
  uint8_t tos = 0;
5851
0
  boolean_t is_first_frag = TRUE;
5852
5853
  /* Incoming mbuf does not contain valid IP6 header */
5854
0
  if ((size_t)(*m)->m_pkthdr.len < sizeof(struct ip6_hdr) ||
5855
0
      ((size_t)(*m)->m_len < sizeof(struct ip6_hdr) &&
5856
0
      (*m = m_pullup(*m, sizeof(struct ip6_hdr))) == NULL)) {
5857
0
    ip6stat.ip6s_clat464_in_tooshort_drop++;
5858
0
    return -1;
5859
0
  }
5860
5861
0
  ip6h = mtod(*m, struct ip6_hdr *);
5862
  /* Validate that mbuf contains IP payload equal to ip6_plen  */
5863
0
  if ((size_t)(*m)->m_pkthdr.len < ntohs(ip6h->ip6_plen) + sizeof(struct ip6_hdr)) {
5864
0
    ip6stat.ip6s_clat464_in_tooshort_drop++;
5865
0
    return -1;
5866
0
  }
5867
5868
0
  osrc = ip6h->ip6_src;
5869
0
  odst = ip6h->ip6_dst;
5870
5871
  /*
5872
   * Retrieve the local CLAT46 reserved IPv6 address.
5873
   * Let the packet pass if we don't find one, as the flag
5874
   * may get set before IPv6 configuration has taken place.
5875
   */
5876
0
  ia6_clat_dst = in6ifa_ifpwithflag(ifp, IN6_IFF_CLAT46);
5877
0
  if (ia6_clat_dst == NULL) {
5878
0
    goto done;
5879
0
  }
5880
5881
  /*
5882
   * Check if the original dest in the packet is same as the reserved
5883
   * CLAT46 IPv6 address
5884
   */
5885
0
  if (IN6_ARE_ADDR_EQUAL(&odst, &ia6_clat_dst->ia_addr.sin6_addr)) {
5886
0
    pbuf_t pbuf_store, *pbuf = NULL;
5887
0
    pbuf_init_mbuf(&pbuf_store, *m, ifp);
5888
0
    pbuf = &pbuf_store;
5889
5890
    /*
5891
     * Retrive the local CLAT46 IPv4 address reserved for stateless
5892
     * translation.
5893
     */
5894
0
    ia4_clat_dst = inifa_ifpclatv4(ifp);
5895
0
    if (ia4_clat_dst == NULL) {
5896
0
      IFA_REMREF(&ia6_clat_dst->ia_ifa);
5897
0
      ip6stat.ip6s_clat464_in_nov4addr_drop++;
5898
0
      error = -1;
5899
0
      goto cleanup;
5900
0
    }
5901
0
    IFA_REMREF(&ia6_clat_dst->ia_ifa);
5902
5903
    /* Translate IPv6 src to IPv4 src by removing the NAT64 prefix */
5904
0
    dst = &ia4_clat_dst->ia_addr.sin_addr;
5905
0
    if ((error = nat464_synthesize_ipv4(ifp, &osrc, &src)) != 0) {
5906
0
      ip6stat.ip6s_clat464_in_v4synthfail_drop++;
5907
0
      error = -1;
5908
0
      goto cleanup;
5909
0
    }
5910
5911
0
    ip6h = pbuf->pb_data;
5912
0
    off = sizeof(struct ip6_hdr);
5913
0
    proto = ip6h->ip6_nxt;
5914
0
    tos = (ntohl(ip6h->ip6_flow) >> 20) & 0xff;
5915
0
    tot_len = ntohs(ip6h->ip6_plen) + sizeof(struct ip6_hdr);
5916
5917
    /*
5918
     * Translate the IP header and update the fragmentation
5919
     * header if needed
5920
     */
5921
0
    error = (nat464_translate_64(pbuf, off, tos, &proto,
5922
0
        ip6h->ip6_hlim, src, *dst, tot_len, &is_first_frag) == NT_NAT64) ?
5923
0
        0 : -1;
5924
5925
0
    ip6h = NULL; /* Invalidate ip6h as pbuf has been changed */
5926
5927
0
    if (error != 0) {
5928
0
      ip6stat.ip6s_clat464_in_64transfail_drop++;
5929
0
      goto cleanup;
5930
0
    }
5931
5932
    /*
5933
     * Translate protocol header, update checksum, checksum flags
5934
     * and related fields.
5935
     */
5936
0
    error = (nat464_translate_proto(pbuf, (struct nat464_addr *)&osrc,
5937
0
        (struct nat464_addr *)&odst, proto, PF_INET6, PF_INET,
5938
0
        NT_IN, !is_first_frag) == NT_NAT64) ? 0 : -1;
5939
5940
0
    if (error != 0) {
5941
0
      ip6stat.ip6s_clat464_in_64proto_transfail_drop++;
5942
0
      goto cleanup;
5943
0
    }
5944
5945
0
cleanup:
5946
0
    if (ia4_clat_dst != NULL) {
5947
0
      IFA_REMREF(&ia4_clat_dst->ia_ifa);
5948
0
    }
5949
5950
0
    if (pbuf_is_valid(pbuf)) {
5951
0
      *m = pbuf->pb_mbuf;
5952
0
      pbuf->pb_mbuf = NULL;
5953
0
      pbuf_destroy(pbuf);
5954
0
    } else {
5955
0
      error = -1;
5956
0
      ip6stat.ip6s_clat464_in_invalpbuf_drop++;
5957
0
    }
5958
5959
0
    if (error == 0) {
5960
0
      *proto_family = PF_INET;
5961
0
      ip6stat.ip6s_clat464_in_success++;
5962
0
    }
5963
0
  } /* CLAT traffic */
5964
5965
0
done:
5966
0
  return error;
5967
0
}
5968
5969
errno_t
5970
ifnet_ioctl(ifnet_t ifp, protocol_family_t proto_fam, u_long ioctl_code,
5971
    void *ioctl_arg)
5972
45.7k
{
5973
45.7k
  struct ifnet_filter *filter;
5974
45.7k
  int retval = EOPNOTSUPP;
5975
45.7k
  int result = 0;
5976
5977
45.7k
  if (ifp == NULL || ioctl_code == 0) {
5978
0
    return EINVAL;
5979
0
  }
5980
5981
  /* Get an io ref count if the interface is attached */
5982
45.7k
  if (!ifnet_is_attached(ifp, 1)) {
5983
0
    return EOPNOTSUPP;
5984
0
  }
5985
5986
  /*
5987
   * Run the interface filters first.
5988
   * We want to run all filters before calling the protocol,
5989
   * interface family, or interface.
5990
   */
5991
45.7k
  lck_mtx_lock_spin(&ifp->if_flt_lock);
5992
  /* prevent filter list from changing in case we drop the lock */
5993
45.7k
  if_flt_monitor_busy(ifp);
5994
45.7k
  TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
5995
0
    if (filter->filt_ioctl != NULL && (filter->filt_protocol == 0 ||
5996
0
        filter->filt_protocol == proto_fam)) {
5997
0
      lck_mtx_unlock(&ifp->if_flt_lock);
5998
5999
0
      result = filter->filt_ioctl(filter->filt_cookie, ifp,
6000
0
          proto_fam, ioctl_code, ioctl_arg);
6001
6002
0
      lck_mtx_lock_spin(&ifp->if_flt_lock);
6003
6004
      /* Only update retval if no one has handled the ioctl */
6005
0
      if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
6006
0
        if (result == ENOTSUP) {
6007
0
          result = EOPNOTSUPP;
6008
0
        }
6009
0
        retval = result;
6010
0
        if (retval != 0 && retval != EOPNOTSUPP) {
6011
          /* we're done with the filter list */
6012
0
          if_flt_monitor_unbusy(ifp);
6013
0
          lck_mtx_unlock(&ifp->if_flt_lock);
6014
0
          goto cleanup;
6015
0
        }
6016
0
      }
6017
0
    }
6018
0
  }
6019
  /* we're done with the filter list */
6020
45.7k
  if_flt_monitor_unbusy(ifp);
6021
45.7k
  lck_mtx_unlock(&ifp->if_flt_lock);
6022
6023
  /* Allow the protocol to handle the ioctl */
6024
45.7k
  if (proto_fam != 0) {
6025
45.7k
    struct if_proto *proto;
6026
6027
    /* callee holds a proto refcnt upon success */
6028
45.7k
    ifnet_lock_shared(ifp);
6029
45.7k
    proto = find_attached_proto(ifp, proto_fam);
6030
45.7k
    ifnet_lock_done(ifp);
6031
45.7k
    if (proto != NULL) {
6032
32.9k
      proto_media_ioctl ioctlp =
6033
32.9k
          (proto->proto_kpi == kProtoKPI_v1 ?
6034
32.9k
          proto->kpi.v1.ioctl : proto->kpi.v2.ioctl);
6035
32.9k
      result = EOPNOTSUPP;
6036
32.9k
      if (ioctlp != NULL) {
6037
0
        result = ioctlp(ifp, proto_fam, ioctl_code,
6038
0
            ioctl_arg);
6039
0
      }
6040
32.9k
      if_proto_free(proto);
6041
6042
      /* Only update retval if no one has handled the ioctl */
6043
32.9k
      if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
6044
32.9k
        if (result == ENOTSUP) {
6045
0
          result = EOPNOTSUPP;
6046
0
        }
6047
32.9k
        retval = result;
6048
32.9k
        if (retval && retval != EOPNOTSUPP) {
6049
0
          goto cleanup;
6050
0
        }
6051
32.9k
      }
6052
32.9k
    }
6053
45.7k
  }
6054
6055
  /* retval is either 0 or EOPNOTSUPP */
6056
6057
  /*
6058
   * Let the interface handle this ioctl.
6059
   * If it returns EOPNOTSUPP, ignore that, we may have
6060
   * already handled this in the protocol or family.
6061
   */
6062
45.7k
  if (ifp->if_ioctl) {
6063
45.7k
    result = (*ifp->if_ioctl)(ifp, ioctl_code, ioctl_arg);
6064
45.7k
  }
6065
6066
  /* Only update retval if no one has handled the ioctl */
6067
45.7k
  if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
6068
45.7k
    if (result == ENOTSUP) {
6069
0
      result = EOPNOTSUPP;
6070
0
    }
6071
45.7k
    retval = result;
6072
45.7k
    if (retval && retval != EOPNOTSUPP) {
6073
1
      goto cleanup;
6074
1
    }
6075
45.7k
  }
6076
6077
45.7k
cleanup:
6078
45.7k
  if (retval == EJUSTRETURN) {
6079
0
    retval = 0;
6080
0
  }
6081
6082
45.7k
  ifnet_decr_iorefcnt(ifp);
6083
6084
45.7k
  return retval;
6085
45.7k
}
6086
6087
__private_extern__ errno_t
6088
dlil_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func callback)
6089
0
{
6090
0
  errno_t error = 0;
6091
6092
6093
0
  if (ifp->if_set_bpf_tap) {
6094
    /* Get an io reference on the interface if it is attached */
6095
0
    if (!ifnet_is_attached(ifp, 1)) {
6096
0
      return ENXIO;
6097
0
    }
6098
0
    error = ifp->if_set_bpf_tap(ifp, mode, callback);
6099
0
    ifnet_decr_iorefcnt(ifp);
6100
0
  }
6101
0
  return error;
6102
0
}
6103
6104
errno_t
6105
dlil_resolve_multi(struct ifnet *ifp, const struct sockaddr *proto_addr,
6106
    struct sockaddr *ll_addr, size_t ll_len)
6107
7
{
6108
7
  errno_t result = EOPNOTSUPP;
6109
7
  struct if_proto *proto;
6110
7
  const struct sockaddr *verify;
6111
7
  proto_media_resolve_multi resolvep;
6112
6113
7
  if (!ifnet_is_attached(ifp, 1)) {
6114
0
    return result;
6115
0
  }
6116
6117
7
  bzero(ll_addr, ll_len);
6118
6119
  /* Call the protocol first; callee holds a proto refcnt upon success */
6120
7
  ifnet_lock_shared(ifp);
6121
7
  proto = find_attached_proto(ifp, proto_addr->sa_family);
6122
7
  ifnet_lock_done(ifp);
6123
7
  if (proto != NULL) {
6124
2
    resolvep = (proto->proto_kpi == kProtoKPI_v1 ?
6125
2
        proto->kpi.v1.resolve_multi : proto->kpi.v2.resolve_multi);
6126
2
    if (resolvep != NULL) {
6127
0
      result = resolvep(ifp, proto_addr,
6128
0
          (struct sockaddr_dl *)(void *)ll_addr, ll_len);
6129
0
    }
6130
2
    if_proto_free(proto);
6131
2
  }
6132
6133
  /* Let the interface verify the multicast address */
6134
7
  if ((result == EOPNOTSUPP || result == 0) && ifp->if_check_multi) {
6135
0
    if (result == 0) {
6136
0
      verify = ll_addr;
6137
0
    } else {
6138
0
      verify = proto_addr;
6139
0
    }
6140
0
    result = ifp->if_check_multi(ifp, verify);
6141
0
  }
6142
6143
7
  ifnet_decr_iorefcnt(ifp);
6144
7
  return result;
6145
7
}
6146
6147
__private_extern__ errno_t
6148
dlil_send_arp_internal(ifnet_t ifp, u_short arpop,
6149
    const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
6150
    const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
6151
0
{
6152
0
  struct if_proto *proto;
6153
0
  errno_t result = 0;
6154
6155
  /* callee holds a proto refcnt upon success */
6156
0
  ifnet_lock_shared(ifp);
6157
0
  proto = find_attached_proto(ifp, target_proto->sa_family);
6158
0
  ifnet_lock_done(ifp);
6159
0
  if (proto == NULL) {
6160
0
    result = ENOTSUP;
6161
0
  } else {
6162
0
    proto_media_send_arp    arpp;
6163
0
    arpp = (proto->proto_kpi == kProtoKPI_v1 ?
6164
0
        proto->kpi.v1.send_arp : proto->kpi.v2.send_arp);
6165
0
    if (arpp == NULL) {
6166
0
      result = ENOTSUP;
6167
0
    } else {
6168
0
      switch (arpop) {
6169
0
      case ARPOP_REQUEST:
6170
0
        arpstat.txrequests++;
6171
0
        if (target_hw != NULL) {
6172
0
          arpstat.txurequests++;
6173
0
        }
6174
0
        break;
6175
0
      case ARPOP_REPLY:
6176
0
        arpstat.txreplies++;
6177
0
        break;
6178
0
      }
6179
0
      result = arpp(ifp, arpop, sender_hw, sender_proto,
6180
0
          target_hw, target_proto);
6181
0
    }
6182
0
    if_proto_free(proto);
6183
0
  }
6184
6185
0
  return result;
6186
0
}
6187
6188
struct net_thread_marks { };
6189
static const struct net_thread_marks net_thread_marks_base = { };
6190
6191
__private_extern__ const net_thread_marks_t net_thread_marks_none =
6192
    &net_thread_marks_base;
6193
6194
__private_extern__ net_thread_marks_t
6195
net_thread_marks_push(u_int32_t push)
6196
3.95M
{
6197
3.95M
  static const char *const base = (const void*)&net_thread_marks_base;
6198
3.95M
  u_int32_t pop = 0;
6199
6200
3.95M
  if (push != 0) {
6201
3.95M
    struct uthread *uth = get_bsdthread_info(current_thread());
6202
6203
3.95M
    pop = push & ~uth->uu_network_marks;
6204
3.95M
    if (pop != 0) {
6205
3.95M
      uth->uu_network_marks |= pop;
6206
3.95M
    }
6207
3.95M
  }
6208
6209
3.95M
  return (net_thread_marks_t)&base[pop];
6210
3.95M
}
6211
6212
__private_extern__ net_thread_marks_t
6213
net_thread_unmarks_push(u_int32_t unpush)
6214
2
{
6215
2
  static const char *const base = (const void*)&net_thread_marks_base;
6216
2
  u_int32_t unpop = 0;
6217
6218
2
  if (unpush != 0) {
6219
2
    struct uthread *uth = get_bsdthread_info(current_thread());
6220
6221
2
    unpop = unpush & uth->uu_network_marks;
6222
2
    if (unpop != 0) {
6223
2
      uth->uu_network_marks &= ~unpop;
6224
2
    }
6225
2
  }
6226
6227
2
  return (net_thread_marks_t)&base[unpop];
6228
2
}
6229
6230
__private_extern__ void
6231
net_thread_marks_pop(net_thread_marks_t popx)
6232
3.95M
{
6233
3.95M
  static const char *const base = (const void*)&net_thread_marks_base;
6234
3.95M
  const ptrdiff_t pop = (const char *)popx - (const char *)base;
6235
6236
3.95M
  if (pop != 0) {
6237
3.95M
    static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
6238
3.95M
    struct uthread *uth = get_bsdthread_info(current_thread());
6239
6240
3.95M
    VERIFY((pop & ones) == pop);
6241
3.95M
    VERIFY((ptrdiff_t)(uth->uu_network_marks & pop) == pop);
6242
0
    uth->uu_network_marks &= ~pop;
6243
3.95M
  }
6244
3.95M
}
6245
6246
__private_extern__ void
6247
net_thread_unmarks_pop(net_thread_marks_t unpopx)
6248
2
{
6249
2
  static const char *const base = (const void*)&net_thread_marks_base;
6250
2
  ptrdiff_t unpop = (const char *)unpopx - (const char *)base;
6251
6252
2
  if (unpop != 0) {
6253
2
    static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
6254
2
    struct uthread *uth = get_bsdthread_info(current_thread());
6255
6256
2
    VERIFY((unpop & ones) == unpop);
6257
2
    VERIFY((ptrdiff_t)(uth->uu_network_marks & unpop) == 0);
6258
0
    uth->uu_network_marks |= unpop;
6259
2
  }
6260
2
}
6261
6262
__private_extern__ u_int32_t
6263
net_thread_is_marked(u_int32_t check)
6264
0
{
6265
0
  if (check != 0) {
6266
0
    struct uthread *uth = get_bsdthread_info(current_thread());
6267
0
    return uth->uu_network_marks & check;
6268
0
  } else {
6269
0
    return 0;
6270
0
  }
6271
0
}
6272
6273
__private_extern__ u_int32_t
6274
net_thread_is_unmarked(u_int32_t check)
6275
0
{
6276
0
  if (check != 0) {
6277
0
    struct uthread *uth = get_bsdthread_info(current_thread());
6278
0
    return ~uth->uu_network_marks & check;
6279
0
  } else {
6280
0
    return 0;
6281
0
  }
6282
0
}
6283
6284
static __inline__ int
6285
_is_announcement(const struct sockaddr_in * sender_sin,
6286
    const struct sockaddr_in * target_sin)
6287
0
{
6288
0
  if (target_sin == NULL || sender_sin == NULL) {
6289
0
    return FALSE;
6290
0
  }
6291
6292
0
  return sender_sin->sin_addr.s_addr == target_sin->sin_addr.s_addr;
6293
0
}
6294
6295
__private_extern__ errno_t
6296
dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl *sender_hw,
6297
    const struct sockaddr *sender_proto, const struct sockaddr_dl *target_hw,
6298
    const struct sockaddr *target_proto0, u_int32_t rtflags)
6299
0
{
6300
0
  errno_t result = 0;
6301
0
  const struct sockaddr_in * sender_sin;
6302
0
  const struct sockaddr_in * target_sin;
6303
0
  struct sockaddr_inarp target_proto_sinarp;
6304
0
  struct sockaddr *target_proto = (void *)(uintptr_t)target_proto0;
6305
6306
0
  if (target_proto == NULL || sender_proto == NULL) {
6307
0
    return EINVAL;
6308
0
  }
6309
6310
0
  if (sender_proto->sa_family != target_proto->sa_family) {
6311
0
    return EINVAL;
6312
0
  }
6313
6314
  /*
6315
   * If the target is a (default) router, provide that
6316
   * information to the send_arp callback routine.
6317
   */
6318
0
  if (rtflags & RTF_ROUTER) {
6319
0
    bcopy(target_proto, &target_proto_sinarp,
6320
0
        sizeof(struct sockaddr_in));
6321
0
    target_proto_sinarp.sin_other |= SIN_ROUTER;
6322
0
    target_proto = (struct sockaddr *)&target_proto_sinarp;
6323
0
  }
6324
6325
  /*
6326
   * If this is an ARP request and the target IP is IPv4LL,
6327
   * send the request on all interfaces.  The exception is
6328
   * an announcement, which must only appear on the specific
6329
   * interface.
6330
   */
6331
0
  sender_sin = (struct sockaddr_in *)(void *)(uintptr_t)sender_proto;
6332
0
  target_sin = (struct sockaddr_in *)(void *)(uintptr_t)target_proto;
6333
0
  if (target_proto->sa_family == AF_INET &&
6334
0
      IN_LINKLOCAL(ntohl(target_sin->sin_addr.s_addr)) &&
6335
0
      ipv4_ll_arp_aware != 0 && arpop == ARPOP_REQUEST &&
6336
0
      !_is_announcement(sender_sin, target_sin)) {
6337
0
    ifnet_t         *ifp_list;
6338
0
    u_int32_t       count;
6339
0
    u_int32_t       ifp_on;
6340
6341
0
    result = ENOTSUP;
6342
6343
0
    if (ifnet_list_get(IFNET_FAMILY_ANY, &ifp_list, &count) == 0) {
6344
0
      for (ifp_on = 0; ifp_on < count; ifp_on++) {
6345
0
        errno_t new_result;
6346
0
        ifaddr_t source_hw = NULL;
6347
0
        ifaddr_t source_ip = NULL;
6348
0
        struct sockaddr_in source_ip_copy;
6349
0
        struct ifnet *cur_ifp = ifp_list[ifp_on];
6350
6351
        /*
6352
         * Only arp on interfaces marked for IPv4LL
6353
         * ARPing.  This may mean that we don't ARP on
6354
         * the interface the subnet route points to.
6355
         */
6356
0
        if (!(cur_ifp->if_eflags & IFEF_ARPLL)) {
6357
0
          continue;
6358
0
        }
6359
6360
        /* Find the source IP address */
6361
0
        ifnet_lock_shared(cur_ifp);
6362
0
        source_hw = cur_ifp->if_lladdr;
6363
0
        TAILQ_FOREACH(source_ip, &cur_ifp->if_addrhead,
6364
0
            ifa_link) {
6365
0
          IFA_LOCK(source_ip);
6366
0
          if (source_ip->ifa_addr != NULL &&
6367
0
              source_ip->ifa_addr->sa_family ==
6368
0
              AF_INET) {
6369
            /* Copy the source IP address */
6370
0
            source_ip_copy =
6371
0
                *(struct sockaddr_in *)
6372
0
                (void *)source_ip->ifa_addr;
6373
0
            IFA_UNLOCK(source_ip);
6374
0
            break;
6375
0
          }
6376
0
          IFA_UNLOCK(source_ip);
6377
0
        }
6378
6379
        /* No IP Source, don't arp */
6380
0
        if (source_ip == NULL) {
6381
0
          ifnet_lock_done(cur_ifp);
6382
0
          continue;
6383
0
        }
6384
6385
0
        IFA_ADDREF(source_hw);
6386
0
        ifnet_lock_done(cur_ifp);
6387
6388
        /* Send the ARP */
6389
0
        new_result = dlil_send_arp_internal(cur_ifp,
6390
0
            arpop, (struct sockaddr_dl *)(void *)
6391
0
            source_hw->ifa_addr,
6392
0
            (struct sockaddr *)&source_ip_copy, NULL,
6393
0
            target_proto);
6394
6395
0
        IFA_REMREF(source_hw);
6396
0
        if (result == ENOTSUP) {
6397
0
          result = new_result;
6398
0
        }
6399
0
      }
6400
0
      ifnet_list_free(ifp_list);
6401
0
    }
6402
0
  } else {
6403
0
    result = dlil_send_arp_internal(ifp, arpop, sender_hw,
6404
0
        sender_proto, target_hw, target_proto);
6405
0
  }
6406
6407
0
  return result;
6408
0
}
6409
6410
/*
6411
 * Caller must hold ifnet head lock.
6412
 */
6413
static int
6414
ifnet_lookup(struct ifnet *ifp)
6415
5.13k
{
6416
5.13k
  struct ifnet *_ifp;
6417
6418
5.13k
  LCK_RW_ASSERT(&ifnet_head_lock, LCK_RW_ASSERT_HELD);
6419
5.13k
  TAILQ_FOREACH(_ifp, &ifnet_head, if_link) {
6420
5.13k
    if (_ifp == ifp) {
6421
5.13k
      break;
6422
5.13k
    }
6423
5.13k
  }
6424
5.13k
  return _ifp != NULL;
6425
5.13k
}
6426
6427
/*
6428
 * Caller has to pass a non-zero refio argument to get a
6429
 * IO reference count. This will prevent ifnet_detach from
6430
 * being called when there are outstanding io reference counts.
6431
 */
6432
int
6433
ifnet_is_attached(struct ifnet *ifp, int refio)
6434
53.4k
{
6435
53.4k
  int ret;
6436
6437
53.4k
  lck_mtx_lock_spin(&ifp->if_ref_lock);
6438
53.4k
  if ((ret = IF_FULLY_ATTACHED(ifp))) {
6439
53.4k
    if (refio > 0) {
6440
52.4k
      ifp->if_refio++;
6441
52.4k
    }
6442
53.4k
  }
6443
53.4k
  lck_mtx_unlock(&ifp->if_ref_lock);
6444
6445
53.4k
  return ret;
6446
53.4k
}
6447
6448
void
6449
ifnet_incr_pending_thread_count(struct ifnet *ifp)
6450
3
{
6451
3
  lck_mtx_lock_spin(&ifp->if_ref_lock);
6452
3
  ifp->if_threads_pending++;
6453
3
  lck_mtx_unlock(&ifp->if_ref_lock);
6454
3
}
6455
6456
void
6457
ifnet_decr_pending_thread_count(struct ifnet *ifp)
6458
0
{
6459
0
  lck_mtx_lock_spin(&ifp->if_ref_lock);
6460
0
  VERIFY(ifp->if_threads_pending > 0);
6461
0
  ifp->if_threads_pending--;
6462
0
  if (ifp->if_threads_pending == 0) {
6463
0
    wakeup(&ifp->if_threads_pending);
6464
0
  }
6465
0
  lck_mtx_unlock(&ifp->if_ref_lock);
6466
0
}
6467
6468
/*
6469
 * Caller must ensure the interface is attached; the assumption is that
6470
 * there is at least an outstanding IO reference count held already.
6471
 * Most callers would call ifnet_is_{attached,data_ready}() instead.
6472
 */
6473
void
6474
ifnet_incr_iorefcnt(struct ifnet *ifp)
6475
0
{
6476
0
  lck_mtx_lock_spin(&ifp->if_ref_lock);
6477
0
  VERIFY(IF_FULLY_ATTACHED(ifp));
6478
0
  VERIFY(ifp->if_refio > 0);
6479
0
  ifp->if_refio++;
6480
0
  lck_mtx_unlock(&ifp->if_ref_lock);
6481
0
}
6482
6483
__attribute__((always_inline))
6484
static void
6485
ifnet_decr_iorefcnt_locked(struct ifnet *ifp)
6486
71.9k
{
6487
71.9k
  LCK_MTX_ASSERT(&ifp->if_ref_lock, LCK_MTX_ASSERT_OWNED);
6488
6489
71.9k
  VERIFY(ifp->if_refio > 0);
6490
71.9k
  VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
6491
6492
0
  ifp->if_refio--;
6493
71.9k
  VERIFY(ifp->if_refio != 0 || ifp->if_datamov == 0);
6494
6495
  /*
6496
   * if there are no more outstanding io references, wakeup the
6497
   * ifnet_detach thread if detaching flag is set.
6498
   */
6499
71.9k
  if (ifp->if_refio == 0 && (ifp->if_refflags & IFRF_DETACHING)) {
6500
0
    wakeup(&(ifp->if_refio));
6501
0
  }
6502
71.9k
}
6503
6504
void
6505
ifnet_decr_iorefcnt(struct ifnet *ifp)
6506
52.4k
{
6507
52.4k
  lck_mtx_lock_spin(&ifp->if_ref_lock);
6508
52.4k
  ifnet_decr_iorefcnt_locked(ifp);
6509
52.4k
  lck_mtx_unlock(&ifp->if_ref_lock);
6510
52.4k
}
6511
6512
boolean_t
6513
ifnet_datamov_begin(struct ifnet *ifp)
6514
19.4k
{
6515
19.4k
  boolean_t ret;
6516
6517
19.4k
  lck_mtx_lock_spin(&ifp->if_ref_lock);
6518
19.4k
  if ((ret = IF_FULLY_ATTACHED_AND_READY(ifp))) {
6519
19.4k
    ifp->if_refio++;
6520
19.4k
    ifp->if_datamov++;
6521
19.4k
  }
6522
19.4k
  lck_mtx_unlock(&ifp->if_ref_lock);
6523
6524
19.4k
  return ret;
6525
19.4k
}
6526
6527
void
6528
ifnet_datamov_end(struct ifnet *ifp)
6529
19.4k
{
6530
19.4k
  lck_mtx_lock_spin(&ifp->if_ref_lock);
6531
19.4k
  VERIFY(ifp->if_datamov > 0);
6532
  /*
6533
   * if there's no more thread moving data, wakeup any
6534
   * drainers that's blocked waiting for this.
6535
   */
6536
19.4k
  if (--ifp->if_datamov == 0 && ifp->if_drainers > 0) {
6537
0
    wakeup(&(ifp->if_datamov));
6538
0
  }
6539
19.4k
  ifnet_decr_iorefcnt_locked(ifp);
6540
19.4k
  lck_mtx_unlock(&ifp->if_ref_lock);
6541
19.4k
}
6542
6543
void
6544
ifnet_datamov_suspend(struct ifnet *ifp)
6545
0
{
6546
0
  lck_mtx_lock_spin(&ifp->if_ref_lock);
6547
0
  VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
6548
0
  ifp->if_refio++;
6549
0
  if (ifp->if_suspend++ == 0) {
6550
0
    VERIFY(ifp->if_refflags & IFRF_READY);
6551
0
    ifp->if_refflags &= ~IFRF_READY;
6552
0
  }
6553
0
  lck_mtx_unlock(&ifp->if_ref_lock);
6554
0
}
6555
6556
void
6557
ifnet_datamov_drain(struct ifnet *ifp)
6558
0
{
6559
0
  lck_mtx_lock(&ifp->if_ref_lock);
6560
0
  VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
6561
  /* data movement must already be suspended */
6562
0
  VERIFY(ifp->if_suspend > 0);
6563
0
  VERIFY(!(ifp->if_refflags & IFRF_READY));
6564
0
  ifp->if_drainers++;
6565
0
  while (ifp->if_datamov != 0) {
6566
0
    (void) msleep(&(ifp->if_datamov), &ifp->if_ref_lock,
6567
0
        (PZERO - 1), __func__, NULL);
6568
0
  }
6569
0
  VERIFY(!(ifp->if_refflags & IFRF_READY));
6570
0
  VERIFY(ifp->if_drainers > 0);
6571
0
  ifp->if_drainers--;
6572
0
  lck_mtx_unlock(&ifp->if_ref_lock);
6573
6574
  /* purge the interface queues */
6575
0
  if ((ifp->if_eflags & IFEF_TXSTART) != 0) {
6576
0
    if_qflush(ifp, 0);
6577
0
  }
6578
0
}
6579
6580
void
6581
ifnet_datamov_resume(struct ifnet *ifp)
6582
0
{
6583
0
  lck_mtx_lock(&ifp->if_ref_lock);
6584
  /* data movement must already be suspended */
6585
0
  VERIFY(ifp->if_suspend > 0);
6586
0
  if (--ifp->if_suspend == 0) {
6587
0
    VERIFY(!(ifp->if_refflags & IFRF_READY));
6588
0
    ifp->if_refflags |= IFRF_READY;
6589
0
  }
6590
0
  ifnet_decr_iorefcnt_locked(ifp);
6591
0
  lck_mtx_unlock(&ifp->if_ref_lock);
6592
0
}
6593
6594
static void
6595
dlil_if_trace(struct dlil_ifnet *dl_if, int refhold)
6596
0
{
6597
0
  struct dlil_ifnet_dbg *dl_if_dbg = (struct dlil_ifnet_dbg *)dl_if;
6598
0
  ctrace_t *tr;
6599
0
  u_int32_t idx;
6600
0
  u_int16_t *cnt;
6601
6602
0
  if (!(dl_if->dl_if_flags & DLIF_DEBUG)) {
6603
0
    panic("%s: dl_if %p has no debug structure", __func__, dl_if);
6604
    /* NOTREACHED */
6605
0
  }
6606
6607
0
  if (refhold) {
6608
0
    cnt = &dl_if_dbg->dldbg_if_refhold_cnt;
6609
0
    tr = dl_if_dbg->dldbg_if_refhold;
6610
0
  } else {
6611
0
    cnt = &dl_if_dbg->dldbg_if_refrele_cnt;
6612
0
    tr = dl_if_dbg->dldbg_if_refrele;
6613
0
  }
6614
6615
0
  idx = atomic_add_16_ov(cnt, 1) % IF_REF_TRACE_HIST_SIZE;
6616
0
  ctrace_record(&tr[idx]);
6617
0
}
6618
6619
errno_t
6620
dlil_if_ref(struct ifnet *ifp)
6621
113k
{
6622
113k
  struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
6623
6624
113k
  if (dl_if == NULL) {
6625
0
    return EINVAL;
6626
0
  }
6627
6628
113k
  lck_mtx_lock_spin(&dl_if->dl_if_lock);
6629
113k
  ++dl_if->dl_if_refcnt;
6630
113k
  if (dl_if->dl_if_refcnt == 0) {
6631
0
    panic("%s: wraparound refcnt for ifp=%p", __func__, ifp);
6632
    /* NOTREACHED */
6633
0
  }
6634
113k
  if (dl_if->dl_if_trace != NULL) {
6635
0
    (*dl_if->dl_if_trace)(dl_if, TRUE);
6636
0
  }
6637
113k
  lck_mtx_unlock(&dl_if->dl_if_lock);
6638
6639
113k
  return 0;
6640
113k
}
6641
6642
errno_t
6643
dlil_if_free(struct ifnet *ifp)
6644
113k
{
6645
113k
  struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
6646
113k
  bool need_release = FALSE;
6647
6648
113k
  if (dl_if == NULL) {
6649
0
    return EINVAL;
6650
0
  }
6651
6652
113k
  lck_mtx_lock_spin(&dl_if->dl_if_lock);
6653
113k
  switch (dl_if->dl_if_refcnt) {
6654
0
  case 0:
6655
0
    panic("%s: negative refcnt for ifp=%p", __func__, ifp);
6656
    /* NOTREACHED */
6657
0
    break;
6658
0
  case 1:
6659
0
    if ((ifp->if_refflags & IFRF_EMBRYONIC) != 0) {
6660
0
      need_release = TRUE;
6661
0
    }
6662
0
    break;
6663
113k
  default:
6664
113k
    break;
6665
113k
  }
6666
113k
  --dl_if->dl_if_refcnt;
6667
113k
  if (dl_if->dl_if_trace != NULL) {
6668
0
    (*dl_if->dl_if_trace)(dl_if, FALSE);
6669
0
  }
6670
113k
  lck_mtx_unlock(&dl_if->dl_if_lock);
6671
113k
  if (need_release) {
6672
0
    dlil_if_release(ifp);
6673
0
  }
6674
113k
  return 0;
6675
113k
}
6676
6677
static errno_t
6678
dlil_attach_protocol_internal(struct if_proto *proto,
6679
    const struct ifnet_demux_desc *demux_list, u_int32_t demux_count,
6680
    uint32_t * proto_count)
6681
5.13k
{
6682
5.13k
  struct kev_dl_proto_data ev_pr_data;
6683
5.13k
  struct ifnet *ifp = proto->ifp;
6684
5.13k
  int retval = 0;
6685
5.13k
  u_int32_t hash_value = proto_hash_value(proto->protocol_family);
6686
5.13k
  struct if_proto *prev_proto;
6687
5.13k
  struct if_proto *_proto;
6688
6689
  /* callee holds a proto refcnt upon success */
6690
5.13k
  ifnet_lock_exclusive(ifp);
6691
5.13k
  _proto = find_attached_proto(ifp, proto->protocol_family);
6692
5.13k
  if (_proto != NULL) {
6693
5.13k
    ifnet_lock_done(ifp);
6694
5.13k
    if_proto_free(_proto);
6695
5.13k
    return EEXIST;
6696
5.13k
  }
6697
6698
  /*
6699
   * Call family module add_proto routine so it can refine the
6700
   * demux descriptors as it wishes.
6701
   */
6702
2
  retval = ifp->if_add_proto(ifp, proto->protocol_family, demux_list,
6703
2
      demux_count);
6704
2
  if (retval) {
6705
0
    ifnet_lock_done(ifp);
6706
0
    return retval;
6707
0
  }
6708
6709
  /*
6710
   * Insert the protocol in the hash
6711
   */
6712
2
  prev_proto = SLIST_FIRST(&ifp->if_proto_hash[hash_value]);
6713
2
  while (prev_proto != NULL && SLIST_NEXT(prev_proto, next_hash) != NULL) {
6714
0
    prev_proto = SLIST_NEXT(prev_proto, next_hash);
6715
0
  }
6716
2
  if (prev_proto) {
6717
0
    SLIST_INSERT_AFTER(prev_proto, proto, next_hash);
6718
2
  } else {
6719
2
    SLIST_INSERT_HEAD(&ifp->if_proto_hash[hash_value],
6720
2
        proto, next_hash);
6721
2
  }
6722
6723
  /* hold a proto refcnt for attach */
6724
2
  if_proto_ref(proto);
6725
6726
  /*
6727
   * The reserved field carries the number of protocol still attached
6728
   * (subject to change)
6729
   */
6730
2
  ev_pr_data.proto_family = proto->protocol_family;
6731
2
  ev_pr_data.proto_remaining_count = dlil_ifp_protolist(ifp, NULL, 0);
6732
6733
2
  ifnet_lock_done(ifp);
6734
6735
2
  dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_ATTACHED,
6736
2
      (struct net_event_data *)&ev_pr_data,
6737
2
      sizeof(struct kev_dl_proto_data));
6738
2
  if (proto_count != NULL) {
6739
2
    *proto_count = ev_pr_data.proto_remaining_count;
6740
2
  }
6741
2
  return retval;
6742
2
}
6743
6744
errno_t
6745
ifnet_attach_protocol(ifnet_t ifp, protocol_family_t protocol,
6746
    const struct ifnet_attach_proto_param *proto_details)
6747
0
{
6748
0
  int retval = 0;
6749
0
  struct if_proto  *ifproto = NULL;
6750
0
  uint32_t proto_count = 0;
6751
6752
0
  ifnet_head_lock_shared();
6753
0
  if (ifp == NULL || protocol == 0 || proto_details == NULL) {
6754
0
    retval = EINVAL;
6755
0
    goto end;
6756
0
  }
6757
  /* Check that the interface is in the global list */
6758
0
  if (!ifnet_lookup(ifp)) {
6759
0
    retval = ENXIO;
6760
0
    goto end;
6761
0
  }
6762
6763
0
  ifproto = zalloc_flags(dlif_proto_zone, Z_WAITOK | Z_ZERO);
6764
0
  if (ifproto == NULL) {
6765
0
    retval = ENOMEM;
6766
0
    goto end;
6767
0
  }
6768
6769
  /* refcnt held above during lookup */
6770
0
  ifproto->ifp = ifp;
6771
0
  ifproto->protocol_family = protocol;
6772
0
  ifproto->proto_kpi = kProtoKPI_v1;
6773
0
  ifproto->kpi.v1.input = proto_details->input;
6774
0
  ifproto->kpi.v1.pre_output = proto_details->pre_output;
6775
0
  ifproto->kpi.v1.event = proto_details->event;
6776
0
  ifproto->kpi.v1.ioctl = proto_details->ioctl;
6777
0
  ifproto->kpi.v1.detached = proto_details->detached;
6778
0
  ifproto->kpi.v1.resolve_multi = proto_details->resolve;
6779
0
  ifproto->kpi.v1.send_arp = proto_details->send_arp;
6780
6781
0
  retval = dlil_attach_protocol_internal(ifproto,
6782
0
      proto_details->demux_list, proto_details->demux_count,
6783
0
      &proto_count);
6784
6785
0
end:
6786
0
  if (retval != 0 && retval != EEXIST) {
6787
0
    DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
6788
0
        ifp != NULL ? if_name(ifp) : "N/A", protocol, retval);
6789
0
  } else {
6790
0
    if (dlil_verbose) {
6791
0
      DLIL_PRINTF("%s: attached v1 protocol %d (count = %d)\n",
6792
0
          ifp != NULL ? if_name(ifp) : "N/A",
6793
0
          protocol, proto_count);
6794
0
    }
6795
0
  }
6796
0
  ifnet_head_done();
6797
0
  if (retval == 0) {
6798
    /*
6799
     * A protocol has been attached, mark the interface up.
6800
     * This used to be done by configd.KernelEventMonitor, but that
6801
     * is inherently prone to races (rdar://problem/30810208).
6802
     */
6803
0
    (void) ifnet_set_flags(ifp, IFF_UP, IFF_UP);
6804
0
    (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
6805
0
    dlil_post_sifflags_msg(ifp);
6806
0
  } else if (ifproto != NULL) {
6807
0
    zfree(dlif_proto_zone, ifproto);
6808
0
  }
6809
0
  return retval;
6810
0
}
6811
6812
errno_t
6813
ifnet_attach_protocol_v2(ifnet_t ifp, protocol_family_t protocol,
6814
    const struct ifnet_attach_proto_param_v2 *proto_details)
6815
5.13k
{
6816
5.13k
  int retval = 0;
6817
5.13k
  struct if_proto  *ifproto = NULL;
6818
5.13k
  uint32_t proto_count = 0;
6819
6820
5.13k
  ifnet_head_lock_shared();
6821
5.13k
  if (ifp == NULL || protocol == 0 || proto_details == NULL) {
6822
0
    retval = EINVAL;
6823
0
    goto end;
6824
0
  }
6825
  /* Check that the interface is in the global list */
6826
5.13k
  if (!ifnet_lookup(ifp)) {
6827
0
    retval = ENXIO;
6828
0
    goto end;
6829
0
  }
6830
6831
5.13k
  ifproto = zalloc(dlif_proto_zone);
6832
5.13k
  if (ifproto == NULL) {
6833
0
    retval = ENOMEM;
6834
0
    goto end;
6835
0
  }
6836
5.13k
  bzero(ifproto, sizeof(*ifproto));
6837
6838
  /* refcnt held above during lookup */
6839
5.13k
  ifproto->ifp = ifp;
6840
5.13k
  ifproto->protocol_family = protocol;
6841
5.13k
  ifproto->proto_kpi = kProtoKPI_v2;
6842
5.13k
  ifproto->kpi.v2.input = proto_details->input;
6843
5.13k
  ifproto->kpi.v2.pre_output = proto_details->pre_output;
6844
5.13k
  ifproto->kpi.v2.event = proto_details->event;
6845
5.13k
  ifproto->kpi.v2.ioctl = proto_details->ioctl;
6846
5.13k
  ifproto->kpi.v2.detached = proto_details->detached;
6847
5.13k
  ifproto->kpi.v2.resolve_multi = proto_details->resolve;
6848
5.13k
  ifproto->kpi.v2.send_arp = proto_details->send_arp;
6849
6850
5.13k
  retval = dlil_attach_protocol_internal(ifproto,
6851
5.13k
      proto_details->demux_list, proto_details->demux_count,
6852
5.13k
      &proto_count);
6853
6854
5.13k
end:
6855
5.13k
  if (retval != 0 && retval != EEXIST) {
6856
0
    DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
6857
0
        ifp != NULL ? if_name(ifp) : "N/A", protocol, retval);
6858
5.13k
  } else {
6859
5.13k
    if (dlil_verbose) {
6860
1
      DLIL_PRINTF("%s: attached v2 protocol %d (count = %d)\n",
6861
1
          ifp != NULL ? if_name(ifp) : "N/A",
6862
1
          protocol, proto_count);
6863
1
    }
6864
5.13k
  }
6865
5.13k
  ifnet_head_done();
6866
5.13k
  if (retval == 0) {
6867
    /*
6868
     * A protocol has been attached, mark the interface up.
6869
     * This used to be done by configd.KernelEventMonitor, but that
6870
     * is inherently prone to races (rdar://problem/30810208).
6871
     */
6872
2
    (void) ifnet_set_flags(ifp, IFF_UP, IFF_UP);
6873
2
    (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
6874
2
    dlil_post_sifflags_msg(ifp);
6875
5.13k
  } else if (ifproto != NULL) {
6876
5.13k
    zfree(dlif_proto_zone, ifproto);
6877
5.13k
  }
6878
5.13k
  return retval;
6879
5.13k
}
6880
6881
errno_t
6882
ifnet_detach_protocol(ifnet_t ifp, protocol_family_t proto_family)
6883
0
{
6884
0
  struct if_proto *proto = NULL;
6885
0
  int     retval = 0;
6886
6887
0
  if (ifp == NULL || proto_family == 0) {
6888
0
    retval = EINVAL;
6889
0
    goto end;
6890
0
  }
6891
6892
0
  ifnet_lock_exclusive(ifp);
6893
  /* callee holds a proto refcnt upon success */
6894
0
  proto = find_attached_proto(ifp, proto_family);
6895
0
  if (proto == NULL) {
6896
0
    retval = ENXIO;
6897
0
    ifnet_lock_done(ifp);
6898
0
    goto end;
6899
0
  }
6900
6901
  /* call family module del_proto */
6902
0
  if (ifp->if_del_proto) {
6903
0
    ifp->if_del_proto(ifp, proto->protocol_family);
6904
0
  }
6905
6906
0
  SLIST_REMOVE(&ifp->if_proto_hash[proto_hash_value(proto_family)],
6907
0
      proto, if_proto, next_hash);
6908
6909
0
  if (proto->proto_kpi == kProtoKPI_v1) {
6910
0
    proto->kpi.v1.input = ifproto_media_input_v1;
6911
0
    proto->kpi.v1.pre_output = ifproto_media_preout;
6912
0
    proto->kpi.v1.event = ifproto_media_event;
6913
0
    proto->kpi.v1.ioctl = ifproto_media_ioctl;
6914
0
    proto->kpi.v1.resolve_multi = ifproto_media_resolve_multi;
6915
0
    proto->kpi.v1.send_arp = ifproto_media_send_arp;
6916
0
  } else {
6917
0
    proto->kpi.v2.input = ifproto_media_input_v2;
6918
0
    proto->kpi.v2.pre_output = ifproto_media_preout;
6919
0
    proto->kpi.v2.event = ifproto_media_event;
6920
0
    proto->kpi.v2.ioctl = ifproto_media_ioctl;
6921
0
    proto->kpi.v2.resolve_multi = ifproto_media_resolve_multi;
6922
0
    proto->kpi.v2.send_arp = ifproto_media_send_arp;
6923
0
  }
6924
0
  proto->detached = 1;
6925
0
  ifnet_lock_done(ifp);
6926
6927
0
  if (dlil_verbose) {
6928
0
    DLIL_PRINTF("%s: detached %s protocol %d\n", if_name(ifp),
6929
0
        (proto->proto_kpi == kProtoKPI_v1) ?
6930
0
        "v1" : "v2", proto_family);
6931
0
  }
6932
6933
  /* release proto refcnt held during protocol attach */
6934
0
  if_proto_free(proto);
6935
6936
  /*
6937
   * Release proto refcnt held during lookup; the rest of
6938
   * protocol detach steps will happen when the last proto
6939
   * reference is released.
6940
   */
6941
0
  if_proto_free(proto);
6942
6943
0
end:
6944
0
  return retval;
6945
0
}
6946
6947
6948
static errno_t
6949
ifproto_media_input_v1(struct ifnet *ifp, protocol_family_t protocol,
6950
    struct mbuf *packet, char *header)
6951
0
{
6952
0
#pragma unused(ifp, protocol, packet, header)
6953
0
  return ENXIO;
6954
0
}
6955
6956
static errno_t
6957
ifproto_media_input_v2(struct ifnet *ifp, protocol_family_t protocol,
6958
    struct mbuf *packet)
6959
0
{
6960
0
#pragma unused(ifp, protocol, packet)
6961
0
  return ENXIO;
6962
0
}
6963
6964
static errno_t
6965
ifproto_media_preout(struct ifnet *ifp, protocol_family_t protocol,
6966
    mbuf_t *packet, const struct sockaddr *dest, void *route, char *frame_type,
6967
    char *link_layer_dest)
6968
0
{
6969
0
#pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
6970
0
  return ENXIO;
6971
0
}
6972
6973
static void
6974
ifproto_media_event(struct ifnet *ifp, protocol_family_t protocol,
6975
    const struct kev_msg *event)
6976
0
{
6977
0
#pragma unused(ifp, protocol, event)
6978
0
}
6979
6980
static errno_t
6981
ifproto_media_ioctl(struct ifnet *ifp, protocol_family_t protocol,
6982
    unsigned long command, void *argument)
6983
0
{
6984
0
#pragma unused(ifp, protocol, command, argument)
6985
0
  return ENXIO;
6986
0
}
6987
6988
static errno_t
6989
ifproto_media_resolve_multi(ifnet_t ifp, const struct sockaddr *proto_addr,
6990
    struct sockaddr_dl *out_ll, size_t ll_len)
6991
0
{
6992
0
#pragma unused(ifp, proto_addr, out_ll, ll_len)
6993
0
  return ENXIO;
6994
0
}
6995
6996
static errno_t
6997
ifproto_media_send_arp(struct ifnet *ifp, u_short arpop,
6998
    const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
6999
    const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
7000
0
{
7001
0
#pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
7002
0
  return ENXIO;
7003
0
}
7004
7005
extern int if_next_index(void);
7006
extern int tcp_ecn_outbound;
7007
7008
errno_t
7009
ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
7010
4
{
7011
4
  struct ifnet *tmp_if;
7012
4
  struct ifaddr *ifa;
7013
4
  struct if_data_internal if_data_saved;
7014
4
  struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
7015
4
  struct dlil_threading_info *dl_inp;
7016
4
  thread_continue_t thfunc = NULL;
7017
4
  u_int32_t sflags = 0;
7018
4
  int err;
7019
7020
4
  if (ifp == NULL) {
7021
0
    return EINVAL;
7022
0
  }
7023
7024
  /*
7025
   * Serialize ifnet attach using dlil_ifnet_lock, in order to
7026
   * prevent the interface from being configured while it is
7027
   * embryonic, as ifnet_head_lock is dropped and reacquired
7028
   * below prior to marking the ifnet with IFRF_ATTACHED.
7029
   */
7030
4
  dlil_if_lock();
7031
4
  ifnet_head_lock_exclusive();
7032
  /* Verify we aren't already on the list */
7033
6
  TAILQ_FOREACH(tmp_if, &ifnet_head, if_link) {
7034
6
    if (tmp_if == ifp) {
7035
0
      ifnet_head_done();
7036
0
      dlil_if_unlock();
7037
0
      return EEXIST;
7038
0
    }
7039
6
  }
7040
7041
4
  lck_mtx_lock_spin(&ifp->if_ref_lock);
7042
4
  if (!(ifp->if_refflags & IFRF_EMBRYONIC)) {
7043
0
    panic_plain("%s: flags mismatch (embryonic not set) ifp=%p",
7044
0
        __func__, ifp);
7045
    /* NOTREACHED */
7046
0
  }
7047
4
  lck_mtx_unlock(&ifp->if_ref_lock);
7048
7049
4
  ifnet_lock_exclusive(ifp);
7050
7051
  /* Sanity check */
7052
4
  VERIFY(ifp->if_detaching_link.tqe_next == NULL);
7053
4
  VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
7054
4
  VERIFY(ifp->if_threads_pending == 0);
7055
7056
4
  if (ll_addr != NULL) {
7057
0
    if (ifp->if_addrlen == 0) {
7058
0
      ifp->if_addrlen = ll_addr->sdl_alen;
7059
0
    } else if (ll_addr->sdl_alen != ifp->if_addrlen) {
7060
0
      ifnet_lock_done(ifp);
7061
0
      ifnet_head_done();
7062
0
      dlil_if_unlock();
7063
0
      return EINVAL;
7064
0
    }
7065
0
  }
7066
7067
  /*
7068
   * Allow interfaces without protocol families to attach
7069
   * only if they have the necessary fields filled out.
7070
   */
7071
4
  if (ifp->if_add_proto == NULL || ifp->if_del_proto == NULL) {
7072
0
    DLIL_PRINTF("%s: Attempt to attach interface without "
7073
0
        "family module - %d\n", __func__, ifp->if_family);
7074
0
    ifnet_lock_done(ifp);
7075
0
    ifnet_head_done();
7076
0
    dlil_if_unlock();
7077
0
    return ENODEV;
7078
0
  }
7079
7080
  /* Allocate protocol hash table */
7081
4
  VERIFY(ifp->if_proto_hash == NULL);
7082
0
  ifp->if_proto_hash = zalloc_flags(dlif_phash_zone, Z_WAITOK | Z_ZERO);
7083
4
  if (ifp->if_proto_hash == NULL) {
7084
0
    ifnet_lock_done(ifp);
7085
0
    ifnet_head_done();
7086
0
    dlil_if_unlock();
7087
0
    return ENOBUFS;
7088
0
  }
7089
7090
4
  lck_mtx_lock_spin(&ifp->if_flt_lock);
7091
4
  VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
7092
4
  TAILQ_INIT(&ifp->if_flt_head);
7093
4
  VERIFY(ifp->if_flt_busy == 0);
7094
4
  VERIFY(ifp->if_flt_waiters == 0);
7095
0
  lck_mtx_unlock(&ifp->if_flt_lock);
7096
7097
4
  if (!(dl_if->dl_if_flags & DLIF_REUSE)) {
7098
4
    VERIFY(LIST_EMPTY(&ifp->if_multiaddrs));
7099
4
    LIST_INIT(&ifp->if_multiaddrs);
7100
4
  }
7101
7102
4
  VERIFY(ifp->if_allhostsinm == NULL);
7103
4
  VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
7104
4
  TAILQ_INIT(&ifp->if_addrhead);
7105
7106
4
  if (ifp->if_index == 0) {
7107
4
    int idx = if_next_index();
7108
7109
4
    if (idx == -1) {
7110
0
      ifp->if_index = 0;
7111
0
      ifnet_lock_done(ifp);
7112
0
      ifnet_head_done();
7113
0
      dlil_if_unlock();
7114
0
      return ENOBUFS;
7115
0
    }
7116
4
    ifp->if_index = (uint16_t)idx;
7117
7118
    /* the lladdr passed at attach time is the permanent address */
7119
4
    if (ll_addr != NULL && ifp->if_type == IFT_ETHER &&
7120
0
        ll_addr->sdl_alen == ETHER_ADDR_LEN) {
7121
0
      bcopy(CONST_LLADDR(ll_addr),
7122
0
          dl_if->dl_if_permanent_ether,
7123
0
          ETHER_ADDR_LEN);
7124
0
      dl_if->dl_if_permanent_ether_is_set = 1;
7125
0
    }
7126
4
  }
7127
  /* There should not be anything occupying this slot */
7128
4
  VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
7129
7130
  /* allocate (if needed) and initialize a link address */
7131
0
  ifa = dlil_alloc_lladdr(ifp, ll_addr);
7132
4
  if (ifa == NULL) {
7133
0
    ifnet_lock_done(ifp);
7134
0
    ifnet_head_done();
7135
0
    dlil_if_unlock();
7136
0
    return ENOBUFS;
7137
0
  }
7138
7139
4
  VERIFY(ifnet_addrs[ifp->if_index - 1] == NULL);
7140
0
  ifnet_addrs[ifp->if_index - 1] = ifa;
7141
7142
  /* make this address the first on the list */
7143
4
  IFA_LOCK(ifa);
7144
  /* hold a reference for ifnet_addrs[] */
7145
4
  IFA_ADDREF_LOCKED(ifa);
7146
  /* if_attach_link_ifa() holds a reference for ifa_link */
7147
4
  if_attach_link_ifa(ifp, ifa);
7148
4
  IFA_UNLOCK(ifa);
7149
7150
4
  TAILQ_INSERT_TAIL(&ifnet_head, ifp, if_link);
7151
4
  ifindex2ifnet[ifp->if_index] = ifp;
7152
7153
  /* Hold a reference to the underlying dlil_ifnet */
7154
4
  ifnet_reference(ifp);
7155
7156
  /* Clear stats (save and restore other fields that we care) */
7157
4
  if_data_saved = ifp->if_data;
7158
4
  bzero(&ifp->if_data, sizeof(ifp->if_data));
7159
4
  ifp->if_data.ifi_type = if_data_saved.ifi_type;
7160
4
  ifp->if_data.ifi_typelen = if_data_saved.ifi_typelen;
7161
4
  ifp->if_data.ifi_physical = if_data_saved.ifi_physical;
7162
4
  ifp->if_data.ifi_addrlen = if_data_saved.ifi_addrlen;
7163
4
  ifp->if_data.ifi_hdrlen = if_data_saved.ifi_hdrlen;
7164
4
  ifp->if_data.ifi_mtu = if_data_saved.ifi_mtu;
7165
4
  ifp->if_data.ifi_baudrate = if_data_saved.ifi_baudrate;
7166
4
  ifp->if_data.ifi_hwassist = if_data_saved.ifi_hwassist;
7167
4
  ifp->if_data.ifi_tso_v4_mtu = if_data_saved.ifi_tso_v4_mtu;
7168
4
  ifp->if_data.ifi_tso_v6_mtu = if_data_saved.ifi_tso_v6_mtu;
7169
4
  ifnet_touch_lastchange(ifp);
7170
7171
4
  VERIFY(ifp->if_output_sched_model == IFNET_SCHED_MODEL_NORMAL ||
7172
4
      ifp->if_output_sched_model == IFNET_SCHED_MODEL_DRIVER_MANAGED ||
7173
4
      ifp->if_output_sched_model == IFNET_SCHED_MODEL_FQ_CODEL);
7174
7175
  /* By default, use SFB and enable flow advisory */
7176
4
  sflags = PKTSCHEDF_QALG_SFB;
7177
4
  if (if_flowadv) {
7178
4
    sflags |= PKTSCHEDF_QALG_FLOWCTL;
7179
4
  }
7180
7181
4
  if (if_delaybased_queue) {
7182
4
    sflags |= PKTSCHEDF_QALG_DELAYBASED;
7183
4
  }
7184
7185
4
  if (ifp->if_output_sched_model ==
7186
4
      IFNET_SCHED_MODEL_DRIVER_MANAGED) {
7187
0
    sflags |= PKTSCHEDF_QALG_DRIVER_MANAGED;
7188
0
  }
7189
7190
  /* Initialize transmit queue(s) */
7191
4
  err = ifclassq_setup(ifp, sflags, (dl_if->dl_if_flags & DLIF_REUSE));
7192
4
  if (err != 0) {
7193
0
    panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
7194
0
        "err=%d", __func__, ifp, err);
7195
    /* NOTREACHED */
7196
0
  }
7197
7198
  /* Sanity checks on the input thread storage */
7199
4
  dl_inp = &dl_if->dl_if_inpstorage;
7200
4
  bzero(&dl_inp->dlth_stats, sizeof(dl_inp->dlth_stats));
7201
4
  VERIFY(dl_inp->dlth_flags == 0);
7202
4
  VERIFY(dl_inp->dlth_wtot == 0);
7203
4
  VERIFY(dl_inp->dlth_ifp == NULL);
7204
4
  VERIFY(qhead(&dl_inp->dlth_pkts) == NULL && qempty(&dl_inp->dlth_pkts));
7205
4
  VERIFY(qlimit(&dl_inp->dlth_pkts) == 0);
7206
4
  VERIFY(!dl_inp->dlth_affinity);
7207
4
  VERIFY(ifp->if_inp == NULL);
7208
4
  VERIFY(dl_inp->dlth_thread == THREAD_NULL);
7209
4
  VERIFY(dl_inp->dlth_strategy == NULL);
7210
4
  VERIFY(dl_inp->dlth_driver_thread == THREAD_NULL);
7211
4
  VERIFY(dl_inp->dlth_poller_thread == THREAD_NULL);
7212
4
  VERIFY(dl_inp->dlth_affinity_tag == 0);
7213
7214
0
#if IFNET_INPUT_SANITY_CHK
7215
4
  VERIFY(dl_inp->dlth_pkts_cnt == 0);
7216
0
#endif /* IFNET_INPUT_SANITY_CHK */
7217
7218
4
  VERIFY(ifp->if_poll_thread == THREAD_NULL);
7219
0
  dlil_reset_rxpoll_params(ifp);
7220
  /*
7221
   * A specific DLIL input thread is created per non-loopback interface.
7222
   */
7223
4
  if (ifp->if_family != IFNET_FAMILY_LOOPBACK) {
7224
3
    ifp->if_inp = dl_inp;
7225
3
    ifnet_incr_pending_thread_count(ifp);
7226
3
    err = dlil_create_input_thread(ifp, ifp->if_inp, &thfunc);
7227
3
    if (err == ENODEV) {
7228
0
      VERIFY(thfunc == NULL);
7229
0
      ifnet_decr_pending_thread_count(ifp);
7230
3
    } else if (err != 0) {
7231
0
      panic_plain("%s: ifp=%p couldn't get an input thread; "
7232
0
          "err=%d", __func__, ifp, err);
7233
      /* NOTREACHED */
7234
0
    }
7235
3
  }
7236
  /*
7237
   * If the driver supports the new transmit model, calculate flow hash
7238
   * and create a workloop starter thread to invoke the if_start callback
7239
   * where the packets may be dequeued and transmitted.
7240
   */
7241
4
  if (ifp->if_eflags & IFEF_TXSTART) {
7242
0
    thread_precedence_policy_data_t info;
7243
0
    __unused kern_return_t kret;
7244
7245
0
    ifp->if_flowhash = ifnet_calc_flowhash(ifp);
7246
0
    VERIFY(ifp->if_flowhash != 0);
7247
0
    VERIFY(ifp->if_start_thread == THREAD_NULL);
7248
7249
0
    ifnet_set_start_cycle(ifp, NULL);
7250
0
    ifp->if_start_active = 0;
7251
0
    ifp->if_start_req = 0;
7252
0
    ifp->if_start_flags = 0;
7253
0
    VERIFY(ifp->if_start != NULL);
7254
0
    ifnet_incr_pending_thread_count(ifp);
7255
0
    if ((err = kernel_thread_start(ifnet_start_thread_func,
7256
0
        ifp, &ifp->if_start_thread)) != KERN_SUCCESS) {
7257
0
      panic_plain("%s: "
7258
0
          "ifp=%p couldn't get a start thread; "
7259
0
          "err=%d", __func__, ifp, err);
7260
      /* NOTREACHED */
7261
0
    }
7262
0
    bzero(&info, sizeof(info));
7263
0
    info.importance = 1;
7264
0
    kret = thread_policy_set(ifp->if_start_thread,
7265
0
        THREAD_PRECEDENCE_POLICY, (thread_policy_t)&info,
7266
0
        THREAD_PRECEDENCE_POLICY_COUNT);
7267
0
    ASSERT(kret == KERN_SUCCESS);
7268
4
  } else {
7269
4
    ifp->if_flowhash = 0;
7270
4
  }
7271
7272
  /* Reset polling parameters */
7273
4
  ifnet_set_poll_cycle(ifp, NULL);
7274
4
  ifp->if_poll_update = 0;
7275
4
  ifp->if_poll_flags = 0;
7276
4
  ifp->if_poll_req = 0;
7277
4
  VERIFY(ifp->if_poll_thread == THREAD_NULL);
7278
7279
  /*
7280
   * If the driver supports the new receive model, create a poller
7281
   * thread to invoke if_input_poll callback where the packets may
7282
   * be dequeued from the driver and processed for reception.
7283
   * if the interface is netif compat then the poller thread is
7284
   * managed by netif.
7285
   */
7286
4
  if (thfunc == dlil_rxpoll_input_thread_func) {
7287
0
    thread_precedence_policy_data_t info;
7288
0
    __unused kern_return_t kret;
7289
0
    VERIFY(ifp->if_input_poll != NULL);
7290
0
    VERIFY(ifp->if_input_ctl != NULL);
7291
0
    ifnet_incr_pending_thread_count(ifp);
7292
0
    if ((err = kernel_thread_start(ifnet_poll_thread_func, ifp,
7293
0
        &ifp->if_poll_thread)) != KERN_SUCCESS) {
7294
0
      panic_plain("%s: ifp=%p couldn't get a poll thread; "
7295
0
          "err=%d", __func__, ifp, err);
7296
      /* NOTREACHED */
7297
0
    }
7298
0
    bzero(&info, sizeof(info));
7299
0
    info.importance = 1;
7300
0
    kret = thread_policy_set(ifp->if_poll_thread,
7301
0
        THREAD_PRECEDENCE_POLICY, (thread_policy_t)&info,
7302
0
        THREAD_PRECEDENCE_POLICY_COUNT);
7303
0
    ASSERT(kret == KERN_SUCCESS);
7304
0
  }
7305
7306
4
  VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
7307
4
  VERIFY(ifp->if_desc.ifd_len == 0);
7308
4
  VERIFY(ifp->if_desc.ifd_desc != NULL);
7309
7310
  /* Record attach PC stacktrace */
7311
0
  ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_attach);
7312
7313
4
  ifp->if_updatemcasts = 0;
7314
4
  if (!LIST_EMPTY(&ifp->if_multiaddrs)) {
7315
0
    struct ifmultiaddr *ifma;
7316
0
    LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
7317
0
      IFMA_LOCK(ifma);
7318
0
      if (ifma->ifma_addr->sa_family == AF_LINK ||
7319
0
          ifma->ifma_addr->sa_family == AF_UNSPEC) {
7320
0
        ifp->if_updatemcasts++;
7321
0
      }
7322
0
      IFMA_UNLOCK(ifma);
7323
0
    }
7324
7325
0
    DLIL_PRINTF("%s: attached with %d suspended link-layer multicast "
7326
0
        "membership(s)\n", if_name(ifp),
7327
0
        ifp->if_updatemcasts);
7328
0
  }
7329
7330
  /* Clear logging parameters */
7331
4
  bzero(&ifp->if_log, sizeof(ifp->if_log));
7332
7333
  /* Clear foreground/realtime activity timestamps */
7334
4
  ifp->if_fg_sendts = 0;
7335
4
  ifp->if_rt_sendts = 0;
7336
7337
4
  VERIFY(ifp->if_delegated.ifp == NULL);
7338
4
  VERIFY(ifp->if_delegated.type == 0);
7339
4
  VERIFY(ifp->if_delegated.family == 0);
7340
4
  VERIFY(ifp->if_delegated.subfamily == 0);
7341
4
  VERIFY(ifp->if_delegated.expensive == 0);
7342
4
  VERIFY(ifp->if_delegated.constrained == 0);
7343
7344
4
  VERIFY(ifp->if_agentids == NULL);
7345
4
  VERIFY(ifp->if_agentcount == 0);
7346
7347
  /* Reset interface state */
7348
0
  bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
7349
4
  ifp->if_interface_state.valid_bitmask |=
7350
4
      IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
7351
4
  ifp->if_interface_state.interface_availability =
7352
4
      IF_INTERFACE_STATE_INTERFACE_AVAILABLE;
7353
7354
  /* Initialize Link Quality Metric (loopback [lo0] is always good) */
7355
4
  if (ifp == lo_ifp) {
7356
1
    ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_GOOD;
7357
1
    ifp->if_interface_state.valid_bitmask |=
7358
1
        IF_INTERFACE_STATE_LQM_STATE_VALID;
7359
3
  } else {
7360
3
    ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_UNKNOWN;
7361
3
  }
7362
7363
  /*
7364
   * Enable ECN capability on this interface depending on the
7365
   * value of ECN global setting
7366
   */
7367
4
  if (tcp_ecn_outbound == 2 && !IFNET_IS_CELLULAR(ifp)) {
7368
4
    if_set_eflags(ifp, IFEF_ECN_ENABLE);
7369
4
    if_clear_eflags(ifp, IFEF_ECN_DISABLE);
7370
4
  }
7371
7372
  /*
7373
   * Built-in Cyclops always on policy for WiFi infra
7374
   */
7375
4
  if (IFNET_IS_WIFI_INFRA(ifp) && net_qos_policy_wifi_enabled != 0) {
7376
0
    errno_t error;
7377
7378
0
    error = if_set_qosmarking_mode(ifp,
7379
0
        IFRTYPE_QOSMARKING_FASTLANE);
7380
0
    if (error != 0) {
7381
0
      DLIL_PRINTF("%s if_set_qosmarking_mode(%s) error %d\n",
7382
0
          __func__, ifp->if_xname, error);
7383
0
    } else {
7384
0
      if_set_eflags(ifp, IFEF_QOSMARKING_ENABLED);
7385
0
#if (DEVELOPMENT || DEBUG)
7386
0
      DLIL_PRINTF("%s fastlane enabled on %s\n",
7387
0
          __func__, ifp->if_xname);
7388
0
#endif /* (DEVELOPMENT || DEBUG) */
7389
0
    }
7390
0
  }
7391
7392
4
  ifnet_lock_done(ifp);
7393
4
  ifnet_head_done();
7394
7395
7396
4
  lck_mtx_lock(&ifp->if_cached_route_lock);
7397
  /* Enable forwarding cached route */
7398
4
  ifp->if_fwd_cacheok = 1;
7399
  /* Clean up any existing cached routes */
7400
4
  ROUTE_RELEASE(&ifp->if_fwd_route);
7401
4
  bzero(&ifp->if_fwd_route, sizeof(ifp->if_fwd_route));
7402
4
  ROUTE_RELEASE(&ifp->if_src_route);
7403
4
  bzero(&ifp->if_src_route, sizeof(ifp->if_src_route));
7404
4
  ROUTE_RELEASE(&ifp->if_src_route6);
7405
4
  bzero(&ifp->if_src_route6, sizeof(ifp->if_src_route6));
7406
4
  lck_mtx_unlock(&ifp->if_cached_route_lock);
7407
7408
4
  ifnet_llreach_ifattach(ifp, (dl_if->dl_if_flags & DLIF_REUSE));
7409
7410
  /*
7411
   * Allocate and attach IGMPv3/MLDv2 interface specific variables
7412
   * and trees; do this before the ifnet is marked as attached.
7413
   * The ifnet keeps the reference to the info structures even after
7414
   * the ifnet is detached, since the network-layer records still
7415
   * refer to the info structures even after that.  This also
7416
   * makes it possible for them to still function after the ifnet
7417
   * is recycled or reattached.
7418
   */
7419
4
#if INET
7420
4
  if (IGMP_IFINFO(ifp) == NULL) {
7421
4
    IGMP_IFINFO(ifp) = igmp_domifattach(ifp, Z_WAITOK);
7422
4
    VERIFY(IGMP_IFINFO(ifp) != NULL);
7423
4
  } else {
7424
0
    VERIFY(IGMP_IFINFO(ifp)->igi_ifp == ifp);
7425
0
    igmp_domifreattach(IGMP_IFINFO(ifp));
7426
0
  }
7427
0
#endif /* INET */
7428
4
  if (MLD_IFINFO(ifp) == NULL) {
7429
4
    MLD_IFINFO(ifp) = mld_domifattach(ifp, Z_WAITOK);
7430
4
    VERIFY(MLD_IFINFO(ifp) != NULL);
7431
4
  } else {
7432
0
    VERIFY(MLD_IFINFO(ifp)->mli_ifp == ifp);
7433
0
    mld_domifreattach(MLD_IFINFO(ifp));
7434
0
  }
7435
7436
4
  VERIFY(ifp->if_data_threshold == 0);
7437
4
  VERIFY(ifp->if_dt_tcall != NULL);
7438
7439
  /*
7440
   * Wait for the created kernel threads for I/O to get
7441
   * scheduled and run at least once before we proceed
7442
   * to mark interface as attached.
7443
   */
7444
0
  lck_mtx_lock(&ifp->if_ref_lock);
7445
  // while (ifp->if_threads_pending != 0) {
7446
  //  DLIL_PRINTF("%s: Waiting for all kernel threads created for "
7447
  //      "interface %s to get scheduled at least once.\n",
7448
  //      __func__, ifp->if_xname);
7449
  //  (void) msleep(&ifp->if_threads_pending, &ifp->if_ref_lock, (PZERO - 1),
7450
  //      __func__, NULL);
7451
  //  LCK_MTX_ASSERT(&ifp->if_ref_lock, LCK_ASSERT_OWNED);
7452
  // }
7453
4
  lck_mtx_unlock(&ifp->if_ref_lock);
7454
4
  DLIL_PRINTF("%s: All kernel threads created for interface %s have been scheduled "
7455
4
      "at least once. Proceeding.\n", __func__, ifp->if_xname);
7456
7457
  /* Final mark this ifnet as attached. */
7458
4
  lck_mtx_lock(rnh_lock);
7459
4
  ifnet_lock_exclusive(ifp);
7460
4
  lck_mtx_lock_spin(&ifp->if_ref_lock);
7461
4
  ifp->if_refflags = (IFRF_ATTACHED | IFRF_READY); /* clears embryonic */
7462
4
  lck_mtx_unlock(&ifp->if_ref_lock);
7463
4
  if (net_rtref) {
7464
    /* boot-args override; enable idle notification */
7465
0
    (void) ifnet_set_idle_flags_locked(ifp, IFRF_IDLE_NOTIFY,
7466
0
        IFRF_IDLE_NOTIFY);
7467
4
  } else {
7468
    /* apply previous request(s) to set the idle flags, if any */
7469
4
    (void) ifnet_set_idle_flags_locked(ifp, ifp->if_idle_new_flags,
7470
4
        ifp->if_idle_new_flags_mask);
7471
4
  }
7472
4
  ifnet_lock_done(ifp);
7473
4
  lck_mtx_unlock(rnh_lock);
7474
4
  dlil_if_unlock();
7475
7476
4
#if PF
7477
  /*
7478
   * Attach packet filter to this interface, if enabled.
7479
   */
7480
4
  pf_ifnet_hook(ifp, 1);
7481
4
#endif /* PF */
7482
7483
4
  dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_ATTACHED, NULL, 0);
7484
7485
4
  if (dlil_verbose) {
7486
2
    DLIL_PRINTF("%s: attached%s\n", if_name(ifp),
7487
2
        (dl_if->dl_if_flags & DLIF_REUSE) ? " (recycled)" : "");
7488
2
  }
7489
7490
4
  return 0;
7491
4
}
7492
7493
/*
7494
 * Prepare the storage for the first/permanent link address, which must
7495
 * must have the same lifetime as the ifnet itself.  Although the link
7496
 * address gets removed from if_addrhead and ifnet_addrs[] at detach time,
7497
 * its location in memory must never change as it may still be referred
7498
 * to by some parts of the system afterwards (unfortunate implementation
7499
 * artifacts inherited from BSD.)
7500
 *
7501
 * Caller must hold ifnet lock as writer.
7502
 */
7503
static struct ifaddr *
7504
dlil_alloc_lladdr(struct ifnet *ifp, const struct sockaddr_dl *ll_addr)
7505
4
{
7506
4
  struct ifaddr *ifa, *oifa;
7507
4
  struct sockaddr_dl *asdl, *msdl;
7508
4
  char workbuf[IFNAMSIZ * 2];
7509
4
  int namelen, masklen, socksize;
7510
4
  struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
7511
7512
4
  ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
7513
4
  VERIFY(ll_addr == NULL || ll_addr->sdl_alen == ifp->if_addrlen);
7514
7515
0
  namelen = scnprintf(workbuf, sizeof(workbuf), "%s",
7516
4
      if_name(ifp));
7517
4
  masklen = offsetof(struct sockaddr_dl, sdl_data[0])
7518
4
      + ((namelen > 0) ? namelen : 0);
7519
4
  socksize = masklen + ifp->if_addrlen;
7520
4
#define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1)))
7521
4
  if ((u_int32_t)socksize < sizeof(struct sockaddr_dl)) {
7522
4
    socksize = sizeof(struct sockaddr_dl);
7523
4
  }
7524
4
  socksize = ROUNDUP(socksize);
7525
4
#undef ROUNDUP
7526
7527
4
  ifa = ifp->if_lladdr;
7528
4
  if (socksize > DLIL_SDLMAXLEN ||
7529
4
      (ifa != NULL && ifa != &dl_if->dl_if_lladdr.ifa)) {
7530
    /*
7531
     * Rare, but in the event that the link address requires
7532
     * more storage space than DLIL_SDLMAXLEN, allocate the
7533
     * largest possible storages for address and mask, such
7534
     * that we can reuse the same space when if_addrlen grows.
7535
     * This same space will be used when if_addrlen shrinks.
7536
     */
7537
0
    if (ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa) {
7538
0
      int ifasize = sizeof(*ifa) + 2 * SOCK_MAXADDRLEN;
7539
0
      ifa = _MALLOC(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
7540
0
      if (ifa == NULL) {
7541
0
        return NULL;
7542
0
      }
7543
0
      ifa_lock_init(ifa);
7544
      /* Don't set IFD_ALLOC, as this is permanent */
7545
0
      ifa->ifa_debug = IFD_LINK;
7546
0
    }
7547
0
    IFA_LOCK(ifa);
7548
    /* address and mask sockaddr_dl locations */
7549
0
    asdl = (struct sockaddr_dl *)(ifa + 1);
7550
0
    bzero(asdl, SOCK_MAXADDRLEN);
7551
0
    msdl = (struct sockaddr_dl *)(void *)
7552
0
        ((char *)asdl + SOCK_MAXADDRLEN);
7553
0
    bzero(msdl, SOCK_MAXADDRLEN);
7554
4
  } else {
7555
4
    VERIFY(ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa);
7556
    /*
7557
     * Use the storage areas for address and mask within the
7558
     * dlil_ifnet structure.  This is the most common case.
7559
     */
7560
4
    if (ifa == NULL) {
7561
4
      ifa = &dl_if->dl_if_lladdr.ifa;
7562
4
      ifa_lock_init(ifa);
7563
      /* Don't set IFD_ALLOC, as this is permanent */
7564
4
      ifa->ifa_debug = IFD_LINK;
7565
4
    }
7566
4
    IFA_LOCK(ifa);
7567
    /* address and mask sockaddr_dl locations */
7568
4
    asdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.asdl;
7569
4
    bzero(asdl, sizeof(dl_if->dl_if_lladdr.asdl));
7570
4
    msdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.msdl;
7571
4
    bzero(msdl, sizeof(dl_if->dl_if_lladdr.msdl));
7572
4
  }
7573
7574
  /* hold a permanent reference for the ifnet itself */
7575
4
  IFA_ADDREF_LOCKED(ifa);
7576
4
  oifa = ifp->if_lladdr;
7577
4
  ifp->if_lladdr = ifa;
7578
7579
4
  VERIFY(ifa->ifa_debug == IFD_LINK);
7580
0
  ifa->ifa_ifp = ifp;
7581
4
  ifa->ifa_rtrequest = link_rtrequest;
7582
4
  ifa->ifa_addr = (struct sockaddr *)asdl;
7583
4
  asdl->sdl_len = (u_char)socksize;
7584
4
  asdl->sdl_family = AF_LINK;
7585
4
  if (namelen > 0) {
7586
0
    bcopy(workbuf, asdl->sdl_data, min(namelen,
7587
0
        sizeof(asdl->sdl_data)));
7588
0
    asdl->sdl_nlen = (u_char)namelen;
7589
4
  } else {
7590
4
    asdl->sdl_nlen = 0;
7591
4
  }
7592
4
  asdl->sdl_index = ifp->if_index;
7593
4
  asdl->sdl_type = ifp->if_type;
7594
4
  if (ll_addr != NULL) {
7595
0
    asdl->sdl_alen = ll_addr->sdl_alen;
7596
0
    bcopy(CONST_LLADDR(ll_addr), LLADDR(asdl), asdl->sdl_alen);
7597
4
  } else {
7598
4
    asdl->sdl_alen = 0;
7599
4
  }
7600
4
  ifa->ifa_netmask = (struct sockaddr *)msdl;
7601
4
  msdl->sdl_len = (u_char)masklen;
7602
4
  while (namelen > 0) {
7603
0
    msdl->sdl_data[--namelen] = 0xff;
7604
0
  }
7605
4
  IFA_UNLOCK(ifa);
7606
7607
4
  if (oifa != NULL) {
7608
0
    IFA_REMREF(oifa);
7609
0
  }
7610
7611
4
  return ifa;
7612
4
}
7613
7614
static void
7615
if_purgeaddrs(struct ifnet *ifp)
7616
0
{
7617
0
#if INET
7618
0
  in_purgeaddrs(ifp);
7619
0
#endif /* INET */
7620
0
  in6_purgeaddrs(ifp);
7621
0
}
7622
7623
errno_t
7624
ifnet_detach(ifnet_t ifp)
7625
0
{
7626
0
  struct ifnet *delegated_ifp;
7627
0
  struct nd_ifinfo *ndi = NULL;
7628
7629
0
  if (ifp == NULL) {
7630
0
    return EINVAL;
7631
0
  }
7632
7633
0
  ndi = ND_IFINFO(ifp);
7634
0
  if (NULL != ndi) {
7635
0
    ndi->cga_initialized = FALSE;
7636
0
  }
7637
7638
0
  lck_mtx_lock(rnh_lock);
7639
0
  ifnet_head_lock_exclusive();
7640
0
  ifnet_lock_exclusive(ifp);
7641
7642
0
  if (ifp->if_output_netem != NULL) {
7643
0
    netem_destroy(ifp->if_output_netem);
7644
0
    ifp->if_output_netem = NULL;
7645
0
  }
7646
7647
  /*
7648
   * Check to see if this interface has previously triggered
7649
   * aggressive protocol draining; if so, decrement the global
7650
   * refcnt and clear PR_AGGDRAIN on the route domain if
7651
   * there are no more of such an interface around.
7652
   */
7653
0
  (void) ifnet_set_idle_flags_locked(ifp, 0, ~0);
7654
7655
0
  lck_mtx_lock_spin(&ifp->if_ref_lock);
7656
0
  if (!(ifp->if_refflags & IFRF_ATTACHED)) {
7657
0
    lck_mtx_unlock(&ifp->if_ref_lock);
7658
0
    ifnet_lock_done(ifp);
7659
0
    ifnet_head_done();
7660
0
    lck_mtx_unlock(rnh_lock);
7661
0
    return EINVAL;
7662
0
  } else if (ifp->if_refflags & IFRF_DETACHING) {
7663
    /* Interface has already been detached */
7664
0
    lck_mtx_unlock(&ifp->if_ref_lock);
7665
0
    ifnet_lock_done(ifp);
7666
0
    ifnet_head_done();
7667
0
    lck_mtx_unlock(rnh_lock);
7668
0
    return ENXIO;
7669
0
  }
7670
0
  VERIFY(!(ifp->if_refflags & IFRF_EMBRYONIC));
7671
  /* Indicate this interface is being detached */
7672
0
  ifp->if_refflags &= ~IFRF_ATTACHED;
7673
0
  ifp->if_refflags |= IFRF_DETACHING;
7674
0
  lck_mtx_unlock(&ifp->if_ref_lock);
7675
7676
0
  if (dlil_verbose) {
7677
0
    DLIL_PRINTF("%s: detaching\n", if_name(ifp));
7678
0
  }
7679
7680
  /* clean up flow control entry object if there's any */
7681
0
  if (ifp->if_eflags & IFEF_TXSTART) {
7682
0
    ifnet_flowadv(ifp->if_flowhash);
7683
0
  }
7684
7685
  /* Reset ECN enable/disable flags */
7686
  /* Reset CLAT46 flag */
7687
0
  if_clear_eflags(ifp, IFEF_ECN_ENABLE | IFEF_ECN_DISABLE | IFEF_CLAT46);
7688
7689
  /*
7690
   * We do not reset the TCP keep alive counters in case
7691
   * a TCP connection stays connection after the interface
7692
   * went down
7693
   */
7694
0
  if (ifp->if_tcp_kao_cnt > 0) {
7695
0
    os_log(OS_LOG_DEFAULT, "%s %s tcp_kao_cnt %u not zero",
7696
0
        __func__, if_name(ifp), ifp->if_tcp_kao_cnt);
7697
0
  }
7698
0
  ifp->if_tcp_kao_max = 0;
7699
7700
  /*
7701
   * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
7702
   * no longer be visible during lookups from this point.
7703
   */
7704
0
  VERIFY(ifindex2ifnet[ifp->if_index] == ifp);
7705
0
  TAILQ_REMOVE(&ifnet_head, ifp, if_link);
7706
0
  ifp->if_link.tqe_next = NULL;
7707
0
  ifp->if_link.tqe_prev = NULL;
7708
0
  if (ifp->if_ordered_link.tqe_next != NULL ||
7709
0
      ifp->if_ordered_link.tqe_prev != NULL) {
7710
0
    ifnet_remove_from_ordered_list(ifp);
7711
0
  }
7712
0
  ifindex2ifnet[ifp->if_index] = NULL;
7713
7714
  /* 18717626 - reset router mode */
7715
0
  if_clear_eflags(ifp, IFEF_IPV4_ROUTER);
7716
0
  ifp->if_ipv6_router_mode = IPV6_ROUTER_MODE_DISABLED;
7717
7718
  /* Record detach PC stacktrace */
7719
0
  ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_detach);
7720
7721
  /* Clear logging parameters */
7722
0
  bzero(&ifp->if_log, sizeof(ifp->if_log));
7723
7724
  /* Clear delegated interface info (reference released below) */
7725
0
  delegated_ifp = ifp->if_delegated.ifp;
7726
0
  bzero(&ifp->if_delegated, sizeof(ifp->if_delegated));
7727
7728
  /* Reset interface state */
7729
0
  bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
7730
7731
0
  ifnet_lock_done(ifp);
7732
0
  ifnet_head_done();
7733
0
  lck_mtx_unlock(rnh_lock);
7734
7735
7736
  /* Release reference held on the delegated interface */
7737
0
  if (delegated_ifp != NULL) {
7738
0
    ifnet_release(delegated_ifp);
7739
0
  }
7740
7741
  /* Reset Link Quality Metric (unless loopback [lo0]) */
7742
0
  if (ifp != lo_ifp) {
7743
0
    if_lqm_update(ifp, IFNET_LQM_THRESH_OFF, 0);
7744
0
  }
7745
7746
  /* Reset TCP local statistics */
7747
0
  if (ifp->if_tcp_stat != NULL) {
7748
0
    bzero(ifp->if_tcp_stat, sizeof(*ifp->if_tcp_stat));
7749
0
  }
7750
7751
  /* Reset UDP local statistics */
7752
0
  if (ifp->if_udp_stat != NULL) {
7753
0
    bzero(ifp->if_udp_stat, sizeof(*ifp->if_udp_stat));
7754
0
  }
7755
7756
  /* Reset ifnet IPv4 stats */
7757
0
  if (ifp->if_ipv4_stat != NULL) {
7758
0
    bzero(ifp->if_ipv4_stat, sizeof(*ifp->if_ipv4_stat));
7759
0
  }
7760
7761
  /* Reset ifnet IPv6 stats */
7762
0
  if (ifp->if_ipv6_stat != NULL) {
7763
0
    bzero(ifp->if_ipv6_stat, sizeof(*ifp->if_ipv6_stat));
7764
0
  }
7765
7766
  /* Release memory held for interface link status report */
7767
0
  if (ifp->if_link_status != NULL) {
7768
0
    FREE(ifp->if_link_status, M_TEMP);
7769
0
    ifp->if_link_status = NULL;
7770
0
  }
7771
7772
  /* Clear agent IDs */
7773
0
  if (ifp->if_agentids != NULL) {
7774
0
    FREE(ifp->if_agentids, M_NETAGENT);
7775
0
    ifp->if_agentids = NULL;
7776
0
  }
7777
0
  ifp->if_agentcount = 0;
7778
7779
7780
  /* Let BPF know we're detaching */
7781
0
  bpfdetach(ifp);
7782
7783
  /* Mark the interface as DOWN */
7784
0
  if_down(ifp);
7785
7786
  /* Disable forwarding cached route */
7787
0
  lck_mtx_lock(&ifp->if_cached_route_lock);
7788
0
  ifp->if_fwd_cacheok = 0;
7789
0
  lck_mtx_unlock(&ifp->if_cached_route_lock);
7790
7791
  /* Disable data threshold and wait for any pending event posting */
7792
0
  ifp->if_data_threshold = 0;
7793
0
  VERIFY(ifp->if_dt_tcall != NULL);
7794
0
  (void) thread_call_cancel_wait(ifp->if_dt_tcall);
7795
7796
  /*
7797
   * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
7798
   * references to the info structures and leave them attached to
7799
   * this ifnet.
7800
   */
7801
0
#if INET
7802
0
  igmp_domifdetach(ifp);
7803
0
#endif /* INET */
7804
0
  mld_domifdetach(ifp);
7805
7806
0
  dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHING, NULL, 0);
7807
7808
  /* Let worker thread take care of the rest, to avoid reentrancy */
7809
0
  dlil_if_lock();
7810
0
  ifnet_detaching_enqueue(ifp);
7811
0
  dlil_if_unlock();
7812
7813
0
  return 0;
7814
0
}
7815
7816
static void
7817
ifnet_detaching_enqueue(struct ifnet *ifp)
7818
0
{
7819
0
  dlil_if_lock_assert();
7820
7821
0
  ++ifnet_detaching_cnt;
7822
0
  VERIFY(ifnet_detaching_cnt != 0);
7823
0
  TAILQ_INSERT_TAIL(&ifnet_detaching_head, ifp, if_detaching_link);
7824
0
  wakeup((caddr_t)&ifnet_delayed_run);
7825
0
}
7826
7827
static struct ifnet *
7828
ifnet_detaching_dequeue(void)
7829
0
{
7830
0
  struct ifnet *ifp;
7831
7832
0
  dlil_if_lock_assert();
7833
7834
0
  ifp = TAILQ_FIRST(&ifnet_detaching_head);
7835
0
  VERIFY(ifnet_detaching_cnt != 0 || ifp == NULL);
7836
0
  if (ifp != NULL) {
7837
0
    VERIFY(ifnet_detaching_cnt != 0);
7838
0
    --ifnet_detaching_cnt;
7839
0
    TAILQ_REMOVE(&ifnet_detaching_head, ifp, if_detaching_link);
7840
0
    ifp->if_detaching_link.tqe_next = NULL;
7841
0
    ifp->if_detaching_link.tqe_prev = NULL;
7842
0
  }
7843
0
  return ifp;
7844
0
}
7845
7846
__attribute__((noreturn))
7847
static void
7848
ifnet_detacher_thread_cont(void *v, wait_result_t wres)
7849
0
{
7850
0
#pragma unused(v, wres)
7851
0
  struct ifnet *ifp;
7852
7853
0
  dlil_if_lock();
7854
0
  if (__improbable(ifnet_detaching_embryonic)) {
7855
0
    ifnet_detaching_embryonic = FALSE;
7856
    /* there's no lock ordering constrain so OK to do this here */
7857
0
    dlil_decr_pending_thread_count();
7858
0
  }
7859
7860
0
  for (;;) {
7861
0
    dlil_if_lock_assert();
7862
7863
0
    if (ifnet_detaching_cnt == 0) {
7864
0
      break;
7865
0
    }
7866
7867
0
    net_update_uptime();
7868
7869
0
    VERIFY(TAILQ_FIRST(&ifnet_detaching_head) != NULL);
7870
7871
    /* Take care of detaching ifnet */
7872
0
    ifp = ifnet_detaching_dequeue();
7873
0
    if (ifp != NULL) {
7874
0
      dlil_if_unlock();
7875
0
      ifnet_detach_final(ifp);
7876
0
      dlil_if_lock();
7877
0
    }
7878
0
  }
7879
7880
0
  (void) assert_wait(&ifnet_delayed_run, THREAD_UNINT);
7881
0
  dlil_if_unlock();
7882
0
  (void) thread_block(ifnet_detacher_thread_cont);
7883
7884
0
  VERIFY(0);      /* we should never get here */
7885
  /* NOTREACHED */
7886
0
  __builtin_unreachable();
7887
0
}
7888
7889
__dead2
7890
static void
7891
ifnet_detacher_thread_func(void *v, wait_result_t w)
7892
0
{
7893
0
#pragma unused(v, w)
7894
0
  dlil_if_lock();
7895
0
  (void) assert_wait(&ifnet_delayed_run, THREAD_UNINT);
7896
0
  ifnet_detaching_embryonic = TRUE;
7897
  /* wake up once to get out of embryonic state */
7898
0
  wakeup((caddr_t)&ifnet_delayed_run);
7899
0
  dlil_if_unlock();
7900
0
  (void) thread_block(ifnet_detacher_thread_cont);
7901
0
  VERIFY(0);
7902
  /* NOTREACHED */
7903
0
  __builtin_unreachable();
7904
0
}
7905
7906
static void
7907
ifnet_detach_final(struct ifnet *ifp)
7908
0
{
7909
0
  struct ifnet_filter *filter, *filter_next;
7910
0
  struct ifnet_filter_head fhead;
7911
0
  struct dlil_threading_info *inp;
7912
0
  struct ifaddr *ifa;
7913
0
  ifnet_detached_func if_free;
7914
0
  int i;
7915
7916
0
  lck_mtx_lock(&ifp->if_ref_lock);
7917
0
  if (!(ifp->if_refflags & IFRF_DETACHING)) {
7918
0
    panic("%s: flags mismatch (detaching not set) ifp=%p",
7919
0
        __func__, ifp);
7920
    /* NOTREACHED */
7921
0
  }
7922
7923
  /*
7924
   * Wait until the existing IO references get released
7925
   * before we proceed with ifnet_detach.  This is not a
7926
   * common case, so block without using a continuation.
7927
   */
7928
0
  while (ifp->if_refio > 0) {
7929
0
    DLIL_PRINTF("%s: Waiting for IO references on %s interface "
7930
0
        "to be released\n", __func__, if_name(ifp));
7931
0
    (void) msleep(&(ifp->if_refio), &ifp->if_ref_lock,
7932
0
        (PZERO - 1), "ifnet_ioref_wait", NULL);
7933
0
  }
7934
7935
0
  VERIFY(ifp->if_datamov == 0);
7936
0
  VERIFY(ifp->if_drainers == 0);
7937
0
  VERIFY(ifp->if_suspend == 0);
7938
0
  ifp->if_refflags &= ~IFRF_READY;
7939
0
  lck_mtx_unlock(&ifp->if_ref_lock);
7940
7941
  /* Drain and destroy send queue */
7942
0
  ifclassq_teardown(ifp);
7943
7944
  /* Detach interface filters */
7945
0
  lck_mtx_lock(&ifp->if_flt_lock);
7946
0
  if_flt_monitor_enter(ifp);
7947
7948
0
  LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
7949
0
  fhead = ifp->if_flt_head;
7950
0
  TAILQ_INIT(&ifp->if_flt_head);
7951
7952
0
  for (filter = TAILQ_FIRST(&fhead); filter; filter = filter_next) {
7953
0
    filter_next = TAILQ_NEXT(filter, filt_next);
7954
0
    lck_mtx_unlock(&ifp->if_flt_lock);
7955
7956
0
    dlil_detach_filter_internal(filter, 1);
7957
0
    lck_mtx_lock(&ifp->if_flt_lock);
7958
0
  }
7959
0
  if_flt_monitor_leave(ifp);
7960
0
  lck_mtx_unlock(&ifp->if_flt_lock);
7961
7962
  /* Tell upper layers to drop their network addresses */
7963
0
  if_purgeaddrs(ifp);
7964
7965
0
  ifnet_lock_exclusive(ifp);
7966
7967
  /* Unplumb all protocols */
7968
0
  for (i = 0; i < PROTO_HASH_SLOTS; i++) {
7969
0
    struct if_proto *proto;
7970
7971
0
    proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
7972
0
    while (proto != NULL) {
7973
0
      protocol_family_t family = proto->protocol_family;
7974
0
      ifnet_lock_done(ifp);
7975
0
      proto_unplumb(family, ifp);
7976
0
      ifnet_lock_exclusive(ifp);
7977
0
      proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
7978
0
    }
7979
    /* There should not be any protocols left */
7980
0
    VERIFY(SLIST_EMPTY(&ifp->if_proto_hash[i]));
7981
0
  }
7982
0
  zfree(dlif_phash_zone, ifp->if_proto_hash);
7983
0
  ifp->if_proto_hash = NULL;
7984
7985
  /* Detach (permanent) link address from if_addrhead */
7986
0
  ifa = TAILQ_FIRST(&ifp->if_addrhead);
7987
0
  VERIFY(ifnet_addrs[ifp->if_index - 1] == ifa);
7988
0
  IFA_LOCK(ifa);
7989
0
  if_detach_link_ifa(ifp, ifa);
7990
0
  IFA_UNLOCK(ifa);
7991
7992
  /* Remove (permanent) link address from ifnet_addrs[] */
7993
0
  IFA_REMREF(ifa);
7994
0
  ifnet_addrs[ifp->if_index - 1] = NULL;
7995
7996
  /* This interface should not be on {ifnet_head,detaching} */
7997
0
  VERIFY(ifp->if_link.tqe_next == NULL);
7998
0
  VERIFY(ifp->if_link.tqe_prev == NULL);
7999
0
  VERIFY(ifp->if_detaching_link.tqe_next == NULL);
8000
0
  VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
8001
0
  VERIFY(ifp->if_ordered_link.tqe_next == NULL);
8002
0
  VERIFY(ifp->if_ordered_link.tqe_prev == NULL);
8003
8004
  /* The slot should have been emptied */
8005
0
  VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
8006
8007
  /* There should not be any addresses left */
8008
0
  VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
8009
8010
  /*
8011
   * Signal the starter thread to terminate itself.
8012
   */
8013
0
  if (ifp->if_start_thread != THREAD_NULL) {
8014
0
    lck_mtx_lock_spin(&ifp->if_start_lock);
8015
0
    ifp->if_start_flags = 0;
8016
0
    ifp->if_start_thread = THREAD_NULL;
8017
0
    wakeup_one((caddr_t)&ifp->if_start_thread);
8018
0
    lck_mtx_unlock(&ifp->if_start_lock);
8019
0
  }
8020
8021
  /*
8022
   * Signal the poller thread to terminate itself.
8023
   */
8024
0
  if (ifp->if_poll_thread != THREAD_NULL) {
8025
0
    lck_mtx_lock_spin(&ifp->if_poll_lock);
8026
0
    ifp->if_poll_thread = THREAD_NULL;
8027
0
    wakeup_one((caddr_t)&ifp->if_poll_thread);
8028
0
    lck_mtx_unlock(&ifp->if_poll_lock);
8029
0
  }
8030
8031
  /*
8032
   * If thread affinity was set for the workloop thread, we will need
8033
   * to tear down the affinity and release the extra reference count
8034
   * taken at attach time.  Does not apply to lo0 or other interfaces
8035
   * without dedicated input threads.
8036
   */
8037
0
  if ((inp = ifp->if_inp) != NULL) {
8038
0
    VERIFY(inp != dlil_main_input_thread);
8039
8040
0
    if (inp->dlth_affinity) {
8041
0
      struct thread *tp, *wtp, *ptp;
8042
8043
0
      lck_mtx_lock_spin(&inp->dlth_lock);
8044
0
      wtp = inp->dlth_driver_thread;
8045
0
      inp->dlth_driver_thread = THREAD_NULL;
8046
0
      ptp = inp->dlth_poller_thread;
8047
0
      inp->dlth_poller_thread = THREAD_NULL;
8048
0
      ASSERT(inp->dlth_thread != THREAD_NULL);
8049
0
      tp = inp->dlth_thread;    /* don't nullify now */
8050
0
      inp->dlth_affinity_tag = 0;
8051
0
      inp->dlth_affinity = FALSE;
8052
0
      lck_mtx_unlock(&inp->dlth_lock);
8053
8054
      /* Tear down poll thread affinity */
8055
0
      if (ptp != NULL) {
8056
0
        VERIFY(ifp->if_eflags & IFEF_RXPOLL);
8057
0
        VERIFY(ifp->if_xflags & IFXF_LEGACY);
8058
0
        (void) dlil_affinity_set(ptp,
8059
0
            THREAD_AFFINITY_TAG_NULL);
8060
0
        thread_deallocate(ptp);
8061
0
      }
8062
8063
      /* Tear down workloop thread affinity */
8064
0
      if (wtp != NULL) {
8065
0
        (void) dlil_affinity_set(wtp,
8066
0
            THREAD_AFFINITY_TAG_NULL);
8067
0
        thread_deallocate(wtp);
8068
0
      }
8069
8070
      /* Tear down DLIL input thread affinity */
8071
0
      (void) dlil_affinity_set(tp, THREAD_AFFINITY_TAG_NULL);
8072
0
      thread_deallocate(tp);
8073
0
    }
8074
8075
    /* disassociate ifp DLIL input thread */
8076
0
    ifp->if_inp = NULL;
8077
8078
    /* if the worker thread was created, tell it to terminate */
8079
0
    if (inp->dlth_thread != THREAD_NULL) {
8080
0
      lck_mtx_lock_spin(&inp->dlth_lock);
8081
0
      inp->dlth_flags |= DLIL_INPUT_TERMINATE;
8082
0
      if (!(inp->dlth_flags & DLIL_INPUT_RUNNING)) {
8083
0
        wakeup_one((caddr_t)&inp->dlth_flags);
8084
0
      }
8085
0
      lck_mtx_unlock(&inp->dlth_lock);
8086
0
      ifnet_lock_done(ifp);
8087
8088
      /* wait for the input thread to terminate */
8089
0
      lck_mtx_lock_spin(&inp->dlth_lock);
8090
0
      while ((inp->dlth_flags & DLIL_INPUT_TERMINATE_COMPLETE)
8091
0
          == 0) {
8092
0
        (void) msleep(&inp->dlth_flags, &inp->dlth_lock,
8093
0
            (PZERO - 1) | PSPIN, inp->dlth_name, NULL);
8094
0
      }
8095
0
      lck_mtx_unlock(&inp->dlth_lock);
8096
0
      ifnet_lock_exclusive(ifp);
8097
0
    }
8098
8099
    /* clean-up input thread state */
8100
0
    dlil_clean_threading_info(inp);
8101
    /* clean-up poll parameters */
8102
0
    VERIFY(ifp->if_poll_thread == THREAD_NULL);
8103
0
    dlil_reset_rxpoll_params(ifp);
8104
0
  }
8105
8106
  /* The driver might unload, so point these to ourselves */
8107
0
  if_free = ifp->if_free;
8108
0
  ifp->if_output_dlil = ifp_if_output;
8109
0
  ifp->if_output = ifp_if_output;
8110
0
  ifp->if_pre_enqueue = ifp_if_output;
8111
0
  ifp->if_start = ifp_if_start;
8112
0
  ifp->if_output_ctl = ifp_if_ctl;
8113
0
  ifp->if_input_dlil = ifp_if_input;
8114
0
  ifp->if_input_poll = ifp_if_input_poll;
8115
0
  ifp->if_input_ctl = ifp_if_ctl;
8116
0
  ifp->if_ioctl = ifp_if_ioctl;
8117
0
  ifp->if_set_bpf_tap = ifp_if_set_bpf_tap;
8118
0
  ifp->if_free = ifp_if_free;
8119
0
  ifp->if_demux = ifp_if_demux;
8120
0
  ifp->if_event = ifp_if_event;
8121
0
  ifp->if_framer_legacy = ifp_if_framer;
8122
0
  ifp->if_framer = ifp_if_framer_extended;
8123
0
  ifp->if_add_proto = ifp_if_add_proto;
8124
0
  ifp->if_del_proto = ifp_if_del_proto;
8125
0
  ifp->if_check_multi = ifp_if_check_multi;
8126
8127
  /* wipe out interface description */
8128
0
  VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
8129
0
  ifp->if_desc.ifd_len = 0;
8130
0
  VERIFY(ifp->if_desc.ifd_desc != NULL);
8131
0
  bzero(ifp->if_desc.ifd_desc, IF_DESCSIZE);
8132
8133
  /* there shouldn't be any delegation by now */
8134
0
  VERIFY(ifp->if_delegated.ifp == NULL);
8135
0
  VERIFY(ifp->if_delegated.type == 0);
8136
0
  VERIFY(ifp->if_delegated.family == 0);
8137
0
  VERIFY(ifp->if_delegated.subfamily == 0);
8138
0
  VERIFY(ifp->if_delegated.expensive == 0);
8139
0
  VERIFY(ifp->if_delegated.constrained == 0);
8140
8141
  /* QoS marking get cleared */
8142
0
  if_clear_eflags(ifp, IFEF_QOSMARKING_ENABLED);
8143
0
  if_set_qosmarking_mode(ifp, IFRTYPE_QOSMARKING_MODE_NONE);
8144
8145
8146
0
  ifnet_lock_done(ifp);
8147
8148
0
#if PF
8149
  /*
8150
   * Detach this interface from packet filter, if enabled.
8151
   */
8152
0
  pf_ifnet_hook(ifp, 0);
8153
0
#endif /* PF */
8154
8155
  /* Filter list should be empty */
8156
0
  lck_mtx_lock_spin(&ifp->if_flt_lock);
8157
0
  VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
8158
0
  VERIFY(ifp->if_flt_busy == 0);
8159
0
  VERIFY(ifp->if_flt_waiters == 0);
8160
0
  lck_mtx_unlock(&ifp->if_flt_lock);
8161
8162
  /* Last chance to drain send queue */
8163
0
  if_qflush(ifp, 0);
8164
8165
  /* Last chance to cleanup any cached route */
8166
0
  lck_mtx_lock(&ifp->if_cached_route_lock);
8167
0
  VERIFY(!ifp->if_fwd_cacheok);
8168
0
  ROUTE_RELEASE(&ifp->if_fwd_route);
8169
0
  bzero(&ifp->if_fwd_route, sizeof(ifp->if_fwd_route));
8170
0
  ROUTE_RELEASE(&ifp->if_src_route);
8171
0
  bzero(&ifp->if_src_route, sizeof(ifp->if_src_route));
8172
0
  ROUTE_RELEASE(&ifp->if_src_route6);
8173
0
  bzero(&ifp->if_src_route6, sizeof(ifp->if_src_route6));
8174
0
  lck_mtx_unlock(&ifp->if_cached_route_lock);
8175
8176
0
  VERIFY(ifp->if_data_threshold == 0);
8177
0
  VERIFY(ifp->if_dt_tcall != NULL);
8178
0
  VERIFY(!thread_call_isactive(ifp->if_dt_tcall));
8179
8180
0
  ifnet_llreach_ifdetach(ifp);
8181
8182
0
  dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHED, NULL, 0);
8183
8184
  /*
8185
   * Finally, mark this ifnet as detached.
8186
   */
8187
0
  lck_mtx_lock_spin(&ifp->if_ref_lock);
8188
0
  if (!(ifp->if_refflags & IFRF_DETACHING)) {
8189
0
    panic("%s: flags mismatch (detaching not set) ifp=%p",
8190
0
        __func__, ifp);
8191
    /* NOTREACHED */
8192
0
  }
8193
0
  ifp->if_refflags &= ~IFRF_DETACHING;
8194
0
  lck_mtx_unlock(&ifp->if_ref_lock);
8195
0
  if (if_free != NULL) {
8196
0
    if_free(ifp);
8197
0
  }
8198
8199
0
  if (dlil_verbose) {
8200
0
    DLIL_PRINTF("%s: detached\n", if_name(ifp));
8201
0
  }
8202
8203
  /* Release reference held during ifnet attach */
8204
0
  ifnet_release(ifp);
8205
0
}
8206
8207
errno_t
8208
ifp_if_output(struct ifnet *ifp, struct mbuf *m)
8209
0
{
8210
0
#pragma unused(ifp)
8211
0
  m_freem_list(m);
8212
0
  return 0;
8213
0
}
8214
8215
void
8216
ifp_if_start(struct ifnet *ifp)
8217
0
{
8218
0
  ifnet_purge(ifp);
8219
0
}
8220
8221
static errno_t
8222
ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
8223
    struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
8224
    boolean_t poll, struct thread *tp)
8225
0
{
8226
0
#pragma unused(ifp, m_tail, s, poll, tp)
8227
0
  m_freem_list(m_head);
8228
0
  return ENXIO;
8229
0
}
8230
8231
static void
8232
ifp_if_input_poll(struct ifnet *ifp, u_int32_t flags, u_int32_t max_cnt,
8233
    struct mbuf **m_head, struct mbuf **m_tail, u_int32_t *cnt, u_int32_t *len)
8234
0
{
8235
0
#pragma unused(ifp, flags, max_cnt)
8236
0
  if (m_head != NULL) {
8237
0
    *m_head = NULL;
8238
0
  }
8239
0
  if (m_tail != NULL) {
8240
0
    *m_tail = NULL;
8241
0
  }
8242
0
  if (cnt != NULL) {
8243
0
    *cnt = 0;
8244
0
  }
8245
0
  if (len != NULL) {
8246
0
    *len = 0;
8247
0
  }
8248
0
}
8249
8250
static errno_t
8251
ifp_if_ctl(struct ifnet *ifp, ifnet_ctl_cmd_t cmd, u_int32_t arglen, void *arg)
8252
0
{
8253
0
#pragma unused(ifp, cmd, arglen, arg)
8254
0
  return EOPNOTSUPP;
8255
0
}
8256
8257
static errno_t
8258
ifp_if_demux(struct ifnet *ifp, struct mbuf *m, char *fh, protocol_family_t *pf)
8259
0
{
8260
0
#pragma unused(ifp, fh, pf)
8261
0
  m_freem(m);
8262
0
  return EJUSTRETURN;
8263
0
}
8264
8265
static errno_t
8266
ifp_if_add_proto(struct ifnet *ifp, protocol_family_t pf,
8267
    const struct ifnet_demux_desc *da, u_int32_t dc)
8268
0
{
8269
0
#pragma unused(ifp, pf, da, dc)
8270
0
  return EINVAL;
8271
0
}
8272
8273
static errno_t
8274
ifp_if_del_proto(struct ifnet *ifp, protocol_family_t pf)
8275
0
{
8276
0
#pragma unused(ifp, pf)
8277
0
  return EINVAL;
8278
0
}
8279
8280
static errno_t
8281
ifp_if_check_multi(struct ifnet *ifp, const struct sockaddr *sa)
8282
0
{
8283
0
#pragma unused(ifp, sa)
8284
0
  return EOPNOTSUPP;
8285
0
}
8286
8287
#if !XNU_TARGET_OS_OSX
8288
static errno_t
8289
ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
8290
    const struct sockaddr *sa, const char *ll, const char *t,
8291
    u_int32_t *pre, u_int32_t *post)
8292
#else /* XNU_TARGET_OS_OSX */
8293
static errno_t
8294
ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
8295
    const struct sockaddr *sa, const char *ll, const char *t)
8296
#endif /* XNU_TARGET_OS_OSX */
8297
0
{
8298
0
#pragma unused(ifp, m, sa, ll, t)
8299
#if !XNU_TARGET_OS_OSX
8300
  return ifp_if_framer_extended(ifp, m, sa, ll, t, pre, post);
8301
#else /* XNU_TARGET_OS_OSX */
8302
0
  return ifp_if_framer_extended(ifp, m, sa, ll, t, NULL, NULL);
8303
0
#endif /* XNU_TARGET_OS_OSX */
8304
0
}
8305
8306
static errno_t
8307
ifp_if_framer_extended(struct ifnet *ifp, struct mbuf **m,
8308
    const struct sockaddr *sa, const char *ll, const char *t,
8309
    u_int32_t *pre, u_int32_t *post)
8310
0
{
8311
0
#pragma unused(ifp, sa, ll, t)
8312
0
  m_freem(*m);
8313
0
  *m = NULL;
8314
8315
0
  if (pre != NULL) {
8316
0
    *pre = 0;
8317
0
  }
8318
0
  if (post != NULL) {
8319
0
    *post = 0;
8320
0
  }
8321
8322
0
  return EJUSTRETURN;
8323
0
}
8324
8325
errno_t
8326
ifp_if_ioctl(struct ifnet *ifp, unsigned long cmd, void *arg)
8327
0
{
8328
0
#pragma unused(ifp, cmd, arg)
8329
0
  return EOPNOTSUPP;
8330
0
}
8331
8332
static errno_t
8333
ifp_if_set_bpf_tap(struct ifnet *ifp, bpf_tap_mode tm, bpf_packet_func f)
8334
0
{
8335
0
#pragma unused(ifp, tm, f)
8336
  /* XXX not sure what to do here */
8337
0
  return 0;
8338
0
}
8339
8340
static void
8341
ifp_if_free(struct ifnet *ifp)
8342
0
{
8343
0
#pragma unused(ifp)
8344
0
}
8345
8346
static void
8347
ifp_if_event(struct ifnet *ifp, const struct kev_msg *e)
8348
0
{
8349
0
#pragma unused(ifp, e)
8350
0
}
8351
8352
int
8353
dlil_if_acquire(u_int32_t family, const void *uniqueid,
8354
    size_t uniqueid_len, const char *ifxname, struct ifnet **ifp)
8355
4
{
8356
4
  struct ifnet *ifp1 = NULL;
8357
4
  struct dlil_ifnet *dlifp1 = NULL;
8358
4
  struct dlil_ifnet *dlifp1_saved = NULL;
8359
4
  void *buf, *base, **pbuf;
8360
4
  int ret = 0;
8361
8362
4
  VERIFY(*ifp == NULL);
8363
0
  dlil_if_lock();
8364
  /*
8365
   * We absolutely can't have an interface with the same name
8366
   * in in-use state.
8367
   * To make sure of that list has to be traversed completely
8368
   */
8369
6
  TAILQ_FOREACH(dlifp1, &dlil_ifnet_head, dl_if_link) {
8370
6
    ifp1 = (struct ifnet *)dlifp1;
8371
8372
6
    if (ifp1->if_family != family) {
8373
5
      continue;
8374
5
    }
8375
8376
    /*
8377
     * If interface is in use, return EBUSY if either unique id
8378
     * or interface extended names are the same
8379
     */
8380
1
    lck_mtx_lock(&dlifp1->dl_if_lock);
8381
1
    if (strncmp(ifxname, ifp1->if_xname, IFXNAMSIZ) == 0) {
8382
0
      if (dlifp1->dl_if_flags & DLIF_INUSE) {
8383
0
        lck_mtx_unlock(&dlifp1->dl_if_lock);
8384
0
        ret = EBUSY;
8385
0
        goto end;
8386
0
      }
8387
0
    }
8388
8389
1
    if (uniqueid_len) {
8390
1
      if (uniqueid_len == dlifp1->dl_if_uniqueid_len &&
8391
1
          bcmp(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len) == 0) {
8392
0
        if (dlifp1->dl_if_flags & DLIF_INUSE) {
8393
0
          lck_mtx_unlock(&dlifp1->dl_if_lock);
8394
0
          ret = EBUSY;
8395
0
          goto end;
8396
0
        } else {
8397
          /* Cache the first interface that can be recycled */
8398
0
          if (*ifp == NULL) {
8399
0
            *ifp = ifp1;
8400
0
            dlifp1_saved = dlifp1;
8401
0
          }
8402
          /*
8403
           * XXX Do not break or jump to end as we have to traverse
8404
           * the whole list to ensure there are no name collisions
8405
           */
8406
0
        }
8407
0
      }
8408
1
    }
8409
1
    lck_mtx_unlock(&dlifp1->dl_if_lock);
8410
1
  }
8411
8412
  /* If there's an interface that can be recycled, use that */
8413
4
  if (*ifp != NULL) {
8414
0
    if (dlifp1_saved != NULL) {
8415
0
      lck_mtx_lock(&dlifp1_saved->dl_if_lock);
8416
0
      dlifp1_saved->dl_if_flags |= (DLIF_INUSE | DLIF_REUSE);
8417
0
      lck_mtx_unlock(&dlifp1_saved->dl_if_lock);
8418
0
      dlifp1_saved = NULL;
8419
0
    }
8420
0
    goto end;
8421
0
  }
8422
8423
  /* no interface found, allocate a new one */
8424
4
  buf = zalloc_flags(dlif_zone, Z_WAITOK | Z_ZERO);
8425
4
  if (buf == NULL) {
8426
0
    ret = ENOMEM;
8427
0
    goto end;
8428
0
  }
8429
8430
  /* Get the 64-bit aligned base address for this object */
8431
4
  base = (void *)P2ROUNDUP((intptr_t)buf + sizeof(u_int64_t),
8432
4
      sizeof(u_int64_t));
8433
4
  VERIFY(((intptr_t)base + dlif_size) <= ((intptr_t)buf + dlif_bufsize));
8434
8435
  /*
8436
   * Wind back a pointer size from the aligned base and
8437
   * save the original address so we can free it later.
8438
   */
8439
0
  pbuf = (void **)((intptr_t)base - sizeof(void *));
8440
4
  *pbuf = buf;
8441
4
  dlifp1 = base;
8442
8443
4
  if (uniqueid_len) {
8444
4
    MALLOC(dlifp1->dl_if_uniqueid, void *, uniqueid_len,
8445
4
        M_NKE, M_WAITOK);
8446
4
    if (dlifp1->dl_if_uniqueid == NULL) {
8447
0
      zfree(dlif_zone, buf);
8448
0
      ret = ENOMEM;
8449
0
      goto end;
8450
0
    }
8451
4
    bcopy(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len);
8452
4
    dlifp1->dl_if_uniqueid_len = uniqueid_len;
8453
4
  }
8454
8455
4
  ifp1 = (struct ifnet *)dlifp1;
8456
4
  dlifp1->dl_if_flags = DLIF_INUSE;
8457
4
  if (ifnet_debug) {
8458
0
    dlifp1->dl_if_flags |= DLIF_DEBUG;
8459
0
    dlifp1->dl_if_trace = dlil_if_trace;
8460
0
  }
8461
4
  ifp1->if_name = dlifp1->dl_if_namestorage;
8462
4
  ifp1->if_xname = dlifp1->dl_if_xnamestorage;
8463
8464
  /* initialize interface description */
8465
4
  ifp1->if_desc.ifd_maxlen = IF_DESCSIZE;
8466
4
  ifp1->if_desc.ifd_len = 0;
8467
4
  ifp1->if_desc.ifd_desc = dlifp1->dl_if_descstorage;
8468
8469
8470
4
  if ((ret = dlil_alloc_local_stats(ifp1)) != 0) {
8471
0
    DLIL_PRINTF("%s: failed to allocate if local stats, "
8472
0
        "error: %d\n", __func__, ret);
8473
    /* This probably shouldn't be fatal */
8474
0
    ret = 0;
8475
0
  }
8476
8477
4
  lck_mtx_init(&dlifp1->dl_if_lock, ifnet_lock_group, ifnet_lock_attr);
8478
4
  lck_rw_init(&ifp1->if_lock, ifnet_lock_group, ifnet_lock_attr);
8479
4
  lck_mtx_init(&ifp1->if_ref_lock, ifnet_lock_group, ifnet_lock_attr);
8480
4
  lck_mtx_init(&ifp1->if_flt_lock, ifnet_lock_group, ifnet_lock_attr);
8481
4
  lck_mtx_init(&ifp1->if_addrconfig_lock, ifnet_lock_group,
8482
4
      ifnet_lock_attr);
8483
4
  lck_rw_init(&ifp1->if_llreach_lock, ifnet_lock_group, ifnet_lock_attr);
8484
4
#if INET
8485
4
  lck_rw_init(&ifp1->if_inetdata_lock, ifnet_lock_group,
8486
4
      ifnet_lock_attr);
8487
4
  ifp1->if_inetdata = NULL;
8488
4
#endif
8489
4
  lck_rw_init(&ifp1->if_inet6data_lock, ifnet_lock_group,
8490
4
      ifnet_lock_attr);
8491
4
  ifp1->if_inet6data = NULL;
8492
4
  lck_rw_init(&ifp1->if_link_status_lock, ifnet_lock_group,
8493
4
      ifnet_lock_attr);
8494
4
  ifp1->if_link_status = NULL;
8495
8496
  /* for send data paths */
8497
4
  lck_mtx_init(&ifp1->if_start_lock, ifnet_snd_lock_group,
8498
4
      ifnet_lock_attr);
8499
4
  lck_mtx_init(&ifp1->if_cached_route_lock, ifnet_snd_lock_group,
8500
4
      ifnet_lock_attr);
8501
4
  lck_mtx_init(&ifp1->if_snd.ifcq_lock, ifnet_snd_lock_group,
8502
4
      ifnet_lock_attr);
8503
8504
  /* for receive data paths */
8505
4
  lck_mtx_init(&ifp1->if_poll_lock, ifnet_rcv_lock_group,
8506
4
      ifnet_lock_attr);
8507
8508
  /* thread call allocation is done with sleeping zalloc */
8509
4
  ifp1->if_dt_tcall = thread_call_allocate_with_options(dlil_dt_tcall_fn,
8510
4
      ifp1, THREAD_CALL_PRIORITY_KERNEL, THREAD_CALL_OPTIONS_ONCE);
8511
4
  if (ifp1->if_dt_tcall == NULL) {
8512
0
    panic_plain("%s: couldn't create if_dt_tcall", __func__);
8513
    /* NOTREACHED */
8514
0
  }
8515
8516
4
  TAILQ_INSERT_TAIL(&dlil_ifnet_head, dlifp1, dl_if_link);
8517
8518
4
  *ifp = ifp1;
8519
8520
4
end:
8521
4
  dlil_if_unlock();
8522
8523
4
  VERIFY(dlifp1 == NULL || (IS_P2ALIGNED(dlifp1, sizeof(u_int64_t)) &&
8524
4
      IS_P2ALIGNED(&ifp1->if_data, sizeof(u_int64_t))));
8525
8526
0
  return ret;
8527
4
}
8528
8529
__private_extern__ void
8530
dlil_if_release(ifnet_t ifp)
8531
0
{
8532
0
  struct dlil_ifnet *dlifp = (struct dlil_ifnet *)ifp;
8533
8534
0
  VERIFY(OSDecrementAtomic64(&net_api_stats.nas_ifnet_alloc_count) > 0);
8535
0
  if (!(ifp->if_xflags & IFXF_ALLOC_KPI)) {
8536
0
    VERIFY(OSDecrementAtomic64(&net_api_stats.nas_ifnet_alloc_os_count) > 0);
8537
0
  }
8538
8539
0
  ifnet_lock_exclusive(ifp);
8540
0
  lck_mtx_lock(&dlifp->dl_if_lock);
8541
0
  dlifp->dl_if_flags &= ~DLIF_INUSE;
8542
0
  strlcpy(dlifp->dl_if_namestorage, ifp->if_name, IFNAMSIZ);
8543
0
  ifp->if_name = dlifp->dl_if_namestorage;
8544
  /* Reset external name (name + unit) */
8545
0
  ifp->if_xname = dlifp->dl_if_xnamestorage;
8546
0
  snprintf(__DECONST(char *, ifp->if_xname), IFXNAMSIZ,
8547
0
      "%s?", ifp->if_name);
8548
0
  lck_mtx_unlock(&dlifp->dl_if_lock);
8549
0
  ifnet_lock_done(ifp);
8550
0
}
8551
8552
__private_extern__ void
8553
dlil_if_lock(void)
8554
136k
{
8555
136k
  lck_mtx_lock(&dlil_ifnet_lock);
8556
136k
}
8557
8558
__private_extern__ void
8559
dlil_if_unlock(void)
8560
136k
{
8561
136k
  lck_mtx_unlock(&dlil_ifnet_lock);
8562
136k
}
8563
8564
__private_extern__ void
8565
dlil_if_lock_assert(void)
8566
0
{
8567
0
  LCK_MTX_ASSERT(&dlil_ifnet_lock, LCK_MTX_ASSERT_OWNED);
8568
0
}
8569
8570
__private_extern__ void
8571
dlil_proto_unplumb_all(struct ifnet *ifp)
8572
0
{
8573
  /*
8574
   * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where
8575
   * each bucket contains exactly one entry; PF_VLAN does not need an
8576
   * explicit unplumb.
8577
   *
8578
   * if_proto_hash[3] is for other protocols; we expect anything
8579
   * in this bucket to respond to the DETACHING event (which would
8580
   * have happened by now) and do the unplumb then.
8581
   */
8582
0
  (void) proto_unplumb(PF_INET, ifp);
8583
0
  (void) proto_unplumb(PF_INET6, ifp);
8584
0
}
8585
8586
static void
8587
ifp_src_route_copyout(struct ifnet *ifp, struct route *dst)
8588
369k
{
8589
369k
  lck_mtx_lock_spin(&ifp->if_cached_route_lock);
8590
369k
  lck_mtx_convert_spin(&ifp->if_cached_route_lock);
8591
8592
369k
  route_copyout(dst, &ifp->if_src_route, sizeof(*dst));
8593
8594
369k
  lck_mtx_unlock(&ifp->if_cached_route_lock);
8595
369k
}
8596
8597
static void
8598
ifp_src_route_copyin(struct ifnet *ifp, struct route *src)
8599
4.93k
{
8600
4.93k
  lck_mtx_lock_spin(&ifp->if_cached_route_lock);
8601
4.93k
  lck_mtx_convert_spin(&ifp->if_cached_route_lock);
8602
8603
4.93k
  if (ifp->if_fwd_cacheok) {
8604
4.93k
    route_copyin(src, &ifp->if_src_route, sizeof(*src));
8605
4.93k
  } else {
8606
0
    ROUTE_RELEASE(src);
8607
0
  }
8608
4.93k
  lck_mtx_unlock(&ifp->if_cached_route_lock);
8609
4.93k
}
8610
8611
static void
8612
ifp_src_route6_copyout(struct ifnet *ifp, struct route_in6 *dst)
8613
249k
{
8614
249k
  lck_mtx_lock_spin(&ifp->if_cached_route_lock);
8615
249k
  lck_mtx_convert_spin(&ifp->if_cached_route_lock);
8616
8617
249k
  route_copyout((struct route *)dst, (struct route *)&ifp->if_src_route6,
8618
249k
      sizeof(*dst));
8619
8620
249k
  lck_mtx_unlock(&ifp->if_cached_route_lock);
8621
249k
}
8622
8623
static void
8624
ifp_src_route6_copyin(struct ifnet *ifp, struct route_in6 *src)
8625
32.1k
{
8626
32.1k
  lck_mtx_lock_spin(&ifp->if_cached_route_lock);
8627
32.1k
  lck_mtx_convert_spin(&ifp->if_cached_route_lock);
8628
8629
32.1k
  if (ifp->if_fwd_cacheok) {
8630
32.1k
    route_copyin((struct route *)src,
8631
32.1k
        (struct route *)&ifp->if_src_route6, sizeof(*src));
8632
32.1k
  } else {
8633
0
    ROUTE_RELEASE(src);
8634
0
  }
8635
32.1k
  lck_mtx_unlock(&ifp->if_cached_route_lock);
8636
32.1k
}
8637
8638
struct rtentry *
8639
ifnet_cached_rtlookup_inet(struct ifnet *ifp, struct in_addr src_ip)
8640
369k
{
8641
369k
  struct route            src_rt;
8642
369k
  struct sockaddr_in      *dst;
8643
8644
369k
  dst = (struct sockaddr_in *)(void *)(&src_rt.ro_dst);
8645
8646
369k
  ifp_src_route_copyout(ifp, &src_rt);
8647
8648
369k
  if (ROUTE_UNUSABLE(&src_rt) || src_ip.s_addr != dst->sin_addr.s_addr) {
8649
368k
    ROUTE_RELEASE(&src_rt);
8650
368k
    if (dst->sin_family != AF_INET) {
8651
29
      bzero(&src_rt.ro_dst, sizeof(src_rt.ro_dst));
8652
29
      dst->sin_len = sizeof(src_rt.ro_dst);
8653
29
      dst->sin_family = AF_INET;
8654
29
    }
8655
368k
    dst->sin_addr = src_ip;
8656
8657
368k
    VERIFY(src_rt.ro_rt == NULL);
8658
0
    src_rt.ro_rt = rtalloc1_scoped((struct sockaddr *)dst,
8659
368k
        0, 0, ifp->if_index);
8660
8661
368k
    if (src_rt.ro_rt != NULL) {
8662
      /* retain a ref, copyin consumes one */
8663
4.93k
      struct rtentry  *rte = src_rt.ro_rt;
8664
4.93k
      RT_ADDREF(rte);
8665
4.93k
      ifp_src_route_copyin(ifp, &src_rt);
8666
4.93k
      src_rt.ro_rt = rte;
8667
4.93k
    }
8668
368k
  }
8669
8670
0
  return src_rt.ro_rt;
8671
369k
}
8672
8673
struct rtentry *
8674
ifnet_cached_rtlookup_inet6(struct ifnet *ifp, struct in6_addr *src_ip6)
8675
249k
{
8676
249k
  struct route_in6 src_rt;
8677
8678
249k
  ifp_src_route6_copyout(ifp, &src_rt);
8679
8680
249k
  if (ROUTE_UNUSABLE(&src_rt) ||
8681
248k
      !IN6_ARE_ADDR_EQUAL(src_ip6, &src_rt.ro_dst.sin6_addr)) {
8682
81.9k
    ROUTE_RELEASE(&src_rt);
8683
81.9k
    if (src_rt.ro_dst.sin6_family != AF_INET6) {
8684
2
      bzero(&src_rt.ro_dst, sizeof(src_rt.ro_dst));
8685
2
      src_rt.ro_dst.sin6_len = sizeof(src_rt.ro_dst);
8686
2
      src_rt.ro_dst.sin6_family = AF_INET6;
8687
2
    }
8688
81.9k
    src_rt.ro_dst.sin6_scope_id = in6_addr2scopeid(ifp, src_ip6);
8689
81.9k
    bcopy(src_ip6, &src_rt.ro_dst.sin6_addr,
8690
81.9k
        sizeof(src_rt.ro_dst.sin6_addr));
8691
8692
81.9k
    if (src_rt.ro_rt == NULL) {
8693
81.9k
      src_rt.ro_rt = rtalloc1_scoped(
8694
81.9k
        (struct sockaddr *)&src_rt.ro_dst, 0, 0,
8695
81.9k
        ifp->if_index);
8696
8697
81.9k
      if (src_rt.ro_rt != NULL) {
8698
        /* retain a ref, copyin consumes one */
8699
32.1k
        struct rtentry  *rte = src_rt.ro_rt;
8700
32.1k
        RT_ADDREF(rte);
8701
32.1k
        ifp_src_route6_copyin(ifp, &src_rt);
8702
32.1k
        src_rt.ro_rt = rte;
8703
32.1k
      }
8704
81.9k
    }
8705
81.9k
  }
8706
8707
249k
  return src_rt.ro_rt;
8708
249k
}
8709
8710
void
8711
if_lqm_update(struct ifnet *ifp, int lqm, int locked)
8712
0
{
8713
0
  struct kev_dl_link_quality_metric_data ev_lqm_data;
8714
8715
0
  VERIFY(lqm >= IFNET_LQM_MIN && lqm <= IFNET_LQM_MAX);
8716
8717
  /* Normalize to edge */
8718
0
  if (lqm >= 0 && lqm <= IFNET_LQM_THRESH_ABORT) {
8719
0
    lqm = IFNET_LQM_THRESH_ABORT;
8720
0
    atomic_bitset_32(&tcbinfo.ipi_flags,
8721
0
        INPCBINFO_HANDLE_LQM_ABORT);
8722
0
    inpcb_timer_sched(&tcbinfo, INPCB_TIMER_FAST);
8723
0
  } else if (lqm > IFNET_LQM_THRESH_ABORT &&
8724
0
      lqm <= IFNET_LQM_THRESH_MINIMALLY_VIABLE) {
8725
0
    lqm = IFNET_LQM_THRESH_MINIMALLY_VIABLE;
8726
0
  } else if (lqm > IFNET_LQM_THRESH_MINIMALLY_VIABLE &&
8727
0
      lqm <= IFNET_LQM_THRESH_POOR) {
8728
0
    lqm = IFNET_LQM_THRESH_POOR;
8729
0
  } else if (lqm > IFNET_LQM_THRESH_POOR &&
8730
0
      lqm <= IFNET_LQM_THRESH_GOOD) {
8731
0
    lqm = IFNET_LQM_THRESH_GOOD;
8732
0
  }
8733
8734
  /*
8735
   * Take the lock if needed
8736
   */
8737
0
  if (!locked) {
8738
0
    ifnet_lock_exclusive(ifp);
8739
0
  }
8740
8741
0
  if (lqm == ifp->if_interface_state.lqm_state &&
8742
0
      (ifp->if_interface_state.valid_bitmask &
8743
0
      IF_INTERFACE_STATE_LQM_STATE_VALID)) {
8744
    /*
8745
     * Release the lock if was not held by the caller
8746
     */
8747
0
    if (!locked) {
8748
0
      ifnet_lock_done(ifp);
8749
0
    }
8750
0
    return;         /* nothing to update */
8751
0
  }
8752
0
  ifp->if_interface_state.valid_bitmask |=
8753
0
      IF_INTERFACE_STATE_LQM_STATE_VALID;
8754
0
  ifp->if_interface_state.lqm_state = (int8_t)lqm;
8755
8756
  /*
8757
   * Don't want to hold the lock when issuing kernel events
8758
   */
8759
0
  ifnet_lock_done(ifp);
8760
8761
0
  bzero(&ev_lqm_data, sizeof(ev_lqm_data));
8762
0
  ev_lqm_data.link_quality_metric = lqm;
8763
8764
0
  dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_LINK_QUALITY_METRIC_CHANGED,
8765
0
      (struct net_event_data *)&ev_lqm_data, sizeof(ev_lqm_data));
8766
8767
  /*
8768
   * Reacquire the lock for the caller
8769
   */
8770
0
  if (locked) {
8771
0
    ifnet_lock_exclusive(ifp);
8772
0
  }
8773
0
}
8774
8775
static void
8776
if_rrc_state_update(struct ifnet *ifp, unsigned int rrc_state)
8777
0
{
8778
0
  struct kev_dl_rrc_state kev;
8779
8780
0
  if (rrc_state == ifp->if_interface_state.rrc_state &&
8781
0
      (ifp->if_interface_state.valid_bitmask &
8782
0
      IF_INTERFACE_STATE_RRC_STATE_VALID)) {
8783
0
    return;
8784
0
  }
8785
8786
0
  ifp->if_interface_state.valid_bitmask |=
8787
0
      IF_INTERFACE_STATE_RRC_STATE_VALID;
8788
8789
0
  ifp->if_interface_state.rrc_state = (uint8_t)rrc_state;
8790
8791
  /*
8792
   * Don't want to hold the lock when issuing kernel events
8793
   */
8794
0
  ifnet_lock_done(ifp);
8795
8796
0
  bzero(&kev, sizeof(struct kev_dl_rrc_state));
8797
0
  kev.rrc_state = rrc_state;
8798
8799
0
  dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_RRC_STATE_CHANGED,
8800
0
      (struct net_event_data *)&kev, sizeof(struct kev_dl_rrc_state));
8801
8802
0
  ifnet_lock_exclusive(ifp);
8803
0
}
8804
8805
errno_t
8806
if_state_update(struct ifnet *ifp,
8807
    struct if_interface_state *if_interface_state)
8808
0
{
8809
0
  u_short if_index_available = 0;
8810
8811
0
  ifnet_lock_exclusive(ifp);
8812
8813
0
  if ((ifp->if_type != IFT_CELLULAR) &&
8814
0
      (if_interface_state->valid_bitmask &
8815
0
      IF_INTERFACE_STATE_RRC_STATE_VALID)) {
8816
0
    ifnet_lock_done(ifp);
8817
0
    return ENOTSUP;
8818
0
  }
8819
0
  if ((if_interface_state->valid_bitmask &
8820
0
      IF_INTERFACE_STATE_LQM_STATE_VALID) &&
8821
0
      (if_interface_state->lqm_state < IFNET_LQM_MIN ||
8822
0
      if_interface_state->lqm_state > IFNET_LQM_MAX)) {
8823
0
    ifnet_lock_done(ifp);
8824
0
    return EINVAL;
8825
0
  }
8826
0
  if ((if_interface_state->valid_bitmask &
8827
0
      IF_INTERFACE_STATE_RRC_STATE_VALID) &&
8828
0
      if_interface_state->rrc_state !=
8829
0
      IF_INTERFACE_STATE_RRC_STATE_IDLE &&
8830
0
      if_interface_state->rrc_state !=
8831
0
      IF_INTERFACE_STATE_RRC_STATE_CONNECTED) {
8832
0
    ifnet_lock_done(ifp);
8833
0
    return EINVAL;
8834
0
  }
8835
8836
0
  if (if_interface_state->valid_bitmask &
8837
0
      IF_INTERFACE_STATE_LQM_STATE_VALID) {
8838
0
    if_lqm_update(ifp, if_interface_state->lqm_state, 1);
8839
0
  }
8840
0
  if (if_interface_state->valid_bitmask &
8841
0
      IF_INTERFACE_STATE_RRC_STATE_VALID) {
8842
0
    if_rrc_state_update(ifp, if_interface_state->rrc_state);
8843
0
  }
8844
0
  if (if_interface_state->valid_bitmask &
8845
0
      IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
8846
0
    ifp->if_interface_state.valid_bitmask |=
8847
0
        IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
8848
0
    ifp->if_interface_state.interface_availability =
8849
0
        if_interface_state->interface_availability;
8850
8851
0
    if (ifp->if_interface_state.interface_availability ==
8852
0
        IF_INTERFACE_STATE_INTERFACE_AVAILABLE) {
8853
0
      os_log(OS_LOG_DEFAULT, "%s: interface %s (%u) available\n",
8854
0
          __func__, if_name(ifp), ifp->if_index);
8855
0
      if_index_available = ifp->if_index;
8856
0
    } else {
8857
0
      os_log(OS_LOG_DEFAULT, "%s: interface %s (%u) unavailable)\n",
8858
0
          __func__, if_name(ifp), ifp->if_index);
8859
0
    }
8860
0
  }
8861
0
  ifnet_lock_done(ifp);
8862
8863
  /*
8864
   * Check if the TCP connections going on this interface should be
8865
   * forced to send probe packets instead of waiting for TCP timers
8866
   * to fire. This is done on an explicit notification such as
8867
   * SIOCSIFINTERFACESTATE which marks the interface as available.
8868
   */
8869
0
  if (if_index_available > 0) {
8870
0
    tcp_interface_send_probe(if_index_available);
8871
0
  }
8872
8873
0
  return 0;
8874
0
}
8875
8876
void
8877
if_get_state(struct ifnet *ifp,
8878
    struct if_interface_state *if_interface_state)
8879
1
{
8880
1
  ifnet_lock_shared(ifp);
8881
8882
1
  if_interface_state->valid_bitmask = 0;
8883
8884
1
  if (ifp->if_interface_state.valid_bitmask &
8885
1
      IF_INTERFACE_STATE_RRC_STATE_VALID) {
8886
0
    if_interface_state->valid_bitmask |=
8887
0
        IF_INTERFACE_STATE_RRC_STATE_VALID;
8888
0
    if_interface_state->rrc_state =
8889
0
        ifp->if_interface_state.rrc_state;
8890
0
  }
8891
1
  if (ifp->if_interface_state.valid_bitmask &
8892
1
      IF_INTERFACE_STATE_LQM_STATE_VALID) {
8893
1
    if_interface_state->valid_bitmask |=
8894
1
        IF_INTERFACE_STATE_LQM_STATE_VALID;
8895
1
    if_interface_state->lqm_state =
8896
1
        ifp->if_interface_state.lqm_state;
8897
1
  }
8898
1
  if (ifp->if_interface_state.valid_bitmask &
8899
1
      IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
8900
1
    if_interface_state->valid_bitmask |=
8901
1
        IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
8902
1
    if_interface_state->interface_availability =
8903
1
        ifp->if_interface_state.interface_availability;
8904
1
  }
8905
8906
1
  ifnet_lock_done(ifp);
8907
1
}
8908
8909
errno_t
8910
if_probe_connectivity(struct ifnet *ifp, u_int32_t conn_probe)
8911
0
{
8912
0
  if (conn_probe > 1) {
8913
0
    return EINVAL;
8914
0
  }
8915
0
  if (conn_probe == 0) {
8916
0
    if_clear_eflags(ifp, IFEF_PROBE_CONNECTIVITY);
8917
0
  } else {
8918
0
    if_set_eflags(ifp, IFEF_PROBE_CONNECTIVITY);
8919
0
  }
8920
8921
0
#if NECP
8922
0
  necp_update_all_clients();
8923
0
#endif /* NECP */
8924
8925
0
  tcp_probe_connectivity(ifp, conn_probe);
8926
0
  return 0;
8927
0
}
8928
8929
/* for uuid.c */
8930
static int
8931
get_ether_index(int * ret_other_index)
8932
0
{
8933
0
  struct ifnet *ifp;
8934
0
  int en0_index = 0;
8935
0
  int other_en_index = 0;
8936
0
  int any_ether_index = 0;
8937
0
  short best_unit = 0;
8938
8939
0
  *ret_other_index = 0;
8940
0
  TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
8941
    /*
8942
     * find en0, or if not en0, the lowest unit en*, and if not
8943
     * that, any ethernet
8944
     */
8945
0
    ifnet_lock_shared(ifp);
8946
0
    if (strcmp(ifp->if_name, "en") == 0) {
8947
0
      if (ifp->if_unit == 0) {
8948
        /* found en0, we're done */
8949
0
        en0_index = ifp->if_index;
8950
0
        ifnet_lock_done(ifp);
8951
0
        break;
8952
0
      }
8953
0
      if (other_en_index == 0 || ifp->if_unit < best_unit) {
8954
0
        other_en_index = ifp->if_index;
8955
0
        best_unit = ifp->if_unit;
8956
0
      }
8957
0
    } else if (ifp->if_type == IFT_ETHER && any_ether_index == 0) {
8958
0
      any_ether_index = ifp->if_index;
8959
0
    }
8960
0
    ifnet_lock_done(ifp);
8961
0
  }
8962
0
  if (en0_index == 0) {
8963
0
    if (other_en_index != 0) {
8964
0
      *ret_other_index = other_en_index;
8965
0
    } else if (any_ether_index != 0) {
8966
0
      *ret_other_index = any_ether_index;
8967
0
    }
8968
0
  }
8969
0
  return en0_index;
8970
0
}
8971
8972
int
8973
uuid_get_ethernet(u_int8_t *node)
8974
0
{
8975
0
  static int en0_index;
8976
0
  struct ifnet *ifp;
8977
0
  int other_index = 0;
8978
0
  int the_index = 0;
8979
0
  int ret;
8980
8981
0
  ifnet_head_lock_shared();
8982
0
  if (en0_index == 0 || ifindex2ifnet[en0_index] == NULL) {
8983
0
    en0_index = get_ether_index(&other_index);
8984
0
  }
8985
0
  if (en0_index != 0) {
8986
0
    the_index = en0_index;
8987
0
  } else if (other_index != 0) {
8988
0
    the_index = other_index;
8989
0
  }
8990
0
  if (the_index != 0) {
8991
0
    struct dlil_ifnet *dl_if;
8992
8993
0
    ifp = ifindex2ifnet[the_index];
8994
0
    VERIFY(ifp != NULL);
8995
0
    dl_if = (struct dlil_ifnet *)ifp;
8996
0
    if (dl_if->dl_if_permanent_ether_is_set != 0) {
8997
      /*
8998
       * Use the permanent ethernet address if it is
8999
       * available because it will never change.
9000
       */
9001
0
      memcpy(node, dl_if->dl_if_permanent_ether,
9002
0
          ETHER_ADDR_LEN);
9003
0
    } else {
9004
0
      memcpy(node, IF_LLADDR(ifp), ETHER_ADDR_LEN);
9005
0
    }
9006
0
    ret = 0;
9007
0
  } else {
9008
0
    ret = -1;
9009
0
  }
9010
0
  ifnet_head_done();
9011
0
  return ret;
9012
0
}
9013
9014
static int
9015
sysctl_rxpoll SYSCTL_HANDLER_ARGS
9016
0
{
9017
0
#pragma unused(arg1, arg2)
9018
0
  uint32_t i;
9019
0
  int err;
9020
9021
0
  i = if_rxpoll;
9022
9023
0
  err = sysctl_handle_int(oidp, &i, 0, req);
9024
0
  if (err != 0 || req->newptr == USER_ADDR_NULL) {
9025
0
    return err;
9026
0
  }
9027
9028
0
  if (net_rxpoll == 0) {
9029
0
    return ENXIO;
9030
0
  }
9031
9032
0
  if_rxpoll = i;
9033
0
  return err;
9034
0
}
9035
9036
static int
9037
sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
9038
0
{
9039
0
#pragma unused(arg1, arg2)
9040
0
  uint64_t q;
9041
0
  int err;
9042
9043
0
  q = if_rxpoll_mode_holdtime;
9044
9045
0
  err = sysctl_handle_quad(oidp, &q, 0, req);
9046
0
  if (err != 0 || req->newptr == USER_ADDR_NULL) {
9047
0
    return err;
9048
0
  }
9049
9050
0
  if (q < IF_RXPOLL_MODE_HOLDTIME_MIN) {
9051
0
    q = IF_RXPOLL_MODE_HOLDTIME_MIN;
9052
0
  }
9053
9054
0
  if_rxpoll_mode_holdtime = q;
9055
9056
0
  return err;
9057
0
}
9058
9059
static int
9060
sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
9061
0
{
9062
0
#pragma unused(arg1, arg2)
9063
0
  uint64_t q;
9064
0
  int err;
9065
9066
0
  q = if_rxpoll_sample_holdtime;
9067
9068
0
  err = sysctl_handle_quad(oidp, &q, 0, req);
9069
0
  if (err != 0 || req->newptr == USER_ADDR_NULL) {
9070
0
    return err;
9071
0
  }
9072
9073
0
  if (q < IF_RXPOLL_SAMPLETIME_MIN) {
9074
0
    q = IF_RXPOLL_SAMPLETIME_MIN;
9075
0
  }
9076
9077
0
  if_rxpoll_sample_holdtime = q;
9078
9079
0
  return err;
9080
0
}
9081
9082
static int
9083
sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
9084
0
{
9085
0
#pragma unused(arg1, arg2)
9086
0
  uint64_t q;
9087
0
  int err;
9088
9089
0
  q = if_rxpoll_interval_time;
9090
9091
0
  err = sysctl_handle_quad(oidp, &q, 0, req);
9092
0
  if (err != 0 || req->newptr == USER_ADDR_NULL) {
9093
0
    return err;
9094
0
  }
9095
9096
0
  if (q < IF_RXPOLL_INTERVALTIME_MIN) {
9097
0
    q = IF_RXPOLL_INTERVALTIME_MIN;
9098
0
  }
9099
9100
0
  if_rxpoll_interval_time = q;
9101
9102
0
  return err;
9103
0
}
9104
9105
static int
9106
sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
9107
0
{
9108
0
#pragma unused(arg1, arg2)
9109
0
  uint32_t i;
9110
0
  int err;
9111
9112
0
  i = if_sysctl_rxpoll_wlowat;
9113
9114
0
  err = sysctl_handle_int(oidp, &i, 0, req);
9115
0
  if (err != 0 || req->newptr == USER_ADDR_NULL) {
9116
0
    return err;
9117
0
  }
9118
9119
0
  if (i == 0 || i >= if_sysctl_rxpoll_whiwat) {
9120
0
    return EINVAL;
9121
0
  }
9122
9123
0
  if_sysctl_rxpoll_wlowat = i;
9124
0
  return err;
9125
0
}
9126
9127
static int
9128
sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
9129
0
{
9130
0
#pragma unused(arg1, arg2)
9131
0
  uint32_t i;
9132
0
  int err;
9133
9134
0
  i = if_sysctl_rxpoll_whiwat;
9135
9136
0
  err = sysctl_handle_int(oidp, &i, 0, req);
9137
0
  if (err != 0 || req->newptr == USER_ADDR_NULL) {
9138
0
    return err;
9139
0
  }
9140
9141
0
  if (i <= if_sysctl_rxpoll_wlowat) {
9142
0
    return EINVAL;
9143
0
  }
9144
9145
0
  if_sysctl_rxpoll_whiwat = i;
9146
0
  return err;
9147
0
}
9148
9149
static int
9150
sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
9151
0
{
9152
0
#pragma unused(arg1, arg2)
9153
0
  int i, err;
9154
9155
0
  i = if_sndq_maxlen;
9156
9157
0
  err = sysctl_handle_int(oidp, &i, 0, req);
9158
0
  if (err != 0 || req->newptr == USER_ADDR_NULL) {
9159
0
    return err;
9160
0
  }
9161
9162
0
  if (i < IF_SNDQ_MINLEN) {
9163
0
    i = IF_SNDQ_MINLEN;
9164
0
  }
9165
9166
0
  if_sndq_maxlen = i;
9167
0
  return err;
9168
0
}
9169
9170
static int
9171
sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
9172
0
{
9173
0
#pragma unused(arg1, arg2)
9174
0
  int i, err;
9175
9176
0
  i = if_rcvq_maxlen;
9177
9178
0
  err = sysctl_handle_int(oidp, &i, 0, req);
9179
0
  if (err != 0 || req->newptr == USER_ADDR_NULL) {
9180
0
    return err;
9181
0
  }
9182
9183
0
  if (i < IF_RCVQ_MINLEN) {
9184
0
    i = IF_RCVQ_MINLEN;
9185
0
  }
9186
9187
0
  if_rcvq_maxlen = i;
9188
0
  return err;
9189
0
}
9190
9191
int
9192
dlil_node_present(struct ifnet *ifp, struct sockaddr *sa,
9193
    int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
9194
0
{
9195
0
  struct kev_dl_node_presence kev;
9196
0
  struct sockaddr_dl *sdl;
9197
0
  struct sockaddr_in6 *sin6;
9198
0
  int ret = 0;
9199
9200
0
  VERIFY(ifp);
9201
0
  VERIFY(sa);
9202
0
  VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
9203
9204
0
  bzero(&kev, sizeof(kev));
9205
0
  sin6 = &kev.sin6_node_address;
9206
0
  sdl = &kev.sdl_node_address;
9207
0
  nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
9208
0
  kev.rssi = rssi;
9209
0
  kev.link_quality_metric = lqm;
9210
0
  kev.node_proximity_metric = npm;
9211
0
  bcopy(srvinfo, kev.node_service_info, sizeof(kev.node_service_info));
9212
9213
0
  ret = nd6_alt_node_present(ifp, sin6, sdl, rssi, lqm, npm);
9214
0
  if (ret == 0) {
9215
0
    int err = dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
9216
0
        &kev.link_data, sizeof(kev));
9217
0
    if (err != 0) {
9218
0
      log(LOG_ERR, "%s: Post DL_NODE_PRESENCE failed with"
9219
0
          "error %d\n", __func__, err);
9220
0
    }
9221
0
  }
9222
0
  return ret;
9223
0
}
9224
9225
void
9226
dlil_node_absent(struct ifnet *ifp, struct sockaddr *sa)
9227
0
{
9228
0
  struct kev_dl_node_absence kev = {};
9229
0
  struct sockaddr_in6 *kev_sin6 = NULL;
9230
0
  struct sockaddr_dl *kev_sdl = NULL;
9231
9232
0
  VERIFY(ifp != NULL);
9233
0
  VERIFY(sa != NULL);
9234
0
  VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
9235
9236
0
  kev_sin6 = &kev.sin6_node_address;
9237
0
  kev_sdl = &kev.sdl_node_address;
9238
9239
0
  if (sa->sa_family == AF_INET6) {
9240
    /*
9241
     * If IPv6 address is given, get the link layer
9242
     * address from what was cached in the neighbor cache
9243
     */
9244
0
    VERIFY(sa->sa_len <= sizeof(*kev_sin6));
9245
0
    bcopy(sa, kev_sin6, sa->sa_len);
9246
0
    nd6_alt_node_absent(ifp, kev_sin6, kev_sdl);
9247
0
  } else {
9248
    /*
9249
     * If passed address is AF_LINK type, derive the address
9250
     * based on the link address.
9251
     */
9252
0
    nd6_alt_node_addr_decompose(ifp, sa, kev_sdl, kev_sin6);
9253
0
    nd6_alt_node_absent(ifp, kev_sin6, NULL);
9254
0
  }
9255
9256
0
  kev_sdl->sdl_type = ifp->if_type;
9257
0
  kev_sdl->sdl_index = ifp->if_index;
9258
9259
0
  dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_ABSENCE,
9260
0
      &kev.link_data, sizeof(kev));
9261
0
}
9262
9263
int
9264
dlil_node_present_v2(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr_dl *sdl,
9265
    int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
9266
0
{
9267
0
  struct kev_dl_node_presence kev = {};
9268
0
  struct sockaddr_dl *kev_sdl = NULL;
9269
0
  struct sockaddr_in6 *kev_sin6 = NULL;
9270
0
  int ret = 0;
9271
9272
0
  VERIFY(ifp != NULL);
9273
0
  VERIFY(sa != NULL && sdl != NULL);
9274
0
  VERIFY(sa->sa_family == AF_INET6 && sdl->sdl_family == AF_LINK);
9275
9276
0
  kev_sin6 = &kev.sin6_node_address;
9277
0
  kev_sdl = &kev.sdl_node_address;
9278
9279
0
  VERIFY(sdl->sdl_len <= sizeof(*kev_sdl));
9280
0
  bcopy(sdl, kev_sdl, sdl->sdl_len);
9281
0
  kev_sdl->sdl_type = ifp->if_type;
9282
0
  kev_sdl->sdl_index = ifp->if_index;
9283
9284
0
  VERIFY(sa->sa_len <= sizeof(*kev_sin6));
9285
0
  bcopy(sa, kev_sin6, sa->sa_len);
9286
9287
0
  kev.rssi = rssi;
9288
0
  kev.link_quality_metric = lqm;
9289
0
  kev.node_proximity_metric = npm;
9290
0
  bcopy(srvinfo, kev.node_service_info, sizeof(kev.node_service_info));
9291
9292
0
  ret = nd6_alt_node_present(ifp, SIN6(sa), sdl, rssi, lqm, npm);
9293
0
  if (ret == 0) {
9294
0
    int err = dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
9295
0
        &kev.link_data, sizeof(kev));
9296
0
    if (err != 0) {
9297
0
      log(LOG_ERR, "%s: Post DL_NODE_PRESENCE failed with error %d\n", __func__, err);
9298
0
    }
9299
0
  }
9300
0
  return ret;
9301
0
}
9302
9303
const void *
9304
dlil_ifaddr_bytes(const struct sockaddr_dl *sdl, size_t *sizep,
9305
    kauth_cred_t *credp)
9306
3.79k
{
9307
3.79k
  const u_int8_t *bytes;
9308
3.79k
  size_t size;
9309
9310
3.79k
  bytes = CONST_LLADDR(sdl);
9311
3.79k
  size = sdl->sdl_alen;
9312
9313
3.79k
#if CONFIG_MACF
9314
3.79k
  if (dlil_lladdr_ckreq) {
9315
0
    switch (sdl->sdl_type) {
9316
0
    case IFT_ETHER:
9317
0
    case IFT_IEEE1394:
9318
0
      break;
9319
0
    default:
9320
0
      credp = NULL;
9321
0
      break;
9322
0
    }
9323
0
    ;
9324
9325
0
    if (credp && mac_system_check_info(*credp, "net.link.addr")) {
9326
0
      static const u_int8_t unspec[FIREWIRE_EUI64_LEN] = {
9327
0
        [0] = 2
9328
0
      };
9329
9330
0
      bytes = unspec;
9331
0
    }
9332
0
  }
9333
#else
9334
#pragma unused(credp)
9335
#endif
9336
9337
3.79k
  if (sizep != NULL) {
9338
3.79k
    *sizep = size;
9339
3.79k
  }
9340
3.79k
  return bytes;
9341
3.79k
}
9342
9343
void
9344
dlil_report_issues(struct ifnet *ifp, u_int8_t modid[DLIL_MODIDLEN],
9345
    u_int8_t info[DLIL_MODARGLEN])
9346
0
{
9347
0
  struct kev_dl_issues kev;
9348
0
  struct timeval tv;
9349
9350
0
  VERIFY(ifp != NULL);
9351
0
  VERIFY(modid != NULL);
9352
0
  _CASSERT(sizeof(kev.modid) == DLIL_MODIDLEN);
9353
0
  _CASSERT(sizeof(kev.info) == DLIL_MODARGLEN);
9354
9355
0
  bzero(&kev, sizeof(kev));
9356
9357
0
  microtime(&tv);
9358
0
  kev.timestamp = tv.tv_sec;
9359
0
  bcopy(modid, &kev.modid, DLIL_MODIDLEN);
9360
0
  if (info != NULL) {
9361
0
    bcopy(info, &kev.info, DLIL_MODARGLEN);
9362
0
  }
9363
9364
0
  dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_ISSUES,
9365
0
      &kev.link_data, sizeof(kev));
9366
0
}
9367
9368
errno_t
9369
ifnet_getset_opportunistic(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
9370
    struct proc *p)
9371
0
{
9372
0
  u_int32_t level = IFNET_THROTTLE_OFF;
9373
0
  errno_t result = 0;
9374
9375
0
  VERIFY(cmd == SIOCSIFOPPORTUNISTIC || cmd == SIOCGIFOPPORTUNISTIC);
9376
9377
0
  if (cmd == SIOCSIFOPPORTUNISTIC) {
9378
    /*
9379
     * XXX: Use priv_check_cred() instead of root check?
9380
     */
9381
0
    if ((result = proc_suser(p)) != 0) {
9382
0
      return result;
9383
0
    }
9384
9385
0
    if (ifr->ifr_opportunistic.ifo_flags ==
9386
0
        IFRIFOF_BLOCK_OPPORTUNISTIC) {
9387
0
      level = IFNET_THROTTLE_OPPORTUNISTIC;
9388
0
    } else if (ifr->ifr_opportunistic.ifo_flags == 0) {
9389
0
      level = IFNET_THROTTLE_OFF;
9390
0
    } else {
9391
0
      result = EINVAL;
9392
0
    }
9393
9394
0
    if (result == 0) {
9395
0
      result = ifnet_set_throttle(ifp, level);
9396
0
    }
9397
0
  } else if ((result = ifnet_get_throttle(ifp, &level)) == 0) {
9398
0
    ifr->ifr_opportunistic.ifo_flags = 0;
9399
0
    if (level == IFNET_THROTTLE_OPPORTUNISTIC) {
9400
0
      ifr->ifr_opportunistic.ifo_flags |=
9401
0
          IFRIFOF_BLOCK_OPPORTUNISTIC;
9402
0
    }
9403
0
  }
9404
9405
  /*
9406
   * Return the count of current opportunistic connections
9407
   * over the interface.
9408
   */
9409
0
  if (result == 0) {
9410
0
    uint32_t flags = 0;
9411
0
    flags |= (cmd == SIOCSIFOPPORTUNISTIC) ?
9412
0
        INPCB_OPPORTUNISTIC_SETCMD : 0;
9413
0
    flags |= (level == IFNET_THROTTLE_OPPORTUNISTIC) ?
9414
0
        INPCB_OPPORTUNISTIC_THROTTLEON : 0;
9415
0
    ifr->ifr_opportunistic.ifo_inuse =
9416
0
        udp_count_opportunistic(ifp->if_index, flags) +
9417
0
        tcp_count_opportunistic(ifp->if_index, flags);
9418
0
  }
9419
9420
0
  if (result == EALREADY) {
9421
0
    result = 0;
9422
0
  }
9423
9424
0
  return result;
9425
0
}
9426
9427
int
9428
ifnet_get_throttle(struct ifnet *ifp, u_int32_t *level)
9429
0
{
9430
0
  struct ifclassq *ifq;
9431
0
  int err = 0;
9432
9433
0
  if (!(ifp->if_eflags & IFEF_TXSTART)) {
9434
0
    return ENXIO;
9435
0
  }
9436
9437
0
  *level = IFNET_THROTTLE_OFF;
9438
9439
0
  ifq = &ifp->if_snd;
9440
0
  IFCQ_LOCK(ifq);
9441
  /* Throttling works only for IFCQ, not ALTQ instances */
9442
0
  if (IFCQ_IS_ENABLED(ifq)) {
9443
0
    cqrq_throttle_t req = { 0, IFNET_THROTTLE_OFF };
9444
9445
0
    err = fq_if_request_classq(ifq, CLASSQRQ_THROTTLE, &req);
9446
0
    *level = req.level;
9447
0
  }
9448
0
  IFCQ_UNLOCK(ifq);
9449
9450
0
  return err;
9451
0
}
9452
9453
int
9454
ifnet_set_throttle(struct ifnet *ifp, u_int32_t level)
9455
0
{
9456
0
  struct ifclassq *ifq;
9457
0
  int err = 0;
9458
9459
0
  if (!(ifp->if_eflags & IFEF_TXSTART)) {
9460
0
    return ENXIO;
9461
0
  }
9462
9463
0
  ifq = &ifp->if_snd;
9464
9465
0
  switch (level) {
9466
0
  case IFNET_THROTTLE_OFF:
9467
0
  case IFNET_THROTTLE_OPPORTUNISTIC:
9468
0
    break;
9469
0
  default:
9470
0
    return EINVAL;
9471
0
  }
9472
9473
0
  IFCQ_LOCK(ifq);
9474
0
  if (IFCQ_IS_ENABLED(ifq)) {
9475
0
    cqrq_throttle_t req = { 1, level };
9476
9477
0
    err = fq_if_request_classq(ifq, CLASSQRQ_THROTTLE, &req);
9478
0
  }
9479
0
  IFCQ_UNLOCK(ifq);
9480
9481
0
  if (err == 0) {
9482
0
    DLIL_PRINTF("%s: throttling level set to %d\n", if_name(ifp),
9483
0
        level);
9484
0
#if NECP
9485
0
    necp_update_all_clients();
9486
0
#endif /* NECP */
9487
0
    if (level == IFNET_THROTTLE_OFF) {
9488
0
      ifnet_start(ifp);
9489
0
    }
9490
0
  }
9491
9492
0
  return err;
9493
0
}
9494
9495
errno_t
9496
ifnet_getset_log(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
9497
    struct proc *p)
9498
0
{
9499
0
#pragma unused(p)
9500
0
  errno_t result = 0;
9501
0
  uint32_t flags;
9502
0
  int level, category, subcategory;
9503
9504
0
  VERIFY(cmd == SIOCSIFLOG || cmd == SIOCGIFLOG);
9505
9506
0
  if (cmd == SIOCSIFLOG) {
9507
0
    if ((result = priv_check_cred(kauth_cred_get(),
9508
0
        PRIV_NET_INTERFACE_CONTROL, 0)) != 0) {
9509
0
      return result;
9510
0
    }
9511
9512
0
    level = ifr->ifr_log.ifl_level;
9513
0
    if (level < IFNET_LOG_MIN || level > IFNET_LOG_MAX) {
9514
0
      result = EINVAL;
9515
0
    }
9516
9517
0
    flags = ifr->ifr_log.ifl_flags;
9518
0
    if ((flags &= IFNET_LOGF_MASK) == 0) {
9519
0
      result = EINVAL;
9520
0
    }
9521
9522
0
    category = ifr->ifr_log.ifl_category;
9523
0
    subcategory = ifr->ifr_log.ifl_subcategory;
9524
9525
0
    if (result == 0) {
9526
0
      result = ifnet_set_log(ifp, level, flags,
9527
0
          category, subcategory);
9528
0
    }
9529
0
  } else {
9530
0
    result = ifnet_get_log(ifp, &level, &flags, &category,
9531
0
        &subcategory);
9532
0
    if (result == 0) {
9533
0
      ifr->ifr_log.ifl_level = level;
9534
0
      ifr->ifr_log.ifl_flags = flags;
9535
0
      ifr->ifr_log.ifl_category = category;
9536
0
      ifr->ifr_log.ifl_subcategory = subcategory;
9537
0
    }
9538
0
  }
9539
9540
0
  return result;
9541
0
}
9542
9543
int
9544
ifnet_set_log(struct ifnet *ifp, int32_t level, uint32_t flags,
9545
    int32_t category, int32_t subcategory)
9546
0
{
9547
0
  int err = 0;
9548
9549
0
  VERIFY(level >= IFNET_LOG_MIN && level <= IFNET_LOG_MAX);
9550
0
  VERIFY(flags & IFNET_LOGF_MASK);
9551
9552
  /*
9553
   * The logging level applies to all facilities; make sure to
9554
   * update them all with the most current level.
9555
   */
9556
0
  flags |= ifp->if_log.flags;
9557
9558
0
  if (ifp->if_output_ctl != NULL) {
9559
0
    struct ifnet_log_params l;
9560
9561
0
    bzero(&l, sizeof(l));
9562
0
    l.level = level;
9563
0
    l.flags = flags;
9564
0
    l.flags &= ~IFNET_LOGF_DLIL;
9565
0
    l.category = category;
9566
0
    l.subcategory = subcategory;
9567
9568
    /* Send this request to lower layers */
9569
0
    if (l.flags != 0) {
9570
0
      err = ifp->if_output_ctl(ifp, IFNET_CTL_SET_LOG,
9571
0
          sizeof(l), &l);
9572
0
    }
9573
0
  } else if ((flags & ~IFNET_LOGF_DLIL) && ifp->if_output_ctl == NULL) {
9574
    /*
9575
     * If targeted to the lower layers without an output
9576
     * control callback registered on the interface, just
9577
     * silently ignore facilities other than ours.
9578
     */
9579
0
    flags &= IFNET_LOGF_DLIL;
9580
0
    if (flags == 0 && (!(ifp->if_log.flags & IFNET_LOGF_DLIL))) {
9581
0
      level = 0;
9582
0
    }
9583
0
  }
9584
9585
0
  if (err == 0) {
9586
0
    if ((ifp->if_log.level = level) == IFNET_LOG_DEFAULT) {
9587
0
      ifp->if_log.flags = 0;
9588
0
    } else {
9589
0
      ifp->if_log.flags |= flags;
9590
0
    }
9591
9592
0
    log(LOG_INFO, "%s: logging level set to %d flags=%b "
9593
0
        "arg=%b, category=%d subcategory=%d\n", if_name(ifp),
9594
0
        ifp->if_log.level, ifp->if_log.flags,
9595
0
        IFNET_LOGF_BITS, flags, IFNET_LOGF_BITS,
9596
0
        category, subcategory);
9597
0
  }
9598
9599
0
  return err;
9600
0
}
9601
9602
int
9603
ifnet_get_log(struct ifnet *ifp, int32_t *level, uint32_t *flags,
9604
    int32_t *category, int32_t *subcategory)
9605
0
{
9606
0
  if (level != NULL) {
9607
0
    *level = ifp->if_log.level;
9608
0
  }
9609
0
  if (flags != NULL) {
9610
0
    *flags = ifp->if_log.flags;
9611
0
  }
9612
0
  if (category != NULL) {
9613
0
    *category = ifp->if_log.category;
9614
0
  }
9615
0
  if (subcategory != NULL) {
9616
0
    *subcategory = ifp->if_log.subcategory;
9617
0
  }
9618
9619
0
  return 0;
9620
0
}
9621
9622
int
9623
ifnet_notify_address(struct ifnet *ifp, int af)
9624
3
{
9625
3
  struct ifnet_notify_address_params na;
9626
9627
3
#if PF
9628
3
  (void) pf_ifaddr_hook(ifp);
9629
3
#endif /* PF */
9630
9631
3
  if (ifp->if_output_ctl == NULL) {
9632
3
    return EOPNOTSUPP;
9633
3
  }
9634
9635
0
  bzero(&na, sizeof(na));
9636
0
  na.address_family = (sa_family_t)af;
9637
9638
0
  return ifp->if_output_ctl(ifp, IFNET_CTL_NOTIFY_ADDRESS,
9639
0
             sizeof(na), &na);
9640
3
}
9641
9642
errno_t
9643
ifnet_flowid(struct ifnet *ifp, uint32_t *flowid)
9644
0
{
9645
0
  if (ifp == NULL || flowid == NULL) {
9646
0
    return EINVAL;
9647
0
  } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
9648
0
      !IF_FULLY_ATTACHED(ifp)) {
9649
0
    return ENXIO;
9650
0
  }
9651
9652
0
  *flowid = ifp->if_flowhash;
9653
9654
0
  return 0;
9655
0
}
9656
9657
errno_t
9658
ifnet_disable_output(struct ifnet *ifp)
9659
0
{
9660
0
  int err;
9661
9662
0
  if (ifp == NULL) {
9663
0
    return EINVAL;
9664
0
  } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
9665
0
      !IF_FULLY_ATTACHED(ifp)) {
9666
0
    return ENXIO;
9667
0
  }
9668
9669
0
  if ((err = ifnet_fc_add(ifp)) == 0) {
9670
0
    lck_mtx_lock_spin(&ifp->if_start_lock);
9671
0
    ifp->if_start_flags |= IFSF_FLOW_CONTROLLED;
9672
0
    lck_mtx_unlock(&ifp->if_start_lock);
9673
0
  }
9674
0
  return err;
9675
0
}
9676
9677
errno_t
9678
ifnet_enable_output(struct ifnet *ifp)
9679
0
{
9680
0
  if (ifp == NULL) {
9681
0
    return EINVAL;
9682
0
  } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
9683
0
      !IF_FULLY_ATTACHED(ifp)) {
9684
0
    return ENXIO;
9685
0
  }
9686
9687
0
  ifnet_start_common(ifp, TRUE);
9688
0
  return 0;
9689
0
}
9690
9691
void
9692
ifnet_flowadv(uint32_t flowhash)
9693
0
{
9694
0
  struct ifnet_fc_entry *ifce;
9695
0
  struct ifnet *ifp;
9696
9697
0
  ifce = ifnet_fc_get(flowhash);
9698
0
  if (ifce == NULL) {
9699
0
    return;
9700
0
  }
9701
9702
0
  VERIFY(ifce->ifce_ifp != NULL);
9703
0
  ifp = ifce->ifce_ifp;
9704
9705
  /* flow hash gets recalculated per attach, so check */
9706
0
  if (ifnet_is_attached(ifp, 1)) {
9707
0
    if (ifp->if_flowhash == flowhash) {
9708
0
      (void) ifnet_enable_output(ifp);
9709
0
    }
9710
0
    ifnet_decr_iorefcnt(ifp);
9711
0
  }
9712
0
  ifnet_fc_entry_free(ifce);
9713
0
}
9714
9715
/*
9716
 * Function to compare ifnet_fc_entries in ifnet flow control tree
9717
 */
9718
static inline int
9719
ifce_cmp(const struct ifnet_fc_entry *fc1, const struct ifnet_fc_entry *fc2)
9720
0
{
9721
0
  return fc1->ifce_flowhash - fc2->ifce_flowhash;
9722
0
}
9723
9724
static int
9725
ifnet_fc_add(struct ifnet *ifp)
9726
0
{
9727
0
  struct ifnet_fc_entry keyfc, *ifce;
9728
0
  uint32_t flowhash;
9729
9730
0
  VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_TXSTART));
9731
0
  VERIFY(ifp->if_flowhash != 0);
9732
0
  flowhash = ifp->if_flowhash;
9733
9734
0
  bzero(&keyfc, sizeof(keyfc));
9735
0
  keyfc.ifce_flowhash = flowhash;
9736
9737
0
  lck_mtx_lock_spin(&ifnet_fc_lock);
9738
0
  ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
9739
0
  if (ifce != NULL && ifce->ifce_ifp == ifp) {
9740
    /* Entry is already in ifnet_fc_tree, return */
9741
0
    lck_mtx_unlock(&ifnet_fc_lock);
9742
0
    return 0;
9743
0
  }
9744
9745
0
  if (ifce != NULL) {
9746
    /*
9747
     * There is a different fc entry with the same flow hash
9748
     * but different ifp pointer.  There can be a collision
9749
     * on flow hash but the probability is low.  Let's just
9750
     * avoid adding a second one when there is a collision.
9751
     */
9752
0
    lck_mtx_unlock(&ifnet_fc_lock);
9753
0
    return EAGAIN;
9754
0
  }
9755
9756
  /* become regular mutex */
9757
0
  lck_mtx_convert_spin(&ifnet_fc_lock);
9758
9759
0
  ifce = zalloc_flags(ifnet_fc_zone, Z_WAITOK | Z_ZERO);
9760
0
  ifce->ifce_flowhash = flowhash;
9761
0
  ifce->ifce_ifp = ifp;
9762
9763
0
  RB_INSERT(ifnet_fc_tree, &ifnet_fc_tree, ifce);
9764
0
  lck_mtx_unlock(&ifnet_fc_lock);
9765
0
  return 0;
9766
0
}
9767
9768
static struct ifnet_fc_entry *
9769
ifnet_fc_get(uint32_t flowhash)
9770
0
{
9771
0
  struct ifnet_fc_entry keyfc, *ifce;
9772
0
  struct ifnet *ifp;
9773
9774
0
  bzero(&keyfc, sizeof(keyfc));
9775
0
  keyfc.ifce_flowhash = flowhash;
9776
9777
0
  lck_mtx_lock_spin(&ifnet_fc_lock);
9778
0
  ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
9779
0
  if (ifce == NULL) {
9780
    /* Entry is not present in ifnet_fc_tree, return */
9781
0
    lck_mtx_unlock(&ifnet_fc_lock);
9782
0
    return NULL;
9783
0
  }
9784
9785
0
  RB_REMOVE(ifnet_fc_tree, &ifnet_fc_tree, ifce);
9786
9787
0
  VERIFY(ifce->ifce_ifp != NULL);
9788
0
  ifp = ifce->ifce_ifp;
9789
9790
  /* become regular mutex */
9791
0
  lck_mtx_convert_spin(&ifnet_fc_lock);
9792
9793
0
  if (!ifnet_is_attached(ifp, 0)) {
9794
    /*
9795
     * This ifp is not attached or in the process of being
9796
     * detached; just don't process it.
9797
     */
9798
0
    ifnet_fc_entry_free(ifce);
9799
0
    ifce = NULL;
9800
0
  }
9801
0
  lck_mtx_unlock(&ifnet_fc_lock);
9802
9803
0
  return ifce;
9804
0
}
9805
9806
static void
9807
ifnet_fc_entry_free(struct ifnet_fc_entry *ifce)
9808
0
{
9809
0
  zfree(ifnet_fc_zone, ifce);
9810
0
}
9811
9812
static uint32_t
9813
ifnet_calc_flowhash(struct ifnet *ifp)
9814
0
{
9815
0
  struct ifnet_flowhash_key fh __attribute__((aligned(8)));
9816
0
  uint32_t flowhash = 0;
9817
9818
0
  if (ifnet_flowhash_seed == 0) {
9819
0
    ifnet_flowhash_seed = RandomULong();
9820
0
  }
9821
9822
0
  bzero(&fh, sizeof(fh));
9823
9824
0
  (void) snprintf(fh.ifk_name, sizeof(fh.ifk_name), "%s", ifp->if_name);
9825
0
  fh.ifk_unit = ifp->if_unit;
9826
0
  fh.ifk_flags = ifp->if_flags;
9827
0
  fh.ifk_eflags = ifp->if_eflags;
9828
0
  fh.ifk_capabilities = ifp->if_capabilities;
9829
0
  fh.ifk_capenable = ifp->if_capenable;
9830
0
  fh.ifk_output_sched_model = ifp->if_output_sched_model;
9831
0
  fh.ifk_rand1 = RandomULong();
9832
0
  fh.ifk_rand2 = RandomULong();
9833
9834
0
try_again:
9835
0
  flowhash = net_flowhash(&fh, sizeof(fh), ifnet_flowhash_seed);
9836
0
  if (flowhash == 0) {
9837
    /* try to get a non-zero flowhash */
9838
0
    ifnet_flowhash_seed = RandomULong();
9839
0
    goto try_again;
9840
0
  }
9841
9842
0
  return flowhash;
9843
0
}
9844
9845
int
9846
ifnet_set_netsignature(struct ifnet *ifp, uint8_t family, uint8_t len,
9847
    uint16_t flags, uint8_t *data)
9848
1
{
9849
1
#pragma unused(flags)
9850
1
  int error = 0;
9851
9852
1
  switch (family) {
9853
0
  case AF_INET:
9854
0
    if_inetdata_lock_exclusive(ifp);
9855
0
    if (IN_IFEXTRA(ifp) != NULL) {
9856
0
      if (len == 0) {
9857
        /* Allow clearing the signature */
9858
0
        IN_IFEXTRA(ifp)->netsig_len = 0;
9859
0
        bzero(IN_IFEXTRA(ifp)->netsig,
9860
0
            sizeof(IN_IFEXTRA(ifp)->netsig));
9861
0
        if_inetdata_lock_done(ifp);
9862
0
        break;
9863
0
      } else if (len > sizeof(IN_IFEXTRA(ifp)->netsig)) {
9864
0
        error = EINVAL;
9865
0
        if_inetdata_lock_done(ifp);
9866
0
        break;
9867
0
      }
9868
0
      IN_IFEXTRA(ifp)->netsig_len = len;
9869
0
      bcopy(data, IN_IFEXTRA(ifp)->netsig, len);
9870
0
    } else {
9871
0
      error = ENOMEM;
9872
0
    }
9873
0
    if_inetdata_lock_done(ifp);
9874
0
    break;
9875
9876
0
  case AF_INET6:
9877
0
    if_inet6data_lock_exclusive(ifp);
9878
0
    if (IN6_IFEXTRA(ifp) != NULL) {
9879
0
      if (len == 0) {
9880
        /* Allow clearing the signature */
9881
0
        IN6_IFEXTRA(ifp)->netsig_len = 0;
9882
0
        bzero(IN6_IFEXTRA(ifp)->netsig,
9883
0
            sizeof(IN6_IFEXTRA(ifp)->netsig));
9884
0
        if_inet6data_lock_done(ifp);
9885
0
        break;
9886
0
      } else if (len > sizeof(IN6_IFEXTRA(ifp)->netsig)) {
9887
0
        error = EINVAL;
9888
0
        if_inet6data_lock_done(ifp);
9889
0
        break;
9890
0
      }
9891
0
      IN6_IFEXTRA(ifp)->netsig_len = len;
9892
0
      bcopy(data, IN6_IFEXTRA(ifp)->netsig, len);
9893
0
    } else {
9894
0
      error = ENOMEM;
9895
0
    }
9896
0
    if_inet6data_lock_done(ifp);
9897
0
    break;
9898
9899
1
  default:
9900
1
    error = EINVAL;
9901
1
    break;
9902
1
  }
9903
9904
1
  return error;
9905
1
}
9906
9907
int
9908
ifnet_get_netsignature(struct ifnet *ifp, uint8_t family, uint8_t *len,
9909
    uint16_t *flags, uint8_t *data)
9910
0
{
9911
0
  int error = 0;
9912
9913
0
  if (ifp == NULL || len == NULL || data == NULL) {
9914
0
    return EINVAL;
9915
0
  }
9916
9917
0
  switch (family) {
9918
0
  case AF_INET:
9919
0
    if_inetdata_lock_shared(ifp);
9920
0
    if (IN_IFEXTRA(ifp) != NULL) {
9921
0
      if (*len == 0 || *len < IN_IFEXTRA(ifp)->netsig_len) {
9922
0
        error = EINVAL;
9923
0
        if_inetdata_lock_done(ifp);
9924
0
        break;
9925
0
      }
9926
0
      if ((*len = (uint8_t)IN_IFEXTRA(ifp)->netsig_len) > 0) {
9927
0
        bcopy(IN_IFEXTRA(ifp)->netsig, data, *len);
9928
0
      } else {
9929
0
        error = ENOENT;
9930
0
      }
9931
0
    } else {
9932
0
      error = ENOMEM;
9933
0
    }
9934
0
    if_inetdata_lock_done(ifp);
9935
0
    break;
9936
9937
0
  case AF_INET6:
9938
0
    if_inet6data_lock_shared(ifp);
9939
0
    if (IN6_IFEXTRA(ifp) != NULL) {
9940
0
      if (*len == 0 || *len < IN6_IFEXTRA(ifp)->netsig_len) {
9941
0
        error = EINVAL;
9942
0
        if_inet6data_lock_done(ifp);
9943
0
        break;
9944
0
      }
9945
0
      if ((*len = (uint8_t)IN6_IFEXTRA(ifp)->netsig_len) > 0) {
9946
0
        bcopy(IN6_IFEXTRA(ifp)->netsig, data, *len);
9947
0
      } else {
9948
0
        error = ENOENT;
9949
0
      }
9950
0
    } else {
9951
0
      error = ENOMEM;
9952
0
    }
9953
0
    if_inet6data_lock_done(ifp);
9954
0
    break;
9955
9956
0
  default:
9957
0
    error = EINVAL;
9958
0
    break;
9959
0
  }
9960
9961
0
  if (error == 0 && flags != NULL) {
9962
0
    *flags = 0;
9963
0
  }
9964
9965
0
  return error;
9966
0
}
9967
9968
int
9969
ifnet_set_nat64prefix(struct ifnet *ifp, struct ipv6_prefix *prefixes)
9970
1
{
9971
1
  int i, error = 0, one_set = 0;
9972
9973
1
  if_inet6data_lock_exclusive(ifp);
9974
9975
1
  if (IN6_IFEXTRA(ifp) == NULL) {
9976
0
    error = ENOMEM;
9977
0
    goto out;
9978
0
  }
9979
9980
1
  for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
9981
1
    uint32_t prefix_len =
9982
1
        prefixes[i].prefix_len;
9983
1
    struct in6_addr *prefix =
9984
1
        &prefixes[i].ipv6_prefix;
9985
9986
1
    if (prefix_len == 0) {
9987
0
      clat_log0((LOG_DEBUG,
9988
0
          "NAT64 prefixes purged from Interface %s\n",
9989
0
          if_name(ifp)));
9990
      /* Allow clearing the signature */
9991
0
      IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = 0;
9992
0
      bzero(&IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
9993
0
          sizeof(struct in6_addr));
9994
9995
0
      continue;
9996
1
    } else if (prefix_len != NAT64_PREFIX_LEN_32 &&
9997
1
        prefix_len != NAT64_PREFIX_LEN_40 &&
9998
1
        prefix_len != NAT64_PREFIX_LEN_48 &&
9999
1
        prefix_len != NAT64_PREFIX_LEN_56 &&
10000
1
        prefix_len != NAT64_PREFIX_LEN_64 &&
10001
1
        prefix_len != NAT64_PREFIX_LEN_96) {
10002
1
      clat_log0((LOG_DEBUG,
10003
1
          "NAT64 prefixlen is incorrect %d\n", prefix_len));
10004
1
      error = EINVAL;
10005
1
      goto out;
10006
1
    }
10007
10008
0
    if (IN6_IS_SCOPE_EMBED(prefix)) {
10009
0
      clat_log0((LOG_DEBUG,
10010
0
          "NAT64 prefix has interface/link local scope.\n"));
10011
0
      error = EINVAL;
10012
0
      goto out;
10013
0
    }
10014
10015
0
    IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = prefix_len;
10016
0
    bcopy(prefix, &IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
10017
0
        sizeof(struct in6_addr));
10018
0
    clat_log0((LOG_DEBUG,
10019
0
        "NAT64 prefix set to %s with prefixlen: %d\n",
10020
0
        ip6_sprintf(prefix), prefix_len));
10021
0
    one_set = 1;
10022
0
  }
10023
10024
1
out:
10025
1
  if_inet6data_lock_done(ifp);
10026
10027
1
  if (error == 0 && one_set != 0) {
10028
0
    necp_update_all_clients();
10029
0
  }
10030
10031
1
  return error;
10032
1
}
10033
10034
int
10035
ifnet_get_nat64prefix(struct ifnet *ifp, struct ipv6_prefix *prefixes)
10036
0
{
10037
0
  int i, found_one = 0, error = 0;
10038
10039
0
  if (ifp == NULL) {
10040
0
    return EINVAL;
10041
0
  }
10042
10043
0
  if_inet6data_lock_shared(ifp);
10044
10045
0
  if (IN6_IFEXTRA(ifp) == NULL) {
10046
0
    error = ENOMEM;
10047
0
    goto out;
10048
0
  }
10049
10050
0
  for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
10051
0
    if (IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len != 0) {
10052
0
      found_one = 1;
10053
0
    }
10054
0
  }
10055
10056
0
  if (found_one == 0) {
10057
0
    error = ENOENT;
10058
0
    goto out;
10059
0
  }
10060
10061
0
  if (prefixes) {
10062
0
    bcopy(IN6_IFEXTRA(ifp)->nat64_prefixes, prefixes,
10063
0
        sizeof(IN6_IFEXTRA(ifp)->nat64_prefixes));
10064
0
  }
10065
10066
0
out:
10067
0
  if_inet6data_lock_done(ifp);
10068
10069
0
  return error;
10070
0
}
10071
10072
static void
10073
dlil_output_cksum_dbg(struct ifnet *ifp, struct mbuf *m, uint32_t hoff,
10074
    protocol_family_t pf)
10075
0
{
10076
0
#pragma unused(ifp)
10077
0
  uint32_t did_sw;
10078
10079
0
  if (!(hwcksum_dbg_mode & HWCKSUM_DBG_FINALIZE_FORCED) ||
10080
0
      (m->m_pkthdr.csum_flags & (CSUM_TSO_IPV4 | CSUM_TSO_IPV6))) {
10081
0
    return;
10082
0
  }
10083
10084
0
  switch (pf) {
10085
0
  case PF_INET:
10086
0
    did_sw = in_finalize_cksum(m, hoff, m->m_pkthdr.csum_flags);
10087
0
    if (did_sw & CSUM_DELAY_IP) {
10088
0
      hwcksum_dbg_finalized_hdr++;
10089
0
    }
10090
0
    if (did_sw & CSUM_DELAY_DATA) {
10091
0
      hwcksum_dbg_finalized_data++;
10092
0
    }
10093
0
    break;
10094
0
  case PF_INET6:
10095
    /*
10096
     * Checksum offload should not have been enabled when
10097
     * extension headers exist; that also means that we
10098
     * cannot force-finalize packets with extension headers.
10099
     * Indicate to the callee should it skip such case by
10100
     * setting optlen to -1.
10101
     */
10102
0
    did_sw = in6_finalize_cksum(m, hoff, -1, -1,
10103
0
        m->m_pkthdr.csum_flags);
10104
0
    if (did_sw & CSUM_DELAY_IPV6_DATA) {
10105
0
      hwcksum_dbg_finalized_data++;
10106
0
    }
10107
0
    break;
10108
0
  default:
10109
0
    return;
10110
0
  }
10111
0
}
10112
10113
static void
10114
dlil_input_cksum_dbg(struct ifnet *ifp, struct mbuf *m, char *frame_header,
10115
    protocol_family_t pf)
10116
0
{
10117
0
  uint16_t sum = 0;
10118
0
  uint32_t hlen;
10119
10120
0
  if (frame_header == NULL ||
10121
0
      frame_header < (char *)mbuf_datastart(m) ||
10122
0
      frame_header > (char *)m->m_data) {
10123
0
    DLIL_PRINTF("%s: frame header pointer 0x%llx out of range "
10124
0
        "[0x%llx,0x%llx] for mbuf 0x%llx\n", if_name(ifp),
10125
0
        (uint64_t)VM_KERNEL_ADDRPERM(frame_header),
10126
0
        (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)),
10127
0
        (uint64_t)VM_KERNEL_ADDRPERM(m->m_data),
10128
0
        (uint64_t)VM_KERNEL_ADDRPERM(m));
10129
0
    return;
10130
0
  }
10131
0
  hlen = (uint32_t)(m->m_data - frame_header);
10132
10133
0
  switch (pf) {
10134
0
  case PF_INET:
10135
0
  case PF_INET6:
10136
0
    break;
10137
0
  default:
10138
0
    return;
10139
0
  }
10140
10141
  /*
10142
   * Force partial checksum offload; useful to simulate cases
10143
   * where the hardware does not support partial checksum offload,
10144
   * in order to validate correctness throughout the layers above.
10145
   */
10146
0
  if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED) {
10147
0
    uint32_t foff = hwcksum_dbg_partial_rxoff_forced;
10148
10149
0
    if (foff > (uint32_t)m->m_pkthdr.len) {
10150
0
      return;
10151
0
    }
10152
10153
0
    m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
10154
10155
    /* Compute 16-bit 1's complement sum from forced offset */
10156
0
    sum = m_sum16(m, foff, (m->m_pkthdr.len - foff));
10157
10158
0
    m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PARTIAL);
10159
0
    m->m_pkthdr.csum_rx_val = sum;
10160
0
    m->m_pkthdr.csum_rx_start = (uint16_t)(foff + hlen);
10161
10162
0
    hwcksum_dbg_partial_forced++;
10163
0
    hwcksum_dbg_partial_forced_bytes += m->m_pkthdr.len;
10164
0
  }
10165
10166
  /*
10167
   * Partial checksum offload verification (and adjustment);
10168
   * useful to validate and test cases where the hardware
10169
   * supports partial checksum offload.
10170
   */
10171
0
  if ((m->m_pkthdr.csum_flags &
10172
0
      (CSUM_DATA_VALID | CSUM_PARTIAL | CSUM_PSEUDO_HDR)) ==
10173
0
      (CSUM_DATA_VALID | CSUM_PARTIAL)) {
10174
0
    uint32_t rxoff;
10175
10176
    /* Start offset must begin after frame header */
10177
0
    rxoff = m->m_pkthdr.csum_rx_start;
10178
0
    if (hlen > rxoff) {
10179
0
      hwcksum_dbg_bad_rxoff++;
10180
0
      if (dlil_verbose) {
10181
0
        DLIL_PRINTF("%s: partial cksum start offset %d "
10182
0
            "is less than frame header length %d for "
10183
0
            "mbuf 0x%llx\n", if_name(ifp), rxoff, hlen,
10184
0
            (uint64_t)VM_KERNEL_ADDRPERM(m));
10185
0
      }
10186
0
      return;
10187
0
    }
10188
0
    rxoff -= hlen;
10189
10190
0
    if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED)) {
10191
      /*
10192
       * Compute the expected 16-bit 1's complement sum;
10193
       * skip this if we've already computed it above
10194
       * when partial checksum offload is forced.
10195
       */
10196
0
      sum = m_sum16(m, rxoff, (m->m_pkthdr.len - rxoff));
10197
10198
      /* Hardware or driver is buggy */
10199
0
      if (sum != m->m_pkthdr.csum_rx_val) {
10200
0
        hwcksum_dbg_bad_cksum++;
10201
0
        if (dlil_verbose) {
10202
0
          DLIL_PRINTF("%s: bad partial cksum value "
10203
0
              "0x%x (expected 0x%x) for mbuf "
10204
0
              "0x%llx [rx_start %d]\n",
10205
0
              if_name(ifp),
10206
0
              m->m_pkthdr.csum_rx_val, sum,
10207
0
              (uint64_t)VM_KERNEL_ADDRPERM(m),
10208
0
              m->m_pkthdr.csum_rx_start);
10209
0
        }
10210
0
        return;
10211
0
      }
10212
0
    }
10213
0
    hwcksum_dbg_verified++;
10214
10215
    /*
10216
     * This code allows us to emulate various hardwares that
10217
     * perform 16-bit 1's complement sum beginning at various
10218
     * start offset values.
10219
     */
10220
0
    if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ) {
10221
0
      uint32_t aoff = hwcksum_dbg_partial_rxoff_adj;
10222
10223
0
      if (aoff == rxoff || aoff > (uint32_t)m->m_pkthdr.len) {
10224
0
        return;
10225
0
      }
10226
10227
0
      sum = m_adj_sum16(m, rxoff, aoff,
10228
0
          m_pktlen(m) - aoff, sum);
10229
10230
0
      m->m_pkthdr.csum_rx_val = sum;
10231
0
      m->m_pkthdr.csum_rx_start = (uint16_t)(aoff + hlen);
10232
10233
0
      hwcksum_dbg_adjusted++;
10234
0
    }
10235
0
  }
10236
0
}
10237
10238
static int
10239
sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
10240
0
{
10241
0
#pragma unused(arg1, arg2)
10242
0
  u_int32_t i;
10243
0
  int err;
10244
10245
0
  i = hwcksum_dbg_mode;
10246
10247
0
  err = sysctl_handle_int(oidp, &i, 0, req);
10248
0
  if (err != 0 || req->newptr == USER_ADDR_NULL) {
10249
0
    return err;
10250
0
  }
10251
10252
0
  if (hwcksum_dbg == 0) {
10253
0
    return ENODEV;
10254
0
  }
10255
10256
0
  if ((i & ~HWCKSUM_DBG_MASK) != 0) {
10257
0
    return EINVAL;
10258
0
  }
10259
10260
0
  hwcksum_dbg_mode = (i & HWCKSUM_DBG_MASK);
10261
10262
0
  return err;
10263
0
}
10264
10265
static int
10266
sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
10267
0
{
10268
0
#pragma unused(arg1, arg2)
10269
0
  u_int32_t i;
10270
0
  int err;
10271
10272
0
  i = hwcksum_dbg_partial_rxoff_forced;
10273
10274
0
  err = sysctl_handle_int(oidp, &i, 0, req);
10275
0
  if (err != 0 || req->newptr == USER_ADDR_NULL) {
10276
0
    return err;
10277
0
  }
10278
10279
0
  if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED)) {
10280
0
    return ENODEV;
10281
0
  }
10282
10283
0
  hwcksum_dbg_partial_rxoff_forced = i;
10284
10285
0
  return err;
10286
0
}
10287
10288
static int
10289
sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
10290
0
{
10291
0
#pragma unused(arg1, arg2)
10292
0
  u_int32_t i;
10293
0
  int err;
10294
10295
0
  i = hwcksum_dbg_partial_rxoff_adj;
10296
10297
0
  err = sysctl_handle_int(oidp, &i, 0, req);
10298
0
  if (err != 0 || req->newptr == USER_ADDR_NULL) {
10299
0
    return err;
10300
0
  }
10301
10302
0
  if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ)) {
10303
0
    return ENODEV;
10304
0
  }
10305
10306
0
  hwcksum_dbg_partial_rxoff_adj = i;
10307
10308
0
  return err;
10309
0
}
10310
10311
static int
10312
sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
10313
0
{
10314
0
#pragma unused(oidp, arg1, arg2)
10315
0
  int err;
10316
10317
0
  if (req->oldptr == USER_ADDR_NULL) {
10318
0
  }
10319
0
  if (req->newptr != USER_ADDR_NULL) {
10320
0
    return EPERM;
10321
0
  }
10322
0
  err = SYSCTL_OUT(req, &tx_chain_len_stats,
10323
0
      sizeof(struct chain_len_stats));
10324
10325
0
  return err;
10326
0
}
10327
10328
10329
#if DEBUG || DEVELOPMENT
10330
/* Blob for sum16 verification */
10331
static uint8_t sumdata[] = {
10332
  0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03,
10333
  0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45,
10334
  0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00,
10335
  0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71,
10336
  0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93,
10337
  0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c,
10338
  0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93,
10339
  0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75,
10340
  0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09,
10341
  0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc,
10342
  0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42,
10343
  0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49,
10344
  0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90,
10345
  0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85,
10346
  0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd,
10347
  0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad,
10348
  0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc,
10349
  0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2,
10350
  0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c,
10351
  0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea,
10352
  0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87,
10353
  0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54,
10354
  0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e,
10355
  0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e,
10356
  0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf,
10357
  0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75,
10358
  0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6,
10359
  0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab,
10360
  0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17,
10361
  0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95,
10362
  0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9,
10363
  0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe,
10364
  0xc8, 0x28, 0x02, 0x00, 0x00
10365
};
10366
10367
/* Precomputed 16-bit 1's complement sums for various spans of the above data */
10368
static struct {
10369
  boolean_t       init;
10370
  uint16_t        len;
10371
  uint16_t        sumr;   /* reference */
10372
  uint16_t        sumrp;  /* reference, precomputed */
10373
} sumtbl[] = {
10374
  { FALSE, 0, 0, 0x0000 },
10375
  { FALSE, 1, 0, 0x001f },
10376
  { FALSE, 2, 0, 0x8b1f },
10377
  { FALSE, 3, 0, 0x8b27 },
10378
  { FALSE, 7, 0, 0x790e },
10379
  { FALSE, 11, 0, 0xcb6d },
10380
  { FALSE, 20, 0, 0x20dd },
10381
  { FALSE, 27, 0, 0xbabd },
10382
  { FALSE, 32, 0, 0xf3e8 },
10383
  { FALSE, 37, 0, 0x197d },
10384
  { FALSE, 43, 0, 0x9eae },
10385
  { FALSE, 64, 0, 0x4678 },
10386
  { FALSE, 127, 0, 0x9399 },
10387
  { FALSE, 256, 0, 0xd147 },
10388
  { FALSE, 325, 0, 0x0358 },
10389
};
10390
16
#define SUMTBL_MAX      ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
10391
10392
static void
10393
dlil_verify_sum16(void)
10394
1
{
10395
1
  struct mbuf *m;
10396
1
  uint8_t *buf;
10397
1
  int n;
10398
10399
  /* Make sure test data plus extra room for alignment fits in cluster */
10400
1
  _CASSERT((sizeof(sumdata) + (sizeof(uint64_t) * 2)) <= MCLBYTES);
10401
10402
1
  kprintf("DLIL: running SUM16 self-tests ... ");
10403
10404
1
  m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR);
10405
1
  m_align(m, sizeof(sumdata) + (sizeof(uint64_t) * 2));
10406
10407
1
  buf = mtod(m, uint8_t *);               /* base address */
10408
10409
16
  for (n = 0; n < SUMTBL_MAX; n++) {
10410
15
    uint16_t len = sumtbl[n].len;
10411
15
    int i;
10412
10413
    /* Verify for all possible alignments */
10414
135
    for (i = 0; i < (int)sizeof(uint64_t); i++) {
10415
120
      uint16_t sum, sumr;
10416
120
      uint8_t *c;
10417
10418
      /* Copy over test data to mbuf */
10419
120
      VERIFY(len <= sizeof(sumdata));
10420
0
      c = buf + i;
10421
120
      bcopy(sumdata, c, len);
10422
10423
      /* Zero-offset test (align by data pointer) */
10424
120
      m->m_data = (caddr_t)c;
10425
120
      m->m_len = len;
10426
120
      sum = m_sum16(m, 0, len);
10427
10428
120
      if (!sumtbl[n].init) {
10429
15
        sumr = (uint16_t)in_cksum_mbuf_ref(m, len, 0, 0);
10430
15
        sumtbl[n].sumr = sumr;
10431
15
        sumtbl[n].init = TRUE;
10432
105
      } else {
10433
105
        sumr = sumtbl[n].sumr;
10434
105
      }
10435
10436
      /* Something is horribly broken; stop now */
10437
120
      if (sumr != sumtbl[n].sumrp) {
10438
0
        panic_plain("\n%s: broken in_cksum_mbuf_ref() "
10439
0
            "for len=%d align=%d sum=0x%04x "
10440
0
            "[expected=0x%04x]\n", __func__,
10441
0
            len, i, sum, sumr);
10442
        /* NOTREACHED */
10443
120
      } else if (sum != sumr) {
10444
        // panic_plain("\n%s: broken m_sum16() for len=%d "
10445
        //     "align=%d sum=0x%04x [expected=0x%04x]\n",
10446
        //     __func__, len, i, sum, sumr);
10447
        /* NOTREACHED */
10448
112
      }
10449
10450
      /* Alignment test by offset (fixed data pointer) */
10451
120
      m->m_data = (caddr_t)buf;
10452
120
      m->m_len = i + len;
10453
120
      sum = m_sum16(m, i, len);
10454
10455
      /* Something is horribly broken; stop now */
10456
120
      if (sum != sumr) {
10457
        // panic_plain("\n%s: broken m_sum16() for len=%d "
10458
        //     "offset=%d sum=0x%04x [expected=0x%04x]\n",
10459
        //     __func__, len, i, sum, sumr);
10460
        /* NOTREACHED */
10461
112
      }
10462
120
#if INET
10463
      /* Simple sum16 contiguous buffer test by aligment */
10464
120
      sum = b_sum16(c, len);
10465
10466
      /* Something is horribly broken; stop now */
10467
120
      if (sum != sumr) {
10468
        // panic_plain("\n%s: broken b_sum16() for len=%d "
10469
        //     "align=%d sum=0x%04x [expected=0x%04x]\n",
10470
        //     __func__, len, i, sum, sumr);
10471
        /* NOTREACHED */
10472
112
      }
10473
120
#endif /* INET */
10474
120
    }
10475
15
  }
10476
1
  m_freem(m);
10477
10478
1
  kprintf("PASSED\n");
10479
1
}
10480
#endif /* DEBUG || DEVELOPMENT */
10481
10482
0
#define CASE_STRINGIFY(x) case x: return #x
10483
10484
__private_extern__ const char *
10485
dlil_kev_dl_code_str(u_int32_t event_code)
10486
0
{
10487
0
  switch (event_code) {
10488
0
    CASE_STRINGIFY(KEV_DL_SIFFLAGS);
10489
0
    CASE_STRINGIFY(KEV_DL_SIFMETRICS);
10490
0
    CASE_STRINGIFY(KEV_DL_SIFMTU);
10491
0
    CASE_STRINGIFY(KEV_DL_SIFPHYS);
10492
0
    CASE_STRINGIFY(KEV_DL_SIFMEDIA);
10493
0
    CASE_STRINGIFY(KEV_DL_SIFGENERIC);
10494
0
    CASE_STRINGIFY(KEV_DL_ADDMULTI);
10495
0
    CASE_STRINGIFY(KEV_DL_DELMULTI);
10496
0
    CASE_STRINGIFY(KEV_DL_IF_ATTACHED);
10497
0
    CASE_STRINGIFY(KEV_DL_IF_DETACHING);
10498
0
    CASE_STRINGIFY(KEV_DL_IF_DETACHED);
10499
0
    CASE_STRINGIFY(KEV_DL_LINK_OFF);
10500
0
    CASE_STRINGIFY(KEV_DL_LINK_ON);
10501
0
    CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED);
10502
0
    CASE_STRINGIFY(KEV_DL_PROTO_DETACHED);
10503
0
    CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED);
10504
0
    CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED);
10505
0
    CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT);
10506
0
    CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED);
10507
0
    CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED);
10508
0
    CASE_STRINGIFY(KEV_DL_NODE_PRESENCE);
10509
0
    CASE_STRINGIFY(KEV_DL_NODE_ABSENCE);
10510
0
    CASE_STRINGIFY(KEV_DL_MASTER_ELECTED);
10511
0
    CASE_STRINGIFY(KEV_DL_ISSUES);
10512
0
    CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED);
10513
0
  default:
10514
0
    break;
10515
0
  }
10516
0
  return "";
10517
0
}
10518
10519
static void
10520
dlil_dt_tcall_fn(thread_call_param_t arg0, thread_call_param_t arg1)
10521
0
{
10522
0
#pragma unused(arg1)
10523
0
  struct ifnet *ifp = arg0;
10524
10525
0
  if (ifnet_is_attached(ifp, 1)) {
10526
0
    nstat_ifnet_threshold_reached(ifp->if_index);
10527
0
    ifnet_decr_iorefcnt(ifp);
10528
0
  }
10529
0
}
10530
10531
void
10532
ifnet_notify_data_threshold(struct ifnet *ifp)
10533
0
{
10534
0
  uint64_t bytes = (ifp->if_ibytes + ifp->if_obytes);
10535
0
  uint64_t oldbytes = ifp->if_dt_bytes;
10536
10537
0
  ASSERT(ifp->if_dt_tcall != NULL);
10538
10539
  /*
10540
   * If we went over the threshold, notify NetworkStatistics.
10541
   * We rate-limit it based on the threshold interval value.
10542
   */
10543
0
  if (threshold_notify && (bytes - oldbytes) > ifp->if_data_threshold &&
10544
0
      OSCompareAndSwap64(oldbytes, bytes, &ifp->if_dt_bytes) &&
10545
0
      !thread_call_isactive(ifp->if_dt_tcall)) {
10546
0
    uint64_t tival = (threshold_interval * NSEC_PER_SEC);
10547
0
    uint64_t now = mach_absolute_time(), deadline = now;
10548
0
    uint64_t ival;
10549
10550
0
    if (tival != 0) {
10551
0
      nanoseconds_to_absolutetime(tival, &ival);
10552
0
      clock_deadline_for_periodic_event(ival, now, &deadline);
10553
0
      (void) thread_call_enter_delayed(ifp->if_dt_tcall,
10554
0
          deadline);
10555
0
    } else {
10556
0
      (void) thread_call_enter(ifp->if_dt_tcall);
10557
0
    }
10558
0
  }
10559
0
}
10560
10561
#if (DEVELOPMENT || DEBUG)
10562
/*
10563
 * The sysctl variable name contains the input parameters of
10564
 * ifnet_get_keepalive_offload_frames()
10565
 *  ifp (interface index): name[0]
10566
 *  frames_array_count:    name[1]
10567
 *  frame_data_offset:     name[2]
10568
 * The return length gives used_frames_count
10569
 */
10570
static int
10571
sysctl_get_kao_frames SYSCTL_HANDLER_ARGS
10572
0
{
10573
0
#pragma unused(oidp)
10574
0
  int *name = (int *)arg1;
10575
0
  u_int namelen = arg2;
10576
0
  int idx;
10577
0
  ifnet_t ifp = NULL;
10578
0
  u_int32_t frames_array_count;
10579
0
  size_t frame_data_offset;
10580
0
  u_int32_t used_frames_count;
10581
0
  struct ifnet_keepalive_offload_frame *frames_array = NULL;
10582
0
  int error = 0;
10583
0
  u_int32_t i;
10584
10585
  /*
10586
   * Only root can get look at other people TCP frames
10587
   */
10588
0
  error = proc_suser(current_proc());
10589
0
  if (error != 0) {
10590
0
    goto done;
10591
0
  }
10592
  /*
10593
   * Validate the input parameters
10594
   */
10595
0
  if (req->newptr != USER_ADDR_NULL) {
10596
0
    error = EPERM;
10597
0
    goto done;
10598
0
  }
10599
0
  if (namelen != 3) {
10600
0
    error = EINVAL;
10601
0
    goto done;
10602
0
  }
10603
0
  if (req->oldptr == USER_ADDR_NULL) {
10604
0
    error = EINVAL;
10605
0
    goto done;
10606
0
  }
10607
0
  if (req->oldlen == 0) {
10608
0
    error = EINVAL;
10609
0
    goto done;
10610
0
  }
10611
0
  idx = name[0];
10612
0
  frames_array_count = name[1];
10613
0
  frame_data_offset = name[2];
10614
10615
  /* Make sure the passed buffer is large enough */
10616
0
  if (frames_array_count * sizeof(struct ifnet_keepalive_offload_frame) >
10617
0
      req->oldlen) {
10618
0
    error = ENOMEM;
10619
0
    goto done;
10620
0
  }
10621
10622
0
  ifnet_head_lock_shared();
10623
0
  if (!IF_INDEX_IN_RANGE(idx)) {
10624
0
    ifnet_head_done();
10625
0
    error = ENOENT;
10626
0
    goto done;
10627
0
  }
10628
0
  ifp = ifindex2ifnet[idx];
10629
0
  ifnet_head_done();
10630
10631
0
  frames_array = _MALLOC(frames_array_count *
10632
0
      sizeof(struct ifnet_keepalive_offload_frame), M_TEMP, M_WAITOK);
10633
0
  if (frames_array == NULL) {
10634
0
    error = ENOMEM;
10635
0
    goto done;
10636
0
  }
10637
10638
0
  error = ifnet_get_keepalive_offload_frames(ifp, frames_array,
10639
0
      frames_array_count, frame_data_offset, &used_frames_count);
10640
0
  if (error != 0) {
10641
0
    DLIL_PRINTF("%s: ifnet_get_keepalive_offload_frames error %d\n",
10642
0
        __func__, error);
10643
0
    goto done;
10644
0
  }
10645
10646
0
  for (i = 0; i < used_frames_count; i++) {
10647
0
    error = SYSCTL_OUT(req, frames_array + i,
10648
0
        sizeof(struct ifnet_keepalive_offload_frame));
10649
0
    if (error != 0) {
10650
0
      goto done;
10651
0
    }
10652
0
  }
10653
0
done:
10654
0
  if (frames_array != NULL) {
10655
0
    _FREE(frames_array, M_TEMP);
10656
0
  }
10657
0
  return error;
10658
0
}
10659
#endif /* DEVELOPMENT || DEBUG */
10660
10661
void
10662
ifnet_update_stats_per_flow(struct ifnet_stats_per_flow *ifs,
10663
    struct ifnet *ifp)
10664
0
{
10665
0
  tcp_update_stats_per_flow(ifs, ifp);
10666
0
}
10667
10668
static inline u_int32_t
10669
_set_flags(u_int32_t *flags_p, u_int32_t set_flags)
10670
17.7k
{
10671
17.7k
  return (u_int32_t)OSBitOrAtomic(set_flags, flags_p);
10672
17.7k
}
10673
10674
static inline void
10675
_clear_flags(u_int32_t *flags_p, u_int32_t clear_flags)
10676
17.8k
{
10677
17.8k
  OSBitAndAtomic(~clear_flags, flags_p);
10678
17.8k
}
10679
10680
__private_extern__ u_int32_t
10681
if_set_eflags(ifnet_t interface, u_int32_t set_flags)
10682
17.7k
{
10683
17.7k
  return _set_flags(&interface->if_eflags, set_flags);
10684
17.7k
}
10685
10686
__private_extern__ void
10687
if_clear_eflags(ifnet_t interface, u_int32_t clear_flags)
10688
17.8k
{
10689
17.8k
  _clear_flags(&interface->if_eflags, clear_flags);
10690
17.8k
}
10691
10692
__private_extern__ u_int32_t
10693
if_set_xflags(ifnet_t interface, u_int32_t set_flags)
10694
4
{
10695
4
  return _set_flags(&interface->if_xflags, set_flags);
10696
4
}
10697
10698
__private_extern__ void
10699
if_clear_xflags(ifnet_t interface, u_int32_t clear_flags)
10700
4
{
10701
4
  _clear_flags(&interface->if_xflags, clear_flags);
10702
4
}