/src/SockFuzzer/third_party/xnu/bsd/net/route.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2000-2020 Apple Inc. All rights reserved. |
3 | | * |
4 | | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | | * |
6 | | * This file contains Original Code and/or Modifications of Original Code |
7 | | * as defined in and that are subject to the Apple Public Source License |
8 | | * Version 2.0 (the 'License'). You may not use this file except in |
9 | | * compliance with the License. The rights granted to you under the License |
10 | | * may not be used to create, or enable the creation or redistribution of, |
11 | | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | | * circumvent, violate, or enable the circumvention or violation of, any |
13 | | * terms of an Apple operating system software license agreement. |
14 | | * |
15 | | * Please obtain a copy of the License at |
16 | | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | | * |
18 | | * The Original Code and all software distributed under the License are |
19 | | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | | * Please see the License for the specific language governing rights and |
24 | | * limitations under the License. |
25 | | * |
26 | | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | | */ |
28 | | /* |
29 | | * Copyright (c) 1980, 1986, 1991, 1993 |
30 | | * The Regents of the University of California. All rights reserved. |
31 | | * |
32 | | * Redistribution and use in source and binary forms, with or without |
33 | | * modification, are permitted provided that the following conditions |
34 | | * are met: |
35 | | * 1. Redistributions of source code must retain the above copyright |
36 | | * notice, this list of conditions and the following disclaimer. |
37 | | * 2. Redistributions in binary form must reproduce the above copyright |
38 | | * notice, this list of conditions and the following disclaimer in the |
39 | | * documentation and/or other materials provided with the distribution. |
40 | | * 3. All advertising materials mentioning features or use of this software |
41 | | * must display the following acknowledgement: |
42 | | * This product includes software developed by the University of |
43 | | * California, Berkeley and its contributors. |
44 | | * 4. Neither the name of the University nor the names of its contributors |
45 | | * may be used to endorse or promote products derived from this software |
46 | | * without specific prior written permission. |
47 | | * |
48 | | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
49 | | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
50 | | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
51 | | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
52 | | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
53 | | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
54 | | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
55 | | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
56 | | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
57 | | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
58 | | * SUCH DAMAGE. |
59 | | * |
60 | | * @(#)route.c 8.2 (Berkeley) 11/15/93 |
61 | | * $FreeBSD: src/sys/net/route.c,v 1.59.2.3 2001/07/29 19:18:02 ume Exp $ |
62 | | */ |
63 | | |
64 | | #include <sys/param.h> |
65 | | #include <sys/sysctl.h> |
66 | | #include <sys/systm.h> |
67 | | #include <sys/malloc.h> |
68 | | #include <sys/mbuf.h> |
69 | | #include <sys/socket.h> |
70 | | #include <sys/domain.h> |
71 | | #include <sys/stat.h> |
72 | | #include <sys/ubc.h> |
73 | | #include <sys/vnode.h> |
74 | | #include <sys/syslog.h> |
75 | | #include <sys/queue.h> |
76 | | #include <sys/mcache.h> |
77 | | #include <sys/priv.h> |
78 | | #include <sys/protosw.h> |
79 | | #include <sys/sdt.h> |
80 | | #include <sys/kernel.h> |
81 | | #include <kern/locks.h> |
82 | | #include <kern/zalloc.h> |
83 | | |
84 | | #include <net/dlil.h> |
85 | | #include <net/if.h> |
86 | | #include <net/route.h> |
87 | | #include <net/ntstat.h> |
88 | | #include <net/nwk_wq.h> |
89 | | #if NECP |
90 | | #include <net/necp.h> |
91 | | #endif /* NECP */ |
92 | | |
93 | | #include <netinet/in.h> |
94 | | #include <netinet/in_var.h> |
95 | | #include <netinet/ip_var.h> |
96 | | #include <netinet/ip.h> |
97 | | #include <netinet/ip6.h> |
98 | | #include <netinet/in_arp.h> |
99 | | |
100 | | #include <netinet6/ip6_var.h> |
101 | | #include <netinet6/in6_var.h> |
102 | | #include <netinet6/nd6.h> |
103 | | |
104 | | #include <net/if_dl.h> |
105 | | |
106 | | #include <libkern/OSAtomic.h> |
107 | | #include <libkern/OSDebug.h> |
108 | | |
109 | | #include <pexpert/pexpert.h> |
110 | | |
111 | | #if CONFIG_MACF |
112 | | #include <sys/kauth.h> |
113 | | #endif |
114 | | |
115 | | /* |
116 | | * Synchronization notes: |
117 | | * |
118 | | * Routing entries fall under two locking domains: the global routing table |
119 | | * lock (rnh_lock) and the per-entry lock (rt_lock); the latter is a mutex that |
120 | | * resides (statically defined) in the rtentry structure. |
121 | | * |
122 | | * The locking domains for routing are defined as follows: |
123 | | * |
124 | | * The global routing lock is used to serialize all accesses to the radix |
125 | | * trees defined by rt_tables[], as well as the tree of masks. This includes |
126 | | * lookups, insertions and removals of nodes to/from the respective tree. |
127 | | * It is also used to protect certain fields in the route entry that aren't |
128 | | * often modified and/or require global serialization (more details below.) |
129 | | * |
130 | | * The per-route entry lock is used to serialize accesses to several routing |
131 | | * entry fields (more details below.) Acquiring and releasing this lock is |
132 | | * done via RT_LOCK() and RT_UNLOCK() routines. |
133 | | * |
134 | | * In cases where both rnh_lock and rt_lock must be held, the former must be |
135 | | * acquired first in order to maintain lock ordering. It is not a requirement |
136 | | * that rnh_lock be acquired first before rt_lock, but in case both must be |
137 | | * acquired in succession, the correct lock ordering must be followed. |
138 | | * |
139 | | * The fields of the rtentry structure are protected in the following way: |
140 | | * |
141 | | * rt_nodes[] |
142 | | * |
143 | | * - Routing table lock (rnh_lock). |
144 | | * |
145 | | * rt_parent, rt_mask, rt_llinfo_free, rt_tree_genid |
146 | | * |
147 | | * - Set once during creation and never changes; no locks to read. |
148 | | * |
149 | | * rt_flags, rt_genmask, rt_llinfo, rt_rmx, rt_refcnt, rt_gwroute |
150 | | * |
151 | | * - Routing entry lock (rt_lock) for read/write access. |
152 | | * |
153 | | * - Some values of rt_flags are either set once at creation time, |
154 | | * or aren't currently used, and thus checking against them can |
155 | | * be done without rt_lock: RTF_GATEWAY, RTF_HOST, RTF_DYNAMIC, |
156 | | * RTF_DONE, RTF_XRESOLVE, RTF_STATIC, RTF_BLACKHOLE, RTF_ANNOUNCE, |
157 | | * RTF_USETRAILERS, RTF_WASCLONED, RTF_PINNED, RTF_LOCAL, |
158 | | * RTF_BROADCAST, RTF_MULTICAST, RTF_IFSCOPE, RTF_IFREF. |
159 | | * |
160 | | * rt_key, rt_gateway, rt_ifp, rt_ifa |
161 | | * |
162 | | * - Always written/modified with both rnh_lock and rt_lock held. |
163 | | * |
164 | | * - May be read freely with rnh_lock held, else must hold rt_lock |
165 | | * for read access; holding both locks for read is also okay. |
166 | | * |
167 | | * - In the event rnh_lock is not acquired, or is not possible to be |
168 | | * acquired across the operation, setting RTF_CONDEMNED on a route |
169 | | * entry will prevent its rt_key, rt_gateway, rt_ifp and rt_ifa |
170 | | * from being modified. This is typically done on a route that |
171 | | * has been chosen for a removal (from the tree) prior to dropping |
172 | | * the rt_lock, so that those values will remain the same until |
173 | | * the route is freed. |
174 | | * |
175 | | * When rnh_lock is held rt_setgate(), rt_setif(), and rtsetifa() are |
176 | | * single-threaded, thus exclusive. This flag will also prevent the |
177 | | * route from being looked up via rt_lookup(). |
178 | | * |
179 | | * rt_genid |
180 | | * |
181 | | * - Assumes that 32-bit writes are atomic; no locks. |
182 | | * |
183 | | * rt_dlt, rt_output |
184 | | * |
185 | | * - Currently unused; no locks. |
186 | | * |
187 | | * Operations on a route entry can be described as follows: |
188 | | * |
189 | | * CREATE an entry with reference count set to 0 as part of RTM_ADD/RESOLVE. |
190 | | * |
191 | | * INSERTION of an entry into the radix tree holds the rnh_lock, checks |
192 | | * for duplicates and then adds the entry. rtrequest returns the entry |
193 | | * after bumping up the reference count to 1 (for the caller). |
194 | | * |
195 | | * LOOKUP of an entry holds the rnh_lock and bumps up the reference count |
196 | | * before returning; it is valid to also bump up the reference count using |
197 | | * RT_ADDREF after the lookup has returned an entry. |
198 | | * |
199 | | * REMOVAL of an entry from the radix tree holds the rnh_lock, removes the |
200 | | * entry but does not decrement the reference count. Removal happens when |
201 | | * the route is explicitly deleted (RTM_DELETE) or when it is in the cached |
202 | | * state and it expires. The route is said to be "down" when it is no |
203 | | * longer present in the tree. Freeing the entry will happen on the last |
204 | | * reference release of such a "down" route. |
205 | | * |
206 | | * RT_ADDREF/RT_REMREF operates on the routing entry which increments/ |
207 | | * decrements the reference count, rt_refcnt, atomically on the rtentry. |
208 | | * rt_refcnt is modified only using this routine. The general rule is to |
209 | | * do RT_ADDREF in the function that is passing the entry as an argument, |
210 | | * in order to prevent the entry from being freed by the callee. |
211 | | */ |
212 | | |
213 | 4.75k | #define equal(a1, a2) (bcmp((caddr_t)(a1), (caddr_t)(a2), (a1)->sa_len) == 0) |
214 | | |
215 | | extern void kdp_set_gateway_mac(void *gatewaymac); |
216 | | |
217 | | __private_extern__ struct rtstat rtstat = { |
218 | | .rts_badredirect = 0, |
219 | | .rts_dynamic = 0, |
220 | | .rts_newgateway = 0, |
221 | | .rts_unreach = 0, |
222 | | .rts_wildcard = 0, |
223 | | .rts_badrtgwroute = 0 |
224 | | }; |
225 | | struct radix_node_head *rt_tables[AF_MAX + 1]; |
226 | | |
227 | | decl_lck_mtx_data(, rnh_lock_data); /* global routing tables mutex */ |
228 | | lck_mtx_t *rnh_lock = &rnh_lock_data; |
229 | | static lck_attr_t *rnh_lock_attr; |
230 | | static lck_grp_t *rnh_lock_grp; |
231 | | static lck_grp_attr_t *rnh_lock_grp_attr; |
232 | | |
233 | | /* Lock group and attribute for routing entry locks */ |
234 | | static lck_attr_t *rte_mtx_attr; |
235 | | static lck_grp_t *rte_mtx_grp; |
236 | | static lck_grp_attr_t *rte_mtx_grp_attr; |
237 | | |
238 | | int rttrash = 0; /* routes not in table but not freed */ |
239 | | |
240 | | boolean_t trigger_v6_defrtr_select = FALSE; |
241 | | unsigned int rte_debug = 0; |
242 | | |
243 | | /* Possible flags for rte_debug */ |
244 | 55.4M | #define RTD_DEBUG 0x1 /* enable or disable rtentry debug facility */ |
245 | 0 | #define RTD_TRACE 0x2 /* trace alloc, free, refcnt and lock */ |
246 | 0 | #define RTD_NO_FREE 0x4 /* don't free (good to catch corruptions) */ |
247 | | |
248 | 2 | #define RTE_NAME "rtentry" /* name for zone and rt_lock */ |
249 | | |
250 | | static struct zone *rte_zone; /* special zone for rtentry */ |
251 | | #define RTE_ZONE_MAX 65536 /* maximum elements in zone */ |
252 | 1 | #define RTE_ZONE_NAME RTE_NAME /* name of rtentry zone */ |
253 | | |
254 | 0 | #define RTD_INUSE 0xFEEDFACE /* entry is in use */ |
255 | 0 | #define RTD_FREED 0xDEADBEEF /* entry is freed */ |
256 | | |
257 | 32.5k | #define MAX_SCOPE_ADDR_STR_LEN (MAX_IPv6_STR_LEN + 6) |
258 | | |
259 | | /* For gdb */ |
260 | | __private_extern__ unsigned int ctrace_stack_size = CTRACE_STACK_SIZE; |
261 | | __private_extern__ unsigned int ctrace_hist_size = CTRACE_HIST_SIZE; |
262 | | |
263 | | /* |
264 | | * Debug variant of rtentry structure. |
265 | | */ |
266 | | struct rtentry_dbg { |
267 | | struct rtentry rtd_entry; /* rtentry */ |
268 | | struct rtentry rtd_entry_saved; /* saved rtentry */ |
269 | | uint32_t rtd_inuse; /* in use pattern */ |
270 | | uint16_t rtd_refhold_cnt; /* # of rtref */ |
271 | | uint16_t rtd_refrele_cnt; /* # of rtunref */ |
272 | | uint32_t rtd_lock_cnt; /* # of locks */ |
273 | | uint32_t rtd_unlock_cnt; /* # of unlocks */ |
274 | | /* |
275 | | * Alloc and free callers. |
276 | | */ |
277 | | ctrace_t rtd_alloc; |
278 | | ctrace_t rtd_free; |
279 | | /* |
280 | | * Circular lists of rtref and rtunref callers. |
281 | | */ |
282 | | ctrace_t rtd_refhold[CTRACE_HIST_SIZE]; |
283 | | ctrace_t rtd_refrele[CTRACE_HIST_SIZE]; |
284 | | /* |
285 | | * Circular lists of locks and unlocks. |
286 | | */ |
287 | | ctrace_t rtd_lock[CTRACE_HIST_SIZE]; |
288 | | ctrace_t rtd_unlock[CTRACE_HIST_SIZE]; |
289 | | /* |
290 | | * Trash list linkage |
291 | | */ |
292 | | TAILQ_ENTRY(rtentry_dbg) rtd_trash_link; |
293 | | }; |
294 | | |
295 | | /* List of trash route entries protected by rnh_lock */ |
296 | | static TAILQ_HEAD(, rtentry_dbg) rttrash_head; |
297 | | |
298 | | static void rte_lock_init(struct rtentry *); |
299 | | static void rte_lock_destroy(struct rtentry *); |
300 | | static inline struct rtentry *rte_alloc_debug(void); |
301 | | static inline void rte_free_debug(struct rtentry *); |
302 | | static inline void rte_lock_debug(struct rtentry_dbg *); |
303 | | static inline void rte_unlock_debug(struct rtentry_dbg *); |
304 | | static void rt_maskedcopy(const struct sockaddr *, |
305 | | struct sockaddr *, const struct sockaddr *); |
306 | | static void rtable_init(void **); |
307 | | static inline void rtref_audit(struct rtentry_dbg *); |
308 | | static inline void rtunref_audit(struct rtentry_dbg *); |
309 | | static struct rtentry *rtalloc1_common_locked(struct sockaddr *, int, uint32_t, |
310 | | unsigned int); |
311 | | static int rtrequest_common_locked(int, struct sockaddr *, |
312 | | struct sockaddr *, struct sockaddr *, int, struct rtentry **, |
313 | | unsigned int); |
314 | | static struct rtentry *rtalloc1_locked(struct sockaddr *, int, uint32_t); |
315 | | static void rtalloc_ign_common_locked(struct route *, uint32_t, unsigned int); |
316 | | static inline void sin6_set_ifscope(struct sockaddr *, unsigned int); |
317 | | static inline void sin6_set_embedded_ifscope(struct sockaddr *, unsigned int); |
318 | | static inline unsigned int sin6_get_embedded_ifscope(struct sockaddr *); |
319 | | static struct sockaddr *ma_copy(int, struct sockaddr *, |
320 | | struct sockaddr_storage *, unsigned int); |
321 | | static struct sockaddr *sa_trim(struct sockaddr *, int); |
322 | | static struct radix_node *node_lookup(struct sockaddr *, struct sockaddr *, |
323 | | unsigned int); |
324 | | static struct radix_node *node_lookup_default(int); |
325 | | static struct rtentry *rt_lookup_common(boolean_t, boolean_t, struct sockaddr *, |
326 | | struct sockaddr *, struct radix_node_head *, unsigned int); |
327 | | static int rn_match_ifscope(struct radix_node *, void *); |
328 | | static struct ifaddr *ifa_ifwithroute_common_locked(int, |
329 | | const struct sockaddr *, const struct sockaddr *, unsigned int); |
330 | | static struct rtentry *rte_alloc(void); |
331 | | static void rte_free(struct rtentry *); |
332 | | static void rtfree_common(struct rtentry *, boolean_t); |
333 | | static void rte_if_ref(struct ifnet *, int); |
334 | | static void rt_set_idleref(struct rtentry *); |
335 | | static void rt_clear_idleref(struct rtentry *); |
336 | | static void route_event_callback(void *); |
337 | | static void rt_str4(struct rtentry *, char *, uint32_t, char *, uint32_t); |
338 | | static void rt_str6(struct rtentry *, char *, uint32_t, char *, uint32_t); |
339 | | static boolean_t route_ignore_protocol_cloning_for_dst(struct rtentry *, struct sockaddr *); |
340 | | |
341 | | uint32_t route_genid_inet = 0; |
342 | | uint32_t route_genid_inet6 = 0; |
343 | | |
344 | 2.15M | #define ASSERT_SINIFSCOPE(sa) { \ |
345 | 2.15M | if ((sa)->sa_family != AF_INET || \ |
346 | 2.15M | (sa)->sa_len < sizeof (struct sockaddr_in)) \ |
347 | 2.15M | panic("%s: bad sockaddr_in %p\n", __func__, sa); \ |
348 | 2.15M | } |
349 | | |
350 | 937k | #define ASSERT_SIN6IFSCOPE(sa) { \ |
351 | 937k | if ((sa)->sa_family != AF_INET6 || \ |
352 | 937k | (sa)->sa_len < sizeof (struct sockaddr_in6)) \ |
353 | 937k | panic("%s: bad sockaddr_in6 %p\n", __func__, sa); \ |
354 | 937k | } |
355 | | |
356 | | /* |
357 | | * Argument to leaf-matching routine; at present it is scoped routing |
358 | | * specific but can be expanded in future to include other search filters. |
359 | | */ |
360 | | struct matchleaf_arg { |
361 | | unsigned int ifscope; /* interface scope */ |
362 | | }; |
363 | | |
364 | | /* |
365 | | * For looking up the non-scoped default route (sockaddr instead |
366 | | * of sockaddr_in for convenience). |
367 | | */ |
368 | | static struct sockaddr sin_def = { |
369 | | .sa_len = sizeof(struct sockaddr_in), |
370 | | .sa_family = AF_INET, |
371 | | .sa_data = { 0, } |
372 | | }; |
373 | | |
374 | | static struct sockaddr_in6 sin6_def = { |
375 | | .sin6_len = sizeof(struct sockaddr_in6), |
376 | | .sin6_family = AF_INET6, |
377 | | .sin6_port = 0, |
378 | | .sin6_flowinfo = 0, |
379 | | .sin6_addr = IN6ADDR_ANY_INIT, |
380 | | .sin6_scope_id = 0 |
381 | | }; |
382 | | |
383 | | /* |
384 | | * Interface index (scope) of the primary interface; determined at |
385 | | * the time when the default, non-scoped route gets added, changed |
386 | | * or deleted. Protected by rnh_lock. |
387 | | */ |
388 | | static unsigned int primary_ifscope = IFSCOPE_NONE; |
389 | | static unsigned int primary6_ifscope = IFSCOPE_NONE; |
390 | | |
391 | | #define INET_DEFAULT(sa) \ |
392 | 95.9k | ((sa)->sa_family == AF_INET && SIN(sa)->sin_addr.s_addr == 0) |
393 | | |
394 | | #define INET6_DEFAULT(sa) \ |
395 | 47.9k | ((sa)->sa_family == AF_INET6 && \ |
396 | 47.9k | IN6_IS_ADDR_UNSPECIFIED(&SIN6(sa)->sin6_addr)) |
397 | | |
398 | 172k | #define SA_DEFAULT(sa) (INET_DEFAULT(sa) || INET6_DEFAULT(sa)) |
399 | 807k | #define RT(r) ((struct rtentry *)r) |
400 | 104k | #define RN(r) ((struct radix_node *)r) |
401 | 48.3k | #define RT_HOST(r) (RT(r)->rt_flags & RTF_HOST) |
402 | | |
403 | | unsigned int rt_verbose = 0; |
404 | | #if (DEVELOPMENT || DEBUG) |
405 | | SYSCTL_DECL(_net_route); |
406 | | SYSCTL_UINT(_net_route, OID_AUTO, verbose, CTLFLAG_RW | CTLFLAG_LOCKED, |
407 | | &rt_verbose, 0, ""); |
408 | | #endif /* (DEVELOPMENT || DEBUG) */ |
409 | | |
410 | | static void |
411 | | rtable_init(void **table) |
412 | 1 | { |
413 | 1 | struct domain *dom; |
414 | | |
415 | 1 | domain_proto_mtx_lock_assert_held(); |
416 | | |
417 | 9 | TAILQ_FOREACH(dom, &domains, dom_entry) { |
418 | 9 | if (dom->dom_rtattach != NULL) { |
419 | 2 | dom->dom_rtattach(&table[dom->dom_family], |
420 | 2 | dom->dom_rtoffset); |
421 | 2 | } |
422 | 9 | } |
423 | 1 | } |
424 | | |
425 | | /* |
426 | | * Called by route_dinit(). |
427 | | */ |
428 | | void |
429 | | route_init(void) |
430 | 1 | { |
431 | 1 | int size; |
432 | | |
433 | 1 | _CASSERT(offsetof(struct route, ro_rt) == |
434 | 1 | offsetof(struct route_in6, ro_rt)); |
435 | 1 | _CASSERT(offsetof(struct route, ro_lle) == |
436 | 1 | offsetof(struct route_in6, ro_lle)); |
437 | 1 | _CASSERT(offsetof(struct route, ro_srcia) == |
438 | 1 | offsetof(struct route_in6, ro_srcia)); |
439 | 1 | _CASSERT(offsetof(struct route, ro_flags) == |
440 | 1 | offsetof(struct route_in6, ro_flags)); |
441 | 1 | _CASSERT(offsetof(struct route, ro_dst) == |
442 | 1 | offsetof(struct route_in6, ro_dst)); |
443 | | |
444 | 1 | PE_parse_boot_argn("rte_debug", &rte_debug, sizeof(rte_debug)); |
445 | 1 | if (rte_debug != 0) { |
446 | 0 | rte_debug |= RTD_DEBUG; |
447 | 0 | } |
448 | | |
449 | 1 | rnh_lock_grp_attr = lck_grp_attr_alloc_init(); |
450 | 1 | rnh_lock_grp = lck_grp_alloc_init("route", rnh_lock_grp_attr); |
451 | 1 | rnh_lock_attr = lck_attr_alloc_init(); |
452 | 1 | lck_mtx_init(rnh_lock, rnh_lock_grp, rnh_lock_attr); |
453 | | |
454 | 1 | rte_mtx_grp_attr = lck_grp_attr_alloc_init(); |
455 | 1 | rte_mtx_grp = lck_grp_alloc_init(RTE_NAME, rte_mtx_grp_attr); |
456 | 1 | rte_mtx_attr = lck_attr_alloc_init(); |
457 | | |
458 | 1 | lck_mtx_lock(rnh_lock); |
459 | 1 | rn_init(); /* initialize all zeroes, all ones, mask table */ |
460 | 1 | lck_mtx_unlock(rnh_lock); |
461 | 1 | rtable_init((void **)rt_tables); |
462 | | |
463 | 1 | if (rte_debug & RTD_DEBUG) { |
464 | 0 | size = sizeof(struct rtentry_dbg); |
465 | 1 | } else { |
466 | 1 | size = sizeof(struct rtentry); |
467 | 1 | } |
468 | | |
469 | 1 | rte_zone = zone_create(RTE_ZONE_NAME, size, ZC_NOENCRYPT); |
470 | | |
471 | 1 | TAILQ_INIT(&rttrash_head); |
472 | 1 | } |
473 | | |
474 | | /* |
475 | | * Given a route, determine whether or not it is the non-scoped default |
476 | | * route; dst typically comes from rt_key(rt) but may be coming from |
477 | | * a separate place when rt is in the process of being created. |
478 | | */ |
479 | | boolean_t |
480 | | rt_primary_default(struct rtentry *rt, struct sockaddr *dst) |
481 | 13.9k | { |
482 | 13.9k | return SA_DEFAULT(dst) && !(rt->rt_flags & RTF_IFSCOPE); |
483 | 13.9k | } |
484 | | |
485 | | /* |
486 | | * Set the ifscope of the primary interface; caller holds rnh_lock. |
487 | | */ |
488 | | void |
489 | | set_primary_ifscope(int af, unsigned int ifscope) |
490 | 0 | { |
491 | 0 | if (af == AF_INET) { |
492 | 0 | primary_ifscope = ifscope; |
493 | 0 | } else { |
494 | 0 | primary6_ifscope = ifscope; |
495 | 0 | } |
496 | 0 | } |
497 | | |
498 | | /* |
499 | | * Return the ifscope of the primary interface; caller holds rnh_lock. |
500 | | */ |
501 | | unsigned int |
502 | | get_primary_ifscope(int af) |
503 | 102k | { |
504 | 102k | return af == AF_INET ? primary_ifscope : primary6_ifscope; |
505 | 102k | } |
506 | | |
507 | | /* |
508 | | * Set the scope ID of a given a sockaddr_in. |
509 | | */ |
510 | | void |
511 | | sin_set_ifscope(struct sockaddr *sa, unsigned int ifscope) |
512 | 1.29M | { |
513 | | /* Caller must pass in sockaddr_in */ |
514 | 1.29M | ASSERT_SINIFSCOPE(sa); |
515 | | |
516 | 1.29M | SINIFSCOPE(sa)->sin_scope_id = ifscope; |
517 | 1.29M | } |
518 | | |
519 | | /* |
520 | | * Set the scope ID of given a sockaddr_in6. |
521 | | */ |
522 | | static inline void |
523 | | sin6_set_ifscope(struct sockaddr *sa, unsigned int ifscope) |
524 | 447k | { |
525 | | /* Caller must pass in sockaddr_in6 */ |
526 | 447k | ASSERT_SIN6IFSCOPE(sa); |
527 | | |
528 | 447k | SIN6IFSCOPE(sa)->sin6_scope_id = ifscope; |
529 | 447k | } |
530 | | |
531 | | /* |
532 | | * Given a sockaddr_in, return the scope ID to the caller. |
533 | | */ |
534 | | unsigned int |
535 | | sin_get_ifscope(struct sockaddr *sa) |
536 | 862k | { |
537 | | /* Caller must pass in sockaddr_in */ |
538 | 862k | ASSERT_SINIFSCOPE(sa); |
539 | | |
540 | 862k | return SINIFSCOPE(sa)->sin_scope_id; |
541 | 862k | } |
542 | | |
543 | | /* |
544 | | * Given a sockaddr_in6, return the scope ID to the caller. |
545 | | */ |
546 | | unsigned int |
547 | | sin6_get_ifscope(struct sockaddr *sa) |
548 | 309k | { |
549 | | /* Caller must pass in sockaddr_in6 */ |
550 | 309k | ASSERT_SIN6IFSCOPE(sa); |
551 | | |
552 | 309k | return SIN6IFSCOPE(sa)->sin6_scope_id; |
553 | 309k | } |
554 | | |
555 | | static inline void |
556 | | sin6_set_embedded_ifscope(struct sockaddr *sa, unsigned int ifscope) |
557 | 37.7k | { |
558 | | /* Caller must pass in sockaddr_in6 */ |
559 | 37.7k | ASSERT_SIN6IFSCOPE(sa); |
560 | 37.7k | VERIFY(IN6_IS_SCOPE_EMBED(&(SIN6(sa)->sin6_addr))); |
561 | | |
562 | 37.7k | SIN6(sa)->sin6_addr.s6_addr16[1] = htons(ifscope); |
563 | 37.7k | } |
564 | | |
565 | | static inline unsigned int |
566 | | sin6_get_embedded_ifscope(struct sockaddr *sa) |
567 | 142k | { |
568 | | /* Caller must pass in sockaddr_in6 */ |
569 | 142k | ASSERT_SIN6IFSCOPE(sa); |
570 | | |
571 | 142k | return ntohs(SIN6(sa)->sin6_addr.s6_addr16[1]); |
572 | 142k | } |
573 | | |
574 | | /* |
575 | | * Copy a sockaddr_{in,in6} src to a dst storage and set scope ID into dst. |
576 | | * |
577 | | * To clear the scope ID, pass is a NULL pifscope. To set the scope ID, pass |
578 | | * in a non-NULL pifscope with non-zero ifscope. Otherwise if pifscope is |
579 | | * non-NULL and ifscope is IFSCOPE_NONE, the existing scope ID is left intact. |
580 | | * In any case, the effective scope ID value is returned to the caller via |
581 | | * pifscope, if it is non-NULL. |
582 | | */ |
583 | | struct sockaddr * |
584 | | sa_copy(struct sockaddr *src, struct sockaddr_storage *dst, |
585 | | unsigned int *pifscope) |
586 | 1.86M | { |
587 | 1.86M | int af = src->sa_family; |
588 | 1.86M | unsigned int ifscope = (pifscope != NULL) ? *pifscope : IFSCOPE_NONE; |
589 | | |
590 | 1.86M | VERIFY(af == AF_INET || af == AF_INET6); |
591 | | |
592 | 0 | bzero(dst, sizeof(*dst)); |
593 | | |
594 | 1.86M | if (af == AF_INET) { |
595 | 1.36M | bcopy(src, dst, sizeof(struct sockaddr_in)); |
596 | 1.36M | dst->ss_len = sizeof(struct sockaddr_in); |
597 | 1.36M | if (pifscope == NULL || ifscope != IFSCOPE_NONE) { |
598 | 1.29M | sin_set_ifscope(SA(dst), ifscope); |
599 | 1.29M | } |
600 | 1.36M | } else { |
601 | 496k | bcopy(src, dst, sizeof(struct sockaddr_in6)); |
602 | 496k | dst->ss_len = sizeof(struct sockaddr_in6); |
603 | 496k | if (pifscope != NULL && |
604 | 496k | IN6_IS_SCOPE_EMBED(&SIN6(dst)->sin6_addr)) { |
605 | 142k | unsigned int eifscope; |
606 | | /* |
607 | | * If the address contains the embedded scope ID, |
608 | | * use that as the value for sin6_scope_id as long |
609 | | * the caller doesn't insist on clearing it (by |
610 | | * passing NULL) or setting it. |
611 | | */ |
612 | 142k | eifscope = sin6_get_embedded_ifscope(SA(dst)); |
613 | 142k | if (eifscope != IFSCOPE_NONE && ifscope == IFSCOPE_NONE) { |
614 | 2.12k | ifscope = eifscope; |
615 | 2.12k | } |
616 | 142k | if (ifscope != IFSCOPE_NONE) { |
617 | | /* Set ifscope from pifscope or eifscope */ |
618 | 142k | sin6_set_ifscope(SA(dst), ifscope); |
619 | 142k | } else { |
620 | | /* If sin6_scope_id has a value, use that one */ |
621 | 0 | ifscope = sin6_get_ifscope(SA(dst)); |
622 | 0 | } |
623 | | /* |
624 | | * If sin6_scope_id is set but the address doesn't |
625 | | * contain the equivalent embedded value, set it. |
626 | | */ |
627 | 142k | if (ifscope != IFSCOPE_NONE && eifscope != ifscope) { |
628 | 37.7k | sin6_set_embedded_ifscope(SA(dst), ifscope); |
629 | 37.7k | } |
630 | 353k | } else if (pifscope == NULL || ifscope != IFSCOPE_NONE) { |
631 | 304k | sin6_set_ifscope(SA(dst), ifscope); |
632 | 304k | } |
633 | 496k | } |
634 | | |
635 | 1.86M | if (pifscope != NULL) { |
636 | 1.17M | *pifscope = (af == AF_INET) ? sin_get_ifscope(SA(dst)) : |
637 | 1.17M | sin6_get_ifscope(SA(dst)); |
638 | 1.17M | } |
639 | | |
640 | 1.86M | return SA(dst); |
641 | 1.86M | } |
642 | | |
643 | | /* |
644 | | * Copy a mask from src to a dst storage and set scope ID into dst. |
645 | | */ |
646 | | static struct sockaddr * |
647 | | ma_copy(int af, struct sockaddr *src, struct sockaddr_storage *dst, |
648 | | unsigned int ifscope) |
649 | 8.68k | { |
650 | 8.68k | VERIFY(af == AF_INET || af == AF_INET6); |
651 | | |
652 | 0 | bzero(dst, sizeof(*dst)); |
653 | 8.68k | rt_maskedcopy(src, SA(dst), src); |
654 | | |
655 | | /* |
656 | | * The length of the mask sockaddr would need to be adjusted |
657 | | * to cover the additional {sin,sin6}_ifscope field; when ifscope |
658 | | * is IFSCOPE_NONE, we'd end up clearing the scope ID field on |
659 | | * the destination mask in addition to extending the length |
660 | | * of the sockaddr, as a side effect. This is okay, as any |
661 | | * trailing zeroes would be skipped by rn_addmask prior to |
662 | | * inserting or looking up the mask in the mask tree. |
663 | | */ |
664 | 8.68k | if (af == AF_INET) { |
665 | 8.68k | SINIFSCOPE(dst)->sin_scope_id = ifscope; |
666 | 8.68k | SINIFSCOPE(dst)->sin_len = |
667 | 8.68k | offsetof(struct sockaddr_inifscope, sin_scope_id) + |
668 | 8.68k | sizeof(SINIFSCOPE(dst)->sin_scope_id); |
669 | 8.68k | } else { |
670 | 3 | SIN6IFSCOPE(dst)->sin6_scope_id = ifscope; |
671 | 3 | SIN6IFSCOPE(dst)->sin6_len = |
672 | 3 | offsetof(struct sockaddr_in6, sin6_scope_id) + |
673 | 3 | sizeof(SIN6IFSCOPE(dst)->sin6_scope_id); |
674 | 3 | } |
675 | | |
676 | 8.68k | return SA(dst); |
677 | 8.68k | } |
678 | | |
679 | | /* |
680 | | * Trim trailing zeroes on a sockaddr and update its length. |
681 | | */ |
682 | | static struct sockaddr * |
683 | | sa_trim(struct sockaddr *sa, int skip) |
684 | 0 | { |
685 | 0 | caddr_t cp, base = (caddr_t)sa + skip; |
686 | |
|
687 | 0 | if (sa->sa_len <= skip) { |
688 | 0 | return sa; |
689 | 0 | } |
690 | | |
691 | 0 | for (cp = base + (sa->sa_len - skip); cp > base && cp[-1] == 0;) { |
692 | 0 | cp--; |
693 | 0 | } |
694 | |
|
695 | 0 | sa->sa_len = (cp - base) + skip; |
696 | 0 | if (sa->sa_len < skip) { |
697 | | /* Must not happen, and if so, panic */ |
698 | 0 | panic("%s: broken logic (sa_len %d < skip %d )", __func__, |
699 | 0 | sa->sa_len, skip); |
700 | | /* NOTREACHED */ |
701 | 0 | } else if (sa->sa_len == skip) { |
702 | | /* If we end up with all zeroes, then there's no mask */ |
703 | 0 | sa->sa_len = 0; |
704 | 0 | } |
705 | | |
706 | 0 | return sa; |
707 | 0 | } |
708 | | |
709 | | /* |
710 | | * Called by rtm_msg{1,2} routines to "scrub" socket address structures of |
711 | | * kernel private information, so that clients of the routing socket will |
712 | | * not be confused by the presence of the information, or the side effect of |
713 | | * the increased length due to that. The source sockaddr is not modified; |
714 | | * instead, the scrubbing happens on the destination sockaddr storage that |
715 | | * is passed in by the caller. |
716 | | * |
717 | | * Scrubbing entails: |
718 | | * - removing embedded scope identifiers from network mask and destination |
719 | | * IPv4 and IPv6 socket addresses |
720 | | * - optionally removing global scope interface hardware addresses from |
721 | | * link-layer interface addresses when the MAC framework check fails. |
722 | | */ |
723 | | struct sockaddr * |
724 | | rtm_scrub(int type, int idx, struct sockaddr *hint, struct sockaddr *sa, |
725 | | void *buf, uint32_t buflen, kauth_cred_t *credp) |
726 | 0 | { |
727 | 0 | struct sockaddr_storage *ss = (struct sockaddr_storage *)buf; |
728 | 0 | struct sockaddr *ret = sa; |
729 | |
|
730 | 0 | VERIFY(buf != NULL && buflen >= sizeof(*ss)); |
731 | 0 | bzero(buf, buflen); |
732 | |
|
733 | 0 | switch (idx) { |
734 | 0 | case RTAX_DST: |
735 | | /* |
736 | | * If this is for an AF_INET/AF_INET6 destination address, |
737 | | * call sa_copy() to clear the scope ID field. |
738 | | */ |
739 | 0 | if (sa->sa_family == AF_INET && |
740 | 0 | SINIFSCOPE(sa)->sin_scope_id != IFSCOPE_NONE) { |
741 | 0 | ret = sa_copy(sa, ss, NULL); |
742 | 0 | } else if (sa->sa_family == AF_INET6 && |
743 | 0 | SIN6IFSCOPE(sa)->sin6_scope_id != IFSCOPE_NONE) { |
744 | 0 | ret = sa_copy(sa, ss, NULL); |
745 | 0 | } |
746 | 0 | break; |
747 | | |
748 | 0 | case RTAX_NETMASK: { |
749 | 0 | int skip, af; |
750 | | /* |
751 | | * If this is for a mask, we can't tell whether or not there |
752 | | * is an valid scope ID value, as the span of bytes between |
753 | | * sa_len and the beginning of the mask (offset of sin_addr in |
754 | | * the case of AF_INET, or sin6_addr for AF_INET6) may be |
755 | | * filled with all-ones by rn_addmask(), and hence we cannot |
756 | | * rely on sa_family. Because of this, we use the sa_family |
757 | | * of the hint sockaddr (RTAX_{DST,IFA}) as indicator as to |
758 | | * whether or not the mask is to be treated as one for AF_INET |
759 | | * or AF_INET6. Clearing the scope ID field involves setting |
760 | | * it to IFSCOPE_NONE followed by calling sa_trim() to trim |
761 | | * trailing zeroes from the storage sockaddr, which reverses |
762 | | * what was done earlier by ma_copy() on the source sockaddr. |
763 | | */ |
764 | 0 | if (hint == NULL || |
765 | 0 | ((af = hint->sa_family) != AF_INET && af != AF_INET6)) { |
766 | 0 | break; /* nothing to do */ |
767 | 0 | } |
768 | 0 | skip = (af == AF_INET) ? |
769 | 0 | offsetof(struct sockaddr_in, sin_addr) : |
770 | 0 | offsetof(struct sockaddr_in6, sin6_addr); |
771 | |
|
772 | 0 | if (sa->sa_len > skip && sa->sa_len <= sizeof(*ss)) { |
773 | 0 | bcopy(sa, ss, sa->sa_len); |
774 | | /* |
775 | | * Don't use {sin,sin6}_set_ifscope() as sa_family |
776 | | * and sa_len for the netmask might not be set to |
777 | | * the corresponding expected values of the hint. |
778 | | */ |
779 | 0 | if (hint->sa_family == AF_INET) { |
780 | 0 | SINIFSCOPE(ss)->sin_scope_id = IFSCOPE_NONE; |
781 | 0 | } else { |
782 | 0 | SIN6IFSCOPE(ss)->sin6_scope_id = IFSCOPE_NONE; |
783 | 0 | } |
784 | 0 | ret = sa_trim(SA(ss), skip); |
785 | | |
786 | | /* |
787 | | * For AF_INET6 mask, set sa_len appropriately unless |
788 | | * this is requested via systl_dumpentry(), in which |
789 | | * case we return the raw value. |
790 | | */ |
791 | 0 | if (hint->sa_family == AF_INET6 && |
792 | 0 | type != RTM_GET && type != RTM_GET2) { |
793 | 0 | SA(ret)->sa_len = sizeof(struct sockaddr_in6); |
794 | 0 | } |
795 | 0 | } |
796 | 0 | break; |
797 | 0 | } |
798 | 0 | case RTAX_GATEWAY: { |
799 | | /* |
800 | | * Break if the gateway is not AF_LINK type (indirect routes) |
801 | | * |
802 | | * Else, if is, check if it is resolved. If not yet resolved |
803 | | * simply break else scrub the link layer address. |
804 | | */ |
805 | 0 | if ((sa->sa_family != AF_LINK) || (SDL(sa)->sdl_alen == 0)) { |
806 | 0 | break; |
807 | 0 | } |
808 | 0 | OS_FALLTHROUGH; |
809 | 0 | } |
810 | | |
811 | 0 | case RTAX_IFP: { |
812 | 0 | if (sa->sa_family == AF_LINK && credp) { |
813 | 0 | struct sockaddr_dl *sdl = SDL(buf); |
814 | 0 | const void *bytes; |
815 | 0 | size_t size; |
816 | | |
817 | | /* caller should handle worst case: SOCK_MAXADDRLEN */ |
818 | 0 | VERIFY(buflen >= sa->sa_len); |
819 | | |
820 | 0 | bcopy(sa, sdl, sa->sa_len); |
821 | 0 | bytes = dlil_ifaddr_bytes(sdl, &size, credp); |
822 | 0 | if (bytes != CONST_LLADDR(sdl)) { |
823 | 0 | VERIFY(sdl->sdl_alen == size); |
824 | 0 | bcopy(bytes, LLADDR(sdl), size); |
825 | 0 | } |
826 | 0 | ret = (struct sockaddr *)sdl; |
827 | 0 | } |
828 | 0 | break; |
829 | 0 | } |
830 | 0 | default: |
831 | 0 | break; |
832 | 0 | } |
833 | | |
834 | 0 | return ret; |
835 | 0 | } |
836 | | |
837 | | /* |
838 | | * Callback leaf-matching routine for rn_matchaddr_args used |
839 | | * for looking up an exact match for a scoped route entry. |
840 | | */ |
841 | | static int |
842 | | rn_match_ifscope(struct radix_node *rn, void *arg) |
843 | 50.0k | { |
844 | 50.0k | struct rtentry *rt = (struct rtentry *)rn; |
845 | 50.0k | struct matchleaf_arg *ma = arg; |
846 | 50.0k | int af = rt_key(rt)->sa_family; |
847 | | |
848 | 50.0k | if (!(rt->rt_flags & RTF_IFSCOPE) || (af != AF_INET && af != AF_INET6)) { |
849 | 1.17k | return 0; |
850 | 1.17k | } |
851 | | |
852 | 48.9k | return af == AF_INET ? |
853 | 699 | (SINIFSCOPE(rt_key(rt))->sin_scope_id == ma->ifscope) : |
854 | 48.9k | (SIN6IFSCOPE(rt_key(rt))->sin6_scope_id == ma->ifscope); |
855 | 50.0k | } |
856 | | |
857 | | /* |
858 | | * Atomically increment route generation counter |
859 | | */ |
860 | | void |
861 | | routegenid_update(void) |
862 | 0 | { |
863 | 0 | routegenid_inet_update(); |
864 | 0 | routegenid_inet6_update(); |
865 | 0 | } |
866 | | |
867 | | void |
868 | | routegenid_inet_update(void) |
869 | 18.6k | { |
870 | 18.6k | atomic_add_32(&route_genid_inet, 1); |
871 | 18.6k | } |
872 | | |
873 | | void |
874 | | routegenid_inet6_update(void) |
875 | 26 | { |
876 | 26 | atomic_add_32(&route_genid_inet6, 1); |
877 | 26 | } |
878 | | |
879 | | /* |
880 | | * Packet routing routines. |
881 | | */ |
882 | | void |
883 | | rtalloc(struct route *ro) |
884 | 0 | { |
885 | 0 | rtalloc_ign(ro, 0); |
886 | 0 | } |
887 | | |
888 | | void |
889 | | rtalloc_scoped(struct route *ro, unsigned int ifscope) |
890 | 41.1k | { |
891 | 41.1k | rtalloc_scoped_ign(ro, 0, ifscope); |
892 | 41.1k | } |
893 | | |
894 | | static void |
895 | | rtalloc_ign_common_locked(struct route *ro, uint32_t ignore, |
896 | | unsigned int ifscope) |
897 | 99.5k | { |
898 | 99.5k | struct rtentry *rt; |
899 | | |
900 | 99.5k | if ((rt = ro->ro_rt) != NULL) { |
901 | 0 | RT_LOCK_SPIN(rt); |
902 | 0 | if (rt->rt_ifp != NULL && !ROUTE_UNUSABLE(ro)) { |
903 | 0 | RT_UNLOCK(rt); |
904 | 0 | return; |
905 | 0 | } |
906 | 0 | RT_UNLOCK(rt); |
907 | 0 | ROUTE_RELEASE_LOCKED(ro); /* rnh_lock already held */ |
908 | 0 | } |
909 | 99.5k | ro->ro_rt = rtalloc1_common_locked(&ro->ro_dst, 1, ignore, ifscope); |
910 | 99.5k | if (ro->ro_rt != NULL) { |
911 | 44.2k | RT_GENID_SYNC(ro->ro_rt); |
912 | 44.2k | RT_LOCK_ASSERT_NOTHELD(ro->ro_rt); |
913 | 44.2k | } |
914 | 99.5k | } |
915 | | |
916 | | void |
917 | | rtalloc_ign(struct route *ro, uint32_t ignore) |
918 | 39.6k | { |
919 | 39.6k | LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_NOTOWNED); |
920 | 39.6k | lck_mtx_lock(rnh_lock); |
921 | 39.6k | rtalloc_ign_common_locked(ro, ignore, IFSCOPE_NONE); |
922 | 39.6k | lck_mtx_unlock(rnh_lock); |
923 | 39.6k | } |
924 | | |
925 | | void |
926 | | rtalloc_scoped_ign(struct route *ro, uint32_t ignore, unsigned int ifscope) |
927 | 59.8k | { |
928 | 59.8k | LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_NOTOWNED); |
929 | 59.8k | lck_mtx_lock(rnh_lock); |
930 | 59.8k | rtalloc_ign_common_locked(ro, ignore, ifscope); |
931 | 59.8k | lck_mtx_unlock(rnh_lock); |
932 | 59.8k | } |
933 | | |
934 | | static struct rtentry * |
935 | | rtalloc1_locked(struct sockaddr *dst, int report, uint32_t ignflags) |
936 | 1 | { |
937 | 1 | return rtalloc1_common_locked(dst, report, ignflags, IFSCOPE_NONE); |
938 | 1 | } |
939 | | |
940 | | struct rtentry * |
941 | | rtalloc1_scoped_locked(struct sockaddr *dst, int report, uint32_t ignflags, |
942 | | unsigned int ifscope) |
943 | 505k | { |
944 | 505k | return rtalloc1_common_locked(dst, report, ignflags, ifscope); |
945 | 505k | } |
946 | | |
947 | | static boolean_t |
948 | | route_ignore_protocol_cloning_for_dst(struct rtentry *rt, struct sockaddr *dst) |
949 | 98.8k | { |
950 | | /* |
951 | | * For now keep protocol cloning for any type of IPv4 |
952 | | * destination. |
953 | | */ |
954 | 98.8k | if (dst->sa_family != AF_INET6) { |
955 | 8.73k | return FALSE; |
956 | 8.73k | } |
957 | | |
958 | | /* |
959 | | * Limit protocol route creation of IPv6 ULA destinations |
960 | | * from default route, |
961 | | * Just to be safe, even though it doesn't affect routability, |
962 | | * still allow protocol cloned routes if we happen to hit |
963 | | * default route over companion link for ULA destination. |
964 | | */ |
965 | 90.1k | if (!IFNET_IS_COMPANION_LINK(rt->rt_ifp) && |
966 | 90.1k | (rt->rt_flags & RTF_GATEWAY) && |
967 | 90.1k | (rt->rt_flags & RTF_PRCLONING) && |
968 | 90.1k | SA_DEFAULT(rt_key(rt)) && |
969 | 90.1k | IN6_IS_ADDR_UNIQUE_LOCAL(&SIN6(dst)->sin6_addr)) { |
970 | 0 | return TRUE; |
971 | 0 | } |
972 | 90.1k | return FALSE; |
973 | 90.1k | } |
974 | | |
975 | | struct rtentry * |
976 | | rtalloc1_common_locked(struct sockaddr *dst, int report, uint32_t ignflags, |
977 | | unsigned int ifscope) |
978 | 605k | { |
979 | 605k | struct radix_node_head *rnh = rt_tables[dst->sa_family]; |
980 | 605k | struct rtentry *rt, *newrt = NULL; |
981 | 605k | struct rt_addrinfo info; |
982 | 605k | uint32_t nflags; |
983 | 605k | int err = 0, msgtype = RTM_MISS; |
984 | | |
985 | 605k | if (rnh == NULL) { |
986 | 0 | goto unreachable; |
987 | 0 | } |
988 | | |
989 | | /* |
990 | | * Find the longest prefix or exact (in the scoped case) address match; |
991 | | * callee adds a reference to entry and checks for root node as well |
992 | | */ |
993 | 605k | rt = rt_lookup(FALSE, dst, NULL, rnh, ifscope); |
994 | 605k | if (rt == NULL) { |
995 | 506k | goto unreachable; |
996 | 506k | } |
997 | | |
998 | | /* |
999 | | * Explicitly ignore protocol cloning for certain destinations. |
1000 | | * Some checks below are kind of redundant, as for now, RTF_PRCLONING |
1001 | | * is only set on indirect (RTF_GATEWAY) routes. |
1002 | | * Also, we do this only when the route lookup above, resulted in default |
1003 | | * route. |
1004 | | * This is done to ensure, the resulting indirect host route doesn't |
1005 | | * interfere when routing table gets configured with a indirect subnet |
1006 | | * route/direct subnet route that is more specific than the current |
1007 | | * parent route of the resulting protocol cloned route. |
1008 | | * |
1009 | | * At the crux of it all, it is a problem that we maintain host cache |
1010 | | * in the routing table. We should revisit this for a generic solution. |
1011 | | */ |
1012 | 98.8k | if (route_ignore_protocol_cloning_for_dst(rt, dst)) { |
1013 | 0 | ignflags |= RTF_PRCLONING; |
1014 | 0 | } |
1015 | | |
1016 | 98.8k | RT_LOCK_SPIN(rt); |
1017 | 98.8k | newrt = rt; |
1018 | 98.8k | nflags = rt->rt_flags & ~ignflags; |
1019 | 98.8k | RT_UNLOCK(rt); |
1020 | | |
1021 | 98.8k | if (report && (nflags & (RTF_CLONING | RTF_PRCLONING))) { |
1022 | | /* |
1023 | | * We are apparently adding (report = 0 in delete). |
1024 | | * If it requires that it be cloned, do so. |
1025 | | * (This implies it wasn't a HOST route.) |
1026 | | */ |
1027 | 3.24k | err = rtrequest_locked(RTM_RESOLVE, dst, NULL, NULL, 0, &newrt); |
1028 | 3.24k | if (err) { |
1029 | | /* |
1030 | | * If the cloning didn't succeed, maybe what we |
1031 | | * have from lookup above will do. Return that; |
1032 | | * no need to hold another reference since it's |
1033 | | * already done. |
1034 | | */ |
1035 | 1.00k | newrt = rt; |
1036 | 1.00k | goto miss; |
1037 | 1.00k | } |
1038 | | |
1039 | | /* |
1040 | | * We cloned it; drop the original route found during lookup. |
1041 | | * The resulted cloned route (newrt) would now have an extra |
1042 | | * reference held during rtrequest. |
1043 | | */ |
1044 | 2.23k | rtfree_locked(rt); |
1045 | | |
1046 | | /* |
1047 | | * If the newly created cloned route is a direct host route |
1048 | | * then also check if it is to a router or not. |
1049 | | * If it is, then set the RTF_ROUTER flag on the host route |
1050 | | * for the gateway. |
1051 | | * |
1052 | | * XXX It is possible for the default route to be created post |
1053 | | * cloned route creation of router's IP. |
1054 | | * We can handle that corner case by special handing for RTM_ADD |
1055 | | * of default route. |
1056 | | */ |
1057 | 2.23k | if ((newrt->rt_flags & (RTF_HOST | RTF_LLINFO)) == |
1058 | 2.23k | (RTF_HOST | RTF_LLINFO)) { |
1059 | 0 | struct rtentry *defrt = NULL; |
1060 | 0 | struct sockaddr_storage def_key; |
1061 | |
|
1062 | 0 | bzero(&def_key, sizeof(def_key)); |
1063 | 0 | def_key.ss_len = rt_key(newrt)->sa_len; |
1064 | 0 | def_key.ss_family = rt_key(newrt)->sa_family; |
1065 | |
|
1066 | 0 | defrt = rtalloc1_scoped_locked((struct sockaddr *)&def_key, |
1067 | 0 | 0, 0, newrt->rt_ifp->if_index); |
1068 | |
|
1069 | 0 | if (defrt) { |
1070 | 0 | if (equal(rt_key(newrt), defrt->rt_gateway)) { |
1071 | 0 | newrt->rt_flags |= RTF_ROUTER; |
1072 | 0 | } |
1073 | 0 | rtfree_locked(defrt); |
1074 | 0 | } |
1075 | 0 | } |
1076 | | |
1077 | 2.23k | if ((rt = newrt) && (rt->rt_flags & RTF_XRESOLVE)) { |
1078 | | /* |
1079 | | * If the new route specifies it be |
1080 | | * externally resolved, then go do that. |
1081 | | */ |
1082 | 0 | msgtype = RTM_RESOLVE; |
1083 | 0 | goto miss; |
1084 | 0 | } |
1085 | 2.23k | } |
1086 | 97.8k | goto done; |
1087 | | |
1088 | 506k | unreachable: |
1089 | | /* |
1090 | | * Either we hit the root or couldn't find any match, |
1091 | | * Which basically means "cant get there from here" |
1092 | | */ |
1093 | 506k | rtstat.rts_unreach++; |
1094 | | |
1095 | 507k | miss: |
1096 | 507k | if (report) { |
1097 | | /* |
1098 | | * If required, report the failure to the supervising |
1099 | | * Authorities. |
1100 | | * For a delete, this is not an error. (report == 0) |
1101 | | */ |
1102 | 56.2k | bzero((caddr_t)&info, sizeof(info)); |
1103 | 56.2k | info.rti_info[RTAX_DST] = dst; |
1104 | 56.2k | rt_missmsg(msgtype, &info, 0, err); |
1105 | 56.2k | } |
1106 | 605k | done: |
1107 | 605k | return newrt; |
1108 | 507k | } |
1109 | | |
1110 | | struct rtentry * |
1111 | | rtalloc1(struct sockaddr *dst, int report, uint32_t ignflags) |
1112 | 1 | { |
1113 | 1 | struct rtentry *entry; |
1114 | 1 | LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_NOTOWNED); |
1115 | 1 | lck_mtx_lock(rnh_lock); |
1116 | 1 | entry = rtalloc1_locked(dst, report, ignflags); |
1117 | 1 | lck_mtx_unlock(rnh_lock); |
1118 | 1 | return entry; |
1119 | 1 | } |
1120 | | |
1121 | | struct rtentry * |
1122 | | rtalloc1_scoped(struct sockaddr *dst, int report, uint32_t ignflags, |
1123 | | unsigned int ifscope) |
1124 | 503k | { |
1125 | 503k | struct rtentry *entry; |
1126 | 503k | LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_NOTOWNED); |
1127 | 503k | lck_mtx_lock(rnh_lock); |
1128 | 503k | entry = rtalloc1_scoped_locked(dst, report, ignflags, ifscope); |
1129 | 503k | lck_mtx_unlock(rnh_lock); |
1130 | 503k | return entry; |
1131 | 503k | } |
1132 | | |
1133 | | /* |
1134 | | * Remove a reference count from an rtentry. |
1135 | | * If the count gets low enough, take it out of the routing table |
1136 | | */ |
1137 | | void |
1138 | | rtfree_locked(struct rtentry *rt) |
1139 | 20.4k | { |
1140 | 20.4k | rtfree_common(rt, TRUE); |
1141 | 20.4k | } |
1142 | | |
1143 | | static void |
1144 | | rtfree_common(struct rtentry *rt, boolean_t locked) |
1145 | 813k | { |
1146 | 813k | struct radix_node_head *rnh; |
1147 | | |
1148 | 813k | LCK_MTX_ASSERT(rnh_lock, locked ? |
1149 | 813k | LCK_MTX_ASSERT_OWNED : LCK_MTX_ASSERT_NOTOWNED); |
1150 | | |
1151 | | /* |
1152 | | * Atomically decrement the reference count and if it reaches 0, |
1153 | | * and there is a close function defined, call the close function. |
1154 | | */ |
1155 | 813k | RT_LOCK_SPIN(rt); |
1156 | 813k | if (rtunref(rt) > 0) { |
1157 | 772k | RT_UNLOCK(rt); |
1158 | 772k | return; |
1159 | 772k | } |
1160 | | |
1161 | | /* |
1162 | | * To avoid violating lock ordering, we must drop rt_lock before |
1163 | | * trying to acquire the global rnh_lock. If we are called with |
1164 | | * rnh_lock held, then we already have exclusive access; otherwise |
1165 | | * we do the lock dance. |
1166 | | */ |
1167 | 40.2k | if (!locked) { |
1168 | | /* |
1169 | | * Note that we check it again below after grabbing rnh_lock, |
1170 | | * since it is possible that another thread doing a lookup wins |
1171 | | * the race, grabs the rnh_lock first, and bumps up reference |
1172 | | * count in which case the route should be left alone as it is |
1173 | | * still in use. It's also possible that another thread frees |
1174 | | * the route after we drop rt_lock; to prevent the route from |
1175 | | * being freed, we hold an extra reference. |
1176 | | */ |
1177 | 35.4k | RT_ADDREF_LOCKED(rt); |
1178 | 35.4k | RT_UNLOCK(rt); |
1179 | 35.4k | lck_mtx_lock(rnh_lock); |
1180 | 35.4k | RT_LOCK_SPIN(rt); |
1181 | 35.4k | if (rtunref(rt) > 0) { |
1182 | | /* We've lost the race, so abort */ |
1183 | 0 | RT_UNLOCK(rt); |
1184 | 0 | goto done; |
1185 | 0 | } |
1186 | 35.4k | } |
1187 | | |
1188 | | /* |
1189 | | * We may be blocked on other lock(s) as part of freeing |
1190 | | * the entry below, so convert from spin to full mutex. |
1191 | | */ |
1192 | 40.2k | RT_CONVERT_LOCK(rt); |
1193 | | |
1194 | 40.2k | LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED); |
1195 | | |
1196 | | /* Negative refcnt must never happen */ |
1197 | 40.2k | if (rt->rt_refcnt != 0) { |
1198 | 0 | panic("rt %p invalid refcnt %d", rt, rt->rt_refcnt); |
1199 | | /* NOTREACHED */ |
1200 | 0 | } |
1201 | | /* Idle refcnt must have been dropped during rtunref() */ |
1202 | 40.2k | VERIFY(!(rt->rt_flags & RTF_IFREF)); |
1203 | | |
1204 | | /* |
1205 | | * find the tree for that address family |
1206 | | * Note: in the case of igmp packets, there might not be an rnh |
1207 | | */ |
1208 | 40.2k | rnh = rt_tables[rt_key(rt)->sa_family]; |
1209 | | |
1210 | | /* |
1211 | | * On last reference give the "close method" a chance to cleanup |
1212 | | * private state. This also permits (for IPv4 and IPv6) a chance |
1213 | | * to decide if the routing table entry should be purged immediately |
1214 | | * or at a later time. When an immediate purge is to happen the |
1215 | | * close routine typically issues RTM_DELETE which clears the RTF_UP |
1216 | | * flag on the entry so that the code below reclaims the storage. |
1217 | | */ |
1218 | 40.2k | if (rnh != NULL && rnh->rnh_close != NULL) { |
1219 | 40.2k | rnh->rnh_close((struct radix_node *)rt, rnh); |
1220 | 40.2k | } |
1221 | | |
1222 | | /* |
1223 | | * If we are no longer "up" (and ref == 0) then we can free the |
1224 | | * resources associated with the route. |
1225 | | */ |
1226 | 40.2k | if (!(rt->rt_flags & RTF_UP)) { |
1227 | 6.96k | struct rtentry *rt_parent; |
1228 | 6.96k | struct ifaddr *rt_ifa; |
1229 | | |
1230 | 6.96k | rt->rt_flags |= RTF_DEAD; |
1231 | 6.96k | if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT)) { |
1232 | 0 | panic("rt %p freed while in radix tree\n", rt); |
1233 | | /* NOTREACHED */ |
1234 | 0 | } |
1235 | | /* |
1236 | | * the rtentry must have been removed from the routing table |
1237 | | * so it is represented in rttrash; remove that now. |
1238 | | */ |
1239 | 6.96k | (void) OSDecrementAtomic(&rttrash); |
1240 | 6.96k | if (rte_debug & RTD_DEBUG) { |
1241 | 0 | TAILQ_REMOVE(&rttrash_head, (struct rtentry_dbg *)rt, |
1242 | 0 | rtd_trash_link); |
1243 | 0 | } |
1244 | | |
1245 | | /* |
1246 | | * release references on items we hold them on.. |
1247 | | * e.g other routes and ifaddrs. |
1248 | | */ |
1249 | 6.96k | if ((rt_parent = rt->rt_parent) != NULL) { |
1250 | 2.23k | rt->rt_parent = NULL; |
1251 | 2.23k | } |
1252 | | |
1253 | 6.96k | if ((rt_ifa = rt->rt_ifa) != NULL) { |
1254 | 6.96k | rt->rt_ifa = NULL; |
1255 | 6.96k | } |
1256 | | |
1257 | | /* |
1258 | | * Now free any attached link-layer info. |
1259 | | */ |
1260 | 6.96k | if (rt->rt_llinfo != NULL) { |
1261 | 0 | if (rt->rt_llinfo_free != NULL) { |
1262 | 0 | (*rt->rt_llinfo_free)(rt->rt_llinfo); |
1263 | 0 | } else { |
1264 | 0 | R_Free(rt->rt_llinfo); |
1265 | 0 | } |
1266 | 0 | rt->rt_llinfo = NULL; |
1267 | 0 | } |
1268 | | |
1269 | | /* Destroy eventhandler lists context */ |
1270 | 6.96k | eventhandler_lists_ctxt_destroy(&rt->rt_evhdlr_ctxt); |
1271 | | |
1272 | | /* |
1273 | | * Route is no longer in the tree and refcnt is 0; |
1274 | | * we have exclusive access, so destroy it. |
1275 | | */ |
1276 | 6.96k | RT_UNLOCK(rt); |
1277 | 6.96k | rte_lock_destroy(rt); |
1278 | | |
1279 | 6.96k | if (rt_parent != NULL) { |
1280 | 2.23k | rtfree_locked(rt_parent); |
1281 | 2.23k | } |
1282 | | |
1283 | 6.96k | if (rt_ifa != NULL) { |
1284 | 6.96k | IFA_REMREF(rt_ifa); |
1285 | 6.96k | } |
1286 | | |
1287 | | /* |
1288 | | * The key is separately alloc'd so free it (see rt_setgate()). |
1289 | | * This also frees the gateway, as they are always malloc'd |
1290 | | * together. |
1291 | | */ |
1292 | 6.96k | R_Free(rt_key(rt)); |
1293 | | |
1294 | | /* |
1295 | | * Free any statistics that may have been allocated |
1296 | | */ |
1297 | 6.96k | nstat_route_detach(rt); |
1298 | | |
1299 | | /* |
1300 | | * and the rtentry itself of course |
1301 | | */ |
1302 | 6.96k | rte_free(rt); |
1303 | 33.2k | } else { |
1304 | | /* |
1305 | | * The "close method" has been called, but the route is |
1306 | | * still in the radix tree with zero refcnt, i.e. "up" |
1307 | | * and in the cached state. |
1308 | | */ |
1309 | 33.2k | RT_UNLOCK(rt); |
1310 | 33.2k | } |
1311 | 40.2k | done: |
1312 | 40.2k | if (!locked) { |
1313 | 35.4k | lck_mtx_unlock(rnh_lock); |
1314 | 35.4k | } |
1315 | 40.2k | } |
1316 | | |
1317 | | void |
1318 | | rtfree(struct rtentry *rt) |
1319 | 792k | { |
1320 | 792k | rtfree_common(rt, FALSE); |
1321 | 792k | } |
1322 | | |
1323 | | /* |
1324 | | * Decrements the refcount but does not free the route when |
1325 | | * the refcount reaches zero. Unless you have really good reason, |
1326 | | * use rtfree not rtunref. |
1327 | | */ |
1328 | | int |
1329 | | rtunref(struct rtentry *p) |
1330 | 895k | { |
1331 | 895k | RT_LOCK_ASSERT_HELD(p); |
1332 | | |
1333 | 895k | if (p->rt_refcnt == 0) { |
1334 | 0 | panic("%s(%p) bad refcnt\n", __func__, p); |
1335 | | /* NOTREACHED */ |
1336 | 895k | } else if (--p->rt_refcnt == 0) { |
1337 | | /* |
1338 | | * Release any idle reference count held on the interface; |
1339 | | * if the route is eligible, still UP and the refcnt becomes |
1340 | | * non-zero at some point in future before it is purged from |
1341 | | * the routing table, rt_set_idleref() will undo this. |
1342 | | */ |
1343 | 98.8k | rt_clear_idleref(p); |
1344 | 98.8k | } |
1345 | | |
1346 | 895k | if (rte_debug & RTD_DEBUG) { |
1347 | 0 | rtunref_audit((struct rtentry_dbg *)p); |
1348 | 0 | } |
1349 | | |
1350 | | /* Return new value */ |
1351 | 895k | return p->rt_refcnt; |
1352 | 895k | } |
1353 | | |
1354 | | static inline void |
1355 | | rtunref_audit(struct rtentry_dbg *rte) |
1356 | 0 | { |
1357 | 0 | uint16_t idx; |
1358 | |
|
1359 | 0 | if (rte->rtd_inuse != RTD_INUSE) { |
1360 | 0 | panic("rtunref: on freed rte=%p\n", rte); |
1361 | | /* NOTREACHED */ |
1362 | 0 | } |
1363 | 0 | idx = atomic_add_16_ov(&rte->rtd_refrele_cnt, 1) % CTRACE_HIST_SIZE; |
1364 | 0 | if (rte_debug & RTD_TRACE) { |
1365 | 0 | ctrace_record(&rte->rtd_refrele[idx]); |
1366 | 0 | } |
1367 | 0 | } |
1368 | | |
1369 | | /* |
1370 | | * Add a reference count from an rtentry. |
1371 | | */ |
1372 | | void |
1373 | | rtref(struct rtentry *p) |
1374 | 895k | { |
1375 | 895k | RT_LOCK_ASSERT_HELD(p); |
1376 | | |
1377 | 895k | VERIFY((p->rt_flags & RTF_DEAD) == 0); |
1378 | 895k | if (++p->rt_refcnt == 0) { |
1379 | 0 | panic("%s(%p) bad refcnt\n", __func__, p); |
1380 | | /* NOTREACHED */ |
1381 | 895k | } else if (p->rt_refcnt == 1) { |
1382 | | /* |
1383 | | * Hold an idle reference count on the interface, |
1384 | | * if the route is eligible for it. |
1385 | | */ |
1386 | 98.8k | rt_set_idleref(p); |
1387 | 98.8k | } |
1388 | | |
1389 | 895k | if (rte_debug & RTD_DEBUG) { |
1390 | 0 | rtref_audit((struct rtentry_dbg *)p); |
1391 | 0 | } |
1392 | 895k | } |
1393 | | |
1394 | | static inline void |
1395 | | rtref_audit(struct rtentry_dbg *rte) |
1396 | 0 | { |
1397 | 0 | uint16_t idx; |
1398 | |
|
1399 | 0 | if (rte->rtd_inuse != RTD_INUSE) { |
1400 | 0 | panic("rtref_audit: on freed rte=%p\n", rte); |
1401 | | /* NOTREACHED */ |
1402 | 0 | } |
1403 | 0 | idx = atomic_add_16_ov(&rte->rtd_refhold_cnt, 1) % CTRACE_HIST_SIZE; |
1404 | 0 | if (rte_debug & RTD_TRACE) { |
1405 | 0 | ctrace_record(&rte->rtd_refhold[idx]); |
1406 | 0 | } |
1407 | 0 | } |
1408 | | |
1409 | | void |
1410 | | rtsetifa(struct rtentry *rt, struct ifaddr *ifa) |
1411 | 7.99k | { |
1412 | 7.99k | LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED); |
1413 | | |
1414 | 7.99k | RT_LOCK_ASSERT_HELD(rt); |
1415 | | |
1416 | 7.99k | if (rt->rt_ifa == ifa) { |
1417 | 0 | return; |
1418 | 0 | } |
1419 | | |
1420 | | /* Become a regular mutex, just in case */ |
1421 | 7.99k | RT_CONVERT_LOCK(rt); |
1422 | | |
1423 | | /* Release the old ifa */ |
1424 | 7.99k | if (rt->rt_ifa) { |
1425 | 0 | IFA_REMREF(rt->rt_ifa); |
1426 | 0 | } |
1427 | | |
1428 | | /* Set rt_ifa */ |
1429 | 7.99k | rt->rt_ifa = ifa; |
1430 | | |
1431 | | /* Take a reference to the ifa */ |
1432 | 7.99k | if (rt->rt_ifa) { |
1433 | 7.99k | IFA_ADDREF(rt->rt_ifa); |
1434 | 7.99k | } |
1435 | 7.99k | } |
1436 | | |
1437 | | /* |
1438 | | * Force a routing table entry to the specified |
1439 | | * destination to go through the given gateway. |
1440 | | * Normally called as a result of a routing redirect |
1441 | | * message from the network layer. |
1442 | | */ |
1443 | | void |
1444 | | rtredirect(struct ifnet *ifp, struct sockaddr *dst, struct sockaddr *gateway, |
1445 | | struct sockaddr *netmask, int flags, struct sockaddr *src, |
1446 | | struct rtentry **rtp) |
1447 | 0 | { |
1448 | 0 | struct rtentry *rt = NULL; |
1449 | 0 | int error = 0; |
1450 | 0 | short *stat = 0; |
1451 | 0 | struct rt_addrinfo info; |
1452 | 0 | struct ifaddr *ifa = NULL; |
1453 | 0 | unsigned int ifscope = (ifp != NULL) ? ifp->if_index : IFSCOPE_NONE; |
1454 | 0 | struct sockaddr_storage ss; |
1455 | 0 | int af = src->sa_family; |
1456 | |
|
1457 | 0 | LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_NOTOWNED); |
1458 | 0 | lck_mtx_lock(rnh_lock); |
1459 | | |
1460 | | /* |
1461 | | * Transform src into the internal routing table form for |
1462 | | * comparison against rt_gateway below. |
1463 | | */ |
1464 | 0 | if ((af == AF_INET) || (af == AF_INET6)) { |
1465 | 0 | src = sa_copy(src, &ss, &ifscope); |
1466 | 0 | } |
1467 | | |
1468 | | /* |
1469 | | * Verify the gateway is directly reachable; if scoped routing |
1470 | | * is enabled, verify that it is reachable from the interface |
1471 | | * where the ICMP redirect arrived on. |
1472 | | */ |
1473 | 0 | if ((ifa = ifa_ifwithnet_scoped(gateway, ifscope)) == NULL) { |
1474 | 0 | error = ENETUNREACH; |
1475 | 0 | goto out; |
1476 | 0 | } |
1477 | | |
1478 | | /* Lookup route to the destination (from the original IP header) */ |
1479 | 0 | rt = rtalloc1_scoped_locked(dst, 0, RTF_CLONING | RTF_PRCLONING, ifscope); |
1480 | 0 | if (rt != NULL) { |
1481 | 0 | RT_LOCK(rt); |
1482 | 0 | } |
1483 | | |
1484 | | /* |
1485 | | * If the redirect isn't from our current router for this dst, |
1486 | | * it's either old or wrong. If it redirects us to ourselves, |
1487 | | * we have a routing loop, perhaps as a result of an interface |
1488 | | * going down recently. Holding rnh_lock here prevents the |
1489 | | * possibility of rt_ifa/ifa's ifa_addr from changing (e.g. |
1490 | | * in_ifinit), so okay to access ifa_addr without locking. |
1491 | | */ |
1492 | 0 | if (!(flags & RTF_DONE) && rt != NULL && |
1493 | 0 | (!equal(src, rt->rt_gateway) || !equal(rt->rt_ifa->ifa_addr, |
1494 | 0 | ifa->ifa_addr))) { |
1495 | 0 | error = EINVAL; |
1496 | 0 | } else { |
1497 | 0 | IFA_REMREF(ifa); |
1498 | 0 | if ((ifa = ifa_ifwithaddr(gateway))) { |
1499 | 0 | IFA_REMREF(ifa); |
1500 | 0 | ifa = NULL; |
1501 | 0 | error = EHOSTUNREACH; |
1502 | 0 | } |
1503 | 0 | } |
1504 | |
|
1505 | 0 | if (ifa) { |
1506 | 0 | IFA_REMREF(ifa); |
1507 | 0 | ifa = NULL; |
1508 | 0 | } |
1509 | |
|
1510 | 0 | if (error) { |
1511 | 0 | if (rt != NULL) { |
1512 | 0 | RT_UNLOCK(rt); |
1513 | 0 | } |
1514 | 0 | goto done; |
1515 | 0 | } |
1516 | | |
1517 | | /* |
1518 | | * Create a new entry if we just got back a wildcard entry |
1519 | | * or the the lookup failed. This is necessary for hosts |
1520 | | * which use routing redirects generated by smart gateways |
1521 | | * to dynamically build the routing tables. |
1522 | | */ |
1523 | 0 | if ((rt == NULL) || (rt_mask(rt) != NULL && rt_mask(rt)->sa_len < 2)) { |
1524 | 0 | goto create; |
1525 | 0 | } |
1526 | | /* |
1527 | | * Don't listen to the redirect if it's |
1528 | | * for a route to an interface. |
1529 | | */ |
1530 | 0 | RT_LOCK_ASSERT_HELD(rt); |
1531 | 0 | if (rt->rt_flags & RTF_GATEWAY) { |
1532 | 0 | if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) { |
1533 | | /* |
1534 | | * Changing from route to net => route to host. |
1535 | | * Create new route, rather than smashing route |
1536 | | * to net; similar to cloned routes, the newly |
1537 | | * created host route is scoped as well. |
1538 | | */ |
1539 | 0 | create: |
1540 | 0 | if (rt != NULL) { |
1541 | 0 | RT_UNLOCK(rt); |
1542 | 0 | } |
1543 | 0 | flags |= RTF_GATEWAY | RTF_DYNAMIC; |
1544 | 0 | error = rtrequest_scoped_locked(RTM_ADD, dst, |
1545 | 0 | gateway, netmask, flags, NULL, ifscope); |
1546 | 0 | stat = &rtstat.rts_dynamic; |
1547 | 0 | } else { |
1548 | | /* |
1549 | | * Smash the current notion of the gateway to |
1550 | | * this destination. Should check about netmask!!! |
1551 | | */ |
1552 | 0 | rt->rt_flags |= RTF_MODIFIED; |
1553 | 0 | flags |= RTF_MODIFIED; |
1554 | 0 | stat = &rtstat.rts_newgateway; |
1555 | | /* |
1556 | | * add the key and gateway (in one malloc'd chunk). |
1557 | | */ |
1558 | 0 | error = rt_setgate(rt, rt_key(rt), gateway); |
1559 | 0 | RT_UNLOCK(rt); |
1560 | 0 | } |
1561 | 0 | } else { |
1562 | 0 | RT_UNLOCK(rt); |
1563 | 0 | error = EHOSTUNREACH; |
1564 | 0 | } |
1565 | 0 | done: |
1566 | 0 | if (rt != NULL) { |
1567 | 0 | RT_LOCK_ASSERT_NOTHELD(rt); |
1568 | 0 | if (!error) { |
1569 | | /* Enqueue event to refresh flow route entries */ |
1570 | 0 | route_event_enqueue_nwk_wq_entry(rt, NULL, ROUTE_ENTRY_REFRESH, NULL, FALSE); |
1571 | 0 | if (rtp) { |
1572 | 0 | *rtp = rt; |
1573 | 0 | } else { |
1574 | 0 | rtfree_locked(rt); |
1575 | 0 | } |
1576 | 0 | } else { |
1577 | 0 | rtfree_locked(rt); |
1578 | 0 | } |
1579 | 0 | } |
1580 | 0 | out: |
1581 | 0 | if (error) { |
1582 | 0 | rtstat.rts_badredirect++; |
1583 | 0 | } else { |
1584 | 0 | if (stat != NULL) { |
1585 | 0 | (*stat)++; |
1586 | 0 | } |
1587 | |
|
1588 | 0 | if (af == AF_INET) { |
1589 | 0 | routegenid_inet_update(); |
1590 | 0 | } else if (af == AF_INET6) { |
1591 | 0 | routegenid_inet6_update(); |
1592 | 0 | } |
1593 | 0 | } |
1594 | 0 | lck_mtx_unlock(rnh_lock); |
1595 | 0 | bzero((caddr_t)&info, sizeof(info)); |
1596 | 0 | info.rti_info[RTAX_DST] = dst; |
1597 | 0 | info.rti_info[RTAX_GATEWAY] = gateway; |
1598 | 0 | info.rti_info[RTAX_NETMASK] = netmask; |
1599 | 0 | info.rti_info[RTAX_AUTHOR] = src; |
1600 | 0 | rt_missmsg(RTM_REDIRECT, &info, flags, error); |
1601 | 0 | } |
1602 | | |
1603 | | /* |
1604 | | * Routing table ioctl interface. |
1605 | | */ |
1606 | | int |
1607 | | rtioctl(unsigned long req, caddr_t data, struct proc *p) |
1608 | 0 | { |
1609 | 0 | #pragma unused(p, req, data) |
1610 | 0 | return ENXIO; |
1611 | 0 | } |
1612 | | |
1613 | | struct ifaddr * |
1614 | | ifa_ifwithroute( |
1615 | | int flags, |
1616 | | const struct sockaddr *dst, |
1617 | | const struct sockaddr *gateway) |
1618 | 0 | { |
1619 | 0 | struct ifaddr *ifa; |
1620 | |
|
1621 | 0 | lck_mtx_lock(rnh_lock); |
1622 | 0 | ifa = ifa_ifwithroute_locked(flags, dst, gateway); |
1623 | 0 | lck_mtx_unlock(rnh_lock); |
1624 | |
|
1625 | 0 | return ifa; |
1626 | 0 | } |
1627 | | |
1628 | | struct ifaddr * |
1629 | | ifa_ifwithroute_locked(int flags, const struct sockaddr *dst, |
1630 | | const struct sockaddr *gateway) |
1631 | 4.73k | { |
1632 | 4.73k | return ifa_ifwithroute_common_locked((flags & ~RTF_IFSCOPE), dst, |
1633 | 4.73k | gateway, IFSCOPE_NONE); |
1634 | 4.73k | } |
1635 | | |
1636 | | struct ifaddr * |
1637 | | ifa_ifwithroute_scoped_locked(int flags, const struct sockaddr *dst, |
1638 | | const struct sockaddr *gateway, unsigned int ifscope) |
1639 | 24 | { |
1640 | 24 | if (ifscope != IFSCOPE_NONE) { |
1641 | 24 | flags |= RTF_IFSCOPE; |
1642 | 24 | } else { |
1643 | 0 | flags &= ~RTF_IFSCOPE; |
1644 | 0 | } |
1645 | | |
1646 | 24 | return ifa_ifwithroute_common_locked(flags, dst, gateway, ifscope); |
1647 | 24 | } |
1648 | | |
1649 | | static struct ifaddr * |
1650 | | ifa_ifwithroute_common_locked(int flags, const struct sockaddr *dst, |
1651 | | const struct sockaddr *gw, unsigned int ifscope) |
1652 | 4.75k | { |
1653 | 4.75k | struct ifaddr *ifa = NULL; |
1654 | 4.75k | struct rtentry *rt = NULL; |
1655 | 4.75k | struct sockaddr_storage dst_ss, gw_ss; |
1656 | | |
1657 | 4.75k | LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED); |
1658 | | |
1659 | | /* |
1660 | | * Just in case the sockaddr passed in by the caller |
1661 | | * contains a scope ID, make sure to clear it since |
1662 | | * interface addresses aren't scoped. |
1663 | | */ |
1664 | 4.75k | if (dst != NULL && |
1665 | 4.75k | ((dst->sa_family == AF_INET) || |
1666 | 4.75k | (dst->sa_family == AF_INET6))) { |
1667 | 4.75k | dst = sa_copy(SA((uintptr_t)dst), &dst_ss, NULL); |
1668 | 4.75k | } |
1669 | | |
1670 | 4.75k | if (gw != NULL && |
1671 | 4.75k | ((gw->sa_family == AF_INET) || |
1672 | 4.75k | (gw->sa_family == AF_INET6))) { |
1673 | 4.75k | gw = sa_copy(SA((uintptr_t)gw), &gw_ss, NULL); |
1674 | 4.75k | } |
1675 | | |
1676 | 4.75k | if (!(flags & RTF_GATEWAY)) { |
1677 | | /* |
1678 | | * If we are adding a route to an interface, |
1679 | | * and the interface is a pt to pt link |
1680 | | * we should search for the destination |
1681 | | * as our clue to the interface. Otherwise |
1682 | | * we can use the local address. |
1683 | | */ |
1684 | 4.75k | if (flags & RTF_HOST) { |
1685 | 423 | ifa = ifa_ifwithdstaddr(dst); |
1686 | 423 | } |
1687 | 4.75k | if (ifa == NULL) { |
1688 | 4.75k | ifa = ifa_ifwithaddr_scoped(gw, ifscope); |
1689 | 4.75k | } |
1690 | 4.75k | } else { |
1691 | | /* |
1692 | | * If we are adding a route to a remote net |
1693 | | * or host, the gateway may still be on the |
1694 | | * other end of a pt to pt link. |
1695 | | */ |
1696 | 0 | ifa = ifa_ifwithdstaddr(gw); |
1697 | 0 | } |
1698 | 4.75k | if (ifa == NULL) { |
1699 | 0 | ifa = ifa_ifwithnet_scoped(gw, ifscope); |
1700 | 0 | } |
1701 | 4.75k | if (ifa == NULL) { |
1702 | | /* Workaround to avoid gcc warning regarding const variable */ |
1703 | 0 | rt = rtalloc1_scoped_locked((struct sockaddr *)(size_t)dst, |
1704 | 0 | 0, 0, ifscope); |
1705 | 0 | if (rt != NULL) { |
1706 | 0 | RT_LOCK_SPIN(rt); |
1707 | 0 | ifa = rt->rt_ifa; |
1708 | 0 | if (ifa != NULL) { |
1709 | | /* Become a regular mutex */ |
1710 | 0 | RT_CONVERT_LOCK(rt); |
1711 | 0 | IFA_ADDREF(ifa); |
1712 | 0 | } |
1713 | 0 | RT_REMREF_LOCKED(rt); |
1714 | 0 | RT_UNLOCK(rt); |
1715 | 0 | rt = NULL; |
1716 | 0 | } |
1717 | 0 | } |
1718 | | /* |
1719 | | * Holding rnh_lock here prevents the possibility of ifa from |
1720 | | * changing (e.g. in_ifinit), so it is safe to access its |
1721 | | * ifa_addr (here and down below) without locking. |
1722 | | */ |
1723 | 4.75k | if (ifa != NULL && ifa->ifa_addr->sa_family != dst->sa_family) { |
1724 | 0 | struct ifaddr *newifa; |
1725 | | /* Callee adds reference to newifa upon success */ |
1726 | 0 | newifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp); |
1727 | 0 | if (newifa != NULL) { |
1728 | 0 | IFA_REMREF(ifa); |
1729 | 0 | ifa = newifa; |
1730 | 0 | } |
1731 | 0 | } |
1732 | | /* |
1733 | | * If we are adding a gateway, it is quite possible that the |
1734 | | * routing table has a static entry in place for the gateway, |
1735 | | * that may not agree with info garnered from the interfaces. |
1736 | | * The routing table should carry more precedence than the |
1737 | | * interfaces in this matter. Must be careful not to stomp |
1738 | | * on new entries from rtinit, hence (ifa->ifa_addr != gw). |
1739 | | */ |
1740 | 4.75k | if ((ifa == NULL || |
1741 | 4.75k | !equal(ifa->ifa_addr, (struct sockaddr *)(size_t)gw)) && |
1742 | 4.75k | (rt = rtalloc1_scoped_locked((struct sockaddr *)(size_t)gw, |
1743 | 0 | 0, 0, ifscope)) != NULL) { |
1744 | 0 | if (ifa != NULL) { |
1745 | 0 | IFA_REMREF(ifa); |
1746 | 0 | } |
1747 | 0 | RT_LOCK_SPIN(rt); |
1748 | 0 | ifa = rt->rt_ifa; |
1749 | 0 | if (ifa != NULL) { |
1750 | | /* Become a regular mutex */ |
1751 | 0 | RT_CONVERT_LOCK(rt); |
1752 | 0 | IFA_ADDREF(ifa); |
1753 | 0 | } |
1754 | 0 | RT_REMREF_LOCKED(rt); |
1755 | 0 | RT_UNLOCK(rt); |
1756 | 0 | } |
1757 | | /* |
1758 | | * If an interface scope was specified, the interface index of |
1759 | | * the found ifaddr must be equivalent to that of the scope; |
1760 | | * otherwise there is no match. |
1761 | | */ |
1762 | 4.75k | if ((flags & RTF_IFSCOPE) && |
1763 | 4.75k | ifa != NULL && ifa->ifa_ifp->if_index != ifscope) { |
1764 | 0 | IFA_REMREF(ifa); |
1765 | 0 | ifa = NULL; |
1766 | 0 | } |
1767 | | |
1768 | | /* |
1769 | | * ifa's address family must match destination's address family |
1770 | | * after all is said and done. |
1771 | | */ |
1772 | 4.75k | if (ifa != NULL && |
1773 | 4.75k | ifa->ifa_addr->sa_family != dst->sa_family) { |
1774 | 0 | IFA_REMREF(ifa); |
1775 | 0 | ifa = NULL; |
1776 | 0 | } |
1777 | | |
1778 | 4.75k | return ifa; |
1779 | 4.75k | } |
1780 | | |
1781 | | static int rt_fixdelete(struct radix_node *, void *); |
1782 | | static int rt_fixchange(struct radix_node *, void *); |
1783 | | |
1784 | | struct rtfc_arg { |
1785 | | struct rtentry *rt0; |
1786 | | struct radix_node_head *rnh; |
1787 | | }; |
1788 | | |
1789 | | int |
1790 | | rtrequest_locked(int req, struct sockaddr *dst, struct sockaddr *gateway, |
1791 | | struct sockaddr *netmask, int flags, struct rtentry **ret_nrt) |
1792 | 19.1k | { |
1793 | 19.1k | return rtrequest_common_locked(req, dst, gateway, netmask, |
1794 | 19.1k | (flags & ~RTF_IFSCOPE), ret_nrt, IFSCOPE_NONE); |
1795 | 19.1k | } |
1796 | | |
1797 | | int |
1798 | | rtrequest_scoped_locked(int req, struct sockaddr *dst, |
1799 | | struct sockaddr *gateway, struct sockaddr *netmask, int flags, |
1800 | | struct rtentry **ret_nrt, unsigned int ifscope) |
1801 | 24 | { |
1802 | 24 | if (ifscope != IFSCOPE_NONE) { |
1803 | 24 | flags |= RTF_IFSCOPE; |
1804 | 24 | } else { |
1805 | 0 | flags &= ~RTF_IFSCOPE; |
1806 | 0 | } |
1807 | | |
1808 | 24 | return rtrequest_common_locked(req, dst, gateway, netmask, |
1809 | 24 | flags, ret_nrt, ifscope); |
1810 | 24 | } |
1811 | | |
1812 | | /* |
1813 | | * Do appropriate manipulations of a routing tree given all the bits of |
1814 | | * info needed. |
1815 | | * |
1816 | | * Storing the scope ID in the radix key is an internal job that should be |
1817 | | * left to routines in this module. Callers should specify the scope value |
1818 | | * to the "scoped" variants of route routines instead of manipulating the |
1819 | | * key itself. This is typically done when creating a scoped route, e.g. |
1820 | | * rtrequest(RTM_ADD). Once such a route is created and marked with the |
1821 | | * RTF_IFSCOPE flag, callers can simply use its rt_key(rt) to clone it |
1822 | | * (RTM_RESOLVE) or to remove it (RTM_DELETE). An exception to this is |
1823 | | * during certain routing socket operations where the search key might be |
1824 | | * derived from the routing message itself, in which case the caller must |
1825 | | * specify the destination address and scope value for RTM_ADD/RTM_DELETE. |
1826 | | */ |
1827 | | static int |
1828 | | rtrequest_common_locked(int req, struct sockaddr *dst0, |
1829 | | struct sockaddr *gateway, struct sockaddr *netmask, int flags, |
1830 | | struct rtentry **ret_nrt, unsigned int ifscope) |
1831 | 19.1k | { |
1832 | 19.1k | int error = 0; |
1833 | 19.1k | struct rtentry *rt; |
1834 | 19.1k | struct radix_node *rn; |
1835 | 19.1k | struct radix_node_head *rnh; |
1836 | 19.1k | struct ifaddr *ifa = NULL; |
1837 | 19.1k | struct sockaddr *ndst, *dst = dst0; |
1838 | 19.1k | struct sockaddr_storage ss, mask; |
1839 | 19.1k | struct timeval caltime; |
1840 | 19.1k | int af = dst->sa_family; |
1841 | 19.1k | void (*ifa_rtrequest)(int, struct rtentry *, struct sockaddr *); |
1842 | | |
1843 | 19.1k | #define senderr(x) { error = x; goto bad; } |
1844 | | |
1845 | 19.1k | DTRACE_ROUTE6(rtrequest, int, req, struct sockaddr *, dst0, |
1846 | 19.1k | struct sockaddr *, gateway, struct sockaddr *, netmask, |
1847 | 19.1k | int, flags, unsigned int, ifscope); |
1848 | | |
1849 | 19.1k | LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED); |
1850 | | |
1851 | | #if !(DEVELOPMENT || DEBUG) |
1852 | | /* |
1853 | | * Setting the global internet flag external is only for testing |
1854 | | */ |
1855 | | flags &= ~RTF_GLOBAL; |
1856 | | #endif /* !(DEVELOPMENT || DEBUG) */ |
1857 | | |
1858 | | /* |
1859 | | * Find the correct routing tree to use for this Address Family |
1860 | | */ |
1861 | 19.1k | if ((rnh = rt_tables[af]) == NULL) { |
1862 | 0 | senderr(ESRCH); |
1863 | 0 | } |
1864 | | /* |
1865 | | * If we are adding a host route then we don't want to put |
1866 | | * a netmask in the tree |
1867 | | */ |
1868 | 19.1k | if (flags & RTF_HOST) { |
1869 | 7.25k | netmask = NULL; |
1870 | 7.25k | } |
1871 | | |
1872 | | /* |
1873 | | * If Scoped Routing is enabled, use a local copy of the destination |
1874 | | * address to store the scope ID into. This logic is repeated below |
1875 | | * in the RTM_RESOLVE handler since the caller does not normally |
1876 | | * specify such a flag during a resolve, as well as for the handling |
1877 | | * of IPv4 link-local address; instead, it passes in the route used for |
1878 | | * cloning for which the scope info is derived from. Note also that |
1879 | | * in the case of RTM_DELETE, the address passed in by the caller |
1880 | | * might already contain the scope ID info when it is the key itself, |
1881 | | * thus making RTF_IFSCOPE unnecessary; one instance where it is |
1882 | | * explicitly set is inside route_output() as part of handling a |
1883 | | * routing socket request. |
1884 | | */ |
1885 | 19.1k | if (req != RTM_RESOLVE && ((af == AF_INET) || (af == AF_INET6))) { |
1886 | | /* Transform dst into the internal routing table form */ |
1887 | 15.9k | dst = sa_copy(dst, &ss, &ifscope); |
1888 | | |
1889 | | /* Transform netmask into the internal routing table form */ |
1890 | 15.9k | if (netmask != NULL) { |
1891 | 8.68k | netmask = ma_copy(af, netmask, &mask, ifscope); |
1892 | 8.68k | } |
1893 | | |
1894 | 15.9k | if (ifscope != IFSCOPE_NONE) { |
1895 | 2.26k | flags |= RTF_IFSCOPE; |
1896 | 2.26k | } |
1897 | 15.9k | } else if ((flags & RTF_IFSCOPE) && |
1898 | 3.24k | (af != AF_INET && af != AF_INET6)) { |
1899 | 0 | senderr(EINVAL); |
1900 | 0 | } |
1901 | | |
1902 | 19.1k | if (ifscope == IFSCOPE_NONE) { |
1903 | 16.9k | flags &= ~RTF_IFSCOPE; |
1904 | 16.9k | } |
1905 | | |
1906 | 19.1k | switch (req) { |
1907 | 11.1k | case RTM_DELETE: { |
1908 | 11.1k | struct rtentry *gwrt = NULL; |
1909 | 11.1k | boolean_t was_router = FALSE; |
1910 | 11.1k | uint32_t old_rt_refcnt = 0; |
1911 | | /* |
1912 | | * Remove the item from the tree and return it. |
1913 | | * Complain if it is not there and do no more processing. |
1914 | | */ |
1915 | 11.1k | if ((rn = rnh->rnh_deladdr(dst, netmask, rnh)) == NULL) { |
1916 | 4.22k | senderr(ESRCH); |
1917 | 0 | } |
1918 | 6.96k | if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) { |
1919 | 0 | panic("rtrequest delete"); |
1920 | | /* NOTREACHED */ |
1921 | 0 | } |
1922 | 6.96k | rt = (struct rtentry *)rn; |
1923 | | |
1924 | 6.96k | RT_LOCK(rt); |
1925 | 6.96k | old_rt_refcnt = rt->rt_refcnt; |
1926 | 6.96k | rt->rt_flags &= ~RTF_UP; |
1927 | | /* |
1928 | | * Release any idle reference count held on the interface |
1929 | | * as this route is no longer externally visible. |
1930 | | */ |
1931 | 6.96k | rt_clear_idleref(rt); |
1932 | | /* |
1933 | | * Take an extra reference to handle the deletion of a route |
1934 | | * entry whose reference count is already 0; e.g. an expiring |
1935 | | * cloned route entry or an entry that was added to the table |
1936 | | * with 0 reference. If the caller is interested in this route, |
1937 | | * we will return it with the reference intact. Otherwise we |
1938 | | * will decrement the reference via rtfree_locked() and then |
1939 | | * possibly deallocate it. |
1940 | | */ |
1941 | 6.96k | RT_ADDREF_LOCKED(rt); |
1942 | | |
1943 | | /* |
1944 | | * For consistency, in case the caller didn't set the flag. |
1945 | | */ |
1946 | 6.96k | rt->rt_flags |= RTF_CONDEMNED; |
1947 | | |
1948 | | /* |
1949 | | * Clear RTF_ROUTER if it's set. |
1950 | | */ |
1951 | 6.96k | if (rt->rt_flags & RTF_ROUTER) { |
1952 | 0 | was_router = TRUE; |
1953 | 0 | VERIFY(rt->rt_flags & RTF_HOST); |
1954 | 0 | rt->rt_flags &= ~RTF_ROUTER; |
1955 | 0 | } |
1956 | | |
1957 | | /* |
1958 | | * Enqueue work item to invoke callback for this route entry |
1959 | | * |
1960 | | * If the old count is 0, it implies that last reference is being |
1961 | | * removed and there's no one listening for this route event. |
1962 | | */ |
1963 | 6.96k | if (old_rt_refcnt != 0) { |
1964 | 6.52k | route_event_enqueue_nwk_wq_entry(rt, NULL, |
1965 | 6.52k | ROUTE_ENTRY_DELETED, NULL, TRUE); |
1966 | 6.52k | } |
1967 | | |
1968 | | /* |
1969 | | * Now search what's left of the subtree for any cloned |
1970 | | * routes which might have been formed from this node. |
1971 | | */ |
1972 | 6.96k | if ((rt->rt_flags & (RTF_CLONING | RTF_PRCLONING)) && |
1973 | 6.96k | rt_mask(rt)) { |
1974 | 136 | RT_UNLOCK(rt); |
1975 | 136 | rnh->rnh_walktree_from(rnh, dst, rt_mask(rt), |
1976 | 136 | rt_fixdelete, rt); |
1977 | 136 | RT_LOCK(rt); |
1978 | 136 | } |
1979 | | |
1980 | 6.96k | if (was_router) { |
1981 | 0 | struct route_event rt_ev; |
1982 | 0 | route_event_init(&rt_ev, rt, NULL, ROUTE_LLENTRY_DELETED); |
1983 | 0 | RT_UNLOCK(rt); |
1984 | 0 | (void) rnh->rnh_walktree(rnh, |
1985 | 0 | route_event_walktree, (void *)&rt_ev); |
1986 | 0 | RT_LOCK(rt); |
1987 | 0 | } |
1988 | | |
1989 | | /* |
1990 | | * Remove any external references we may have. |
1991 | | */ |
1992 | 6.96k | if ((gwrt = rt->rt_gwroute) != NULL) { |
1993 | 0 | rt->rt_gwroute = NULL; |
1994 | 0 | } |
1995 | | |
1996 | | /* |
1997 | | * give the protocol a chance to keep things in sync. |
1998 | | */ |
1999 | 6.96k | if ((ifa = rt->rt_ifa) != NULL) { |
2000 | 6.96k | IFA_LOCK_SPIN(ifa); |
2001 | 6.96k | ifa_rtrequest = ifa->ifa_rtrequest; |
2002 | 6.96k | IFA_UNLOCK(ifa); |
2003 | 6.96k | if (ifa_rtrequest != NULL) { |
2004 | 6.96k | ifa_rtrequest(RTM_DELETE, rt, NULL); |
2005 | 6.96k | } |
2006 | | /* keep reference on rt_ifa */ |
2007 | 6.96k | ifa = NULL; |
2008 | 6.96k | } |
2009 | | |
2010 | | /* |
2011 | | * one more rtentry floating around that is not |
2012 | | * linked to the routing table. |
2013 | | */ |
2014 | 6.96k | (void) OSIncrementAtomic(&rttrash); |
2015 | 6.96k | if (rte_debug & RTD_DEBUG) { |
2016 | 0 | TAILQ_INSERT_TAIL(&rttrash_head, |
2017 | 0 | (struct rtentry_dbg *)rt, rtd_trash_link); |
2018 | 0 | } |
2019 | | |
2020 | | /* |
2021 | | * If this is the (non-scoped) default route, clear |
2022 | | * the interface index used for the primary ifscope. |
2023 | | */ |
2024 | 6.96k | if (rt_primary_default(rt, rt_key(rt))) { |
2025 | 0 | set_primary_ifscope(rt_key(rt)->sa_family, |
2026 | 0 | IFSCOPE_NONE); |
2027 | 0 | if ((rt->rt_flags & RTF_STATIC) && |
2028 | 0 | rt_key(rt)->sa_family == PF_INET6) { |
2029 | 0 | trigger_v6_defrtr_select = TRUE; |
2030 | 0 | } |
2031 | 0 | } |
2032 | | |
2033 | 6.96k | #if NECP |
2034 | | /* |
2035 | | * If this is a change in a default route, update |
2036 | | * necp client watchers to re-evaluate |
2037 | | */ |
2038 | 6.96k | if (SA_DEFAULT(rt_key(rt))) { |
2039 | 0 | if (rt->rt_ifp != NULL) { |
2040 | 0 | ifnet_touch_lastupdown(rt->rt_ifp); |
2041 | 0 | } |
2042 | 0 | necp_update_all_clients(); |
2043 | 0 | } |
2044 | 6.96k | #endif /* NECP */ |
2045 | | |
2046 | 6.96k | RT_UNLOCK(rt); |
2047 | | |
2048 | | /* |
2049 | | * This might result in another rtentry being freed if |
2050 | | * we held its last reference. Do this after the rtentry |
2051 | | * lock is dropped above, as it could lead to the same |
2052 | | * lock being acquired if gwrt is a clone of rt. |
2053 | | */ |
2054 | 6.96k | if (gwrt != NULL) { |
2055 | 0 | rtfree_locked(gwrt); |
2056 | 0 | } |
2057 | | |
2058 | | /* |
2059 | | * If the caller wants it, then it can have it, |
2060 | | * but it's up to it to free the rtentry as we won't be |
2061 | | * doing it. |
2062 | | */ |
2063 | 6.96k | if (ret_nrt != NULL) { |
2064 | | /* Return the route to caller with reference intact */ |
2065 | 510 | *ret_nrt = rt; |
2066 | 6.45k | } else { |
2067 | | /* Dereference or deallocate the route */ |
2068 | 6.45k | rtfree_locked(rt); |
2069 | 6.45k | } |
2070 | 6.96k | if (af == AF_INET) { |
2071 | 6.96k | routegenid_inet_update(); |
2072 | 6.96k | } else if (af == AF_INET6) { |
2073 | 0 | routegenid_inet6_update(); |
2074 | 0 | } |
2075 | 6.96k | break; |
2076 | 6.96k | } |
2077 | 3.24k | case RTM_RESOLVE: |
2078 | 3.24k | if (ret_nrt == NULL || (rt = *ret_nrt) == NULL) { |
2079 | 0 | senderr(EINVAL); |
2080 | 0 | } |
2081 | | /* |
2082 | | * According to the UNIX conformance tests, we need to return |
2083 | | * ENETUNREACH when the parent route is RTF_REJECT. |
2084 | | * However, there isn't any point in cloning RTF_REJECT |
2085 | | * routes, so we immediately return an error. |
2086 | | */ |
2087 | 3.24k | if (rt->rt_flags & RTF_REJECT) { |
2088 | 0 | if (rt->rt_flags & RTF_HOST) { |
2089 | 0 | senderr(EHOSTUNREACH); |
2090 | 0 | } else { |
2091 | 0 | senderr(ENETUNREACH); |
2092 | 0 | } |
2093 | 0 | } |
2094 | | /* |
2095 | | * If cloning, we have the parent route given by the caller |
2096 | | * and will use its rt_gateway, rt_rmx as part of the cloning |
2097 | | * process below. Since rnh_lock is held at this point, the |
2098 | | * parent's rt_ifa and rt_gateway will not change, and its |
2099 | | * relevant rt_flags will not change as well. The only thing |
2100 | | * that could change are the metrics, and thus we hold the |
2101 | | * parent route's rt_lock later on during the actual copying |
2102 | | * of rt_rmx. |
2103 | | */ |
2104 | 3.24k | ifa = rt->rt_ifa; |
2105 | 3.24k | IFA_ADDREF(ifa); |
2106 | 3.24k | flags = rt->rt_flags & |
2107 | 3.24k | ~(RTF_CLONING | RTF_PRCLONING | RTF_STATIC); |
2108 | 3.24k | flags |= RTF_WASCLONED; |
2109 | 3.24k | gateway = rt->rt_gateway; |
2110 | 3.24k | if ((netmask = rt->rt_genmask) == NULL) { |
2111 | 3.24k | flags |= RTF_HOST; |
2112 | 3.24k | } |
2113 | | |
2114 | 3.24k | if (af != AF_INET && af != AF_INET6) { |
2115 | 0 | goto makeroute; |
2116 | 0 | } |
2117 | | |
2118 | | /* |
2119 | | * When scoped routing is enabled, cloned entries are |
2120 | | * always scoped according to the interface portion of |
2121 | | * the parent route. The exception to this are IPv4 |
2122 | | * link local addresses, or those routes that are cloned |
2123 | | * from a RTF_PROXY route. For the latter, the clone |
2124 | | * gets to keep the RTF_PROXY flag. |
2125 | | */ |
2126 | 3.24k | if ((af == AF_INET && |
2127 | 3.24k | IN_LINKLOCAL(ntohl(SIN(dst)->sin_addr.s_addr))) || |
2128 | 3.24k | (rt->rt_flags & RTF_PROXY)) { |
2129 | 0 | ifscope = IFSCOPE_NONE; |
2130 | 0 | flags &= ~RTF_IFSCOPE; |
2131 | | /* |
2132 | | * These types of cloned routes aren't currently |
2133 | | * eligible for idle interface reference counting. |
2134 | | */ |
2135 | 0 | flags |= RTF_NOIFREF; |
2136 | 3.24k | } else { |
2137 | 3.24k | if (flags & RTF_IFSCOPE) { |
2138 | 0 | ifscope = (af == AF_INET) ? |
2139 | 0 | sin_get_ifscope(rt_key(rt)) : |
2140 | 0 | sin6_get_ifscope(rt_key(rt)); |
2141 | 3.24k | } else { |
2142 | 3.24k | ifscope = rt->rt_ifp->if_index; |
2143 | 3.24k | flags |= RTF_IFSCOPE; |
2144 | 3.24k | } |
2145 | 3.24k | VERIFY(ifscope != IFSCOPE_NONE); |
2146 | 3.24k | } |
2147 | | |
2148 | | /* |
2149 | | * Transform dst into the internal routing table form, |
2150 | | * clearing out the scope ID field if ifscope isn't set. |
2151 | | */ |
2152 | 3.24k | dst = sa_copy(dst, &ss, (ifscope == IFSCOPE_NONE) ? |
2153 | 3.24k | NULL : &ifscope); |
2154 | | |
2155 | | /* Transform netmask into the internal routing table form */ |
2156 | 3.24k | if (netmask != NULL) { |
2157 | 0 | netmask = ma_copy(af, netmask, &mask, ifscope); |
2158 | 0 | } |
2159 | | |
2160 | 3.24k | goto makeroute; |
2161 | | |
2162 | 4.75k | case RTM_ADD: |
2163 | 4.75k | if ((flags & RTF_GATEWAY) && !gateway) { |
2164 | 0 | panic("rtrequest: RTF_GATEWAY but no gateway"); |
2165 | | /* NOTREACHED */ |
2166 | 0 | } |
2167 | 4.75k | if (flags & RTF_IFSCOPE) { |
2168 | 24 | ifa = ifa_ifwithroute_scoped_locked(flags, dst0, |
2169 | 24 | gateway, ifscope); |
2170 | 4.73k | } else { |
2171 | 4.73k | ifa = ifa_ifwithroute_locked(flags, dst0, gateway); |
2172 | 4.73k | } |
2173 | 4.75k | if (ifa == NULL) { |
2174 | 0 | senderr(ENETUNREACH); |
2175 | 0 | } |
2176 | 7.99k | makeroute: |
2177 | | /* |
2178 | | * We land up here for both RTM_RESOLVE and RTM_ADD |
2179 | | * when we decide to create a route. |
2180 | | */ |
2181 | 7.99k | if ((rt = rte_alloc()) == NULL) { |
2182 | 0 | senderr(ENOBUFS); |
2183 | 0 | } |
2184 | 7.99k | Bzero(rt, sizeof(*rt)); |
2185 | 7.99k | rte_lock_init(rt); |
2186 | 7.99k | eventhandler_lists_ctxt_init(&rt->rt_evhdlr_ctxt); |
2187 | 7.99k | getmicrotime(&caltime); |
2188 | 7.99k | rt->base_calendartime = caltime.tv_sec; |
2189 | 7.99k | rt->base_uptime = net_uptime(); |
2190 | 7.99k | RT_LOCK(rt); |
2191 | 7.99k | rt->rt_flags = RTF_UP | flags; |
2192 | | |
2193 | | /* |
2194 | | * Point the generation ID to the tree's. |
2195 | | */ |
2196 | 7.99k | switch (af) { |
2197 | 7.97k | case AF_INET: |
2198 | 7.97k | rt->rt_tree_genid = &route_genid_inet; |
2199 | 7.97k | break; |
2200 | 25 | case AF_INET6: |
2201 | 25 | rt->rt_tree_genid = &route_genid_inet6; |
2202 | 25 | break; |
2203 | 0 | default: |
2204 | 0 | break; |
2205 | 7.99k | } |
2206 | | |
2207 | | /* |
2208 | | * Add the gateway. Possibly re-malloc-ing the storage for it |
2209 | | * also add the rt_gwroute if possible. |
2210 | | */ |
2211 | 7.99k | if ((error = rt_setgate(rt, dst, gateway)) != 0) { |
2212 | 0 | int tmp = error; |
2213 | 0 | RT_UNLOCK(rt); |
2214 | 0 | nstat_route_detach(rt); |
2215 | 0 | rte_lock_destroy(rt); |
2216 | 0 | rte_free(rt); |
2217 | 0 | senderr(tmp); |
2218 | 0 | } |
2219 | | |
2220 | | /* |
2221 | | * point to the (possibly newly malloc'd) dest address. |
2222 | | */ |
2223 | 7.99k | ndst = rt_key(rt); |
2224 | | |
2225 | | /* |
2226 | | * make sure it contains the value we want (masked if needed). |
2227 | | */ |
2228 | 7.99k | if (netmask) { |
2229 | 4.33k | rt_maskedcopy(dst, ndst, netmask); |
2230 | 4.33k | } else { |
2231 | 3.66k | Bcopy(dst, ndst, dst->sa_len); |
2232 | 3.66k | } |
2233 | | |
2234 | | /* |
2235 | | * Note that we now have a reference to the ifa. |
2236 | | * This moved from below so that rnh->rnh_addaddr() can |
2237 | | * examine the ifa and ifa->ifa_ifp if it so desires. |
2238 | | */ |
2239 | 7.99k | rtsetifa(rt, ifa); |
2240 | 7.99k | rt->rt_ifp = rt->rt_ifa->ifa_ifp; |
2241 | | |
2242 | | /* XXX mtu manipulation will be done in rnh_addaddr -- itojun */ |
2243 | | |
2244 | 7.99k | rn = rnh->rnh_addaddr((caddr_t)ndst, (caddr_t)netmask, |
2245 | 7.99k | rnh, rt->rt_nodes); |
2246 | 7.99k | if (rn == 0) { |
2247 | 1.00k | struct rtentry *rt2; |
2248 | | /* |
2249 | | * Uh-oh, we already have one of these in the tree. |
2250 | | * We do a special hack: if the route that's already |
2251 | | * there was generated by the protocol-cloning |
2252 | | * mechanism, then we just blow it away and retry |
2253 | | * the insertion of the new one. |
2254 | | */ |
2255 | 1.00k | if (flags & RTF_IFSCOPE) { |
2256 | 1.00k | rt2 = rtalloc1_scoped_locked(dst0, 0, |
2257 | 1.00k | RTF_CLONING | RTF_PRCLONING, ifscope); |
2258 | 1.00k | } else { |
2259 | 0 | rt2 = rtalloc1_locked(dst, 0, |
2260 | 0 | RTF_CLONING | RTF_PRCLONING); |
2261 | 0 | } |
2262 | 1.00k | if (rt2 && rt2->rt_parent) { |
2263 | | /* |
2264 | | * rnh_lock is held here, so rt_key and |
2265 | | * rt_gateway of rt2 will not change. |
2266 | | */ |
2267 | 0 | (void) rtrequest_locked(RTM_DELETE, rt_key(rt2), |
2268 | 0 | rt2->rt_gateway, rt_mask(rt2), |
2269 | 0 | rt2->rt_flags, 0); |
2270 | 0 | rtfree_locked(rt2); |
2271 | 0 | rn = rnh->rnh_addaddr((caddr_t)ndst, |
2272 | 0 | (caddr_t)netmask, rnh, rt->rt_nodes); |
2273 | 1.00k | } else if (rt2) { |
2274 | | /* undo the extra ref we got */ |
2275 | 1.00k | rtfree_locked(rt2); |
2276 | 1.00k | } |
2277 | 1.00k | } |
2278 | | |
2279 | | /* |
2280 | | * If it still failed to go into the tree, |
2281 | | * then un-make it (this should be a function) |
2282 | | */ |
2283 | 7.99k | if (rn == NULL) { |
2284 | | /* Clear gateway route */ |
2285 | 1.00k | rt_set_gwroute(rt, rt_key(rt), NULL); |
2286 | 1.00k | if (rt->rt_ifa) { |
2287 | 1.00k | IFA_REMREF(rt->rt_ifa); |
2288 | 1.00k | rt->rt_ifa = NULL; |
2289 | 1.00k | } |
2290 | 1.00k | R_Free(rt_key(rt)); |
2291 | 1.00k | RT_UNLOCK(rt); |
2292 | 1.00k | nstat_route_detach(rt); |
2293 | 1.00k | rte_lock_destroy(rt); |
2294 | 1.00k | rte_free(rt); |
2295 | 1.00k | senderr(EEXIST); |
2296 | 0 | } |
2297 | | |
2298 | 6.99k | rt->rt_parent = NULL; |
2299 | | |
2300 | | /* |
2301 | | * If we got here from RESOLVE, then we are cloning so clone |
2302 | | * the rest, and note that we are a clone (and increment the |
2303 | | * parent's references). rnh_lock is still held, which prevents |
2304 | | * a lookup from returning the newly-created route. Hence |
2305 | | * holding and releasing the parent's rt_lock while still |
2306 | | * holding the route's rt_lock is safe since the new route |
2307 | | * is not yet externally visible. |
2308 | | */ |
2309 | 6.99k | if (req == RTM_RESOLVE) { |
2310 | 2.23k | RT_LOCK_SPIN(*ret_nrt); |
2311 | 2.23k | VERIFY((*ret_nrt)->rt_expire == 0 || |
2312 | 2.23k | (*ret_nrt)->rt_rmx.rmx_expire != 0); |
2313 | 2.23k | VERIFY((*ret_nrt)->rt_expire != 0 || |
2314 | 2.23k | (*ret_nrt)->rt_rmx.rmx_expire == 0); |
2315 | 0 | rt->rt_rmx = (*ret_nrt)->rt_rmx; |
2316 | 2.23k | rt_setexpire(rt, (*ret_nrt)->rt_expire); |
2317 | 2.23k | if ((*ret_nrt)->rt_flags & |
2318 | 2.23k | (RTF_CLONING | RTF_PRCLONING)) { |
2319 | 2.23k | rt->rt_parent = (*ret_nrt); |
2320 | 2.23k | RT_ADDREF_LOCKED(*ret_nrt); |
2321 | 2.23k | } |
2322 | 2.23k | RT_UNLOCK(*ret_nrt); |
2323 | 2.23k | } |
2324 | | |
2325 | | /* |
2326 | | * if this protocol has something to add to this then |
2327 | | * allow it to do that as well. |
2328 | | */ |
2329 | 6.99k | IFA_LOCK_SPIN(ifa); |
2330 | 6.99k | ifa_rtrequest = ifa->ifa_rtrequest; |
2331 | 6.99k | IFA_UNLOCK(ifa); |
2332 | 6.99k | if (ifa_rtrequest != NULL) { |
2333 | 6.99k | ifa_rtrequest(req, rt, SA(ret_nrt ? *ret_nrt : NULL)); |
2334 | 6.99k | } |
2335 | 6.99k | IFA_REMREF(ifa); |
2336 | 6.99k | ifa = NULL; |
2337 | | |
2338 | | /* |
2339 | | * If this is the (non-scoped) default route, record |
2340 | | * the interface index used for the primary ifscope. |
2341 | | */ |
2342 | 6.99k | if (rt_primary_default(rt, rt_key(rt))) { |
2343 | 0 | set_primary_ifscope(rt_key(rt)->sa_family, |
2344 | 0 | rt->rt_ifp->if_index); |
2345 | 0 | } |
2346 | | |
2347 | 6.99k | #if NECP |
2348 | | /* |
2349 | | * If this is a change in a default route, update |
2350 | | * necp client watchers to re-evaluate |
2351 | | */ |
2352 | 6.99k | if (SA_DEFAULT(rt_key(rt))) { |
2353 | | /* |
2354 | | * Mark default routes as (potentially) leading to the global internet |
2355 | | * this can be used for policy decisions. |
2356 | | * The clone routes will inherit this flag. |
2357 | | * We check against the host flag as this works for default routes that have |
2358 | | * a gateway and defaults routes when all subnets are local. |
2359 | | */ |
2360 | 0 | if (req == RTM_ADD && (rt->rt_flags & RTF_HOST) == 0) { |
2361 | 0 | rt->rt_flags |= RTF_GLOBAL; |
2362 | 0 | } |
2363 | 0 | if (rt->rt_ifp != NULL) { |
2364 | 0 | ifnet_touch_lastupdown(rt->rt_ifp); |
2365 | 0 | } |
2366 | 0 | necp_update_all_clients(); |
2367 | 0 | } |
2368 | 6.99k | #endif /* NECP */ |
2369 | | |
2370 | | /* |
2371 | | * actually return a resultant rtentry and |
2372 | | * give the caller a single reference. |
2373 | | */ |
2374 | 6.99k | if (ret_nrt) { |
2375 | 6.98k | *ret_nrt = rt; |
2376 | 6.98k | RT_ADDREF_LOCKED(rt); |
2377 | 6.98k | } |
2378 | | |
2379 | 6.99k | if (af == AF_INET) { |
2380 | 6.96k | routegenid_inet_update(); |
2381 | 6.96k | } else if (af == AF_INET6) { |
2382 | 25 | routegenid_inet6_update(); |
2383 | 25 | } |
2384 | | |
2385 | 6.99k | RT_GENID_SYNC(rt); |
2386 | | |
2387 | | /* |
2388 | | * We repeat the same procedures from rt_setgate() here |
2389 | | * because they weren't completed when we called it earlier, |
2390 | | * since the node was embryonic. |
2391 | | */ |
2392 | 6.99k | if ((rt->rt_flags & RTF_GATEWAY) && rt->rt_gwroute != NULL) { |
2393 | 0 | rt_set_gwroute(rt, rt_key(rt), rt->rt_gwroute); |
2394 | 0 | } |
2395 | | |
2396 | 6.99k | if (req == RTM_ADD && |
2397 | 6.99k | !(rt->rt_flags & RTF_HOST) && rt_mask(rt) != NULL) { |
2398 | 4.33k | struct rtfc_arg arg; |
2399 | 4.33k | arg.rnh = rnh; |
2400 | 4.33k | arg.rt0 = rt; |
2401 | 4.33k | RT_UNLOCK(rt); |
2402 | 4.33k | rnh->rnh_walktree_from(rnh, rt_key(rt), rt_mask(rt), |
2403 | 4.33k | rt_fixchange, &arg); |
2404 | 4.33k | } else { |
2405 | 2.66k | RT_UNLOCK(rt); |
2406 | 2.66k | } |
2407 | | |
2408 | 6.99k | nstat_route_new_entry(rt); |
2409 | 6.99k | break; |
2410 | 19.1k | } |
2411 | 19.1k | bad: |
2412 | 19.1k | if (ifa) { |
2413 | 1.00k | IFA_REMREF(ifa); |
2414 | 1.00k | } |
2415 | 19.1k | return error; |
2416 | 19.1k | } |
2417 | | #undef senderr |
2418 | | |
2419 | | int |
2420 | | rtrequest(int req, struct sockaddr *dst, struct sockaddr *gateway, |
2421 | | struct sockaddr *netmask, int flags, struct rtentry **ret_nrt) |
2422 | 0 | { |
2423 | 0 | int error; |
2424 | 0 | LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_NOTOWNED); |
2425 | 0 | lck_mtx_lock(rnh_lock); |
2426 | 0 | error = rtrequest_locked(req, dst, gateway, netmask, flags, ret_nrt); |
2427 | 0 | lck_mtx_unlock(rnh_lock); |
2428 | 0 | return error; |
2429 | 0 | } |
2430 | | |
2431 | | int |
2432 | | rtrequest_scoped(int req, struct sockaddr *dst, struct sockaddr *gateway, |
2433 | | struct sockaddr *netmask, int flags, struct rtentry **ret_nrt, |
2434 | | unsigned int ifscope) |
2435 | 3 | { |
2436 | 3 | int error; |
2437 | 3 | LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_NOTOWNED); |
2438 | 3 | lck_mtx_lock(rnh_lock); |
2439 | 3 | error = rtrequest_scoped_locked(req, dst, gateway, netmask, flags, |
2440 | 3 | ret_nrt, ifscope); |
2441 | 3 | lck_mtx_unlock(rnh_lock); |
2442 | 3 | return error; |
2443 | 3 | } |
2444 | | |
2445 | | /* |
2446 | | * Called from rtrequest(RTM_DELETE, ...) to fix up the route's ``family'' |
2447 | | * (i.e., the routes related to it by the operation of cloning). This |
2448 | | * routine is iterated over all potential former-child-routes by way of |
2449 | | * rnh->rnh_walktree_from() above, and those that actually are children of |
2450 | | * the late parent (passed in as VP here) are themselves deleted. |
2451 | | */ |
2452 | | static int |
2453 | | rt_fixdelete(struct radix_node *rn, void *vp) |
2454 | 125 | { |
2455 | 125 | struct rtentry *rt = (struct rtentry *)rn; |
2456 | 125 | struct rtentry *rt0 = vp; |
2457 | | |
2458 | 125 | LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED); |
2459 | | |
2460 | 125 | RT_LOCK(rt); |
2461 | 125 | if (rt->rt_parent == rt0 && |
2462 | 125 | !(rt->rt_flags & (RTF_CLONING | RTF_PRCLONING))) { |
2463 | | /* |
2464 | | * Safe to drop rt_lock and use rt_key, since holding |
2465 | | * rnh_lock here prevents another thread from calling |
2466 | | * rt_setgate() on this route. |
2467 | | */ |
2468 | 125 | RT_UNLOCK(rt); |
2469 | 125 | return rtrequest_locked(RTM_DELETE, rt_key(rt), NULL, |
2470 | 125 | rt_mask(rt), rt->rt_flags, NULL); |
2471 | 125 | } |
2472 | 0 | RT_UNLOCK(rt); |
2473 | 0 | return 0; |
2474 | 125 | } |
2475 | | |
2476 | | /* |
2477 | | * This routine is called from rt_setgate() to do the analogous thing for |
2478 | | * adds and changes. There is the added complication in this case of a |
2479 | | * middle insert; i.e., insertion of a new network route between an older |
2480 | | * network route and (cloned) host routes. For this reason, a simple check |
2481 | | * of rt->rt_parent is insufficient; each candidate route must be tested |
2482 | | * against the (mask, value) of the new route (passed as before in vp) |
2483 | | * to see if the new route matches it. |
2484 | | * |
2485 | | * XXX - it may be possible to do fixdelete() for changes and reserve this |
2486 | | * routine just for adds. I'm not sure why I thought it was necessary to do |
2487 | | * changes this way. |
2488 | | */ |
2489 | | static int |
2490 | | rt_fixchange(struct radix_node *rn, void *vp) |
2491 | 4.33k | { |
2492 | 4.33k | struct rtentry *rt = (struct rtentry *)rn; |
2493 | 4.33k | struct rtfc_arg *ap = vp; |
2494 | 4.33k | struct rtentry *rt0 = ap->rt0; |
2495 | 4.33k | struct radix_node_head *rnh = ap->rnh; |
2496 | 4.33k | u_char *xk1, *xm1, *xk2, *xmp; |
2497 | 4.33k | int i, len; |
2498 | | |
2499 | 4.33k | LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED); |
2500 | | |
2501 | 4.33k | RT_LOCK(rt); |
2502 | | |
2503 | 4.33k | if (!rt->rt_parent || |
2504 | 4.33k | (rt->rt_flags & (RTF_CLONING | RTF_PRCLONING))) { |
2505 | 4.33k | RT_UNLOCK(rt); |
2506 | 4.33k | return 0; |
2507 | 4.33k | } |
2508 | | |
2509 | 0 | if (rt->rt_parent == rt0) { |
2510 | 0 | goto delete_rt; |
2511 | 0 | } |
2512 | | |
2513 | | /* |
2514 | | * There probably is a function somewhere which does this... |
2515 | | * if not, there should be. |
2516 | | */ |
2517 | 0 | len = imin(rt_key(rt0)->sa_len, rt_key(rt)->sa_len); |
2518 | |
|
2519 | 0 | xk1 = (u_char *)rt_key(rt0); |
2520 | 0 | xm1 = (u_char *)rt_mask(rt0); |
2521 | 0 | xk2 = (u_char *)rt_key(rt); |
2522 | | |
2523 | | /* |
2524 | | * Avoid applying a less specific route; do this only if the parent |
2525 | | * route (rt->rt_parent) is a network route, since otherwise its mask |
2526 | | * will be NULL if it is a cloning host route. |
2527 | | */ |
2528 | 0 | if ((xmp = (u_char *)rt_mask(rt->rt_parent)) != NULL) { |
2529 | 0 | int mlen = rt_mask(rt->rt_parent)->sa_len; |
2530 | 0 | if (mlen > rt_mask(rt0)->sa_len) { |
2531 | 0 | RT_UNLOCK(rt); |
2532 | 0 | return 0; |
2533 | 0 | } |
2534 | | |
2535 | 0 | for (i = rnh->rnh_treetop->rn_offset; i < mlen; i++) { |
2536 | 0 | if ((xmp[i] & ~(xmp[i] ^ xm1[i])) != xmp[i]) { |
2537 | 0 | RT_UNLOCK(rt); |
2538 | 0 | return 0; |
2539 | 0 | } |
2540 | 0 | } |
2541 | 0 | } |
2542 | | |
2543 | 0 | for (i = rnh->rnh_treetop->rn_offset; i < len; i++) { |
2544 | 0 | if ((xk2[i] & xm1[i]) != xk1[i]) { |
2545 | 0 | RT_UNLOCK(rt); |
2546 | 0 | return 0; |
2547 | 0 | } |
2548 | 0 | } |
2549 | | |
2550 | | /* |
2551 | | * OK, this node is a clone, and matches the node currently being |
2552 | | * changed/added under the node's mask. So, get rid of it. |
2553 | | */ |
2554 | 0 | delete_rt: |
2555 | | /* |
2556 | | * Safe to drop rt_lock and use rt_key, since holding rnh_lock here |
2557 | | * prevents another thread from calling rt_setgate() on this route. |
2558 | | */ |
2559 | 0 | RT_UNLOCK(rt); |
2560 | 0 | return rtrequest_locked(RTM_DELETE, rt_key(rt), NULL, |
2561 | 0 | rt_mask(rt), rt->rt_flags, NULL); |
2562 | 0 | } |
2563 | | |
2564 | | /* |
2565 | | * Round up sockaddr len to multiples of 32-bytes. This will reduce |
2566 | | * or even eliminate the need to re-allocate the chunk of memory used |
2567 | | * for rt_key and rt_gateway in the event the gateway portion changes. |
2568 | | * Certain code paths (e.g. IPsec) are notorious for caching the address |
2569 | | * of rt_gateway; this rounding-up would help ensure that the gateway |
2570 | | * portion never gets deallocated (though it may change contents) and |
2571 | | * thus greatly simplifies things. |
2572 | | */ |
2573 | 16.0k | #define SA_SIZE(x) (-(-((uintptr_t)(x)) & -(32))) |
2574 | | |
2575 | | /* |
2576 | | * Sets the gateway and/or gateway route portion of a route; may be |
2577 | | * called on an existing route to modify the gateway portion. Both |
2578 | | * rt_key and rt_gateway are allocated out of the same memory chunk. |
2579 | | * Route entry lock must be held by caller; this routine will return |
2580 | | * with the lock held. |
2581 | | */ |
2582 | | int |
2583 | | rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate) |
2584 | 8.01k | { |
2585 | 8.01k | int dlen = SA_SIZE(dst->sa_len), glen = SA_SIZE(gate->sa_len); |
2586 | 8.01k | struct radix_node_head *rnh = NULL; |
2587 | 8.01k | boolean_t loop = FALSE; |
2588 | | |
2589 | 8.01k | if (dst->sa_family != AF_INET && dst->sa_family != AF_INET6) { |
2590 | 0 | return EINVAL; |
2591 | 0 | } |
2592 | | |
2593 | 8.01k | rnh = rt_tables[dst->sa_family]; |
2594 | 8.01k | LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED); |
2595 | 8.01k | RT_LOCK_ASSERT_HELD(rt); |
2596 | | |
2597 | | /* |
2598 | | * If this is for a route that is on its way of being removed, |
2599 | | * or is temporarily frozen, reject the modification request. |
2600 | | */ |
2601 | 8.01k | if (rt->rt_flags & RTF_CONDEMNED) { |
2602 | 0 | return EBUSY; |
2603 | 0 | } |
2604 | | |
2605 | | /* Add an extra ref for ourselves */ |
2606 | 8.01k | RT_ADDREF_LOCKED(rt); |
2607 | | |
2608 | 8.01k | if (rt->rt_flags & RTF_GATEWAY) { |
2609 | 0 | if ((dst->sa_len == gate->sa_len) && |
2610 | 0 | (dst->sa_family == AF_INET || dst->sa_family == AF_INET6)) { |
2611 | 0 | struct sockaddr_storage dst_ss, gate_ss; |
2612 | |
|
2613 | 0 | (void) sa_copy(dst, &dst_ss, NULL); |
2614 | 0 | (void) sa_copy(gate, &gate_ss, NULL); |
2615 | |
|
2616 | 0 | loop = equal(SA(&dst_ss), SA(&gate_ss)); |
2617 | 0 | } else { |
2618 | 0 | loop = (dst->sa_len == gate->sa_len && |
2619 | 0 | equal(dst, gate)); |
2620 | 0 | } |
2621 | 0 | } |
2622 | | |
2623 | | /* |
2624 | | * A (cloning) network route with the destination equal to the gateway |
2625 | | * will create an endless loop (see notes below), so disallow it. |
2626 | | */ |
2627 | 8.01k | if (((rt->rt_flags & (RTF_HOST | RTF_GATEWAY | RTF_LLINFO)) == |
2628 | 8.01k | RTF_GATEWAY) && loop) { |
2629 | | /* Release extra ref */ |
2630 | 0 | RT_REMREF_LOCKED(rt); |
2631 | 0 | return EADDRNOTAVAIL; |
2632 | 0 | } |
2633 | | |
2634 | | /* |
2635 | | * A host route with the destination equal to the gateway |
2636 | | * will interfere with keeping LLINFO in the routing |
2637 | | * table, so disallow it. |
2638 | | */ |
2639 | 8.01k | if (((rt->rt_flags & (RTF_HOST | RTF_GATEWAY | RTF_LLINFO)) == |
2640 | 8.01k | (RTF_HOST | RTF_GATEWAY)) && loop) { |
2641 | | /* |
2642 | | * The route might already exist if this is an RTM_CHANGE |
2643 | | * or a routing redirect, so try to delete it. |
2644 | | */ |
2645 | 0 | if (rt_key(rt) != NULL) { |
2646 | | /* |
2647 | | * Safe to drop rt_lock and use rt_key, rt_gateway, |
2648 | | * since holding rnh_lock here prevents another thread |
2649 | | * from calling rt_setgate() on this route. |
2650 | | */ |
2651 | 0 | RT_UNLOCK(rt); |
2652 | 0 | (void) rtrequest_locked(RTM_DELETE, rt_key(rt), |
2653 | 0 | rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL); |
2654 | 0 | RT_LOCK(rt); |
2655 | 0 | } |
2656 | | /* Release extra ref */ |
2657 | 0 | RT_REMREF_LOCKED(rt); |
2658 | 0 | return EADDRNOTAVAIL; |
2659 | 0 | } |
2660 | | |
2661 | | /* |
2662 | | * The destination is not directly reachable. Get a route |
2663 | | * to the next-hop gateway and store it in rt_gwroute. |
2664 | | */ |
2665 | 8.01k | if (rt->rt_flags & RTF_GATEWAY) { |
2666 | 0 | struct rtentry *gwrt; |
2667 | 0 | unsigned int ifscope; |
2668 | |
|
2669 | 0 | if (dst->sa_family == AF_INET) { |
2670 | 0 | ifscope = sin_get_ifscope(dst); |
2671 | 0 | } else if (dst->sa_family == AF_INET6) { |
2672 | 0 | ifscope = sin6_get_ifscope(dst); |
2673 | 0 | } else { |
2674 | 0 | ifscope = IFSCOPE_NONE; |
2675 | 0 | } |
2676 | |
|
2677 | 0 | RT_UNLOCK(rt); |
2678 | | /* |
2679 | | * Don't ignore RTF_CLONING, since we prefer that rt_gwroute |
2680 | | * points to a clone rather than a cloning route; see above |
2681 | | * check for cloning loop avoidance (dst == gate). |
2682 | | */ |
2683 | 0 | gwrt = rtalloc1_scoped_locked(gate, 1, RTF_PRCLONING, ifscope); |
2684 | 0 | if (gwrt != NULL) { |
2685 | 0 | RT_LOCK_ASSERT_NOTHELD(gwrt); |
2686 | 0 | } |
2687 | 0 | RT_LOCK(rt); |
2688 | | |
2689 | | /* |
2690 | | * Cloning loop avoidance: |
2691 | | * |
2692 | | * In the presence of protocol-cloning and bad configuration, |
2693 | | * it is possible to get stuck in bottomless mutual recursion |
2694 | | * (rtrequest rt_setgate rtalloc1). We avoid this by not |
2695 | | * allowing protocol-cloning to operate for gateways (which |
2696 | | * is probably the correct choice anyway), and avoid the |
2697 | | * resulting reference loops by disallowing any route to run |
2698 | | * through itself as a gateway. This is obviously mandatory |
2699 | | * when we get rt->rt_output(). It implies that a route to |
2700 | | * the gateway must already be present in the system in order |
2701 | | * for the gateway to be referred to by another route. |
2702 | | */ |
2703 | 0 | if (gwrt == rt) { |
2704 | 0 | RT_REMREF_LOCKED(gwrt); |
2705 | | /* Release extra ref */ |
2706 | 0 | RT_REMREF_LOCKED(rt); |
2707 | 0 | return EADDRINUSE; /* failure */ |
2708 | 0 | } |
2709 | | |
2710 | | /* |
2711 | | * If scoped, the gateway route must use the same interface; |
2712 | | * we're holding rnh_lock now, so rt_gateway and rt_ifp of gwrt |
2713 | | * should not change and are freely accessible. |
2714 | | */ |
2715 | 0 | if (ifscope != IFSCOPE_NONE && (rt->rt_flags & RTF_IFSCOPE) && |
2716 | 0 | gwrt != NULL && gwrt->rt_ifp != NULL && |
2717 | 0 | gwrt->rt_ifp->if_index != ifscope) { |
2718 | 0 | rtfree_locked(gwrt); /* rt != gwrt, no deadlock */ |
2719 | | /* Release extra ref */ |
2720 | 0 | RT_REMREF_LOCKED(rt); |
2721 | 0 | return (rt->rt_flags & RTF_HOST) ? |
2722 | 0 | EHOSTUNREACH : ENETUNREACH; |
2723 | 0 | } |
2724 | | |
2725 | | /* Check again since we dropped the lock above */ |
2726 | 0 | if (rt->rt_flags & RTF_CONDEMNED) { |
2727 | 0 | if (gwrt != NULL) { |
2728 | 0 | rtfree_locked(gwrt); |
2729 | 0 | } |
2730 | | /* Release extra ref */ |
2731 | 0 | RT_REMREF_LOCKED(rt); |
2732 | 0 | return EBUSY; |
2733 | 0 | } |
2734 | | |
2735 | | /* Set gateway route; callee adds ref to gwrt if non-NULL */ |
2736 | 0 | rt_set_gwroute(rt, dst, gwrt); |
2737 | | |
2738 | | /* |
2739 | | * In case the (non-scoped) default route gets modified via |
2740 | | * an ICMP redirect, record the interface index used for the |
2741 | | * primary ifscope. Also done in rt_setif() to take care |
2742 | | * of the non-redirect cases. |
2743 | | */ |
2744 | 0 | if (rt_primary_default(rt, dst) && rt->rt_ifp != NULL) { |
2745 | 0 | set_primary_ifscope(dst->sa_family, |
2746 | 0 | rt->rt_ifp->if_index); |
2747 | 0 | } |
2748 | |
|
2749 | 0 | #if NECP |
2750 | | /* |
2751 | | * If this is a change in a default route, update |
2752 | | * necp client watchers to re-evaluate |
2753 | | */ |
2754 | 0 | if (SA_DEFAULT(dst)) { |
2755 | 0 | necp_update_all_clients(); |
2756 | 0 | } |
2757 | 0 | #endif /* NECP */ |
2758 | | |
2759 | | /* |
2760 | | * Tell the kernel debugger about the new default gateway |
2761 | | * if the gateway route uses the primary interface, or |
2762 | | * if we are in a transient state before the non-scoped |
2763 | | * default gateway is installed (similar to how the system |
2764 | | * was behaving in the past). In future, it would be good |
2765 | | * to do all this only when KDP is enabled. |
2766 | | */ |
2767 | 0 | if ((dst->sa_family == AF_INET) && |
2768 | 0 | gwrt != NULL && gwrt->rt_gateway->sa_family == AF_LINK && |
2769 | 0 | (gwrt->rt_ifp->if_index == get_primary_ifscope(AF_INET) || |
2770 | 0 | get_primary_ifscope(AF_INET) == IFSCOPE_NONE)) { |
2771 | 0 | kdp_set_gateway_mac(SDL((void *)gwrt->rt_gateway)-> |
2772 | 0 | sdl_data); |
2773 | 0 | } |
2774 | | |
2775 | | /* Release extra ref from rtalloc1() */ |
2776 | 0 | if (gwrt != NULL) { |
2777 | 0 | RT_REMREF(gwrt); |
2778 | 0 | } |
2779 | 0 | } |
2780 | | |
2781 | | /* |
2782 | | * Prepare to store the gateway in rt_gateway. Both dst and gateway |
2783 | | * are stored one after the other in the same malloc'd chunk. If we |
2784 | | * have room, reuse the old buffer since rt_gateway already points |
2785 | | * to the right place. Otherwise, malloc a new block and update |
2786 | | * the 'dst' address and point rt_gateway to the right place. |
2787 | | */ |
2788 | 8.01k | if (rt->rt_gateway == NULL || glen > SA_SIZE(rt->rt_gateway->sa_len)) { |
2789 | 7.99k | caddr_t new; |
2790 | | |
2791 | | /* The underlying allocation is done with M_WAITOK set */ |
2792 | 7.99k | R_Malloc(new, caddr_t, dlen + glen); |
2793 | 7.99k | if (new == NULL) { |
2794 | | /* Clear gateway route */ |
2795 | 0 | rt_set_gwroute(rt, dst, NULL); |
2796 | | /* Release extra ref */ |
2797 | 0 | RT_REMREF_LOCKED(rt); |
2798 | 0 | return ENOBUFS; |
2799 | 0 | } |
2800 | | |
2801 | | /* |
2802 | | * Copy from 'dst' and not rt_key(rt) because we can get |
2803 | | * here to initialize a newly allocated route entry, in |
2804 | | * which case rt_key(rt) is NULL (and so does rt_gateway). |
2805 | | */ |
2806 | 7.99k | bzero(new, dlen + glen); |
2807 | 7.99k | Bcopy(dst, new, dst->sa_len); |
2808 | 7.99k | R_Free(rt_key(rt)); /* free old block; NULL is okay */ |
2809 | 7.99k | rt->rt_nodes->rn_key = new; |
2810 | 7.99k | rt->rt_gateway = (struct sockaddr *)(new + dlen); |
2811 | 7.99k | } |
2812 | | |
2813 | | /* |
2814 | | * Copy the new gateway value into the memory chunk. |
2815 | | */ |
2816 | 8.01k | Bcopy(gate, rt->rt_gateway, gate->sa_len); |
2817 | | |
2818 | | /* |
2819 | | * For consistency between rt_gateway and rt_key(gwrt). |
2820 | | */ |
2821 | 8.01k | if ((rt->rt_flags & RTF_GATEWAY) && rt->rt_gwroute != NULL && |
2822 | 8.01k | (rt->rt_gwroute->rt_flags & RTF_IFSCOPE)) { |
2823 | 0 | if (rt->rt_gateway->sa_family == AF_INET && |
2824 | 0 | rt_key(rt->rt_gwroute)->sa_family == AF_INET) { |
2825 | 0 | sin_set_ifscope(rt->rt_gateway, |
2826 | 0 | sin_get_ifscope(rt_key(rt->rt_gwroute))); |
2827 | 0 | } else if (rt->rt_gateway->sa_family == AF_INET6 && |
2828 | 0 | rt_key(rt->rt_gwroute)->sa_family == AF_INET6) { |
2829 | 0 | sin6_set_ifscope(rt->rt_gateway, |
2830 | 0 | sin6_get_ifscope(rt_key(rt->rt_gwroute))); |
2831 | 0 | } |
2832 | 0 | } |
2833 | | |
2834 | | /* |
2835 | | * This isn't going to do anything useful for host routes, so |
2836 | | * don't bother. Also make sure we have a reasonable mask |
2837 | | * (we don't yet have one during adds). |
2838 | | */ |
2839 | 8.01k | if (!(rt->rt_flags & RTF_HOST) && rt_mask(rt) != 0) { |
2840 | 0 | struct rtfc_arg arg; |
2841 | 0 | arg.rnh = rnh; |
2842 | 0 | arg.rt0 = rt; |
2843 | 0 | RT_UNLOCK(rt); |
2844 | 0 | rnh->rnh_walktree_from(rnh, rt_key(rt), rt_mask(rt), |
2845 | 0 | rt_fixchange, &arg); |
2846 | 0 | RT_LOCK(rt); |
2847 | 0 | } |
2848 | | |
2849 | | /* Release extra ref */ |
2850 | 8.01k | RT_REMREF_LOCKED(rt); |
2851 | 8.01k | return 0; |
2852 | 8.01k | } |
2853 | | |
2854 | | #undef SA_SIZE |
2855 | | |
2856 | | void |
2857 | | rt_set_gwroute(struct rtentry *rt, struct sockaddr *dst, struct rtentry *gwrt) |
2858 | 1.00k | { |
2859 | 1.00k | boolean_t gwrt_isrouter; |
2860 | | |
2861 | 1.00k | LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED); |
2862 | 1.00k | RT_LOCK_ASSERT_HELD(rt); |
2863 | | |
2864 | 1.00k | if (gwrt != NULL) { |
2865 | 0 | RT_ADDREF(gwrt); /* for this routine */ |
2866 | 0 | } |
2867 | | /* |
2868 | | * Get rid of existing gateway route; if rt_gwroute is already |
2869 | | * set to gwrt, this is slightly redundant (though safe since |
2870 | | * we held an extra ref above) but makes the code simpler. |
2871 | | */ |
2872 | 1.00k | if (rt->rt_gwroute != NULL) { |
2873 | 0 | struct rtentry *ogwrt = rt->rt_gwroute; |
2874 | |
|
2875 | 0 | VERIFY(rt != ogwrt); /* sanity check */ |
2876 | 0 | rt->rt_gwroute = NULL; |
2877 | 0 | RT_UNLOCK(rt); |
2878 | 0 | rtfree_locked(ogwrt); |
2879 | 0 | RT_LOCK(rt); |
2880 | 0 | VERIFY(rt->rt_gwroute == NULL); |
2881 | 0 | } |
2882 | | |
2883 | | /* |
2884 | | * And associate the new gateway route. |
2885 | | */ |
2886 | 1.00k | if ((rt->rt_gwroute = gwrt) != NULL) { |
2887 | 0 | RT_ADDREF(gwrt); /* for rt */ |
2888 | |
|
2889 | 0 | if (rt->rt_flags & RTF_WASCLONED) { |
2890 | | /* rt_parent might be NULL if rt is embryonic */ |
2891 | 0 | gwrt_isrouter = (rt->rt_parent != NULL && |
2892 | 0 | SA_DEFAULT(rt_key(rt->rt_parent)) && |
2893 | 0 | !RT_HOST(rt->rt_parent)); |
2894 | 0 | } else { |
2895 | 0 | gwrt_isrouter = (SA_DEFAULT(dst) && !RT_HOST(rt)); |
2896 | 0 | } |
2897 | | |
2898 | | /* If gwrt points to a default router, mark it accordingly */ |
2899 | 0 | if (gwrt_isrouter && RT_HOST(gwrt) && |
2900 | 0 | !(gwrt->rt_flags & RTF_ROUTER)) { |
2901 | 0 | RT_LOCK(gwrt); |
2902 | 0 | gwrt->rt_flags |= RTF_ROUTER; |
2903 | 0 | RT_UNLOCK(gwrt); |
2904 | 0 | } |
2905 | |
|
2906 | 0 | RT_REMREF(gwrt); /* for this routine */ |
2907 | 0 | } |
2908 | 1.00k | } |
2909 | | |
2910 | | static void |
2911 | | rt_maskedcopy(const struct sockaddr *src, struct sockaddr *dst, |
2912 | | const struct sockaddr *netmask) |
2913 | 13.1k | { |
2914 | 13.1k | const char *netmaskp = &netmask->sa_data[0]; |
2915 | 13.1k | const char *srcp = &src->sa_data[0]; |
2916 | 13.1k | char *dstp = &dst->sa_data[0]; |
2917 | 13.1k | const char *maskend = (char *)dst |
2918 | 13.1k | + MIN(netmask->sa_len, src->sa_len); |
2919 | 13.1k | const char *srcend = (char *)dst + src->sa_len; |
2920 | | |
2921 | 13.1k | dst->sa_len = src->sa_len; |
2922 | 13.1k | dst->sa_family = src->sa_family; |
2923 | | |
2924 | 83.1k | while (dstp < maskend) { |
2925 | 69.9k | *dstp++ = *srcp++ & *netmaskp++; |
2926 | 69.9k | } |
2927 | 13.1k | if (dstp < srcend) { |
2928 | 4.49k | memset(dstp, 0, (size_t)(srcend - dstp)); |
2929 | 4.49k | } |
2930 | 13.1k | } |
2931 | | |
2932 | | /* |
2933 | | * Lookup an AF_INET/AF_INET6 scoped or non-scoped route depending on the |
2934 | | * ifscope value passed in by the caller (IFSCOPE_NONE implies non-scoped). |
2935 | | */ |
2936 | | static struct radix_node * |
2937 | | node_lookup(struct sockaddr *dst, struct sockaddr *netmask, |
2938 | | unsigned int ifscope) |
2939 | 1.21M | { |
2940 | 1.21M | struct radix_node_head *rnh; |
2941 | 1.21M | struct radix_node *rn; |
2942 | 1.21M | struct sockaddr_storage ss, mask; |
2943 | 1.21M | int af = dst->sa_family; |
2944 | 1.21M | struct matchleaf_arg ma = { .ifscope = ifscope }; |
2945 | 1.21M | rn_matchf_t *f = rn_match_ifscope; |
2946 | 1.21M | void *w = &ma; |
2947 | | |
2948 | 1.21M | if (af != AF_INET && af != AF_INET6) { |
2949 | 0 | return NULL; |
2950 | 0 | } |
2951 | | |
2952 | 1.21M | rnh = rt_tables[af]; |
2953 | | |
2954 | | /* |
2955 | | * Transform dst into the internal routing table form, |
2956 | | * clearing out the scope ID field if ifscope isn't set. |
2957 | | */ |
2958 | 1.21M | dst = sa_copy(dst, &ss, (ifscope == IFSCOPE_NONE) ? NULL : &ifscope); |
2959 | | |
2960 | | /* Transform netmask into the internal routing table form */ |
2961 | 1.21M | if (netmask != NULL) { |
2962 | 0 | netmask = ma_copy(af, netmask, &mask, ifscope); |
2963 | 0 | } |
2964 | | |
2965 | 1.21M | if (ifscope == IFSCOPE_NONE) { |
2966 | 681k | f = w = NULL; |
2967 | 681k | } |
2968 | | |
2969 | 1.21M | rn = rnh->rnh_lookup_args(dst, netmask, rnh, f, w); |
2970 | 1.21M | if (rn != NULL && (rn->rn_flags & RNF_ROOT)) { |
2971 | 0 | rn = NULL; |
2972 | 0 | } |
2973 | | |
2974 | 1.21M | return rn; |
2975 | 1.21M | } |
2976 | | |
2977 | | /* |
2978 | | * Lookup the AF_INET/AF_INET6 non-scoped default route. |
2979 | | */ |
2980 | | static struct radix_node * |
2981 | | node_lookup_default(int af) |
2982 | 514k | { |
2983 | 514k | struct radix_node_head *rnh; |
2984 | | |
2985 | 514k | VERIFY(af == AF_INET || af == AF_INET6); |
2986 | 0 | rnh = rt_tables[af]; |
2987 | | |
2988 | 514k | return af == AF_INET ? rnh->rnh_lookup(&sin_def, NULL, rnh) : |
2989 | 514k | rnh->rnh_lookup(&sin6_def, NULL, rnh); |
2990 | 514k | } |
2991 | | |
2992 | | boolean_t |
2993 | | rt_ifa_is_dst(struct sockaddr *dst, struct ifaddr *ifa) |
2994 | 0 | { |
2995 | 0 | boolean_t result = FALSE; |
2996 | |
|
2997 | 0 | if (ifa == NULL || ifa->ifa_addr == NULL) { |
2998 | 0 | return result; |
2999 | 0 | } |
3000 | | |
3001 | 0 | IFA_LOCK_SPIN(ifa); |
3002 | |
|
3003 | 0 | if (dst->sa_family == ifa->ifa_addr->sa_family && |
3004 | 0 | ((dst->sa_family == AF_INET && |
3005 | 0 | SIN(dst)->sin_addr.s_addr == |
3006 | 0 | SIN(ifa->ifa_addr)->sin_addr.s_addr) || |
3007 | 0 | (dst->sa_family == AF_INET6 && |
3008 | 0 | SA6_ARE_ADDR_EQUAL(SIN6(dst), SIN6(ifa->ifa_addr))))) { |
3009 | 0 | result = TRUE; |
3010 | 0 | } |
3011 | |
|
3012 | 0 | IFA_UNLOCK(ifa); |
3013 | |
|
3014 | 0 | return result; |
3015 | 0 | } |
3016 | | |
3017 | | /* |
3018 | | * Common routine to lookup/match a route. It invokes the lookup/matchaddr |
3019 | | * callback which could be address family-specific. The main difference |
3020 | | * between the two (at least for AF_INET/AF_INET6) is that a lookup does |
3021 | | * not alter the expiring state of a route, whereas a match would unexpire |
3022 | | * or revalidate the route. |
3023 | | * |
3024 | | * The optional scope or interface index property of a route allows for a |
3025 | | * per-interface route instance. This permits multiple route entries having |
3026 | | * the same destination (but not necessarily the same gateway) to exist in |
3027 | | * the routing table; each of these entries is specific to the corresponding |
3028 | | * interface. This is made possible by storing the scope ID value into the |
3029 | | * radix key, thus making each route entry unique. These scoped entries |
3030 | | * exist along with the regular, non-scoped entries in the same radix tree |
3031 | | * for a given address family (AF_INET/AF_INET6); the scope logically |
3032 | | * partitions it into multiple per-interface sub-trees. |
3033 | | * |
3034 | | * When a scoped route lookup is performed, the routing table is searched for |
3035 | | * the best match that would result in a route using the same interface as the |
3036 | | * one associated with the scope (the exception to this are routes that point |
3037 | | * to the loopback interface). The search rule follows the longest matching |
3038 | | * prefix with the additional interface constraint. |
3039 | | */ |
3040 | | static struct rtentry * |
3041 | | rt_lookup_common(boolean_t lookup_only, boolean_t coarse, struct sockaddr *dst, |
3042 | | struct sockaddr *netmask, struct radix_node_head *rnh, unsigned int ifscope) |
3043 | 619k | { |
3044 | 619k | struct radix_node *rn0, *rn = NULL; |
3045 | 619k | int af = dst->sa_family; |
3046 | 619k | struct sockaddr_storage dst_ss; |
3047 | 619k | struct sockaddr_storage mask_ss; |
3048 | 619k | boolean_t dontcare; |
3049 | 619k | #if (DEVELOPMENT || DEBUG) |
3050 | 619k | char dbuf[MAX_SCOPE_ADDR_STR_LEN], gbuf[MAX_IPv6_STR_LEN]; |
3051 | 619k | char s_dst[MAX_IPv6_STR_LEN], s_netmask[MAX_IPv6_STR_LEN]; |
3052 | 619k | #endif |
3053 | 619k | VERIFY(!coarse || ifscope == IFSCOPE_NONE); |
3054 | | |
3055 | 619k | LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED); |
3056 | | /* |
3057 | | * While we have rnh_lock held, see if we need to schedule the timer. |
3058 | | */ |
3059 | 619k | if (nd6_sched_timeout_want) { |
3060 | 0 | nd6_sched_timeout(NULL, NULL); |
3061 | 0 | } |
3062 | | |
3063 | 619k | if (!lookup_only) { |
3064 | 605k | netmask = NULL; |
3065 | 605k | } |
3066 | | |
3067 | | /* |
3068 | | * Non-scoped route lookup. |
3069 | | */ |
3070 | 619k | if (af != AF_INET && af != AF_INET6) { |
3071 | 0 | rn = rnh->rnh_matchaddr(dst, rnh); |
3072 | | |
3073 | | /* |
3074 | | * Don't return a root node; also, rnh_matchaddr callback |
3075 | | * would have done the necessary work to clear RTPRF_OURS |
3076 | | * for certain protocol families. |
3077 | | */ |
3078 | 0 | if (rn != NULL && (rn->rn_flags & RNF_ROOT)) { |
3079 | 0 | rn = NULL; |
3080 | 0 | } |
3081 | 0 | if (rn != NULL) { |
3082 | 0 | RT_LOCK_SPIN(RT(rn)); |
3083 | 0 | if (!(RT(rn)->rt_flags & RTF_CONDEMNED)) { |
3084 | 0 | RT_ADDREF_LOCKED(RT(rn)); |
3085 | 0 | RT_UNLOCK(RT(rn)); |
3086 | 0 | } else { |
3087 | 0 | RT_UNLOCK(RT(rn)); |
3088 | 0 | rn = NULL; |
3089 | 0 | } |
3090 | 0 | } |
3091 | 0 | return RT(rn); |
3092 | 0 | } |
3093 | | |
3094 | | /* Transform dst/netmask into the internal routing table form */ |
3095 | 619k | dst = sa_copy(dst, &dst_ss, &ifscope); |
3096 | 619k | if (netmask != NULL) { |
3097 | 0 | netmask = ma_copy(af, netmask, &mask_ss, ifscope); |
3098 | 0 | } |
3099 | 619k | dontcare = (ifscope == IFSCOPE_NONE); |
3100 | | |
3101 | 619k | #if (DEVELOPMENT || DEBUG) |
3102 | 619k | if (rt_verbose) { |
3103 | 0 | if (af == AF_INET) { |
3104 | 0 | (void) inet_ntop(af, &SIN(dst)->sin_addr.s_addr, |
3105 | 0 | s_dst, sizeof(s_dst)); |
3106 | 0 | } else { |
3107 | 0 | (void) inet_ntop(af, &SIN6(dst)->sin6_addr, |
3108 | 0 | s_dst, sizeof(s_dst)); |
3109 | 0 | } |
3110 | |
|
3111 | 0 | if (netmask != NULL && af == AF_INET) { |
3112 | 0 | (void) inet_ntop(af, &SIN(netmask)->sin_addr.s_addr, |
3113 | 0 | s_netmask, sizeof(s_netmask)); |
3114 | 0 | } |
3115 | 0 | if (netmask != NULL && af == AF_INET6) { |
3116 | 0 | (void) inet_ntop(af, &SIN6(netmask)->sin6_addr, |
3117 | 0 | s_netmask, sizeof(s_netmask)); |
3118 | 0 | } else { |
3119 | 0 | *s_netmask = '\0'; |
3120 | 0 | } |
3121 | 0 | printf("%s (%d, %d, %s, %s, %u)\n", |
3122 | 0 | __func__, lookup_only, coarse, s_dst, s_netmask, ifscope); |
3123 | 0 | } |
3124 | 619k | #endif |
3125 | | |
3126 | | /* |
3127 | | * Scoped route lookup: |
3128 | | * |
3129 | | * We first perform a non-scoped lookup for the original result. |
3130 | | * Afterwards, depending on whether or not the caller has specified |
3131 | | * a scope, we perform a more specific scoped search and fallback |
3132 | | * to this original result upon failure. |
3133 | | */ |
3134 | 619k | rn0 = rn = node_lookup(dst, netmask, IFSCOPE_NONE); |
3135 | | |
3136 | | /* |
3137 | | * If the caller did not specify a scope, use the primary scope |
3138 | | * derived from the system's non-scoped default route. If, for |
3139 | | * any reason, there is no primary interface, ifscope will be |
3140 | | * set to IFSCOPE_NONE; if the above lookup resulted in a route, |
3141 | | * we'll do a more-specific search below, scoped to the interface |
3142 | | * of that route. |
3143 | | */ |
3144 | 619k | if (dontcare) { |
3145 | 102k | ifscope = get_primary_ifscope(af); |
3146 | 102k | } |
3147 | | |
3148 | | /* |
3149 | | * Keep the original result if either of the following is true: |
3150 | | * |
3151 | | * 1) The interface portion of the route has the same interface |
3152 | | * index as the scope value and it is marked with RTF_IFSCOPE. |
3153 | | * 2) The route uses the loopback interface, in which case the |
3154 | | * destination (host/net) is local/loopback. |
3155 | | * |
3156 | | * Otherwise, do a more specified search using the scope; |
3157 | | * we're holding rnh_lock now, so rt_ifp should not change. |
3158 | | */ |
3159 | 619k | if (rn != NULL) { |
3160 | 56.0k | struct rtentry *rt = RT(rn); |
3161 | 56.0k | #if (DEVELOPMENT || DEBUG) |
3162 | 56.0k | if (rt_verbose) { |
3163 | 0 | rt_str(rt, dbuf, sizeof(dbuf), gbuf, sizeof(gbuf)); |
3164 | 0 | printf("%s unscoped search %p to %s->%s->%s ifa_ifp %s\n", |
3165 | 0 | __func__, rt, |
3166 | 0 | dbuf, gbuf, |
3167 | 0 | (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "", |
3168 | 0 | (rt->rt_ifa->ifa_ifp != NULL) ? |
3169 | 0 | rt->rt_ifa->ifa_ifp->if_xname : ""); |
3170 | 0 | } |
3171 | 56.0k | #endif |
3172 | 56.0k | if (!(rt->rt_ifp->if_flags & IFF_LOOPBACK) || |
3173 | 56.0k | (rt->rt_flags & RTF_GATEWAY)) { |
3174 | 32.1k | if (rt->rt_ifp->if_index != ifscope) { |
3175 | | /* |
3176 | | * Wrong interface; keep the original result |
3177 | | * only if the caller did not specify a scope, |
3178 | | * and do a more specific scoped search using |
3179 | | * the scope of the found route. Otherwise, |
3180 | | * start again from scratch. |
3181 | | * |
3182 | | * For loopback scope we keep the unscoped |
3183 | | * route for local addresses |
3184 | | */ |
3185 | 25.1k | rn = NULL; |
3186 | 25.1k | if (dontcare) { |
3187 | 25.1k | ifscope = rt->rt_ifp->if_index; |
3188 | 25.1k | } else if (ifscope != lo_ifp->if_index || |
3189 | 0 | rt_ifa_is_dst(dst, rt->rt_ifa) == FALSE) { |
3190 | 0 | rn0 = NULL; |
3191 | 0 | } |
3192 | 25.1k | } else if (!(rt->rt_flags & RTF_IFSCOPE)) { |
3193 | | /* |
3194 | | * Right interface, except that this route |
3195 | | * isn't marked with RTF_IFSCOPE. Do a more |
3196 | | * specific scoped search. Keep the original |
3197 | | * result and return it it in case the scoped |
3198 | | * search fails. |
3199 | | */ |
3200 | 6.96k | rn = NULL; |
3201 | 6.96k | } |
3202 | 32.1k | } |
3203 | 56.0k | } |
3204 | | |
3205 | | /* |
3206 | | * Scoped search. Find the most specific entry having the same |
3207 | | * interface scope as the one requested. The following will result |
3208 | | * in searching for the longest prefix scoped match. |
3209 | | */ |
3210 | 619k | if (rn == NULL) { |
3211 | 595k | rn = node_lookup(dst, netmask, ifscope); |
3212 | 595k | #if (DEVELOPMENT || DEBUG) |
3213 | 595k | if (rt_verbose && rn != NULL) { |
3214 | 0 | struct rtentry *rt = RT(rn); |
3215 | |
|
3216 | 0 | rt_str(rt, dbuf, sizeof(dbuf), gbuf, sizeof(gbuf)); |
3217 | 0 | printf("%s scoped search %p to %s->%s->%s ifa %s\n", |
3218 | 0 | __func__, rt, |
3219 | 0 | dbuf, gbuf, |
3220 | 0 | (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "", |
3221 | 0 | (rt->rt_ifa->ifa_ifp != NULL) ? |
3222 | 0 | rt->rt_ifa->ifa_ifp->if_xname : ""); |
3223 | 0 | } |
3224 | 595k | #endif |
3225 | 595k | } |
3226 | | /* |
3227 | | * Use the original result if either of the following is true: |
3228 | | * |
3229 | | * 1) The scoped search did not yield any result. |
3230 | | * 2) The caller insists on performing a coarse-grained lookup. |
3231 | | * 3) The result from the scoped search is a scoped default route, |
3232 | | * and the original (non-scoped) result is not a default route, |
3233 | | * i.e. the original result is a more specific host/net route. |
3234 | | * 4) The scoped search yielded a net route but the original |
3235 | | * result is a host route, i.e. the original result is treated |
3236 | | * as a more specific route. |
3237 | | */ |
3238 | 619k | if (rn == NULL || coarse || (rn0 != NULL && |
3239 | 68.2k | ((SA_DEFAULT(rt_key(RT(rn))) && !SA_DEFAULT(rt_key(RT(rn0)))) || |
3240 | 550k | (!RT_HOST(rn) && RT_HOST(rn0))))) { |
3241 | 550k | rn = rn0; |
3242 | 550k | } |
3243 | | |
3244 | | /* |
3245 | | * If we still don't have a route, use the non-scoped default |
3246 | | * route as long as the interface portion satistifes the scope. |
3247 | | */ |
3248 | 619k | if (rn == NULL && (rn = node_lookup_default(af)) != NULL && |
3249 | 619k | RT(rn)->rt_ifp->if_index != ifscope) { |
3250 | 0 | rn = NULL; |
3251 | 0 | } |
3252 | | |
3253 | 619k | if (rn != NULL) { |
3254 | | /* |
3255 | | * Manually clear RTPRF_OURS using rt_validate() and |
3256 | | * bump up the reference count after, and not before; |
3257 | | * we only get here for AF_INET/AF_INET6. node_lookup() |
3258 | | * has done the check against RNF_ROOT, so we can be sure |
3259 | | * that we're not returning a root node here. |
3260 | | */ |
3261 | 104k | RT_LOCK_SPIN(RT(rn)); |
3262 | 104k | if (rt_validate(RT(rn))) { |
3263 | 104k | RT_ADDREF_LOCKED(RT(rn)); |
3264 | 104k | RT_UNLOCK(RT(rn)); |
3265 | 104k | } else { |
3266 | 0 | RT_UNLOCK(RT(rn)); |
3267 | 0 | rn = NULL; |
3268 | 0 | } |
3269 | 104k | } |
3270 | 619k | #if (DEVELOPMENT || DEBUG) |
3271 | 619k | if (rt_verbose) { |
3272 | 0 | if (rn == NULL) { |
3273 | 0 | printf("%s %u return NULL\n", __func__, ifscope); |
3274 | 0 | } else { |
3275 | 0 | struct rtentry *rt = RT(rn); |
3276 | |
|
3277 | 0 | rt_str(rt, dbuf, sizeof(dbuf), gbuf, sizeof(gbuf)); |
3278 | |
|
3279 | 0 | printf("%s %u return %p to %s->%s->%s ifa_ifp %s\n", |
3280 | 0 | __func__, ifscope, rt, |
3281 | 0 | dbuf, gbuf, |
3282 | 0 | (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "", |
3283 | 0 | (rt->rt_ifa->ifa_ifp != NULL) ? |
3284 | 0 | rt->rt_ifa->ifa_ifp->if_xname : ""); |
3285 | 0 | } |
3286 | 0 | } |
3287 | 619k | #endif |
3288 | 619k | return RT(rn); |
3289 | 619k | } |
3290 | | |
3291 | | struct rtentry * |
3292 | | rt_lookup(boolean_t lookup_only, struct sockaddr *dst, struct sockaddr *netmask, |
3293 | | struct radix_node_head *rnh, unsigned int ifscope) |
3294 | 614k | { |
3295 | 614k | return rt_lookup_common(lookup_only, FALSE, dst, netmask, |
3296 | 614k | rnh, ifscope); |
3297 | 614k | } |
3298 | | |
3299 | | struct rtentry * |
3300 | | rt_lookup_coarse(boolean_t lookup_only, struct sockaddr *dst, |
3301 | | struct sockaddr *netmask, struct radix_node_head *rnh) |
3302 | 4.73k | { |
3303 | 4.73k | return rt_lookup_common(lookup_only, TRUE, dst, netmask, |
3304 | 4.73k | rnh, IFSCOPE_NONE); |
3305 | 4.73k | } |
3306 | | |
3307 | | boolean_t |
3308 | | rt_validate(struct rtentry *rt) |
3309 | 104k | { |
3310 | 104k | RT_LOCK_ASSERT_HELD(rt); |
3311 | | |
3312 | 104k | if ((rt->rt_flags & (RTF_UP | RTF_CONDEMNED)) == RTF_UP) { |
3313 | 104k | int af = rt_key(rt)->sa_family; |
3314 | | |
3315 | 104k | if (af == AF_INET) { |
3316 | 14.0k | (void) in_validate(RN(rt)); |
3317 | 90.1k | } else if (af == AF_INET6) { |
3318 | 90.1k | (void) in6_validate(RN(rt)); |
3319 | 90.1k | } |
3320 | 104k | } else { |
3321 | 0 | rt = NULL; |
3322 | 0 | } |
3323 | | |
3324 | 104k | return rt != NULL; |
3325 | 104k | } |
3326 | | |
3327 | | /* |
3328 | | * Set up a routing table entry, normally |
3329 | | * for an interface. |
3330 | | */ |
3331 | | int |
3332 | | rtinit(struct ifaddr *ifa, int cmd, int flags) |
3333 | 4.73k | { |
3334 | 4.73k | int error; |
3335 | | |
3336 | 4.73k | LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_NOTOWNED); |
3337 | | |
3338 | 4.73k | lck_mtx_lock(rnh_lock); |
3339 | 4.73k | error = rtinit_locked(ifa, cmd, flags); |
3340 | 4.73k | lck_mtx_unlock(rnh_lock); |
3341 | | |
3342 | 4.73k | return error; |
3343 | 4.73k | } |
3344 | | |
3345 | | int |
3346 | | rtinit_locked(struct ifaddr *ifa, int cmd, int flags) |
3347 | 9.46k | { |
3348 | 9.46k | struct radix_node_head *rnh; |
3349 | 9.46k | uint8_t nbuf[128]; /* long enough for IPv6 */ |
3350 | 9.46k | #if (DEVELOPMENT || DEBUG) |
3351 | 9.46k | char dbuf[MAX_IPv6_STR_LEN], gbuf[MAX_IPv6_STR_LEN]; |
3352 | 9.46k | char abuf[MAX_IPv6_STR_LEN]; |
3353 | 9.46k | #endif |
3354 | 9.46k | struct rtentry *rt = NULL; |
3355 | 9.46k | struct sockaddr *dst; |
3356 | 9.46k | struct sockaddr *netmask; |
3357 | 9.46k | int error = 0; |
3358 | | |
3359 | | /* |
3360 | | * Holding rnh_lock here prevents the possibility of ifa from |
3361 | | * changing (e.g. in_ifinit), so it is safe to access its |
3362 | | * ifa_{dst}addr (here and down below) without locking. |
3363 | | */ |
3364 | 9.46k | LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED); |
3365 | | |
3366 | 9.46k | if (flags & RTF_HOST) { |
3367 | 4.96k | dst = ifa->ifa_dstaddr; |
3368 | 4.96k | netmask = NULL; |
3369 | 4.96k | } else { |
3370 | 4.49k | dst = ifa->ifa_addr; |
3371 | 4.49k | netmask = ifa->ifa_netmask; |
3372 | 4.49k | } |
3373 | | |
3374 | 9.46k | if (dst->sa_len == 0) { |
3375 | 0 | log(LOG_ERR, "%s: %s failed, invalid dst sa_len %d\n", |
3376 | 0 | __func__, rtm2str(cmd), dst->sa_len); |
3377 | 0 | error = EINVAL; |
3378 | 0 | goto done; |
3379 | 0 | } |
3380 | 9.46k | if (netmask != NULL && netmask->sa_len > sizeof(nbuf)) { |
3381 | 0 | log(LOG_ERR, "%s: %s failed, mask sa_len %d too large\n", |
3382 | 0 | __func__, rtm2str(cmd), dst->sa_len); |
3383 | 0 | error = EINVAL; |
3384 | 0 | goto done; |
3385 | 0 | } |
3386 | | |
3387 | 9.46k | #if (DEVELOPMENT || DEBUG) |
3388 | 9.46k | if (dst->sa_family == AF_INET) { |
3389 | 9.46k | (void) inet_ntop(AF_INET, &SIN(dst)->sin_addr.s_addr, |
3390 | 9.46k | abuf, sizeof(abuf)); |
3391 | 9.46k | } else if (dst->sa_family == AF_INET6) { |
3392 | 1 | (void) inet_ntop(AF_INET6, &SIN6(dst)->sin6_addr, |
3393 | 1 | abuf, sizeof(abuf)); |
3394 | 1 | } |
3395 | 9.46k | #endif /* (DEVELOPMENT || DEBUG) */ |
3396 | | |
3397 | 9.46k | if ((rnh = rt_tables[dst->sa_family]) == NULL) { |
3398 | 0 | error = EINVAL; |
3399 | 0 | goto done; |
3400 | 0 | } |
3401 | | |
3402 | | /* |
3403 | | * If it's a delete, check that if it exists, it's on the correct |
3404 | | * interface or we might scrub a route to another ifa which would |
3405 | | * be confusing at best and possibly worse. |
3406 | | */ |
3407 | 9.46k | if (cmd == RTM_DELETE) { |
3408 | | /* |
3409 | | * It's a delete, so it should already exist.. |
3410 | | * If it's a net, mask off the host bits |
3411 | | * (Assuming we have a mask) |
3412 | | */ |
3413 | 4.73k | if (netmask != NULL) { |
3414 | 163 | rt_maskedcopy(dst, SA(nbuf), netmask); |
3415 | 163 | dst = SA(nbuf); |
3416 | 163 | } |
3417 | | /* |
3418 | | * Get an rtentry that is in the routing tree and contains |
3419 | | * the correct info. Note that we perform a coarse-grained |
3420 | | * lookup here, in case there is a scoped variant of the |
3421 | | * subnet/prefix route which we should ignore, as we never |
3422 | | * add a scoped subnet/prefix route as part of adding an |
3423 | | * interface address. |
3424 | | */ |
3425 | 4.73k | rt = rt_lookup_coarse(TRUE, dst, NULL, rnh); |
3426 | 4.73k | if (rt != NULL) { |
3427 | 4.70k | #if (DEVELOPMENT || DEBUG) |
3428 | 4.70k | rt_str(rt, dbuf, sizeof(dbuf), gbuf, sizeof(gbuf)); |
3429 | 4.70k | #endif |
3430 | | /* |
3431 | | * Ok so we found the rtentry. it has an extra reference |
3432 | | * for us at this stage. we won't need that so |
3433 | | * lop that off now. |
3434 | | */ |
3435 | 4.70k | RT_LOCK(rt); |
3436 | 4.70k | if (rt->rt_ifa != ifa) { |
3437 | | /* |
3438 | | * If the interface address in the rtentry |
3439 | | * doesn't match the interface we are using, |
3440 | | * then we don't want to delete it, so return |
3441 | | * an error. This seems to be the only point |
3442 | | * of this whole RTM_DELETE clause. |
3443 | | */ |
3444 | 0 | #if (DEVELOPMENT || DEBUG) |
3445 | 0 | if (rt_verbose) { |
3446 | 0 | log(LOG_DEBUG, "%s: not removing " |
3447 | 0 | "route to %s->%s->%s, flags %b, " |
3448 | 0 | "ifaddr %s, rt_ifa 0x%llx != " |
3449 | 0 | "ifa 0x%llx\n", __func__, dbuf, |
3450 | 0 | gbuf, ((rt->rt_ifp != NULL) ? |
3451 | 0 | rt->rt_ifp->if_xname : ""), |
3452 | 0 | rt->rt_flags, RTF_BITS, abuf, |
3453 | 0 | (uint64_t)VM_KERNEL_ADDRPERM( |
3454 | 0 | rt->rt_ifa), |
3455 | 0 | (uint64_t)VM_KERNEL_ADDRPERM(ifa)); |
3456 | 0 | } |
3457 | 0 | #endif /* (DEVELOPMENT || DEBUG) */ |
3458 | 0 | RT_REMREF_LOCKED(rt); |
3459 | 0 | RT_UNLOCK(rt); |
3460 | 0 | rt = NULL; |
3461 | 0 | error = ((flags & RTF_HOST) ? |
3462 | 0 | EHOSTUNREACH : ENETUNREACH); |
3463 | 0 | goto done; |
3464 | 4.70k | } else if (rt->rt_flags & RTF_STATIC) { |
3465 | | /* |
3466 | | * Don't remove the subnet/prefix route if |
3467 | | * this was manually added from above. |
3468 | | */ |
3469 | 0 | #if (DEVELOPMENT || DEBUG) |
3470 | 0 | if (rt_verbose) { |
3471 | 0 | log(LOG_DEBUG, "%s: not removing " |
3472 | 0 | "static route to %s->%s->%s, " |
3473 | 0 | "flags %b, ifaddr %s\n", __func__, |
3474 | 0 | dbuf, gbuf, ((rt->rt_ifp != NULL) ? |
3475 | 0 | rt->rt_ifp->if_xname : ""), |
3476 | 0 | rt->rt_flags, RTF_BITS, abuf); |
3477 | 0 | } |
3478 | 0 | #endif /* (DEVELOPMENT || DEBUG) */ |
3479 | 0 | RT_REMREF_LOCKED(rt); |
3480 | 0 | RT_UNLOCK(rt); |
3481 | 0 | rt = NULL; |
3482 | 0 | error = EBUSY; |
3483 | 0 | goto done; |
3484 | 0 | } |
3485 | 4.70k | #if (DEVELOPMENT || DEBUG) |
3486 | 4.70k | if (rt_verbose) { |
3487 | 0 | log(LOG_DEBUG, "%s: removing route to " |
3488 | 0 | "%s->%s->%s, flags %b, ifaddr %s\n", |
3489 | 0 | __func__, dbuf, gbuf, |
3490 | 0 | ((rt->rt_ifp != NULL) ? |
3491 | 0 | rt->rt_ifp->if_xname : ""), |
3492 | 0 | rt->rt_flags, RTF_BITS, abuf); |
3493 | 0 | } |
3494 | 4.70k | #endif /* (DEVELOPMENT || DEBUG) */ |
3495 | 4.70k | RT_REMREF_LOCKED(rt); |
3496 | 4.70k | RT_UNLOCK(rt); |
3497 | 4.70k | rt = NULL; |
3498 | 4.70k | } |
3499 | 4.73k | } |
3500 | | /* |
3501 | | * Do the actual request |
3502 | | */ |
3503 | 9.46k | if ((error = rtrequest_locked(cmd, dst, ifa->ifa_addr, netmask, |
3504 | 9.46k | flags | ifa->ifa_flags, &rt)) != 0) { |
3505 | 4.22k | goto done; |
3506 | 4.22k | } |
3507 | | |
3508 | 5.24k | VERIFY(rt != NULL); |
3509 | 0 | #if (DEVELOPMENT || DEBUG) |
3510 | 0 | rt_str(rt, dbuf, sizeof(dbuf), gbuf, sizeof(gbuf)); |
3511 | 5.24k | #endif /* (DEVELOPMENT || DEBUG) */ |
3512 | 5.24k | switch (cmd) { |
3513 | 510 | case RTM_DELETE: |
3514 | | /* |
3515 | | * If we are deleting, and we found an entry, then it's |
3516 | | * been removed from the tree. Notify any listening |
3517 | | * routing agents of the change and throw it away. |
3518 | | */ |
3519 | 510 | RT_LOCK(rt); |
3520 | 510 | rt_newaddrmsg(cmd, ifa, error, rt); |
3521 | 510 | RT_UNLOCK(rt); |
3522 | 510 | #if (DEVELOPMENT || DEBUG) |
3523 | 510 | if (rt_verbose) { |
3524 | 0 | log(LOG_DEBUG, "%s: removed route to %s->%s->%s, " |
3525 | 0 | "flags %b, ifaddr %s\n", __func__, dbuf, gbuf, |
3526 | 0 | ((rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : ""), |
3527 | 0 | rt->rt_flags, RTF_BITS, abuf); |
3528 | 0 | } |
3529 | 510 | #endif /* (DEVELOPMENT || DEBUG) */ |
3530 | 510 | rtfree_locked(rt); |
3531 | 510 | break; |
3532 | | |
3533 | 4.73k | case RTM_ADD: |
3534 | | /* |
3535 | | * We are adding, and we have a returned routing entry. |
3536 | | * We need to sanity check the result. If it came back |
3537 | | * with an unexpected interface, then it must have already |
3538 | | * existed or something. |
3539 | | */ |
3540 | 4.73k | RT_LOCK(rt); |
3541 | 4.73k | if (rt->rt_ifa != ifa) { |
3542 | 0 | void (*ifa_rtrequest) |
3543 | 0 | (int, struct rtentry *, struct sockaddr *); |
3544 | 0 | #if (DEVELOPMENT || DEBUG) |
3545 | 0 | if (rt_verbose) { |
3546 | 0 | if (!(rt->rt_ifa->ifa_ifp->if_flags & |
3547 | 0 | (IFF_POINTOPOINT | IFF_LOOPBACK))) { |
3548 | 0 | log(LOG_ERR, "%s: %s route to %s->%s->%s, " |
3549 | 0 | "flags %b, ifaddr %s, rt_ifa 0x%llx != " |
3550 | 0 | "ifa 0x%llx\n", __func__, rtm2str(cmd), |
3551 | 0 | dbuf, gbuf, ((rt->rt_ifp != NULL) ? |
3552 | 0 | rt->rt_ifp->if_xname : ""), rt->rt_flags, |
3553 | 0 | RTF_BITS, abuf, |
3554 | 0 | (uint64_t)VM_KERNEL_ADDRPERM(rt->rt_ifa), |
3555 | 0 | (uint64_t)VM_KERNEL_ADDRPERM(ifa)); |
3556 | 0 | } |
3557 | |
|
3558 | 0 | log(LOG_DEBUG, "%s: %s route to %s->%s->%s, " |
3559 | 0 | "flags %b, ifaddr %s, rt_ifa was 0x%llx " |
3560 | 0 | "now 0x%llx\n", __func__, rtm2str(cmd), |
3561 | 0 | dbuf, gbuf, ((rt->rt_ifp != NULL) ? |
3562 | 0 | rt->rt_ifp->if_xname : ""), rt->rt_flags, |
3563 | 0 | RTF_BITS, abuf, |
3564 | 0 | (uint64_t)VM_KERNEL_ADDRPERM(rt->rt_ifa), |
3565 | 0 | (uint64_t)VM_KERNEL_ADDRPERM(ifa)); |
3566 | 0 | } |
3567 | 0 | #endif /* (DEVELOPMENT || DEBUG) */ |
3568 | | |
3569 | | /* |
3570 | | * Ask that the protocol in question |
3571 | | * remove anything it has associated with |
3572 | | * this route and ifaddr. |
3573 | | */ |
3574 | 0 | ifa_rtrequest = rt->rt_ifa->ifa_rtrequest; |
3575 | 0 | if (ifa_rtrequest != NULL) { |
3576 | 0 | ifa_rtrequest(RTM_DELETE, rt, NULL); |
3577 | 0 | } |
3578 | | /* |
3579 | | * Set the route's ifa. |
3580 | | */ |
3581 | 0 | rtsetifa(rt, ifa); |
3582 | |
|
3583 | 0 | if (rt->rt_ifp != ifa->ifa_ifp) { |
3584 | | /* |
3585 | | * Purge any link-layer info caching. |
3586 | | */ |
3587 | 0 | if (rt->rt_llinfo_purge != NULL) { |
3588 | 0 | rt->rt_llinfo_purge(rt); |
3589 | 0 | } |
3590 | | /* |
3591 | | * Adjust route ref count for the interfaces. |
3592 | | */ |
3593 | 0 | if (rt->rt_if_ref_fn != NULL) { |
3594 | 0 | rt->rt_if_ref_fn(ifa->ifa_ifp, 1); |
3595 | 0 | rt->rt_if_ref_fn(rt->rt_ifp, -1); |
3596 | 0 | } |
3597 | 0 | } |
3598 | | |
3599 | | /* |
3600 | | * And substitute in references to the ifaddr |
3601 | | * we are adding. |
3602 | | */ |
3603 | 0 | rt->rt_ifp = ifa->ifa_ifp; |
3604 | | /* |
3605 | | * If rmx_mtu is not locked, update it |
3606 | | * to the MTU used by the new interface. |
3607 | | */ |
3608 | 0 | if (!(rt->rt_rmx.rmx_locks & RTV_MTU)) { |
3609 | 0 | rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; |
3610 | 0 | if (dst->sa_family == AF_INET && |
3611 | 0 | INTF_ADJUST_MTU_FOR_CLAT46(rt->rt_ifp)) { |
3612 | 0 | rt->rt_rmx.rmx_mtu = IN6_LINKMTU(rt->rt_ifp); |
3613 | | /* Further adjust the size for CLAT46 expansion */ |
3614 | 0 | rt->rt_rmx.rmx_mtu -= CLAT46_HDR_EXPANSION_OVERHD; |
3615 | 0 | } |
3616 | 0 | } |
3617 | | |
3618 | | /* |
3619 | | * Now ask the protocol to check if it needs |
3620 | | * any special processing in its new form. |
3621 | | */ |
3622 | 0 | ifa_rtrequest = ifa->ifa_rtrequest; |
3623 | 0 | if (ifa_rtrequest != NULL) { |
3624 | 0 | ifa_rtrequest(RTM_ADD, rt, NULL); |
3625 | 0 | } |
3626 | 4.73k | } else { |
3627 | 4.73k | #if (DEVELOPMENT || DEBUG) |
3628 | 4.73k | if (rt_verbose) { |
3629 | 0 | log(LOG_DEBUG, "%s: added route to %s->%s->%s, " |
3630 | 0 | "flags %b, ifaddr %s\n", __func__, dbuf, |
3631 | 0 | gbuf, ((rt->rt_ifp != NULL) ? |
3632 | 0 | rt->rt_ifp->if_xname : ""), rt->rt_flags, |
3633 | 0 | RTF_BITS, abuf); |
3634 | 0 | } |
3635 | 4.73k | #endif /* (DEVELOPMENT || DEBUG) */ |
3636 | 4.73k | } |
3637 | | /* |
3638 | | * notify any listenning routing agents of the change |
3639 | | */ |
3640 | 4.73k | rt_newaddrmsg(cmd, ifa, error, rt); |
3641 | | /* |
3642 | | * We just wanted to add it; we don't actually need a |
3643 | | * reference. This will result in a route that's added |
3644 | | * to the routing table without a reference count. The |
3645 | | * RTM_DELETE code will do the necessary step to adjust |
3646 | | * the reference count at deletion time. |
3647 | | */ |
3648 | 4.73k | RT_REMREF_LOCKED(rt); |
3649 | 4.73k | RT_UNLOCK(rt); |
3650 | 4.73k | break; |
3651 | | |
3652 | 0 | default: |
3653 | 0 | VERIFY(0); |
3654 | | /* NOTREACHED */ |
3655 | 5.24k | } |
3656 | 9.46k | done: |
3657 | 9.46k | return error; |
3658 | 5.24k | } |
3659 | | |
3660 | | static void |
3661 | | rt_set_idleref(struct rtentry *rt) |
3662 | 98.8k | { |
3663 | 98.8k | RT_LOCK_ASSERT_HELD(rt); |
3664 | | |
3665 | | /* |
3666 | | * We currently keep idle refcnt only on unicast cloned routes |
3667 | | * that aren't marked with RTF_NOIFREF. |
3668 | | */ |
3669 | 98.8k | if (rt->rt_parent != NULL && !(rt->rt_flags & |
3670 | 6.82k | (RTF_NOIFREF | RTF_BROADCAST | RTF_MULTICAST)) && |
3671 | 98.8k | (rt->rt_flags & (RTF_UP | RTF_WASCLONED | RTF_IFREF)) == |
3672 | 6.82k | (RTF_UP | RTF_WASCLONED)) { |
3673 | 6.69k | rt_clear_idleref(rt); /* drop existing refcnt if any */ |
3674 | 6.69k | rt->rt_if_ref_fn = rte_if_ref; |
3675 | | /* Become a regular mutex, just in case */ |
3676 | 6.69k | RT_CONVERT_LOCK(rt); |
3677 | 6.69k | rt->rt_if_ref_fn(rt->rt_ifp, 1); |
3678 | 6.69k | rt->rt_flags |= RTF_IFREF; |
3679 | 6.69k | } |
3680 | 98.8k | } |
3681 | | |
3682 | | void |
3683 | | rt_clear_idleref(struct rtentry *rt) |
3684 | 112k | { |
3685 | 112k | RT_LOCK_ASSERT_HELD(rt); |
3686 | | |
3687 | 112k | if (rt->rt_if_ref_fn != NULL) { |
3688 | 6.69k | VERIFY((rt->rt_flags & (RTF_NOIFREF | RTF_IFREF)) == RTF_IFREF); |
3689 | | /* Become a regular mutex, just in case */ |
3690 | 6.69k | RT_CONVERT_LOCK(rt); |
3691 | 6.69k | rt->rt_if_ref_fn(rt->rt_ifp, -1); |
3692 | 6.69k | rt->rt_flags &= ~RTF_IFREF; |
3693 | 6.69k | rt->rt_if_ref_fn = NULL; |
3694 | 6.69k | } |
3695 | 112k | } |
3696 | | |
3697 | | void |
3698 | | rt_set_proxy(struct rtentry *rt, boolean_t set) |
3699 | 0 | { |
3700 | 0 | lck_mtx_lock(rnh_lock); |
3701 | 0 | RT_LOCK(rt); |
3702 | | /* |
3703 | | * Search for any cloned routes which might have |
3704 | | * been formed from this node, and delete them. |
3705 | | */ |
3706 | 0 | if (rt->rt_flags & (RTF_CLONING | RTF_PRCLONING)) { |
3707 | 0 | struct radix_node_head *rnh = rt_tables[rt_key(rt)->sa_family]; |
3708 | |
|
3709 | 0 | if (set) { |
3710 | 0 | rt->rt_flags |= RTF_PROXY; |
3711 | 0 | } else { |
3712 | 0 | rt->rt_flags &= ~RTF_PROXY; |
3713 | 0 | } |
3714 | |
|
3715 | 0 | RT_UNLOCK(rt); |
3716 | 0 | if (rnh != NULL && rt_mask(rt)) { |
3717 | 0 | rnh->rnh_walktree_from(rnh, rt_key(rt), rt_mask(rt), |
3718 | 0 | rt_fixdelete, rt); |
3719 | 0 | } |
3720 | 0 | } else { |
3721 | 0 | RT_UNLOCK(rt); |
3722 | 0 | } |
3723 | 0 | lck_mtx_unlock(rnh_lock); |
3724 | 0 | } |
3725 | | |
3726 | | static void |
3727 | | rte_lock_init(struct rtentry *rt) |
3728 | 7.99k | { |
3729 | 7.99k | lck_mtx_init(&rt->rt_lock, rte_mtx_grp, rte_mtx_attr); |
3730 | 7.99k | } |
3731 | | |
3732 | | static void |
3733 | | rte_lock_destroy(struct rtentry *rt) |
3734 | 7.97k | { |
3735 | 7.97k | RT_LOCK_ASSERT_NOTHELD(rt); |
3736 | 7.97k | lck_mtx_destroy(&rt->rt_lock, rte_mtx_grp); |
3737 | 7.97k | } |
3738 | | |
3739 | | void |
3740 | | rt_lock(struct rtentry *rt, boolean_t spin) |
3741 | 26.8M | { |
3742 | 26.8M | RT_LOCK_ASSERT_NOTHELD(rt); |
3743 | 26.8M | if (spin) { |
3744 | 9.65M | lck_mtx_lock_spin(&rt->rt_lock); |
3745 | 17.1M | } else { |
3746 | 17.1M | lck_mtx_lock(&rt->rt_lock); |
3747 | 17.1M | } |
3748 | 26.8M | if (rte_debug & RTD_DEBUG) { |
3749 | 0 | rte_lock_debug((struct rtentry_dbg *)rt); |
3750 | 0 | } |
3751 | 26.8M | } |
3752 | | |
3753 | | void |
3754 | | rt_unlock(struct rtentry *rt) |
3755 | 26.8M | { |
3756 | 26.8M | if (rte_debug & RTD_DEBUG) { |
3757 | 0 | rte_unlock_debug((struct rtentry_dbg *)rt); |
3758 | 0 | } |
3759 | 26.8M | lck_mtx_unlock(&rt->rt_lock); |
3760 | 26.8M | } |
3761 | | |
3762 | | static inline void |
3763 | | rte_lock_debug(struct rtentry_dbg *rte) |
3764 | 0 | { |
3765 | 0 | uint32_t idx; |
3766 | |
|
3767 | 0 | RT_LOCK_ASSERT_HELD((struct rtentry *)rte); |
3768 | 0 | idx = atomic_add_32_ov(&rte->rtd_lock_cnt, 1) % CTRACE_HIST_SIZE; |
3769 | 0 | if (rte_debug & RTD_TRACE) { |
3770 | 0 | ctrace_record(&rte->rtd_lock[idx]); |
3771 | 0 | } |
3772 | 0 | } |
3773 | | |
3774 | | static inline void |
3775 | | rte_unlock_debug(struct rtentry_dbg *rte) |
3776 | 0 | { |
3777 | 0 | uint32_t idx; |
3778 | |
|
3779 | 0 | RT_LOCK_ASSERT_HELD((struct rtentry *)rte); |
3780 | 0 | idx = atomic_add_32_ov(&rte->rtd_unlock_cnt, 1) % CTRACE_HIST_SIZE; |
3781 | 0 | if (rte_debug & RTD_TRACE) { |
3782 | 0 | ctrace_record(&rte->rtd_unlock[idx]); |
3783 | 0 | } |
3784 | 0 | } |
3785 | | |
3786 | | static struct rtentry * |
3787 | | rte_alloc(void) |
3788 | 7.99k | { |
3789 | 7.99k | if (rte_debug & RTD_DEBUG) { |
3790 | 0 | return rte_alloc_debug(); |
3791 | 0 | } |
3792 | | |
3793 | 7.99k | return (struct rtentry *)zalloc(rte_zone); |
3794 | 7.99k | } |
3795 | | |
3796 | | static void |
3797 | | rte_free(struct rtentry *p) |
3798 | 7.97k | { |
3799 | 7.97k | if (rte_debug & RTD_DEBUG) { |
3800 | 0 | rte_free_debug(p); |
3801 | 0 | return; |
3802 | 0 | } |
3803 | | |
3804 | 7.97k | if (p->rt_refcnt != 0) { |
3805 | 0 | panic("rte_free: rte=%p refcnt=%d non-zero\n", p, p->rt_refcnt); |
3806 | | /* NOTREACHED */ |
3807 | 0 | } |
3808 | | |
3809 | 7.97k | zfree(rte_zone, p); |
3810 | 7.97k | } |
3811 | | |
3812 | | static void |
3813 | | rte_if_ref(struct ifnet *ifp, int cnt) |
3814 | 13.3k | { |
3815 | 13.3k | struct kev_msg ev_msg; |
3816 | 13.3k | struct net_event_data ev_data; |
3817 | 13.3k | uint32_t old; |
3818 | | |
3819 | | /* Force cnt to 1 increment/decrement */ |
3820 | 13.3k | if (cnt < -1 || cnt > 1) { |
3821 | 0 | panic("%s: invalid count argument (%d)", __func__, cnt); |
3822 | | /* NOTREACHED */ |
3823 | 0 | } |
3824 | 13.3k | old = atomic_add_32_ov(&ifp->if_route_refcnt, cnt); |
3825 | 13.3k | if (cnt < 0 && old == 0) { |
3826 | 0 | panic("%s: ifp=%p negative route refcnt!", __func__, ifp); |
3827 | | /* NOTREACHED */ |
3828 | 0 | } |
3829 | | /* |
3830 | | * The following is done without first holding the ifnet lock, |
3831 | | * for performance reasons. The relevant ifnet fields, with |
3832 | | * the exception of the if_idle_flags, are never changed |
3833 | | * during the lifetime of the ifnet. The if_idle_flags |
3834 | | * may possibly be modified, so in the event that the value |
3835 | | * is stale because IFRF_IDLE_NOTIFY was cleared, we'd end up |
3836 | | * sending the event anyway. This is harmless as it is just |
3837 | | * a notification to the monitoring agent in user space, and |
3838 | | * it is expected to check via SIOCGIFGETRTREFCNT again anyway. |
3839 | | */ |
3840 | 13.3k | if ((ifp->if_idle_flags & IFRF_IDLE_NOTIFY) && cnt < 0 && old == 1) { |
3841 | 0 | bzero(&ev_msg, sizeof(ev_msg)); |
3842 | 0 | bzero(&ev_data, sizeof(ev_data)); |
3843 | |
|
3844 | 0 | ev_msg.vendor_code = KEV_VENDOR_APPLE; |
3845 | 0 | ev_msg.kev_class = KEV_NETWORK_CLASS; |
3846 | 0 | ev_msg.kev_subclass = KEV_DL_SUBCLASS; |
3847 | 0 | ev_msg.event_code = KEV_DL_IF_IDLE_ROUTE_REFCNT; |
3848 | |
|
3849 | 0 | strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ); |
3850 | |
|
3851 | 0 | ev_data.if_family = ifp->if_family; |
3852 | 0 | ev_data.if_unit = ifp->if_unit; |
3853 | 0 | ev_msg.dv[0].data_length = sizeof(struct net_event_data); |
3854 | 0 | ev_msg.dv[0].data_ptr = &ev_data; |
3855 | |
|
3856 | 0 | dlil_post_complete_msg(NULL, &ev_msg); |
3857 | 0 | } |
3858 | 13.3k | } |
3859 | | |
3860 | | static inline struct rtentry * |
3861 | | rte_alloc_debug(void) |
3862 | 0 | { |
3863 | 0 | struct rtentry_dbg *rte; |
3864 | |
|
3865 | 0 | rte = ((struct rtentry_dbg *)zalloc(rte_zone)); |
3866 | 0 | if (rte != NULL) { |
3867 | 0 | bzero(rte, sizeof(*rte)); |
3868 | 0 | if (rte_debug & RTD_TRACE) { |
3869 | 0 | ctrace_record(&rte->rtd_alloc); |
3870 | 0 | } |
3871 | 0 | rte->rtd_inuse = RTD_INUSE; |
3872 | 0 | } |
3873 | 0 | return (struct rtentry *)rte; |
3874 | 0 | } |
3875 | | |
3876 | | static inline void |
3877 | | rte_free_debug(struct rtentry *p) |
3878 | 0 | { |
3879 | 0 | struct rtentry_dbg *rte = (struct rtentry_dbg *)p; |
3880 | |
|
3881 | 0 | if (p->rt_refcnt != 0) { |
3882 | 0 | panic("rte_free: rte=%p refcnt=%d\n", p, p->rt_refcnt); |
3883 | | /* NOTREACHED */ |
3884 | 0 | } |
3885 | 0 | if (rte->rtd_inuse == RTD_FREED) { |
3886 | 0 | panic("rte_free: double free rte=%p\n", rte); |
3887 | | /* NOTREACHED */ |
3888 | 0 | } else if (rte->rtd_inuse != RTD_INUSE) { |
3889 | 0 | panic("rte_free: corrupted rte=%p\n", rte); |
3890 | | /* NOTREACHED */ |
3891 | 0 | } |
3892 | 0 | bcopy((caddr_t)p, (caddr_t)&rte->rtd_entry_saved, sizeof(*p)); |
3893 | | /* Preserve rt_lock to help catch use-after-free cases */ |
3894 | 0 | bzero((caddr_t)p, offsetof(struct rtentry, rt_lock)); |
3895 | |
|
3896 | 0 | rte->rtd_inuse = RTD_FREED; |
3897 | |
|
3898 | 0 | if (rte_debug & RTD_TRACE) { |
3899 | 0 | ctrace_record(&rte->rtd_free); |
3900 | 0 | } |
3901 | |
|
3902 | 0 | if (!(rte_debug & RTD_NO_FREE)) { |
3903 | 0 | zfree(rte_zone, p); |
3904 | 0 | } |
3905 | 0 | } |
3906 | | |
3907 | | void |
3908 | | ctrace_record(ctrace_t *tr) |
3909 | 2 | { |
3910 | 2 | tr->th = current_thread(); |
3911 | 2 | bzero(tr->pc, sizeof(tr->pc)); |
3912 | 2 | (void) OSBacktrace(tr->pc, CTRACE_STACK_SIZE); |
3913 | 2 | } |
3914 | | |
3915 | | void |
3916 | | route_copyout(struct route *dst, const struct route *src, size_t length) |
3917 | 667k | { |
3918 | | /* Copy everything (rt, srcif, flags, dst) from src */ |
3919 | 667k | bcopy(src, dst, length); |
3920 | | |
3921 | | /* Hold one reference for the local copy of struct route */ |
3922 | 667k | if (dst->ro_rt != NULL) { |
3923 | 667k | RT_ADDREF(dst->ro_rt); |
3924 | 667k | } |
3925 | | |
3926 | | /* Hold one reference for the local copy of struct lle */ |
3927 | 667k | if (dst->ro_lle != NULL) { |
3928 | 0 | LLE_ADDREF(dst->ro_lle); |
3929 | 0 | } |
3930 | | |
3931 | | /* Hold one reference for the local copy of struct ifaddr */ |
3932 | 667k | if (dst->ro_srcia != NULL) { |
3933 | 0 | IFA_ADDREF(dst->ro_srcia); |
3934 | 0 | } |
3935 | 667k | } |
3936 | | |
3937 | | void |
3938 | | route_copyin(struct route *src, struct route *dst, size_t length) |
3939 | 34.1k | { |
3940 | | /* |
3941 | | * No cached route at the destination? |
3942 | | * If none, then remove old references if present |
3943 | | * and copy entire src route. |
3944 | | */ |
3945 | 34.1k | if (dst->ro_rt == NULL) { |
3946 | | /* |
3947 | | * Ditch the cached link layer reference (dst) |
3948 | | * since we're about to take everything there is in src |
3949 | | */ |
3950 | 2 | if (dst->ro_lle != NULL) { |
3951 | 0 | LLE_REMREF(dst->ro_lle); |
3952 | 0 | } |
3953 | | /* |
3954 | | * Ditch the address in the cached copy (dst) since |
3955 | | * we're about to take everything there is in src. |
3956 | | */ |
3957 | 2 | if (dst->ro_srcia != NULL) { |
3958 | 0 | IFA_REMREF(dst->ro_srcia); |
3959 | 0 | } |
3960 | | /* |
3961 | | * Copy everything (rt, ro_lle, srcia, flags, dst) from src; the |
3962 | | * references to rt and/or srcia were held at the time |
3963 | | * of storage and are kept intact. |
3964 | | */ |
3965 | 2 | bcopy(src, dst, length); |
3966 | 2 | goto done; |
3967 | 2 | } |
3968 | | |
3969 | | /* |
3970 | | * We know dst->ro_rt is not NULL here. |
3971 | | * If the src->ro_rt is the same, update ro_lle, srcia and flags |
3972 | | * and ditch the route in the local copy. |
3973 | | */ |
3974 | 34.1k | if (dst->ro_rt == src->ro_rt) { |
3975 | 2.48k | dst->ro_flags = src->ro_flags; |
3976 | | |
3977 | 2.48k | if (dst->ro_lle != src->ro_lle) { |
3978 | 0 | if (dst->ro_lle != NULL) { |
3979 | 0 | LLE_REMREF(dst->ro_lle); |
3980 | 0 | } |
3981 | 0 | dst->ro_lle = src->ro_lle; |
3982 | 2.48k | } else if (src->ro_lle != NULL) { |
3983 | 0 | LLE_REMREF(src->ro_lle); |
3984 | 0 | } |
3985 | | |
3986 | 2.48k | if (dst->ro_srcia != src->ro_srcia) { |
3987 | 0 | if (dst->ro_srcia != NULL) { |
3988 | 0 | IFA_REMREF(dst->ro_srcia); |
3989 | 0 | } |
3990 | 0 | dst->ro_srcia = src->ro_srcia; |
3991 | 2.48k | } else if (src->ro_srcia != NULL) { |
3992 | 0 | IFA_REMREF(src->ro_srcia); |
3993 | 0 | } |
3994 | 2.48k | rtfree(src->ro_rt); |
3995 | 2.48k | goto done; |
3996 | 2.48k | } |
3997 | | |
3998 | | /* |
3999 | | * If they are dst's ro_rt is not equal to src's, |
4000 | | * and src'd rt is not NULL, then remove old references |
4001 | | * if present and copy entire src route. |
4002 | | */ |
4003 | 31.6k | if (src->ro_rt != NULL) { |
4004 | 31.6k | rtfree(dst->ro_rt); |
4005 | | |
4006 | 31.6k | if (dst->ro_lle != NULL) { |
4007 | 0 | LLE_REMREF(dst->ro_lle); |
4008 | 0 | } |
4009 | 31.6k | if (dst->ro_srcia != NULL) { |
4010 | 0 | IFA_REMREF(dst->ro_srcia); |
4011 | 0 | } |
4012 | 31.6k | bcopy(src, dst, length); |
4013 | 31.6k | goto done; |
4014 | 31.6k | } |
4015 | | |
4016 | | /* |
4017 | | * Here, dst's cached route is not NULL but source's is. |
4018 | | * Just get rid of all the other cached reference in src. |
4019 | | */ |
4020 | 0 | if (src->ro_srcia != NULL) { |
4021 | | /* |
4022 | | * Ditch src address in the local copy (src) since we're |
4023 | | * not caching the route entry anyway (ro_rt is NULL). |
4024 | | */ |
4025 | 0 | IFA_REMREF(src->ro_srcia); |
4026 | 0 | } |
4027 | 0 | if (src->ro_lle != NULL) { |
4028 | | /* |
4029 | | * Ditch cache lle in the local copy (src) since we're |
4030 | | * not caching the route anyway (ro_rt is NULL). |
4031 | | */ |
4032 | 0 | LLE_REMREF(src->ro_lle); |
4033 | 0 | } |
4034 | 34.1k | done: |
4035 | | /* This function consumes the references on src */ |
4036 | 34.1k | src->ro_lle = NULL; |
4037 | 34.1k | src->ro_rt = NULL; |
4038 | 34.1k | src->ro_srcia = NULL; |
4039 | 34.1k | } |
4040 | | |
4041 | | /* |
4042 | | * route_to_gwroute will find the gateway route for a given route. |
4043 | | * |
4044 | | * If the route is down, look the route up again. |
4045 | | * If the route goes through a gateway, get the route to the gateway. |
4046 | | * If the gateway route is down, look it up again. |
4047 | | * If the route is set to reject, verify it hasn't expired. |
4048 | | * |
4049 | | * If the returned route is non-NULL, the caller is responsible for |
4050 | | * releasing the reference and unlocking the route. |
4051 | | */ |
4052 | 0 | #define senderr(e) { error = (e); goto bad; } |
4053 | | errno_t |
4054 | | route_to_gwroute(const struct sockaddr *net_dest, struct rtentry *hint0, |
4055 | | struct rtentry **out_route) |
4056 | 0 | { |
4057 | 0 | uint64_t timenow; |
4058 | 0 | struct rtentry *rt = hint0, *hint = hint0; |
4059 | 0 | errno_t error = 0; |
4060 | 0 | unsigned int ifindex; |
4061 | 0 | boolean_t gwroute; |
4062 | |
|
4063 | 0 | *out_route = NULL; |
4064 | |
|
4065 | 0 | if (rt == NULL) { |
4066 | 0 | return 0; |
4067 | 0 | } |
4068 | | |
4069 | | /* |
4070 | | * Next hop determination. Because we may involve the gateway route |
4071 | | * in addition to the original route, locking is rather complicated. |
4072 | | * The general concept is that regardless of whether the route points |
4073 | | * to the original route or to the gateway route, this routine takes |
4074 | | * an extra reference on such a route. This extra reference will be |
4075 | | * released at the end. |
4076 | | * |
4077 | | * Care must be taken to ensure that the "hint0" route never gets freed |
4078 | | * via rtfree(), since the caller may have stored it inside a struct |
4079 | | * route with a reference held for that placeholder. |
4080 | | */ |
4081 | 0 | RT_LOCK_SPIN(rt); |
4082 | 0 | ifindex = rt->rt_ifp->if_index; |
4083 | 0 | RT_ADDREF_LOCKED(rt); |
4084 | 0 | if (!(rt->rt_flags & RTF_UP)) { |
4085 | 0 | RT_REMREF_LOCKED(rt); |
4086 | 0 | RT_UNLOCK(rt); |
4087 | | /* route is down, find a new one */ |
4088 | 0 | hint = rt = rtalloc1_scoped((struct sockaddr *) |
4089 | 0 | (size_t)net_dest, 1, 0, ifindex); |
4090 | 0 | if (hint != NULL) { |
4091 | 0 | RT_LOCK_SPIN(rt); |
4092 | 0 | ifindex = rt->rt_ifp->if_index; |
4093 | 0 | } else { |
4094 | 0 | senderr(EHOSTUNREACH); |
4095 | 0 | } |
4096 | 0 | } |
4097 | | |
4098 | | /* |
4099 | | * We have a reference to "rt" by now; it will either |
4100 | | * be released or freed at the end of this routine. |
4101 | | */ |
4102 | 0 | RT_LOCK_ASSERT_HELD(rt); |
4103 | 0 | if ((gwroute = (rt->rt_flags & RTF_GATEWAY))) { |
4104 | 0 | struct rtentry *gwrt = rt->rt_gwroute; |
4105 | 0 | struct sockaddr_storage ss; |
4106 | 0 | struct sockaddr *gw = (struct sockaddr *)&ss; |
4107 | |
|
4108 | 0 | VERIFY(rt == hint); |
4109 | 0 | RT_ADDREF_LOCKED(hint); |
4110 | | |
4111 | | /* If there's no gateway rt, look it up */ |
4112 | 0 | if (gwrt == NULL) { |
4113 | 0 | bcopy(rt->rt_gateway, gw, MIN(sizeof(ss), |
4114 | 0 | rt->rt_gateway->sa_len)); |
4115 | 0 | gw->sa_len = MIN(sizeof(ss), rt->rt_gateway->sa_len); |
4116 | 0 | RT_UNLOCK(rt); |
4117 | 0 | goto lookup; |
4118 | 0 | } |
4119 | | /* Become a regular mutex */ |
4120 | 0 | RT_CONVERT_LOCK(rt); |
4121 | | |
4122 | | /* |
4123 | | * Take gwrt's lock while holding route's lock; |
4124 | | * this is okay since gwrt never points back |
4125 | | * to "rt", so no lock ordering issues. |
4126 | | */ |
4127 | 0 | RT_LOCK_SPIN(gwrt); |
4128 | 0 | if (!(gwrt->rt_flags & RTF_UP)) { |
4129 | 0 | rt->rt_gwroute = NULL; |
4130 | 0 | RT_UNLOCK(gwrt); |
4131 | 0 | bcopy(rt->rt_gateway, gw, MIN(sizeof(ss), |
4132 | 0 | rt->rt_gateway->sa_len)); |
4133 | 0 | gw->sa_len = MIN(sizeof(ss), rt->rt_gateway->sa_len); |
4134 | 0 | RT_UNLOCK(rt); |
4135 | 0 | rtfree(gwrt); |
4136 | 0 | lookup: |
4137 | 0 | lck_mtx_lock(rnh_lock); |
4138 | 0 | gwrt = rtalloc1_scoped_locked(gw, 1, 0, ifindex); |
4139 | |
|
4140 | 0 | RT_LOCK(rt); |
4141 | | /* |
4142 | | * Bail out if the route is down, no route |
4143 | | * to gateway, circular route, or if the |
4144 | | * gateway portion of "rt" has changed. |
4145 | | */ |
4146 | 0 | if (!(rt->rt_flags & RTF_UP) || gwrt == NULL || |
4147 | 0 | gwrt == rt || !equal(gw, rt->rt_gateway)) { |
4148 | 0 | if (gwrt == rt) { |
4149 | 0 | RT_REMREF_LOCKED(gwrt); |
4150 | 0 | gwrt = NULL; |
4151 | 0 | } |
4152 | 0 | VERIFY(rt == hint); |
4153 | 0 | RT_REMREF_LOCKED(hint); |
4154 | 0 | hint = NULL; |
4155 | 0 | RT_UNLOCK(rt); |
4156 | 0 | if (gwrt != NULL) { |
4157 | 0 | rtfree_locked(gwrt); |
4158 | 0 | } |
4159 | 0 | lck_mtx_unlock(rnh_lock); |
4160 | 0 | senderr(EHOSTUNREACH); |
4161 | 0 | } |
4162 | 0 | VERIFY(gwrt != NULL); |
4163 | | /* |
4164 | | * Set gateway route; callee adds ref to gwrt; |
4165 | | * gwrt has an extra ref from rtalloc1() for |
4166 | | * this routine. |
4167 | | */ |
4168 | 0 | rt_set_gwroute(rt, rt_key(rt), gwrt); |
4169 | 0 | VERIFY(rt == hint); |
4170 | 0 | RT_REMREF_LOCKED(rt); /* hint still holds a refcnt */ |
4171 | 0 | RT_UNLOCK(rt); |
4172 | 0 | lck_mtx_unlock(rnh_lock); |
4173 | 0 | rt = gwrt; |
4174 | 0 | } else { |
4175 | 0 | RT_ADDREF_LOCKED(gwrt); |
4176 | 0 | RT_UNLOCK(gwrt); |
4177 | 0 | VERIFY(rt == hint); |
4178 | 0 | RT_REMREF_LOCKED(rt); /* hint still holds a refcnt */ |
4179 | 0 | RT_UNLOCK(rt); |
4180 | 0 | rt = gwrt; |
4181 | 0 | } |
4182 | 0 | VERIFY(rt == gwrt && rt != hint); |
4183 | | |
4184 | | /* |
4185 | | * This is an opportunity to revalidate the parent route's |
4186 | | * rt_gwroute, in case it now points to a dead route entry. |
4187 | | * Parent route won't go away since the clone (hint) holds |
4188 | | * a reference to it. rt == gwrt. |
4189 | | */ |
4190 | 0 | RT_LOCK_SPIN(hint); |
4191 | 0 | if ((hint->rt_flags & (RTF_WASCLONED | RTF_UP)) == |
4192 | 0 | (RTF_WASCLONED | RTF_UP)) { |
4193 | 0 | struct rtentry *prt = hint->rt_parent; |
4194 | 0 | VERIFY(prt != NULL); |
4195 | | |
4196 | 0 | RT_CONVERT_LOCK(hint); |
4197 | 0 | RT_ADDREF(prt); |
4198 | 0 | RT_UNLOCK(hint); |
4199 | 0 | rt_revalidate_gwroute(prt, rt); |
4200 | 0 | RT_REMREF(prt); |
4201 | 0 | } else { |
4202 | 0 | RT_UNLOCK(hint); |
4203 | 0 | } |
4204 | | |
4205 | | /* Clean up "hint" now; see notes above regarding hint0 */ |
4206 | 0 | if (hint == hint0) { |
4207 | 0 | RT_REMREF(hint); |
4208 | 0 | } else { |
4209 | 0 | rtfree(hint); |
4210 | 0 | } |
4211 | 0 | hint = NULL; |
4212 | | |
4213 | | /* rt == gwrt; if it is now down, give up */ |
4214 | 0 | RT_LOCK_SPIN(rt); |
4215 | 0 | if (!(rt->rt_flags & RTF_UP)) { |
4216 | 0 | RT_UNLOCK(rt); |
4217 | 0 | senderr(EHOSTUNREACH); |
4218 | 0 | } |
4219 | 0 | } |
4220 | | |
4221 | 0 | if (rt->rt_flags & RTF_REJECT) { |
4222 | 0 | VERIFY(rt->rt_expire == 0 || rt->rt_rmx.rmx_expire != 0); |
4223 | 0 | VERIFY(rt->rt_expire != 0 || rt->rt_rmx.rmx_expire == 0); |
4224 | 0 | timenow = net_uptime(); |
4225 | 0 | if (rt->rt_expire == 0 || timenow < rt->rt_expire) { |
4226 | 0 | RT_UNLOCK(rt); |
4227 | 0 | senderr(!gwroute ? EHOSTDOWN : EHOSTUNREACH); |
4228 | 0 | } |
4229 | 0 | } |
4230 | | |
4231 | | /* Become a regular mutex */ |
4232 | 0 | RT_CONVERT_LOCK(rt); |
4233 | | |
4234 | | /* Caller is responsible for cleaning up "rt" */ |
4235 | 0 | *out_route = rt; |
4236 | 0 | return 0; |
4237 | | |
4238 | 0 | bad: |
4239 | | /* Clean up route (either it is "rt" or "gwrt") */ |
4240 | 0 | if (rt != NULL) { |
4241 | 0 | RT_LOCK_SPIN(rt); |
4242 | 0 | if (rt == hint0) { |
4243 | 0 | RT_REMREF_LOCKED(rt); |
4244 | 0 | RT_UNLOCK(rt); |
4245 | 0 | } else { |
4246 | 0 | RT_UNLOCK(rt); |
4247 | 0 | rtfree(rt); |
4248 | 0 | } |
4249 | 0 | } |
4250 | 0 | return error; |
4251 | 0 | } |
4252 | | #undef senderr |
4253 | | |
4254 | | void |
4255 | | rt_revalidate_gwroute(struct rtentry *rt, struct rtentry *gwrt) |
4256 | 0 | { |
4257 | 0 | VERIFY(gwrt != NULL); |
4258 | | |
4259 | 0 | RT_LOCK_SPIN(rt); |
4260 | 0 | if ((rt->rt_flags & (RTF_GATEWAY | RTF_UP)) == (RTF_GATEWAY | RTF_UP) && |
4261 | 0 | rt->rt_ifp == gwrt->rt_ifp && rt->rt_gateway->sa_family == |
4262 | 0 | rt_key(gwrt)->sa_family && (rt->rt_gwroute == NULL || |
4263 | 0 | !(rt->rt_gwroute->rt_flags & RTF_UP))) { |
4264 | 0 | boolean_t isequal; |
4265 | 0 | VERIFY(rt->rt_flags & (RTF_CLONING | RTF_PRCLONING)); |
4266 | | |
4267 | 0 | if (rt->rt_gateway->sa_family == AF_INET || |
4268 | 0 | rt->rt_gateway->sa_family == AF_INET6) { |
4269 | 0 | struct sockaddr_storage key_ss, gw_ss; |
4270 | | /* |
4271 | | * We need to compare rt_key and rt_gateway; create |
4272 | | * local copies to get rid of any ifscope association. |
4273 | | */ |
4274 | 0 | (void) sa_copy(rt_key(gwrt), &key_ss, NULL); |
4275 | 0 | (void) sa_copy(rt->rt_gateway, &gw_ss, NULL); |
4276 | |
|
4277 | 0 | isequal = equal(SA(&key_ss), SA(&gw_ss)); |
4278 | 0 | } else { |
4279 | 0 | isequal = equal(rt_key(gwrt), rt->rt_gateway); |
4280 | 0 | } |
4281 | | |
4282 | | /* If they are the same, update gwrt */ |
4283 | 0 | if (isequal) { |
4284 | 0 | RT_UNLOCK(rt); |
4285 | 0 | lck_mtx_lock(rnh_lock); |
4286 | 0 | RT_LOCK(rt); |
4287 | 0 | rt_set_gwroute(rt, rt_key(rt), gwrt); |
4288 | 0 | RT_UNLOCK(rt); |
4289 | 0 | lck_mtx_unlock(rnh_lock); |
4290 | 0 | } else { |
4291 | 0 | RT_UNLOCK(rt); |
4292 | 0 | } |
4293 | 0 | } else { |
4294 | 0 | RT_UNLOCK(rt); |
4295 | 0 | } |
4296 | 0 | } |
4297 | | |
4298 | | static void |
4299 | | rt_str4(struct rtentry *rt, char *ds, uint32_t dslen, char *gs, uint32_t gslen) |
4300 | 16.2k | { |
4301 | 16.2k | VERIFY(rt_key(rt)->sa_family == AF_INET); |
4302 | | |
4303 | 16.2k | if (ds != NULL) { |
4304 | 16.2k | (void) inet_ntop(AF_INET, |
4305 | 16.2k | &SIN(rt_key(rt))->sin_addr.s_addr, ds, dslen); |
4306 | 16.2k | if (dslen >= MAX_SCOPE_ADDR_STR_LEN && |
4307 | 16.2k | SINIFSCOPE(rt_key(rt))->sin_scope_id != IFSCOPE_NONE) { |
4308 | 0 | char scpstr[16]; |
4309 | |
|
4310 | 0 | snprintf(scpstr, sizeof(scpstr), "@%u", |
4311 | 0 | SINIFSCOPE(rt_key(rt))->sin_scope_id); |
4312 | |
|
4313 | 0 | strlcat(ds, scpstr, dslen); |
4314 | 0 | } |
4315 | 16.2k | } |
4316 | | |
4317 | 16.2k | if (gs != NULL) { |
4318 | 16.2k | if (rt->rt_flags & RTF_GATEWAY) { |
4319 | 0 | (void) inet_ntop(AF_INET, |
4320 | 0 | &SIN(rt->rt_gateway)->sin_addr.s_addr, gs, gslen); |
4321 | 16.2k | } else if (rt->rt_ifp != NULL) { |
4322 | 16.2k | snprintf(gs, gslen, "link#%u", rt->rt_ifp->if_unit); |
4323 | 16.2k | } else { |
4324 | 0 | snprintf(gs, gslen, "%s", "link"); |
4325 | 0 | } |
4326 | 16.2k | } |
4327 | 16.2k | } |
4328 | | |
4329 | | static void |
4330 | | rt_str6(struct rtentry *rt, char *ds, uint32_t dslen, char *gs, uint32_t gslen) |
4331 | 1 | { |
4332 | 1 | VERIFY(rt_key(rt)->sa_family == AF_INET6); |
4333 | | |
4334 | 1 | if (ds != NULL) { |
4335 | 1 | (void) inet_ntop(AF_INET6, |
4336 | 1 | &SIN6(rt_key(rt))->sin6_addr, ds, dslen); |
4337 | 1 | if (dslen >= MAX_SCOPE_ADDR_STR_LEN && |
4338 | 1 | SIN6IFSCOPE(rt_key(rt))->sin6_scope_id != IFSCOPE_NONE) { |
4339 | 0 | char scpstr[16]; |
4340 | |
|
4341 | 0 | snprintf(scpstr, sizeof(scpstr), "@%u", |
4342 | 0 | SIN6IFSCOPE(rt_key(rt))->sin6_scope_id); |
4343 | |
|
4344 | 0 | strlcat(ds, scpstr, dslen); |
4345 | 0 | } |
4346 | 1 | } |
4347 | | |
4348 | 1 | if (gs != NULL) { |
4349 | 1 | if (rt->rt_flags & RTF_GATEWAY) { |
4350 | 0 | (void) inet_ntop(AF_INET6, |
4351 | 0 | &SIN6(rt->rt_gateway)->sin6_addr, gs, gslen); |
4352 | 1 | } else if (rt->rt_ifp != NULL) { |
4353 | 1 | snprintf(gs, gslen, "link#%u", rt->rt_ifp->if_unit); |
4354 | 1 | } else { |
4355 | 0 | snprintf(gs, gslen, "%s", "link"); |
4356 | 0 | } |
4357 | 1 | } |
4358 | 1 | } |
4359 | | |
4360 | | void |
4361 | | rt_str(struct rtentry *rt, char *ds, uint32_t dslen, char *gs, uint32_t gslen) |
4362 | 16.2k | { |
4363 | 16.2k | switch (rt_key(rt)->sa_family) { |
4364 | 16.2k | case AF_INET: |
4365 | 16.2k | rt_str4(rt, ds, dslen, gs, gslen); |
4366 | 16.2k | break; |
4367 | 1 | case AF_INET6: |
4368 | 1 | rt_str6(rt, ds, dslen, gs, gslen); |
4369 | 1 | break; |
4370 | 0 | default: |
4371 | 0 | if (ds != NULL) { |
4372 | 0 | bzero(ds, dslen); |
4373 | 0 | } |
4374 | 0 | if (gs != NULL) { |
4375 | 0 | bzero(gs, gslen); |
4376 | 0 | } |
4377 | 0 | break; |
4378 | 16.2k | } |
4379 | 16.2k | } |
4380 | | |
4381 | | void |
4382 | | route_event_init(struct route_event *p_route_ev, struct rtentry *rt, |
4383 | | struct rtentry *gwrt, int route_ev_code) |
4384 | 914 | { |
4385 | 914 | VERIFY(p_route_ev != NULL); |
4386 | 0 | bzero(p_route_ev, sizeof(*p_route_ev)); |
4387 | | |
4388 | 914 | p_route_ev->rt = rt; |
4389 | 914 | p_route_ev->gwrt = gwrt; |
4390 | 914 | p_route_ev->route_event_code = route_ev_code; |
4391 | 914 | } |
4392 | | |
4393 | | static void |
4394 | | route_event_callback(void *arg) |
4395 | 11.5k | { |
4396 | 11.5k | struct route_event *p_rt_ev = (struct route_event *)arg; |
4397 | 11.5k | struct rtentry *rt = p_rt_ev->rt; |
4398 | 11.5k | eventhandler_tag evtag = p_rt_ev->evtag; |
4399 | 11.5k | int route_ev_code = p_rt_ev->route_event_code; |
4400 | | |
4401 | 11.5k | if (route_ev_code == ROUTE_EVHDLR_DEREGISTER) { |
4402 | 0 | VERIFY(evtag != NULL); |
4403 | 0 | EVENTHANDLER_DEREGISTER(&rt->rt_evhdlr_ctxt, route_event, |
4404 | 0 | evtag); |
4405 | 0 | rtfree(rt); |
4406 | 0 | return; |
4407 | 0 | } |
4408 | | |
4409 | 11.5k | EVENTHANDLER_INVOKE(&rt->rt_evhdlr_ctxt, route_event, rt_key(rt), |
4410 | 0 | route_ev_code, (struct sockaddr *)&p_rt_ev->rt_addr, |
4411 | 0 | rt->rt_flags); |
4412 | | |
4413 | | /* The code enqueuing the route event held a reference */ |
4414 | 0 | rtfree(rt); |
4415 | | /* XXX No reference is taken on gwrt */ |
4416 | 11.5k | } |
4417 | | |
4418 | | int |
4419 | | route_event_walktree(struct radix_node *rn, void *arg) |
4420 | 18.7k | { |
4421 | 18.7k | struct route_event *p_route_ev = (struct route_event *)arg; |
4422 | 18.7k | struct rtentry *rt = (struct rtentry *)rn; |
4423 | 18.7k | struct rtentry *gwrt = p_route_ev->rt; |
4424 | | |
4425 | 18.7k | LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED); |
4426 | | |
4427 | 18.7k | RT_LOCK(rt); |
4428 | | |
4429 | | /* Return if the entry is pending cleanup */ |
4430 | 18.7k | if (rt->rt_flags & RTPRF_OURS) { |
4431 | 0 | RT_UNLOCK(rt); |
4432 | 0 | return 0; |
4433 | 0 | } |
4434 | | |
4435 | | /* Return if it is not an indirect route */ |
4436 | 18.7k | if (!(rt->rt_flags & RTF_GATEWAY)) { |
4437 | 18.7k | RT_UNLOCK(rt); |
4438 | 18.7k | return 0; |
4439 | 18.7k | } |
4440 | | |
4441 | 0 | if (rt->rt_gwroute != gwrt) { |
4442 | 0 | RT_UNLOCK(rt); |
4443 | 0 | return 0; |
4444 | 0 | } |
4445 | | |
4446 | 0 | route_event_enqueue_nwk_wq_entry(rt, gwrt, p_route_ev->route_event_code, |
4447 | 0 | NULL, TRUE); |
4448 | 0 | RT_UNLOCK(rt); |
4449 | |
|
4450 | 0 | return 0; |
4451 | 0 | } |
4452 | | |
4453 | | struct route_event_nwk_wq_entry { |
4454 | | struct nwk_wq_entry nwk_wqe; |
4455 | | struct route_event rt_ev_arg; |
4456 | | }; |
4457 | | |
4458 | | void |
4459 | | route_event_enqueue_nwk_wq_entry(struct rtentry *rt, struct rtentry *gwrt, |
4460 | | uint32_t route_event_code, eventhandler_tag evtag, boolean_t rt_locked) |
4461 | 11.5k | { |
4462 | 11.5k | struct route_event_nwk_wq_entry *p_rt_ev = NULL; |
4463 | 11.5k | struct sockaddr *p_gw_saddr = NULL; |
4464 | | |
4465 | 11.5k | MALLOC(p_rt_ev, struct route_event_nwk_wq_entry *, |
4466 | 11.5k | sizeof(struct route_event_nwk_wq_entry), |
4467 | 11.5k | M_NWKWQ, M_WAITOK | M_ZERO); |
4468 | | |
4469 | | /* |
4470 | | * If the intent is to de-register, don't take |
4471 | | * reference, route event registration already takes |
4472 | | * a reference on route. |
4473 | | */ |
4474 | 11.5k | if (route_event_code != ROUTE_EVHDLR_DEREGISTER) { |
4475 | | /* The reference is released by route_event_callback */ |
4476 | 11.5k | if (rt_locked) { |
4477 | 11.5k | RT_ADDREF_LOCKED(rt); |
4478 | 11.5k | } else { |
4479 | 0 | RT_ADDREF(rt); |
4480 | 0 | } |
4481 | 11.5k | } |
4482 | | |
4483 | 11.5k | p_rt_ev->rt_ev_arg.rt = rt; |
4484 | 11.5k | p_rt_ev->rt_ev_arg.gwrt = gwrt; |
4485 | 11.5k | p_rt_ev->rt_ev_arg.evtag = evtag; |
4486 | | |
4487 | 11.5k | if (gwrt != NULL) { |
4488 | 0 | p_gw_saddr = gwrt->rt_gateway; |
4489 | 11.5k | } else { |
4490 | 11.5k | p_gw_saddr = rt->rt_gateway; |
4491 | 11.5k | } |
4492 | | |
4493 | 11.5k | VERIFY(p_gw_saddr->sa_len <= sizeof(p_rt_ev->rt_ev_arg.rt_addr)); |
4494 | 0 | bcopy(p_gw_saddr, &(p_rt_ev->rt_ev_arg.rt_addr), p_gw_saddr->sa_len); |
4495 | | |
4496 | 11.5k | p_rt_ev->rt_ev_arg.route_event_code = route_event_code; |
4497 | 11.5k | p_rt_ev->nwk_wqe.func = route_event_callback; |
4498 | 11.5k | p_rt_ev->nwk_wqe.is_arg_managed = TRUE; |
4499 | 11.5k | p_rt_ev->nwk_wqe.arg = &p_rt_ev->rt_ev_arg; |
4500 | 11.5k | nwk_wq_enqueue((struct nwk_wq_entry*)p_rt_ev); |
4501 | 11.5k | } |
4502 | | |
4503 | | const char * |
4504 | | route_event2str(int route_event) |
4505 | 0 | { |
4506 | 0 | const char *route_event_str = "ROUTE_EVENT_UNKNOWN"; |
4507 | 0 | switch (route_event) { |
4508 | 0 | case ROUTE_STATUS_UPDATE: |
4509 | 0 | route_event_str = "ROUTE_STATUS_UPDATE"; |
4510 | 0 | break; |
4511 | 0 | case ROUTE_ENTRY_REFRESH: |
4512 | 0 | route_event_str = "ROUTE_ENTRY_REFRESH"; |
4513 | 0 | break; |
4514 | 0 | case ROUTE_ENTRY_DELETED: |
4515 | 0 | route_event_str = "ROUTE_ENTRY_DELETED"; |
4516 | 0 | break; |
4517 | 0 | case ROUTE_LLENTRY_RESOLVED: |
4518 | 0 | route_event_str = "ROUTE_LLENTRY_RESOLVED"; |
4519 | 0 | break; |
4520 | 0 | case ROUTE_LLENTRY_UNREACH: |
4521 | 0 | route_event_str = "ROUTE_LLENTRY_UNREACH"; |
4522 | 0 | break; |
4523 | 0 | case ROUTE_LLENTRY_CHANGED: |
4524 | 0 | route_event_str = "ROUTE_LLENTRY_CHANGED"; |
4525 | 0 | break; |
4526 | 0 | case ROUTE_LLENTRY_STALE: |
4527 | 0 | route_event_str = "ROUTE_LLENTRY_STALE"; |
4528 | 0 | break; |
4529 | 0 | case ROUTE_LLENTRY_TIMEDOUT: |
4530 | 0 | route_event_str = "ROUTE_LLENTRY_TIMEDOUT"; |
4531 | 0 | break; |
4532 | 0 | case ROUTE_LLENTRY_DELETED: |
4533 | 0 | route_event_str = "ROUTE_LLENTRY_DELETED"; |
4534 | 0 | break; |
4535 | 0 | case ROUTE_LLENTRY_EXPIRED: |
4536 | 0 | route_event_str = "ROUTE_LLENTRY_EXPIRED"; |
4537 | 0 | break; |
4538 | 0 | case ROUTE_LLENTRY_PROBED: |
4539 | 0 | route_event_str = "ROUTE_LLENTRY_PROBED"; |
4540 | 0 | break; |
4541 | 0 | case ROUTE_EVHDLR_DEREGISTER: |
4542 | 0 | route_event_str = "ROUTE_EVHDLR_DEREGISTER"; |
4543 | 0 | break; |
4544 | 0 | default: |
4545 | | /* Init'd to ROUTE_EVENT_UNKNOWN */ |
4546 | 0 | break; |
4547 | 0 | } |
4548 | 0 | return route_event_str; |
4549 | 0 | } |
4550 | | |
4551 | | int |
4552 | | route_op_entitlement_check(struct socket *so, |
4553 | | kauth_cred_t cred, |
4554 | | int route_op_type, |
4555 | | boolean_t allow_root) |
4556 | 0 | { |
4557 | 0 | if (so != NULL) { |
4558 | 0 | if (route_op_type == ROUTE_OP_READ) { |
4559 | | /* |
4560 | | * If needed we can later extend this for more |
4561 | | * granular entitlements and return a bit set of |
4562 | | * allowed accesses. |
4563 | | */ |
4564 | 0 | if (soopt_cred_check(so, PRIV_NET_RESTRICTED_ROUTE_NC_READ, |
4565 | 0 | allow_root, false) == 0) { |
4566 | 0 | return 0; |
4567 | 0 | } else { |
4568 | 0 | return -1; |
4569 | 0 | } |
4570 | 0 | } |
4571 | 0 | } else if (cred != NULL) { |
4572 | 0 | uid_t uid = kauth_cred_getuid(cred); |
4573 | | |
4574 | | /* uid is 0 for root */ |
4575 | 0 | if (uid != 0 || !allow_root) { |
4576 | 0 | if (route_op_type == ROUTE_OP_READ) { |
4577 | 0 | if (priv_check_cred(cred, |
4578 | 0 | PRIV_NET_RESTRICTED_ROUTE_NC_READ, 0) == 0) { |
4579 | 0 | return 0; |
4580 | 0 | } else { |
4581 | 0 | return -1; |
4582 | 0 | } |
4583 | 0 | } |
4584 | 0 | } |
4585 | 0 | } |
4586 | 0 | return -1; |
4587 | 0 | } |