/src/frr/zebra/kernel_netlink.c
Line | Count | Source |
1 | | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | | /* Kernel communication using netlink interface. |
3 | | * Copyright (C) 1999 Kunihiro Ishiguro |
4 | | */ |
5 | | |
6 | | #include <zebra.h> |
7 | | |
8 | | #if defined(HANDLE_NETLINK_FUZZING) |
9 | | #include <stdio.h> |
10 | | #include <string.h> |
11 | | #include "libfrr.h" |
12 | | #endif /* HANDLE_NETLINK_FUZZING */ |
13 | | |
14 | | #ifdef HAVE_NETLINK |
15 | | |
16 | | #include "linklist.h" |
17 | | #include "if.h" |
18 | | #include "log.h" |
19 | | #include "prefix.h" |
20 | | #include "connected.h" |
21 | | #include "table.h" |
22 | | #include "memory.h" |
23 | | #include "rib.h" |
24 | | #include "frrevent.h" |
25 | | #include "privs.h" |
26 | | #include "nexthop.h" |
27 | | #include "vrf.h" |
28 | | #include "mpls.h" |
29 | | #include "lib_errors.h" |
30 | | #include "hash.h" |
31 | | |
32 | | #include "zebra/zebra_router.h" |
33 | | #include "zebra/zebra_ns.h" |
34 | | #include "zebra/zebra_vrf.h" |
35 | | #include "zebra/rt.h" |
36 | | #include "zebra/debug.h" |
37 | | #include "zebra/kernel_netlink.h" |
38 | | #include "zebra/rt_netlink.h" |
39 | | #include "zebra/if_netlink.h" |
40 | | #include "zebra/rule_netlink.h" |
41 | | #include "zebra/tc_netlink.h" |
42 | | #include "zebra/netconf_netlink.h" |
43 | | #include "zebra/zebra_errors.h" |
44 | | |
45 | | #ifndef SO_RCVBUFFORCE |
46 | | #define SO_RCVBUFFORCE (33) |
47 | | #endif |
48 | | |
49 | | /* Hack for GNU libc version 2. */ |
50 | | #ifndef MSG_TRUNC |
51 | | #define MSG_TRUNC 0x20 |
52 | | #endif /* MSG_TRUNC */ |
53 | | |
54 | | #ifndef NLMSG_TAIL |
55 | | #define NLMSG_TAIL(nmsg) \ |
56 | 0 | ((struct rtattr *)(((uint8_t *)(nmsg)) \ |
57 | 0 | + NLMSG_ALIGN((nmsg)->nlmsg_len))) |
58 | | #endif |
59 | | |
60 | | #ifndef RTA_TAIL |
61 | | #define RTA_TAIL(rta) \ |
62 | 0 | ((struct rtattr *)(((uint8_t *)(rta)) + RTA_ALIGN((rta)->rta_len))) |
63 | | #endif |
64 | | |
65 | | #ifndef RTNL_FAMILY_IP6MR |
66 | | #define RTNL_FAMILY_IP6MR 129 |
67 | | #endif |
68 | | |
69 | | #ifndef RTPROT_MROUTED |
70 | | #define RTPROT_MROUTED 17 |
71 | | #endif |
72 | | |
73 | 0 | #define NL_DEFAULT_BATCH_BUFSIZE (16 * NL_PKT_BUF_SIZE) |
74 | | |
75 | | /* |
76 | | * We limit the batch's size to a number smaller than the length of the |
77 | | * underlying buffer since the last message that wouldn't fit the batch would go |
78 | | * over the upper boundary and then it would have to be encoded again into a new |
79 | | * buffer. If the difference between the limit and the length of the buffer is |
80 | | * big enough (bigger than the biggest Netlink message) then this situation |
81 | | * won't occur. |
82 | | */ |
83 | 0 | #define NL_DEFAULT_BATCH_SEND_THRESHOLD (15 * NL_PKT_BUF_SIZE) |
84 | | |
85 | | static const struct message nlmsg_str[] = {{RTM_NEWROUTE, "RTM_NEWROUTE"}, |
86 | | {RTM_DELROUTE, "RTM_DELROUTE"}, |
87 | | {RTM_GETROUTE, "RTM_GETROUTE"}, |
88 | | {RTM_NEWLINK, "RTM_NEWLINK"}, |
89 | | {RTM_SETLINK, "RTM_SETLINK"}, |
90 | | {RTM_DELLINK, "RTM_DELLINK"}, |
91 | | {RTM_GETLINK, "RTM_GETLINK"}, |
92 | | {RTM_NEWADDR, "RTM_NEWADDR"}, |
93 | | {RTM_DELADDR, "RTM_DELADDR"}, |
94 | | {RTM_GETADDR, "RTM_GETADDR"}, |
95 | | {RTM_NEWNEIGH, "RTM_NEWNEIGH"}, |
96 | | {RTM_DELNEIGH, "RTM_DELNEIGH"}, |
97 | | {RTM_GETNEIGH, "RTM_GETNEIGH"}, |
98 | | {RTM_NEWRULE, "RTM_NEWRULE"}, |
99 | | {RTM_DELRULE, "RTM_DELRULE"}, |
100 | | {RTM_GETRULE, "RTM_GETRULE"}, |
101 | | {RTM_NEWNEXTHOP, "RTM_NEWNEXTHOP"}, |
102 | | {RTM_DELNEXTHOP, "RTM_DELNEXTHOP"}, |
103 | | {RTM_GETNEXTHOP, "RTM_GETNEXTHOP"}, |
104 | | {RTM_NEWNETCONF, "RTM_NEWNETCONF"}, |
105 | | {RTM_DELNETCONF, "RTM_DELNETCONF"}, |
106 | | {RTM_NEWTUNNEL, "RTM_NEWTUNNEL"}, |
107 | | {RTM_DELTUNNEL, "RTM_DELTUNNEL"}, |
108 | | {RTM_GETTUNNEL, "RTM_GETTUNNEL"}, |
109 | | {RTM_NEWQDISC, "RTM_NEWQDISC"}, |
110 | | {RTM_DELQDISC, "RTM_DELQDISC"}, |
111 | | {RTM_GETQDISC, "RTM_GETQDISC"}, |
112 | | {RTM_NEWTCLASS, "RTM_NEWTCLASS"}, |
113 | | {RTM_DELTCLASS, "RTM_DELTCLASS"}, |
114 | | {RTM_GETTCLASS, "RTM_GETTCLASS"}, |
115 | | {RTM_NEWTFILTER, "RTM_NEWTFILTER"}, |
116 | | {RTM_DELTFILTER, "RTM_DELTFILTER"}, |
117 | | {RTM_GETTFILTER, "RTM_GETTFILTER"}, |
118 | | {RTM_NEWVLAN, "RTM_NEWVLAN"}, |
119 | | {RTM_DELVLAN, "RTM_DELVLAN"}, |
120 | | {RTM_GETVLAN, "RTM_GETVLAN"}, |
121 | | {0}}; |
122 | | |
123 | | static const struct message rtproto_str[] = { |
124 | | {RTPROT_REDIRECT, "redirect"}, |
125 | | {RTPROT_KERNEL, "kernel"}, |
126 | | {RTPROT_BOOT, "boot"}, |
127 | | {RTPROT_STATIC, "static"}, |
128 | | {RTPROT_GATED, "GateD"}, |
129 | | {RTPROT_RA, "router advertisement"}, |
130 | | {RTPROT_MRT, "MRT"}, |
131 | | {RTPROT_ZEBRA, "Zebra"}, |
132 | | #ifdef RTPROT_BIRD |
133 | | {RTPROT_BIRD, "BIRD"}, |
134 | | #endif /* RTPROT_BIRD */ |
135 | | {RTPROT_MROUTED, "mroute"}, |
136 | | {RTPROT_BGP, "BGP"}, |
137 | | {RTPROT_OSPF, "OSPF"}, |
138 | | {RTPROT_ISIS, "IS-IS"}, |
139 | | {RTPROT_RIP, "RIP"}, |
140 | | {RTPROT_RIPNG, "RIPNG"}, |
141 | | {RTPROT_ZSTATIC, "static"}, |
142 | | {0}}; |
143 | | |
144 | | static const struct message family_str[] = {{AF_INET, "ipv4"}, |
145 | | {AF_INET6, "ipv6"}, |
146 | | {AF_BRIDGE, "bridge"}, |
147 | | {RTNL_FAMILY_IPMR, "ipv4MR"}, |
148 | | {RTNL_FAMILY_IP6MR, "ipv6MR"}, |
149 | | {0}}; |
150 | | |
151 | | static const struct message rttype_str[] = {{RTN_UNSPEC, "none"}, |
152 | | {RTN_UNICAST, "unicast"}, |
153 | | {RTN_LOCAL, "local"}, |
154 | | {RTN_BROADCAST, "broadcast"}, |
155 | | {RTN_ANYCAST, "anycast"}, |
156 | | {RTN_MULTICAST, "multicast"}, |
157 | | {RTN_BLACKHOLE, "blackhole"}, |
158 | | {RTN_UNREACHABLE, "unreachable"}, |
159 | | {RTN_PROHIBIT, "prohibited"}, |
160 | | {RTN_THROW, "throw"}, |
161 | | {RTN_NAT, "nat"}, |
162 | | {RTN_XRESOLVE, "resolver"}, |
163 | | {0}}; |
164 | | |
165 | | extern struct event_loop *master; |
166 | | |
167 | | extern struct zebra_privs_t zserv_privs; |
168 | | |
169 | 2 | DEFINE_MTYPE_STATIC(ZEBRA, NL_BUF, "Zebra Netlink buffers"); |
170 | 2 | |
171 | 2 | /* Hashtable and mutex to allow lookup of nlsock structs by socket/fd value. |
172 | 2 | * We have both the main and dplane pthreads using these structs, so we have |
173 | 2 | * to protect the hash with a lock. |
174 | 2 | */ |
175 | 2 | static struct hash *nlsock_hash; |
176 | 2 | pthread_mutex_t nlsock_mutex; |
177 | 2 | |
178 | 2 | /* Lock and unlock wrappers for nlsock hash */ |
179 | 2 | #define NLSOCK_LOCK() pthread_mutex_lock(&nlsock_mutex) |
180 | 0 | #define NLSOCK_UNLOCK() pthread_mutex_unlock(&nlsock_mutex) |
181 | | |
182 | | size_t nl_batch_tx_bufsize; |
183 | | char *nl_batch_tx_buf; |
184 | | |
185 | | _Atomic uint32_t nl_batch_bufsize = NL_DEFAULT_BATCH_BUFSIZE; |
186 | | _Atomic uint32_t nl_batch_send_threshold = NL_DEFAULT_BATCH_SEND_THRESHOLD; |
187 | | |
188 | | struct nl_batch { |
189 | | void *buf; |
190 | | size_t bufsiz; |
191 | | size_t limit; |
192 | | |
193 | | void *buf_head; |
194 | | size_t curlen; |
195 | | size_t msgcnt; |
196 | | |
197 | | const struct zebra_dplane_info *zns; |
198 | | |
199 | | struct dplane_ctx_list_head ctx_list; |
200 | | |
201 | | /* |
202 | | * Pointer to the queue of completed contexts outbound back |
203 | | * towards the dataplane module. |
204 | | */ |
205 | | struct dplane_ctx_list_head *ctx_out_q; |
206 | | }; |
207 | | |
208 | | int netlink_config_write_helper(struct vty *vty) |
209 | 0 | { |
210 | 0 | uint32_t size = |
211 | 0 | atomic_load_explicit(&nl_batch_bufsize, memory_order_relaxed); |
212 | 0 | uint32_t threshold = atomic_load_explicit(&nl_batch_send_threshold, |
213 | 0 | memory_order_relaxed); |
214 | |
|
215 | 0 | if (size != NL_DEFAULT_BATCH_BUFSIZE |
216 | 0 | || threshold != NL_DEFAULT_BATCH_SEND_THRESHOLD) |
217 | 0 | vty_out(vty, "zebra kernel netlink batch-tx-buf %u %u\n", size, |
218 | 0 | threshold); |
219 | |
|
220 | 0 | if (if_netlink_frr_protodown_r_bit_is_set()) |
221 | 0 | vty_out(vty, "zebra protodown reason-bit %u\n", |
222 | 0 | if_netlink_get_frr_protodown_r_bit()); |
223 | |
|
224 | 0 | return 0; |
225 | 0 | } |
226 | | |
227 | | void netlink_set_batch_buffer_size(uint32_t size, uint32_t threshold, bool set) |
228 | 0 | { |
229 | 0 | if (!set) { |
230 | 0 | size = NL_DEFAULT_BATCH_BUFSIZE; |
231 | 0 | threshold = NL_DEFAULT_BATCH_SEND_THRESHOLD; |
232 | 0 | } |
233 | |
|
234 | 0 | atomic_store_explicit(&nl_batch_bufsize, size, memory_order_relaxed); |
235 | 0 | atomic_store_explicit(&nl_batch_send_threshold, threshold, |
236 | 0 | memory_order_relaxed); |
237 | 0 | } |
238 | | |
239 | | int netlink_talk_filter(struct nlmsghdr *h, ns_id_t ns_id, int startup) |
240 | 0 | { |
241 | | /* |
242 | | * This is an error condition that must be handled during |
243 | | * development. |
244 | | * |
245 | | * The netlink_talk_filter function is used for communication |
246 | | * down the netlink_cmd pipe and we are expecting |
247 | | * an ack being received. So if we get here |
248 | | * then we did not receive the ack and instead |
249 | | * received some other message in an unexpected |
250 | | * way. |
251 | | */ |
252 | 0 | zlog_debug("%s: ignoring message type 0x%04x(%s) NS %u", __func__, |
253 | 0 | h->nlmsg_type, nl_msg_type_to_str(h->nlmsg_type), ns_id); |
254 | 0 | return 0; |
255 | 0 | } |
256 | | |
257 | | static int netlink_recvbuf(struct nlsock *nl, uint32_t newsize) |
258 | 0 | { |
259 | 0 | uint32_t oldsize; |
260 | 0 | socklen_t newlen = sizeof(newsize); |
261 | 0 | socklen_t oldlen = sizeof(oldsize); |
262 | 0 | int ret; |
263 | 0 |
|
264 | 0 | ret = getsockopt(nl->sock, SOL_SOCKET, SO_RCVBUF, &oldsize, &oldlen); |
265 | 0 | if (ret < 0) { |
266 | 0 | flog_err_sys(EC_LIB_SOCKET, |
267 | 0 | "Can't get %s receive buffer size: %s", nl->name, |
268 | 0 | safe_strerror(errno)); |
269 | 0 | return -1; |
270 | 0 | } |
271 | 0 |
|
272 | 0 | /* Try force option (linux >= 2.6.14) and fall back to normal set */ |
273 | 0 | frr_with_privs(&zserv_privs) { |
274 | 0 | ret = setsockopt(nl->sock, SOL_SOCKET, SO_RCVBUFFORCE, |
275 | 0 | &rcvbufsize, sizeof(rcvbufsize)); |
276 | 0 | } |
277 | 0 | if (ret < 0) |
278 | 0 | ret = setsockopt(nl->sock, SOL_SOCKET, SO_RCVBUF, &rcvbufsize, |
279 | 0 | sizeof(rcvbufsize)); |
280 | 0 | if (ret < 0) { |
281 | 0 | flog_err_sys(EC_LIB_SOCKET, |
282 | 0 | "Can't set %s receive buffer size: %s", nl->name, |
283 | 0 | safe_strerror(errno)); |
284 | 0 | return -1; |
285 | 0 | } |
286 | 0 |
|
287 | 0 | ret = getsockopt(nl->sock, SOL_SOCKET, SO_RCVBUF, &newsize, &newlen); |
288 | 0 | if (ret < 0) { |
289 | 0 | flog_err_sys(EC_LIB_SOCKET, |
290 | 0 | "Can't get %s receive buffer size: %s", nl->name, |
291 | 0 | safe_strerror(errno)); |
292 | 0 | return -1; |
293 | 0 | } |
294 | 0 | return 0; |
295 | 0 | } |
296 | | |
297 | | static const char *group2str(uint32_t group) |
298 | 0 | { |
299 | 0 | switch (group) { |
300 | 0 | case RTNLGRP_TUNNEL: |
301 | 0 | return "RTNLGRP_TUNNEL"; |
302 | 0 | default: |
303 | 0 | return "UNKNOWN"; |
304 | 0 | } |
305 | 0 | } |
306 | | |
307 | | /* Make socket for Linux netlink interface. */ |
308 | | static int netlink_socket(struct nlsock *nl, unsigned long groups, |
309 | | uint32_t ext_groups[], uint8_t ext_group_size, |
310 | | ns_id_t ns_id) |
311 | 0 | { |
312 | 0 | int ret; |
313 | 0 | struct sockaddr_nl snl; |
314 | 0 | int sock; |
315 | 0 | int namelen; |
316 | 0 |
|
317 | 0 | frr_with_privs(&zserv_privs) { |
318 | 0 | sock = ns_socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE, ns_id); |
319 | 0 | if (sock < 0) { |
320 | 0 | zlog_err("Can't open %s socket: %s", nl->name, |
321 | 0 | safe_strerror(errno)); |
322 | 0 | return -1; |
323 | 0 | } |
324 | 0 |
|
325 | 0 | memset(&snl, 0, sizeof(snl)); |
326 | 0 | snl.nl_family = AF_NETLINK; |
327 | 0 | snl.nl_groups = groups; |
328 | 0 |
|
329 | 0 | if (ext_group_size) { |
330 | 0 | uint8_t i; |
331 | 0 |
|
332 | 0 | for (i = 0; i < ext_group_size; i++) { |
333 | 0 | #if defined SOL_NETLINK |
334 | 0 | ret = setsockopt(sock, SOL_NETLINK, |
335 | 0 | NETLINK_ADD_MEMBERSHIP, |
336 | 0 | &ext_groups[i], |
337 | 0 | sizeof(ext_groups[i])); |
338 | 0 | if (ret < 0) { |
339 | 0 | zlog_notice( |
340 | 0 | "can't setsockopt NETLINK_ADD_MEMBERSHIP for group %s(%u), this linux kernel does not support it: %s(%d)", |
341 | 0 | group2str(ext_groups[i]), |
342 | 0 | ext_groups[i], |
343 | 0 | safe_strerror(errno), errno); |
344 | 0 | } |
345 | 0 | #else |
346 | 0 | zlog_notice( |
347 | 0 | "Unable to use NETLINK_ADD_MEMBERSHIP via SOL_NETLINK for %s(%u) since the linux kernel does not support the socket option", |
348 | 0 | group2str(ext_groups[i]), |
349 | 0 | ext_groups[i]); |
350 | 0 | #endif |
351 | 0 | } |
352 | 0 | } |
353 | 0 |
|
354 | 0 | /* Bind the socket to the netlink structure for anything. */ |
355 | 0 | ret = bind(sock, (struct sockaddr *)&snl, sizeof(snl)); |
356 | 0 | } |
357 | 0 |
|
358 | 0 | if (ret < 0) { |
359 | 0 | zlog_err("Can't bind %s socket to group 0x%x: %s", nl->name, |
360 | 0 | snl.nl_groups, safe_strerror(errno)); |
361 | 0 | close(sock); |
362 | 0 | return -1; |
363 | 0 | } |
364 | 0 |
|
365 | 0 | /* multiple netlink sockets will have different nl_pid */ |
366 | 0 | namelen = sizeof(snl); |
367 | 0 | ret = getsockname(sock, (struct sockaddr *)&snl, (socklen_t *)&namelen); |
368 | 0 | if (ret < 0 || namelen != sizeof(snl)) { |
369 | 0 | flog_err_sys(EC_LIB_SOCKET, "Can't get %s socket name: %s", |
370 | 0 | nl->name, safe_strerror(errno)); |
371 | 0 | close(sock); |
372 | 0 | return -1; |
373 | 0 | } |
374 | 0 |
|
375 | 0 | nl->snl = snl; |
376 | 0 | nl->sock = sock; |
377 | 0 | nl->buflen = NL_RCV_PKT_BUF_SIZE; |
378 | 0 | nl->buf = XMALLOC(MTYPE_NL_BUF, nl->buflen); |
379 | 0 |
|
380 | 0 | return ret; |
381 | 0 | } |
382 | | |
383 | | /* |
384 | | * Dispatch an incoming netlink message; used by the zebra main pthread's |
385 | | * netlink event reader. |
386 | | */ |
387 | | static int netlink_information_fetch(struct nlmsghdr *h, ns_id_t ns_id, |
388 | | int startup) |
389 | 0 | { |
390 | | /* |
391 | | * When we handle new message types here |
392 | | * because we are starting to install them |
393 | | * then lets check the netlink_install_filter |
394 | | * and see if we should add the corresponding |
395 | | * allow through entry there. |
396 | | * Probably not needed to do but please |
397 | | * think about it. |
398 | | */ |
399 | 0 | switch (h->nlmsg_type) { |
400 | 0 | case RTM_NEWROUTE: |
401 | 0 | return netlink_route_change(h, ns_id, startup); |
402 | 0 | case RTM_DELROUTE: |
403 | 0 | return netlink_route_change(h, ns_id, startup); |
404 | 0 | case RTM_NEWLINK: |
405 | 0 | return netlink_link_change(h, ns_id, startup); |
406 | 0 | case RTM_DELLINK: |
407 | 0 | return netlink_link_change(h, ns_id, startup); |
408 | 0 | case RTM_NEWNEIGH: |
409 | 0 | case RTM_DELNEIGH: |
410 | 0 | case RTM_GETNEIGH: |
411 | 0 | return netlink_neigh_change(h, ns_id); |
412 | 0 | case RTM_NEWRULE: |
413 | 0 | return netlink_rule_change(h, ns_id, startup); |
414 | 0 | case RTM_DELRULE: |
415 | 0 | return netlink_rule_change(h, ns_id, startup); |
416 | 0 | case RTM_NEWNEXTHOP: |
417 | 0 | return netlink_nexthop_change(h, ns_id, startup); |
418 | 0 | case RTM_DELNEXTHOP: |
419 | 0 | return netlink_nexthop_change(h, ns_id, startup); |
420 | 0 | case RTM_NEWQDISC: |
421 | 0 | case RTM_DELQDISC: |
422 | 0 | return netlink_qdisc_change(h, ns_id, startup); |
423 | 0 | case RTM_NEWTCLASS: |
424 | 0 | case RTM_DELTCLASS: |
425 | 0 | return netlink_tclass_change(h, ns_id, startup); |
426 | 0 | case RTM_NEWTFILTER: |
427 | 0 | case RTM_DELTFILTER: |
428 | 0 | return netlink_tfilter_change(h, ns_id, startup); |
429 | 0 | case RTM_NEWVLAN: |
430 | 0 | return netlink_vlan_change(h, ns_id, startup); |
431 | 0 | case RTM_DELVLAN: |
432 | 0 | return netlink_vlan_change(h, ns_id, startup); |
433 | | |
434 | | /* Messages handled in the dplane thread */ |
435 | 0 | case RTM_NEWADDR: |
436 | 0 | case RTM_DELADDR: |
437 | 0 | case RTM_NEWNETCONF: |
438 | 0 | case RTM_DELNETCONF: |
439 | 0 | case RTM_NEWTUNNEL: |
440 | 0 | case RTM_DELTUNNEL: |
441 | 0 | case RTM_GETTUNNEL: |
442 | 0 | return 0; |
443 | 0 | default: |
444 | | /* |
445 | | * If we have received this message then |
446 | | * we have made a mistake during development |
447 | | * and we need to write some code to handle |
448 | | * this message type or not ask for |
449 | | * it to be sent up to us |
450 | | */ |
451 | 0 | flog_err(EC_ZEBRA_UNKNOWN_NLMSG, |
452 | 0 | "Unknown netlink nlmsg_type %s(%d) vrf %u", |
453 | 0 | nl_msg_type_to_str(h->nlmsg_type), h->nlmsg_type, |
454 | 0 | ns_id); |
455 | 0 | break; |
456 | 0 | } |
457 | 0 | return 0; |
458 | 0 | } |
459 | | |
460 | | /* |
461 | | * Dispatch an incoming netlink message; used by the dataplane pthread's |
462 | | * netlink event reader code. |
463 | | */ |
464 | | static int dplane_netlink_information_fetch(struct nlmsghdr *h, ns_id_t ns_id, |
465 | | int startup) |
466 | 0 | { |
467 | | /* |
468 | | * Dispatch the incoming messages that the dplane pthread handles |
469 | | */ |
470 | 0 | switch (h->nlmsg_type) { |
471 | 0 | case RTM_NEWADDR: |
472 | 0 | case RTM_DELADDR: |
473 | 0 | return netlink_interface_addr_dplane(h, ns_id, startup); |
474 | | |
475 | 0 | case RTM_NEWNETCONF: |
476 | 0 | case RTM_DELNETCONF: |
477 | 0 | return netlink_netconf_change(h, ns_id, startup); |
478 | | |
479 | | /* TODO -- other messages for the dplane socket and pthread */ |
480 | | |
481 | 0 | case RTM_NEWLINK: |
482 | 0 | case RTM_DELLINK: |
483 | |
|
484 | 0 | default: |
485 | 0 | break; |
486 | 0 | } |
487 | | |
488 | 0 | return 0; |
489 | 0 | } |
490 | | |
491 | | #if defined(HANDLE_NETLINK_FUZZING) |
492 | | /* Using globals here to avoid adding function parameters */ |
493 | | |
494 | | /* Keep distinct filenames for netlink fuzzy collection */ |
495 | | static unsigned int netlink_file_counter = 1; |
496 | | |
497 | | /** |
498 | | * netlink_write_incoming() - Writes all data received from netlink to a file |
499 | | * @buf: Data from netlink. |
500 | | * @size: Size of data. |
501 | | * @counter: Counter for keeping filenames distinct. |
502 | | */ |
503 | | static void netlink_write_incoming(const char *buf, const unsigned int size, |
504 | | unsigned int counter) |
505 | | { |
506 | | char fname[MAXPATHLEN]; |
507 | | FILE *f; |
508 | | |
509 | | snprintf(fname, MAXPATHLEN, "%s/%s_%u", frr_vtydir, "netlink", counter); |
510 | | frr_with_privs(&zserv_privs) { |
511 | | f = fopen(fname, "w"); |
512 | | } |
513 | | if (f) { |
514 | | fwrite(buf, 1, size, f); |
515 | | fclose(f); |
516 | | } |
517 | | } |
518 | | |
519 | | #endif /* HANDLE_NETLINK_FUZZING */ |
520 | | |
521 | | static void kernel_read(struct event *thread) |
522 | 0 | { |
523 | 0 | struct zebra_ns *zns = (struct zebra_ns *)EVENT_ARG(thread); |
524 | 0 | struct zebra_dplane_info dp_info; |
525 | 0 |
|
526 | 0 | /* Capture key info from ns struct */ |
527 | 0 | zebra_dplane_info_from_zns(&dp_info, zns, false); |
528 | 0 |
|
529 | 0 | netlink_parse_info(netlink_information_fetch, &zns->netlink, &dp_info, |
530 | 0 | 5, false); |
531 | 0 |
|
532 | 0 | event_add_read(zrouter.master, kernel_read, zns, zns->netlink.sock, |
533 | 0 | &zns->t_netlink); |
534 | 0 | } |
535 | | |
536 | | /* |
537 | | * Called by the dplane pthread to read incoming OS messages and dispatch them. |
538 | | */ |
539 | | int kernel_dplane_read(struct zebra_dplane_info *info) |
540 | 0 | { |
541 | 0 | struct nlsock *nl = kernel_netlink_nlsock_lookup(info->sock); |
542 | |
|
543 | 0 | netlink_parse_info(dplane_netlink_information_fetch, nl, info, 5, |
544 | 0 | false); |
545 | |
|
546 | 0 | return 0; |
547 | 0 | } |
548 | | |
549 | | /* |
550 | | * Filter out messages from self that occur on listener socket, |
551 | | * caused by our actions on the command socket(s) |
552 | | * |
553 | | * When we add new Netlink message types we probably |
554 | | * do not need to add them here as that we are filtering |
555 | | * on the routes we actually care to receive( which is rarer |
556 | | * then the normal course of operations). We are intentionally |
557 | | * allowing some messages from ourselves through |
558 | | * ( I'm looking at you Interface based netlink messages ) |
559 | | * so that we only have to write one way to handle incoming |
560 | | * address add/delete and xxxNETCONF changes. |
561 | | */ |
562 | | static void netlink_install_filter(int sock, uint32_t pid, uint32_t dplane_pid) |
563 | 0 | { |
564 | 0 | /* |
565 | 0 | * BPF_JUMP instructions and where you jump to are based upon |
566 | 0 | * 0 as being the next statement. So count from 0. Writing |
567 | 0 | * this down because every time I look at this I have to |
568 | 0 | * re-remember it. |
569 | 0 | */ |
570 | 0 | struct sock_filter filter[] = { |
571 | 0 | /* |
572 | 0 | * Logic: |
573 | 0 | * if (nlmsg_pid == pid || |
574 | 0 | * nlmsg_pid == dplane_pid) { |
575 | 0 | * if (the incoming nlmsg_type == |
576 | 0 | * RTM_NEWADDR || RTM_DELADDR || RTM_NEWNETCONF || |
577 | 0 | * RTM_DELNETCONF) |
578 | 0 | * keep this message |
579 | 0 | * else |
580 | 0 | * skip this message |
581 | 0 | * } else |
582 | 0 | * keep this netlink message |
583 | 0 | */ |
584 | 0 | /* |
585 | 0 | * 0: Load the nlmsg_pid into the BPF register |
586 | 0 | */ |
587 | 0 | BPF_STMT(BPF_LD | BPF_ABS | BPF_W, |
588 | 0 | offsetof(struct nlmsghdr, nlmsg_pid)), |
589 | 0 | /* |
590 | 0 | * 1: Compare to pid |
591 | 0 | */ |
592 | 0 | BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htonl(pid), 1, 0), |
593 | 0 | /* |
594 | 0 | * 2: Compare to dplane pid |
595 | 0 | */ |
596 | 0 | BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htonl(dplane_pid), 0, 6), |
597 | 0 | /* |
598 | 0 | * 3: Load the nlmsg_type into BPF register |
599 | 0 | */ |
600 | 0 | BPF_STMT(BPF_LD | BPF_ABS | BPF_H, |
601 | 0 | offsetof(struct nlmsghdr, nlmsg_type)), |
602 | 0 | /* |
603 | 0 | * 4: Compare to RTM_NEWADDR |
604 | 0 | */ |
605 | 0 | BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htons(RTM_NEWADDR), 4, 0), |
606 | 0 | /* |
607 | 0 | * 5: Compare to RTM_DELADDR |
608 | 0 | */ |
609 | 0 | BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htons(RTM_DELADDR), 3, 0), |
610 | 0 | /* |
611 | 0 | * 6: Compare to RTM_NEWNETCONF |
612 | 0 | */ |
613 | 0 | BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htons(RTM_NEWNETCONF), 2, |
614 | 0 | 0), |
615 | 0 | /* |
616 | 0 | * 7: Compare to RTM_DELNETCONF |
617 | 0 | */ |
618 | 0 | BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htons(RTM_DELNETCONF), 1, |
619 | 0 | 0), |
620 | 0 | /* |
621 | 0 | * 8: This is the end state of we want to skip the |
622 | 0 | * message |
623 | 0 | */ |
624 | 0 | BPF_STMT(BPF_RET | BPF_K, 0), |
625 | 0 | /* 9: This is the end state of we want to keep |
626 | 0 | * the message |
627 | 0 | */ |
628 | 0 | BPF_STMT(BPF_RET | BPF_K, 0xffff), |
629 | 0 | }; |
630 | 0 |
|
631 | 0 | struct sock_fprog prog = { |
632 | 0 | .len = array_size(filter), .filter = filter, |
633 | 0 | }; |
634 | 0 |
|
635 | 0 | if (setsockopt(sock, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)) |
636 | 0 | < 0) |
637 | 0 | flog_err_sys(EC_LIB_SOCKET, "Can't install socket filter: %s", |
638 | 0 | safe_strerror(errno)); |
639 | 0 | } |
640 | | |
641 | | void netlink_parse_rtattr_flags(struct rtattr **tb, int max, struct rtattr *rta, |
642 | | int len, unsigned short flags) |
643 | 0 | { |
644 | 0 | unsigned short type; |
645 | |
|
646 | 0 | memset(tb, 0, sizeof(struct rtattr *) * (max + 1)); |
647 | 0 | while (RTA_OK(rta, len)) { |
648 | 0 | type = rta->rta_type & ~flags; |
649 | 0 | if ((type <= max) && (!tb[type])) |
650 | 0 | tb[type] = rta; |
651 | 0 | rta = RTA_NEXT(rta, len); |
652 | 0 | } |
653 | 0 | } |
654 | | |
655 | | void netlink_parse_rtattr(struct rtattr **tb, int max, struct rtattr *rta, |
656 | | int len) |
657 | 0 | { |
658 | 0 | memset(tb, 0, sizeof(struct rtattr *) * (max + 1)); |
659 | 0 | while (RTA_OK(rta, len)) { |
660 | 0 | if (rta->rta_type <= max) |
661 | 0 | tb[rta->rta_type] = rta; |
662 | 0 | rta = RTA_NEXT(rta, len); |
663 | 0 | } |
664 | 0 | } |
665 | | |
666 | | /** |
667 | | * netlink_parse_rtattr_nested() - Parses a nested route attribute |
668 | | * @tb: Pointer to array for storing rtattr in. |
669 | | * @max: Max number to store. |
670 | | * @rta: Pointer to rtattr to look for nested items in. |
671 | | */ |
672 | | void netlink_parse_rtattr_nested(struct rtattr **tb, int max, |
673 | | struct rtattr *rta) |
674 | 0 | { |
675 | 0 | netlink_parse_rtattr(tb, max, RTA_DATA(rta), RTA_PAYLOAD(rta)); |
676 | 0 | } |
677 | | |
678 | | bool nl_addraw_l(struct nlmsghdr *n, unsigned int maxlen, const void *data, |
679 | | unsigned int len) |
680 | 0 | { |
681 | 0 | if (NLMSG_ALIGN(n->nlmsg_len) + NLMSG_ALIGN(len) > maxlen) { |
682 | 0 | zlog_err("ERROR message exceeded bound of %d", maxlen); |
683 | 0 | return false; |
684 | 0 | } |
685 | | |
686 | 0 | memcpy(NLMSG_TAIL(n), data, len); |
687 | 0 | memset((uint8_t *)NLMSG_TAIL(n) + len, 0, NLMSG_ALIGN(len) - len); |
688 | 0 | n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + NLMSG_ALIGN(len); |
689 | |
|
690 | 0 | return true; |
691 | 0 | } |
692 | | |
693 | | bool nl_attr_put(struct nlmsghdr *n, unsigned int maxlen, int type, |
694 | | const void *data, unsigned int alen) |
695 | 1 | { |
696 | 1 | int len; |
697 | 1 | struct rtattr *rta; |
698 | | |
699 | 1 | len = RTA_LENGTH(alen); |
700 | | |
701 | 1 | if (NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len) > maxlen) |
702 | 0 | return false; |
703 | | |
704 | 1 | rta = (struct rtattr *)(((char *)n) + NLMSG_ALIGN(n->nlmsg_len)); |
705 | 1 | rta->rta_type = type; |
706 | 1 | rta->rta_len = len; |
707 | | |
708 | 1 | if (data) |
709 | 1 | memcpy(RTA_DATA(rta), data, alen); |
710 | 0 | else |
711 | 0 | assert(alen == 0); |
712 | | |
713 | 1 | n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len); |
714 | | |
715 | 1 | return true; |
716 | 1 | } |
717 | | |
718 | | bool nl_attr_put8(struct nlmsghdr *n, unsigned int maxlen, int type, |
719 | | uint8_t data) |
720 | 0 | { |
721 | 0 | return nl_attr_put(n, maxlen, type, &data, sizeof(uint8_t)); |
722 | 0 | } |
723 | | |
724 | | bool nl_attr_put16(struct nlmsghdr *n, unsigned int maxlen, int type, |
725 | | uint16_t data) |
726 | 0 | { |
727 | 0 | return nl_attr_put(n, maxlen, type, &data, sizeof(uint16_t)); |
728 | 0 | } |
729 | | |
730 | | bool nl_attr_put32(struct nlmsghdr *n, unsigned int maxlen, int type, |
731 | | uint32_t data) |
732 | 1 | { |
733 | 1 | return nl_attr_put(n, maxlen, type, &data, sizeof(uint32_t)); |
734 | 1 | } |
735 | | |
736 | | bool nl_attr_put64(struct nlmsghdr *n, unsigned int maxlen, int type, |
737 | | uint64_t data) |
738 | 0 | { |
739 | 0 | return nl_attr_put(n, maxlen, type, &data, sizeof(uint64_t)); |
740 | 0 | } |
741 | | |
742 | | struct rtattr *nl_attr_nest(struct nlmsghdr *n, unsigned int maxlen, int type) |
743 | 0 | { |
744 | 0 | struct rtattr *nest = NLMSG_TAIL(n); |
745 | |
|
746 | 0 | if (!nl_attr_put(n, maxlen, type, NULL, 0)) |
747 | 0 | return NULL; |
748 | | |
749 | 0 | nest->rta_type |= NLA_F_NESTED; |
750 | 0 | return nest; |
751 | 0 | } |
752 | | |
753 | | int nl_attr_nest_end(struct nlmsghdr *n, struct rtattr *nest) |
754 | 0 | { |
755 | 0 | nest->rta_len = (uint8_t *)NLMSG_TAIL(n) - (uint8_t *)nest; |
756 | 0 | return n->nlmsg_len; |
757 | 0 | } |
758 | | |
759 | | struct rtnexthop *nl_attr_rtnh(struct nlmsghdr *n, unsigned int maxlen) |
760 | 0 | { |
761 | 0 | struct rtnexthop *rtnh = (struct rtnexthop *)NLMSG_TAIL(n); |
762 | |
|
763 | 0 | if (NLMSG_ALIGN(n->nlmsg_len) + RTNH_ALIGN(sizeof(struct rtnexthop)) |
764 | 0 | > maxlen) |
765 | 0 | return NULL; |
766 | | |
767 | 0 | memset(rtnh, 0, sizeof(struct rtnexthop)); |
768 | 0 | n->nlmsg_len = |
769 | 0 | NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(sizeof(struct rtnexthop)); |
770 | |
|
771 | 0 | return rtnh; |
772 | 0 | } |
773 | | |
774 | | void nl_attr_rtnh_end(struct nlmsghdr *n, struct rtnexthop *rtnh) |
775 | 0 | { |
776 | 0 | rtnh->rtnh_len = (uint8_t *)NLMSG_TAIL(n) - (uint8_t *)rtnh; |
777 | 0 | } |
778 | | |
779 | | bool nl_rta_put(struct rtattr *rta, unsigned int maxlen, int type, |
780 | | const void *data, int alen) |
781 | 0 | { |
782 | 0 | struct rtattr *subrta; |
783 | 0 | int len = RTA_LENGTH(alen); |
784 | |
|
785 | 0 | if (RTA_ALIGN(rta->rta_len) + RTA_ALIGN(len) > maxlen) { |
786 | 0 | zlog_err("ERROR max allowed bound %d exceeded for rtattr", |
787 | 0 | maxlen); |
788 | 0 | return false; |
789 | 0 | } |
790 | 0 | subrta = (struct rtattr *)(((char *)rta) + RTA_ALIGN(rta->rta_len)); |
791 | 0 | subrta->rta_type = type; |
792 | 0 | subrta->rta_len = len; |
793 | 0 | if (alen) |
794 | 0 | memcpy(RTA_DATA(subrta), data, alen); |
795 | 0 | rta->rta_len = NLMSG_ALIGN(rta->rta_len) + RTA_ALIGN(len); |
796 | |
|
797 | 0 | return true; |
798 | 0 | } |
799 | | |
800 | | bool nl_rta_put16(struct rtattr *rta, unsigned int maxlen, int type, |
801 | | uint16_t data) |
802 | 0 | { |
803 | 0 | return nl_rta_put(rta, maxlen, type, &data, sizeof(uint16_t)); |
804 | 0 | } |
805 | | |
806 | | bool nl_rta_put64(struct rtattr *rta, unsigned int maxlen, int type, |
807 | | uint64_t data) |
808 | 0 | { |
809 | 0 | return nl_rta_put(rta, maxlen, type, &data, sizeof(uint64_t)); |
810 | 0 | } |
811 | | |
812 | | struct rtattr *nl_rta_nest(struct rtattr *rta, unsigned int maxlen, int type) |
813 | 0 | { |
814 | 0 | struct rtattr *nest = RTA_TAIL(rta); |
815 | |
|
816 | 0 | if (nl_rta_put(rta, maxlen, type, NULL, 0)) |
817 | 0 | return NULL; |
818 | | |
819 | 0 | nest->rta_type |= NLA_F_NESTED; |
820 | |
|
821 | 0 | return nest; |
822 | 0 | } |
823 | | |
824 | | int nl_rta_nest_end(struct rtattr *rta, struct rtattr *nest) |
825 | 0 | { |
826 | 0 | nest->rta_len = (uint8_t *)RTA_TAIL(rta) - (uint8_t *)nest; |
827 | |
|
828 | 0 | return rta->rta_len; |
829 | 0 | } |
830 | | |
831 | | const char *nl_msg_type_to_str(uint16_t msg_type) |
832 | 0 | { |
833 | 0 | return lookup_msg(nlmsg_str, msg_type, ""); |
834 | 0 | } |
835 | | |
836 | | const char *nl_rtproto_to_str(uint8_t rtproto) |
837 | 0 | { |
838 | 0 | return lookup_msg(rtproto_str, rtproto, ""); |
839 | 0 | } |
840 | | |
841 | | const char *nl_family_to_str(uint8_t family) |
842 | 0 | { |
843 | 0 | return lookup_msg(family_str, family, ""); |
844 | 0 | } |
845 | | |
846 | | const char *nl_rttype_to_str(uint8_t rttype) |
847 | 0 | { |
848 | 0 | return lookup_msg(rttype_str, rttype, ""); |
849 | 0 | } |
850 | | |
851 | | #define NLA_OK(nla, len) \ |
852 | 0 | ((len) >= (int)sizeof(struct nlattr) \ |
853 | 0 | && (nla)->nla_len >= sizeof(struct nlattr) \ |
854 | 0 | && (nla)->nla_len <= (len)) |
855 | | #define NLA_NEXT(nla, attrlen) \ |
856 | 0 | ((attrlen) -= NLA_ALIGN((nla)->nla_len), \ |
857 | 0 | (struct nlattr *)(((char *)(nla)) + NLA_ALIGN((nla)->nla_len))) |
858 | 0 | #define NLA_LENGTH(len) (NLA_ALIGN(sizeof(struct nlattr)) + (len)) |
859 | 0 | #define NLA_DATA(nla) ((struct nlattr *)(((char *)(nla)) + NLA_LENGTH(0))) |
860 | | |
861 | | #define ERR_NLA(err, inner_len) \ |
862 | 0 | ((struct nlattr *)(((char *)(err)) \ |
863 | 0 | + NLMSG_ALIGN(sizeof(struct nlmsgerr)) \ |
864 | 0 | + NLMSG_ALIGN((inner_len)))) |
865 | | |
866 | | static void netlink_parse_nlattr(struct nlattr **tb, int max, |
867 | | struct nlattr *nla, int len) |
868 | 0 | { |
869 | 0 | while (NLA_OK(nla, len)) { |
870 | 0 | if (nla->nla_type <= max) |
871 | 0 | tb[nla->nla_type] = nla; |
872 | 0 | nla = NLA_NEXT(nla, len); |
873 | 0 | } |
874 | 0 | } |
875 | | |
876 | | static void netlink_parse_extended_ack(struct nlmsghdr *h) |
877 | 0 | { |
878 | 0 | struct nlattr *tb[NLMSGERR_ATTR_MAX + 1] = {}; |
879 | 0 | const struct nlmsgerr *err = (const struct nlmsgerr *)NLMSG_DATA(h); |
880 | 0 | const struct nlmsghdr *err_nlh = NULL; |
881 | | /* Length not including nlmsghdr */ |
882 | 0 | uint32_t len = 0; |
883 | | /* Inner error netlink message length */ |
884 | 0 | uint32_t inner_len = 0; |
885 | 0 | const char *msg = NULL; |
886 | 0 | uint32_t off = 0; |
887 | |
|
888 | 0 | if (!(h->nlmsg_flags & NLM_F_CAPPED)) |
889 | 0 | inner_len = (uint32_t)NLMSG_PAYLOAD(&err->msg, 0); |
890 | |
|
891 | 0 | len = (uint32_t)(NLMSG_PAYLOAD(h, sizeof(struct nlmsgerr)) - inner_len); |
892 | |
|
893 | 0 | netlink_parse_nlattr(tb, NLMSGERR_ATTR_MAX, ERR_NLA(err, inner_len), |
894 | 0 | len); |
895 | |
|
896 | 0 | if (tb[NLMSGERR_ATTR_MSG]) |
897 | 0 | msg = (const char *)NLA_DATA(tb[NLMSGERR_ATTR_MSG]); |
898 | |
|
899 | 0 | if (tb[NLMSGERR_ATTR_OFFS]) { |
900 | 0 | off = *(uint32_t *)NLA_DATA(tb[NLMSGERR_ATTR_OFFS]); |
901 | |
|
902 | 0 | if (off > h->nlmsg_len) { |
903 | 0 | zlog_err("Invalid offset for NLMSGERR_ATTR_OFFS"); |
904 | 0 | } else if (!(h->nlmsg_flags & NLM_F_CAPPED)) { |
905 | | /* |
906 | | * Header of failed message |
907 | | * we are not doing anything currently with it |
908 | | * but noticing it for later. |
909 | | */ |
910 | 0 | err_nlh = &err->msg; |
911 | 0 | zlog_debug("%s: Received %s extended Ack", __func__, |
912 | 0 | nl_msg_type_to_str(err_nlh->nlmsg_type)); |
913 | 0 | } |
914 | 0 | } |
915 | |
|
916 | 0 | if (msg && *msg != '\0') { |
917 | 0 | bool is_err = !!err->error; |
918 | |
|
919 | 0 | if (is_err) |
920 | 0 | zlog_err("Extended Error: %s", msg); |
921 | 0 | else |
922 | 0 | flog_warn(EC_ZEBRA_NETLINK_EXTENDED_WARNING, |
923 | 0 | "Extended Warning: %s", msg); |
924 | 0 | } |
925 | 0 | } |
926 | | |
927 | | /* |
928 | | * netlink_send_msg - send a netlink message of a certain size. |
929 | | * |
930 | | * Returns -1 on error. Otherwise, it returns the number of bytes sent. |
931 | | */ |
932 | | static ssize_t netlink_send_msg(const struct nlsock *nl, void *buf, |
933 | | size_t buflen) |
934 | 0 | { |
935 | 0 | struct sockaddr_nl snl = {}; |
936 | 0 | struct iovec iov = {}; |
937 | 0 | struct msghdr msg = {}; |
938 | 0 | ssize_t status; |
939 | 0 | int save_errno = 0; |
940 | |
|
941 | 0 | iov.iov_base = buf; |
942 | 0 | iov.iov_len = buflen; |
943 | 0 | msg.msg_name = &snl; |
944 | 0 | msg.msg_namelen = sizeof(snl); |
945 | 0 | msg.msg_iov = &iov; |
946 | 0 | msg.msg_iovlen = 1; |
947 | |
|
948 | 0 | snl.nl_family = AF_NETLINK; |
949 | | |
950 | | /* Send message to netlink interface. */ |
951 | 0 | frr_with_privs(&zserv_privs) { |
952 | 0 | status = sendmsg(nl->sock, &msg, 0); |
953 | 0 | save_errno = errno; |
954 | 0 | } |
955 | |
|
956 | 0 | if (IS_ZEBRA_DEBUG_KERNEL_MSGDUMP_SEND) { |
957 | 0 | zlog_debug("%s: >> netlink message dump [sent]", __func__); |
958 | 0 | #ifdef NETLINK_DEBUG |
959 | 0 | nl_dump(buf, buflen); |
960 | | #else |
961 | | zlog_hexdump(buf, buflen); |
962 | | #endif /* NETLINK_DEBUG */ |
963 | 0 | } |
964 | |
|
965 | 0 | if (status == -1) { |
966 | 0 | flog_err_sys(EC_LIB_SOCKET, "%s error: %s", __func__, |
967 | 0 | safe_strerror(save_errno)); |
968 | 0 | return -1; |
969 | 0 | } |
970 | | |
971 | 0 | return status; |
972 | 0 | } |
973 | | |
974 | | /* |
975 | | * netlink_recv_msg - receive a netlink message. |
976 | | * |
977 | | * Returns -1 on error, 0 if read would block or the number of bytes received. |
978 | | */ |
979 | | static int netlink_recv_msg(struct nlsock *nl, struct msghdr *msg) |
980 | 0 | { |
981 | 0 | struct iovec iov; |
982 | 0 | int status; |
983 | |
|
984 | 0 | iov.iov_base = nl->buf; |
985 | 0 | iov.iov_len = nl->buflen; |
986 | 0 | msg->msg_iov = &iov; |
987 | 0 | msg->msg_iovlen = 1; |
988 | |
|
989 | 0 | do { |
990 | 0 | int bytes; |
991 | |
|
992 | 0 | bytes = recv(nl->sock, NULL, 0, MSG_PEEK | MSG_TRUNC); |
993 | |
|
994 | 0 | if (bytes >= 0 && (size_t)bytes > nl->buflen) { |
995 | 0 | nl->buf = XREALLOC(MTYPE_NL_BUF, nl->buf, bytes); |
996 | 0 | nl->buflen = bytes; |
997 | 0 | iov.iov_base = nl->buf; |
998 | 0 | iov.iov_len = nl->buflen; |
999 | 0 | } |
1000 | |
|
1001 | 0 | status = recvmsg(nl->sock, msg, 0); |
1002 | 0 | } while (status == -1 && errno == EINTR); |
1003 | |
|
1004 | 0 | if (status == -1) { |
1005 | 0 | if (errno == EWOULDBLOCK || errno == EAGAIN) |
1006 | 0 | return 0; |
1007 | 0 | flog_err(EC_ZEBRA_RECVMSG_OVERRUN, "%s recvmsg overrun: %s", |
1008 | 0 | nl->name, safe_strerror(errno)); |
1009 | | /* |
1010 | | * In this case we are screwed. There is no good way to recover |
1011 | | * zebra at this point. |
1012 | | */ |
1013 | 0 | exit(-1); |
1014 | 0 | } |
1015 | | |
1016 | 0 | if (status == 0) { |
1017 | 0 | flog_err_sys(EC_LIB_SOCKET, "%s EOF", nl->name); |
1018 | 0 | return -1; |
1019 | 0 | } |
1020 | | |
1021 | 0 | if (msg->msg_namelen != sizeof(struct sockaddr_nl)) { |
1022 | 0 | flog_err(EC_ZEBRA_NETLINK_LENGTH_ERROR, |
1023 | 0 | "%s sender address length error: length %d", nl->name, |
1024 | 0 | msg->msg_namelen); |
1025 | 0 | return -1; |
1026 | 0 | } |
1027 | | |
1028 | 0 | if (IS_ZEBRA_DEBUG_KERNEL_MSGDUMP_RECV) { |
1029 | 0 | zlog_debug("%s: << netlink message dump [recv]", __func__); |
1030 | 0 | #ifdef NETLINK_DEBUG |
1031 | 0 | nl_dump(nl->buf, status); |
1032 | | #else |
1033 | | zlog_hexdump(nl->buf, status); |
1034 | | #endif /* NETLINK_DEBUG */ |
1035 | 0 | } |
1036 | |
|
1037 | | #if defined(HANDLE_NETLINK_FUZZING) |
1038 | | zlog_debug("Writing incoming netlink message"); |
1039 | | netlink_write_incoming(buf, status, netlink_file_counter++); |
1040 | | #endif /* HANDLE_NETLINK_FUZZING */ |
1041 | |
|
1042 | 0 | return status; |
1043 | 0 | } |
1044 | | |
1045 | | /* |
1046 | | * netlink_parse_error - parse a netlink error message |
1047 | | * |
1048 | | * Returns 1 if this message is acknowledgement, 0 if this error should be |
1049 | | * ignored, -1 otherwise. |
1050 | | */ |
1051 | | static int netlink_parse_error(const struct nlsock *nl, struct nlmsghdr *h, |
1052 | | bool is_cmd, bool startup) |
1053 | 0 | { |
1054 | 0 | struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(h); |
1055 | 0 | int errnum = err->error; |
1056 | 0 | int msg_type = err->msg.nlmsg_type; |
1057 | |
|
1058 | 0 | if (h->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr))) { |
1059 | 0 | flog_err(EC_ZEBRA_NETLINK_LENGTH_ERROR, |
1060 | 0 | "%s error: message truncated", nl->name); |
1061 | 0 | return -1; |
1062 | 0 | } |
1063 | | |
1064 | | /* |
1065 | | * Parse the extended information before we actually handle it. At this |
1066 | | * point in time we do not do anything other than report the issue. |
1067 | | */ |
1068 | 0 | if (h->nlmsg_flags & NLM_F_ACK_TLVS) |
1069 | 0 | netlink_parse_extended_ack(h); |
1070 | | |
1071 | | /* If the error field is zero, then this is an ACK. */ |
1072 | 0 | if (err->error == 0) { |
1073 | 0 | if (IS_ZEBRA_DEBUG_KERNEL) { |
1074 | 0 | zlog_debug("%s: %s ACK: type=%s(%u), seq=%u, pid=%u", |
1075 | 0 | __func__, nl->name, |
1076 | 0 | nl_msg_type_to_str(err->msg.nlmsg_type), |
1077 | 0 | err->msg.nlmsg_type, err->msg.nlmsg_seq, |
1078 | 0 | err->msg.nlmsg_pid); |
1079 | 0 | } |
1080 | |
|
1081 | 0 | return 1; |
1082 | 0 | } |
1083 | | |
1084 | | /* |
1085 | | * Deal with errors that occur because of races in link handling |
1086 | | * or types are not supported in kernel. |
1087 | | */ |
1088 | 0 | if (is_cmd && |
1089 | 0 | ((msg_type == RTM_DELROUTE && |
1090 | 0 | (-errnum == ENODEV || -errnum == ESRCH)) || |
1091 | 0 | (msg_type == RTM_NEWROUTE && |
1092 | 0 | (-errnum == ENETDOWN || -errnum == EEXIST)) || |
1093 | 0 | ((msg_type == RTM_NEWTUNNEL || msg_type == RTM_DELTUNNEL || |
1094 | 0 | msg_type == RTM_GETTUNNEL) && |
1095 | 0 | (-errnum == EOPNOTSUPP)))) { |
1096 | 0 | if (IS_ZEBRA_DEBUG_KERNEL) |
1097 | 0 | zlog_debug("%s: error: %s type=%s(%u), seq=%u, pid=%u", |
1098 | 0 | nl->name, safe_strerror(-errnum), |
1099 | 0 | nl_msg_type_to_str(msg_type), msg_type, |
1100 | 0 | err->msg.nlmsg_seq, err->msg.nlmsg_pid); |
1101 | 0 | return 0; |
1102 | 0 | } |
1103 | | |
1104 | | /* |
1105 | | * We see RTM_DELNEIGH when shutting down an interface with an IPv4 |
1106 | | * link-local. The kernel should have already deleted the neighbor so |
1107 | | * do not log these as an error. |
1108 | | */ |
1109 | 0 | if (msg_type == RTM_DELNEIGH |
1110 | 0 | || (is_cmd && msg_type == RTM_NEWROUTE |
1111 | 0 | && (-errnum == ESRCH || -errnum == ENETUNREACH))) { |
1112 | | /* |
1113 | | * This is known to happen in some situations, don't log as |
1114 | | * error. |
1115 | | */ |
1116 | 0 | if (IS_ZEBRA_DEBUG_KERNEL) |
1117 | 0 | zlog_debug("%s error: %s, type=%s(%u), seq=%u, pid=%u", |
1118 | 0 | nl->name, safe_strerror(-errnum), |
1119 | 0 | nl_msg_type_to_str(msg_type), msg_type, |
1120 | 0 | err->msg.nlmsg_seq, err->msg.nlmsg_pid); |
1121 | 0 | } else { |
1122 | 0 | if ((msg_type != RTM_GETNEXTHOP && msg_type != RTM_GETVLAN) || |
1123 | 0 | !startup) |
1124 | 0 | flog_err(EC_ZEBRA_UNEXPECTED_MESSAGE, |
1125 | 0 | "%s error: %s, type=%s(%u), seq=%u, pid=%u", |
1126 | 0 | nl->name, safe_strerror(-errnum), |
1127 | 0 | nl_msg_type_to_str(msg_type), msg_type, |
1128 | 0 | err->msg.nlmsg_seq, err->msg.nlmsg_pid); |
1129 | 0 | } |
1130 | |
|
1131 | 0 | return -1; |
1132 | 0 | } |
1133 | | |
1134 | | /* |
1135 | | * netlink_parse_info |
1136 | | * |
1137 | | * Receive message from netlink interface and pass those information |
1138 | | * to the given function. |
1139 | | * |
1140 | | * filter -> Function to call to read the results |
1141 | | * nl -> netlink socket information |
1142 | | * zns -> The zebra namespace data |
1143 | | * count -> How many we should read in, 0 means as much as possible |
1144 | | * startup -> Are we reading in under startup conditions? passed to |
1145 | | * the filter. |
1146 | | */ |
1147 | | int netlink_parse_info(int (*filter)(struct nlmsghdr *, ns_id_t, int), |
1148 | | struct nlsock *nl, const struct zebra_dplane_info *zns, |
1149 | | int count, bool startup) |
1150 | 0 | { |
1151 | 0 | int status; |
1152 | 0 | int ret = 0; |
1153 | 0 | int error; |
1154 | 0 | int read_in = 0; |
1155 | |
|
1156 | 0 | while (1) { |
1157 | 0 | struct sockaddr_nl snl; |
1158 | 0 | struct msghdr msg = {.msg_name = (void *)&snl, |
1159 | 0 | .msg_namelen = sizeof(snl)}; |
1160 | 0 | struct nlmsghdr *h; |
1161 | |
|
1162 | 0 | if (count && read_in >= count) |
1163 | 0 | return 0; |
1164 | | |
1165 | 0 | status = netlink_recv_msg(nl, &msg); |
1166 | 0 | if (status == -1) |
1167 | 0 | return -1; |
1168 | 0 | else if (status == 0) |
1169 | 0 | break; |
1170 | | |
1171 | 0 | read_in++; |
1172 | 0 | for (h = (struct nlmsghdr *)nl->buf; |
1173 | 0 | (status >= 0 && NLMSG_OK(h, (unsigned int)status)); |
1174 | 0 | h = NLMSG_NEXT(h, status)) { |
1175 | | /* Finish of reading. */ |
1176 | 0 | if (h->nlmsg_type == NLMSG_DONE) |
1177 | 0 | return ret; |
1178 | | |
1179 | | /* Error handling. */ |
1180 | 0 | if (h->nlmsg_type == NLMSG_ERROR) { |
1181 | 0 | int err = netlink_parse_error( |
1182 | 0 | nl, h, zns->is_cmd, startup); |
1183 | |
|
1184 | 0 | if (err == 1) { |
1185 | 0 | if (!(h->nlmsg_flags & NLM_F_MULTI)) |
1186 | 0 | return 0; |
1187 | 0 | continue; |
1188 | 0 | } else |
1189 | 0 | return err; |
1190 | 0 | } |
1191 | | |
1192 | | /* |
1193 | | * What is the right thing to do? The kernel |
1194 | | * is telling us that the dump request was interrupted |
1195 | | * and we more than likely are out of luck and have |
1196 | | * missed data from the kernel. At this point in time |
1197 | | * lets just note that this is happening. |
1198 | | */ |
1199 | 0 | if (h->nlmsg_flags & NLM_F_DUMP_INTR) |
1200 | 0 | flog_err( |
1201 | 0 | EC_ZEBRA_NETLINK_BAD_SEQUENCE, |
1202 | 0 | "netlink recvmsg: The Dump request was interrupted"); |
1203 | | |
1204 | | /* OK we got netlink message. */ |
1205 | 0 | if (IS_ZEBRA_DEBUG_KERNEL) |
1206 | 0 | zlog_debug( |
1207 | 0 | "%s: %s type %s(%u), len=%d, seq=%u, pid=%u", |
1208 | 0 | __func__, nl->name, |
1209 | 0 | nl_msg_type_to_str(h->nlmsg_type), |
1210 | 0 | h->nlmsg_type, h->nlmsg_len, |
1211 | 0 | h->nlmsg_seq, h->nlmsg_pid); |
1212 | | |
1213 | | |
1214 | | /* |
1215 | | * Ignore messages that maybe sent from |
1216 | | * other actors besides the kernel |
1217 | | */ |
1218 | 0 | if (snl.nl_pid != 0) { |
1219 | 0 | zlog_debug("Ignoring message from pid %u", |
1220 | 0 | snl.nl_pid); |
1221 | 0 | continue; |
1222 | 0 | } |
1223 | | |
1224 | 0 | error = (*filter)(h, zns->ns_id, startup); |
1225 | 0 | if (error < 0) { |
1226 | 0 | zlog_debug("%s filter function error", |
1227 | 0 | nl->name); |
1228 | 0 | ret = error; |
1229 | 0 | } |
1230 | 0 | } |
1231 | | |
1232 | | /* After error care. */ |
1233 | 0 | if (msg.msg_flags & MSG_TRUNC) { |
1234 | 0 | flog_err(EC_ZEBRA_NETLINK_LENGTH_ERROR, |
1235 | 0 | "%s error: message truncated", nl->name); |
1236 | 0 | continue; |
1237 | 0 | } |
1238 | 0 | if (status) { |
1239 | 0 | flog_err(EC_ZEBRA_NETLINK_LENGTH_ERROR, |
1240 | 0 | "%s error: data remnant size %d", nl->name, |
1241 | 0 | status); |
1242 | 0 | return -1; |
1243 | 0 | } |
1244 | 0 | } |
1245 | 0 | return ret; |
1246 | 0 | } |
1247 | | |
1248 | | /* |
1249 | | * netlink_talk_info |
1250 | | * |
1251 | | * sendmsg() to netlink socket then recvmsg(). |
1252 | | * Calls netlink_parse_info to parse returned data |
1253 | | * |
1254 | | * filter -> The filter to read final results from kernel |
1255 | | * nlmsghdr -> The data to send to the kernel |
1256 | | * dp_info -> The dataplane and netlink socket information |
1257 | | * startup -> Are we reading in under startup conditions |
1258 | | * This is passed through eventually to filter. |
1259 | | */ |
1260 | | static int netlink_talk_info(int (*filter)(struct nlmsghdr *, ns_id_t, |
1261 | | int startup), |
1262 | | struct nlmsghdr *n, |
1263 | | struct zebra_dplane_info *dp_info, bool startup) |
1264 | 0 | { |
1265 | 0 | struct nlsock *nl; |
1266 | |
|
1267 | 0 | nl = kernel_netlink_nlsock_lookup(dp_info->sock); |
1268 | 0 | n->nlmsg_seq = dp_info->seq; |
1269 | 0 | n->nlmsg_pid = nl->snl.nl_pid; |
1270 | |
|
1271 | 0 | if (IS_ZEBRA_DEBUG_KERNEL) |
1272 | 0 | zlog_debug( |
1273 | 0 | "netlink_talk: %s type %s(%u), len=%d seq=%u flags 0x%x", |
1274 | 0 | nl->name, nl_msg_type_to_str(n->nlmsg_type), |
1275 | 0 | n->nlmsg_type, n->nlmsg_len, n->nlmsg_seq, |
1276 | 0 | n->nlmsg_flags); |
1277 | |
|
1278 | 0 | if (netlink_send_msg(nl, n, n->nlmsg_len) == -1) |
1279 | 0 | return -1; |
1280 | | |
1281 | | /* |
1282 | | * Get reply from netlink socket. |
1283 | | * The reply should either be an acknowlegement or an error. |
1284 | | */ |
1285 | 0 | return netlink_parse_info(filter, nl, dp_info, 0, startup); |
1286 | 0 | } |
1287 | | |
1288 | | /* |
1289 | | * Synchronous version of netlink_talk_info. Converts args to suit the |
1290 | | * common version, which is suitable for both sync and async use. |
1291 | | */ |
1292 | | int netlink_talk(int (*filter)(struct nlmsghdr *, ns_id_t, int startup), |
1293 | | struct nlmsghdr *n, struct nlsock *nl, struct zebra_ns *zns, |
1294 | | bool startup) |
1295 | 0 | { |
1296 | 0 | struct zebra_dplane_info dp_info; |
1297 | | |
1298 | | /* Increment sequence number before capturing snapshot of ns socket |
1299 | | * info. |
1300 | | */ |
1301 | 0 | nl->seq++; |
1302 | | |
1303 | | /* Capture info in intermediate info struct */ |
1304 | 0 | zebra_dplane_info_from_zns(&dp_info, zns, (nl == &(zns->netlink_cmd))); |
1305 | |
|
1306 | 0 | return netlink_talk_info(filter, n, &dp_info, startup); |
1307 | 0 | } |
1308 | | |
1309 | | /* Issue request message to kernel via netlink socket. GET messages |
1310 | | * are issued through this interface. |
1311 | | */ |
1312 | | int netlink_request(struct nlsock *nl, void *req) |
1313 | 7 | { |
1314 | 7 | struct nlmsghdr *n = (struct nlmsghdr *)req; |
1315 | | |
1316 | | /* Check netlink socket. */ |
1317 | 7 | if (nl->sock < 0) { |
1318 | 7 | flog_err_sys(EC_LIB_SOCKET, "%s socket isn't active.", |
1319 | 7 | nl->name); |
1320 | 7 | return -1; |
1321 | 7 | } |
1322 | | |
1323 | | /* Fill common fields for all requests. */ |
1324 | 0 | n->nlmsg_pid = nl->snl.nl_pid; |
1325 | 0 | n->nlmsg_seq = ++nl->seq; |
1326 | |
|
1327 | 0 | if (netlink_send_msg(nl, req, n->nlmsg_len) == -1) |
1328 | 0 | return -1; |
1329 | | |
1330 | 0 | return 0; |
1331 | 0 | } |
1332 | | |
1333 | | static int nl_batch_read_resp(struct nl_batch *bth) |
1334 | 0 | { |
1335 | 0 | struct nlmsghdr *h; |
1336 | 0 | struct sockaddr_nl snl; |
1337 | 0 | struct msghdr msg = {}; |
1338 | 0 | int status, seq; |
1339 | 0 | struct nlsock *nl; |
1340 | 0 | struct zebra_dplane_ctx *ctx; |
1341 | 0 | bool ignore_msg; |
1342 | |
|
1343 | 0 | nl = kernel_netlink_nlsock_lookup(bth->zns->sock); |
1344 | |
|
1345 | 0 | msg.msg_name = (void *)&snl; |
1346 | 0 | msg.msg_namelen = sizeof(snl); |
1347 | |
|
1348 | 0 | status = netlink_recv_msg(nl, &msg); |
1349 | 0 | if (status == -1 || status == 0) |
1350 | 0 | return status; |
1351 | | |
1352 | 0 | for (h = (struct nlmsghdr *)nl->buf; |
1353 | 0 | (status >= 0 && NLMSG_OK(h, (unsigned int)status)); |
1354 | 0 | h = NLMSG_NEXT(h, status)) { |
1355 | |
|
1356 | 0 | ignore_msg = false; |
1357 | 0 | seq = h->nlmsg_seq; |
1358 | | /* |
1359 | | * Find the corresponding context object. Received responses are |
1360 | | * in the same order as requests we sent, so we can simply |
1361 | | * iterate over the context list and match responses with |
1362 | | * requests at same time. |
1363 | | */ |
1364 | 0 | while (true) { |
1365 | 0 | ctx = dplane_ctx_get_head(&(bth->ctx_list)); |
1366 | 0 | if (ctx == NULL) { |
1367 | | /* |
1368 | | * This is a situation where we have gotten |
1369 | | * into a bad spot. We need to know that |
1370 | | * this happens( does it? ) |
1371 | | */ |
1372 | 0 | zlog_err( |
1373 | 0 | "%s:WARNING Received netlink Response for an error and no Contexts to associate with it", |
1374 | 0 | __func__); |
1375 | 0 | break; |
1376 | 0 | } |
1377 | | |
1378 | | /* |
1379 | | * 'update' context objects take two consecutive |
1380 | | * sequence numbers. |
1381 | | */ |
1382 | 0 | if (dplane_ctx_is_update(ctx) && |
1383 | 0 | dplane_ctx_get_ns(ctx)->seq + 1 == seq) { |
1384 | | /* |
1385 | | * This is the situation where we get a response |
1386 | | * to a message that should be ignored. |
1387 | | */ |
1388 | 0 | ignore_msg = true; |
1389 | 0 | break; |
1390 | 0 | } |
1391 | | |
1392 | 0 | ctx = dplane_ctx_dequeue(&(bth->ctx_list)); |
1393 | 0 | dplane_ctx_enqueue_tail(bth->ctx_out_q, ctx); |
1394 | | |
1395 | | /* We have found corresponding context object. */ |
1396 | 0 | if (dplane_ctx_get_ns(ctx)->seq == seq) |
1397 | 0 | break; |
1398 | | |
1399 | 0 | if (dplane_ctx_get_ns(ctx)->seq > seq) |
1400 | 0 | zlog_warn( |
1401 | 0 | "%s:WARNING Received %u is less than any context on the queue ctx->seq %u", |
1402 | 0 | __func__, seq, |
1403 | 0 | dplane_ctx_get_ns(ctx)->seq); |
1404 | 0 | } |
1405 | |
|
1406 | 0 | if (ignore_msg) { |
1407 | | /* |
1408 | | * If we ignore the message due to an update |
1409 | | * above we should still fricking decode the |
1410 | | * message for our operator to understand |
1411 | | * what is going on |
1412 | | */ |
1413 | 0 | int err = netlink_parse_error(nl, h, bth->zns->is_cmd, |
1414 | 0 | false); |
1415 | |
|
1416 | 0 | zlog_debug("%s: netlink error message seq=%d %d", |
1417 | 0 | __func__, h->nlmsg_seq, err); |
1418 | 0 | continue; |
1419 | 0 | } |
1420 | | |
1421 | | /* |
1422 | | * We received a message with the sequence number that isn't |
1423 | | * associated with any dplane context object. |
1424 | | */ |
1425 | 0 | if (ctx == NULL) { |
1426 | 0 | if (IS_ZEBRA_DEBUG_KERNEL) |
1427 | 0 | zlog_debug( |
1428 | 0 | "%s: skipping unassociated response, seq number %d NS %u", |
1429 | 0 | __func__, h->nlmsg_seq, |
1430 | 0 | bth->zns->ns_id); |
1431 | 0 | continue; |
1432 | 0 | } |
1433 | | |
1434 | 0 | if (h->nlmsg_type == NLMSG_ERROR) { |
1435 | 0 | int err = netlink_parse_error(nl, h, bth->zns->is_cmd, |
1436 | 0 | false); |
1437 | |
|
1438 | 0 | if (err == -1) |
1439 | 0 | dplane_ctx_set_status( |
1440 | 0 | ctx, ZEBRA_DPLANE_REQUEST_FAILURE); |
1441 | |
|
1442 | 0 | if (IS_ZEBRA_DEBUG_KERNEL) |
1443 | 0 | zlog_debug("%s: netlink error message seq=%d ", |
1444 | 0 | __func__, h->nlmsg_seq); |
1445 | 0 | continue; |
1446 | 0 | } |
1447 | | |
1448 | | /* |
1449 | | * If we get here then we did not receive neither the ack nor |
1450 | | * the error and instead received some other message in an |
1451 | | * unexpected way. |
1452 | | */ |
1453 | 0 | if (IS_ZEBRA_DEBUG_KERNEL) |
1454 | 0 | zlog_debug("%s: ignoring message type 0x%04x(%s) NS %u", |
1455 | 0 | __func__, h->nlmsg_type, |
1456 | 0 | nl_msg_type_to_str(h->nlmsg_type), |
1457 | 0 | bth->zns->ns_id); |
1458 | 0 | } |
1459 | |
|
1460 | 0 | return 0; |
1461 | 0 | } |
1462 | | |
1463 | | static void nl_batch_reset(struct nl_batch *bth) |
1464 | 0 | { |
1465 | 0 | bth->buf_head = bth->buf; |
1466 | 0 | bth->curlen = 0; |
1467 | 0 | bth->msgcnt = 0; |
1468 | 0 | bth->zns = NULL; |
1469 | |
|
1470 | 0 | dplane_ctx_q_init(&(bth->ctx_list)); |
1471 | 0 | } |
1472 | | |
1473 | | static void nl_batch_init(struct nl_batch *bth, |
1474 | | struct dplane_ctx_list_head *ctx_out_q) |
1475 | 0 | { |
1476 | | /* |
1477 | | * If the size of the buffer has changed, free and then allocate a new |
1478 | | * one. |
1479 | | */ |
1480 | 0 | size_t bufsize = |
1481 | 0 | atomic_load_explicit(&nl_batch_bufsize, memory_order_relaxed); |
1482 | 0 | if (bufsize != nl_batch_tx_bufsize) { |
1483 | 0 | if (nl_batch_tx_buf) |
1484 | 0 | XFREE(MTYPE_NL_BUF, nl_batch_tx_buf); |
1485 | |
|
1486 | 0 | nl_batch_tx_buf = XCALLOC(MTYPE_NL_BUF, bufsize); |
1487 | 0 | nl_batch_tx_bufsize = bufsize; |
1488 | 0 | } |
1489 | |
|
1490 | 0 | bth->buf = nl_batch_tx_buf; |
1491 | 0 | bth->bufsiz = bufsize; |
1492 | 0 | bth->limit = atomic_load_explicit(&nl_batch_send_threshold, |
1493 | 0 | memory_order_relaxed); |
1494 | |
|
1495 | 0 | bth->ctx_out_q = ctx_out_q; |
1496 | |
|
1497 | 0 | nl_batch_reset(bth); |
1498 | 0 | } |
1499 | | |
1500 | | static void nl_batch_send(struct nl_batch *bth) |
1501 | 0 | { |
1502 | 0 | struct zebra_dplane_ctx *ctx; |
1503 | 0 | bool err = false; |
1504 | |
|
1505 | 0 | if (bth->curlen != 0 && bth->zns != NULL) { |
1506 | 0 | struct nlsock *nl = |
1507 | 0 | kernel_netlink_nlsock_lookup(bth->zns->sock); |
1508 | |
|
1509 | 0 | if (IS_ZEBRA_DEBUG_KERNEL) |
1510 | 0 | zlog_debug("%s: %s, batch size=%zu, msg cnt=%zu", |
1511 | 0 | __func__, nl->name, bth->curlen, |
1512 | 0 | bth->msgcnt); |
1513 | |
|
1514 | 0 | if (netlink_send_msg(nl, bth->buf, bth->curlen) == -1) |
1515 | 0 | err = true; |
1516 | |
|
1517 | 0 | if (!err) { |
1518 | 0 | if (nl_batch_read_resp(bth) == -1) |
1519 | 0 | err = true; |
1520 | 0 | } |
1521 | 0 | } |
1522 | | |
1523 | | /* Move remaining contexts to the outbound queue. */ |
1524 | 0 | while (true) { |
1525 | 0 | ctx = dplane_ctx_dequeue(&(bth->ctx_list)); |
1526 | 0 | if (ctx == NULL) |
1527 | 0 | break; |
1528 | | |
1529 | 0 | if (err) |
1530 | 0 | dplane_ctx_set_status(ctx, |
1531 | 0 | ZEBRA_DPLANE_REQUEST_FAILURE); |
1532 | |
|
1533 | 0 | dplane_ctx_enqueue_tail(bth->ctx_out_q, ctx); |
1534 | 0 | } |
1535 | |
|
1536 | 0 | nl_batch_reset(bth); |
1537 | 0 | } |
1538 | | |
1539 | | enum netlink_msg_status netlink_batch_add_msg( |
1540 | | struct nl_batch *bth, struct zebra_dplane_ctx *ctx, |
1541 | | ssize_t (*msg_encoder)(struct zebra_dplane_ctx *, void *, size_t), |
1542 | | bool ignore_res) |
1543 | 0 | { |
1544 | 0 | int seq; |
1545 | 0 | ssize_t size; |
1546 | 0 | struct nlmsghdr *msgh; |
1547 | 0 | struct nlsock *nl; |
1548 | |
|
1549 | 0 | size = (*msg_encoder)(ctx, bth->buf_head, bth->bufsiz - bth->curlen); |
1550 | | |
1551 | | /* |
1552 | | * If there was an error while encoding the message (other than buffer |
1553 | | * overflow) then return an error. |
1554 | | */ |
1555 | 0 | if (size < 0) |
1556 | 0 | return FRR_NETLINK_ERROR; |
1557 | | |
1558 | | /* |
1559 | | * If the message doesn't fit entirely in the buffer then send the batch |
1560 | | * and retry. |
1561 | | */ |
1562 | 0 | if (size == 0) { |
1563 | 0 | nl_batch_send(bth); |
1564 | 0 | size = (*msg_encoder)(ctx, bth->buf_head, |
1565 | 0 | bth->bufsiz - bth->curlen); |
1566 | | /* |
1567 | | * If the message doesn't fit in the empty buffer then just |
1568 | | * return an error. |
1569 | | */ |
1570 | 0 | if (size <= 0) |
1571 | 0 | return FRR_NETLINK_ERROR; |
1572 | 0 | } |
1573 | | |
1574 | 0 | seq = dplane_ctx_get_ns(ctx)->seq; |
1575 | 0 | nl = kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx)); |
1576 | |
|
1577 | 0 | if (ignore_res) |
1578 | 0 | seq++; |
1579 | |
|
1580 | 0 | msgh = (struct nlmsghdr *)bth->buf_head; |
1581 | 0 | msgh->nlmsg_seq = seq; |
1582 | 0 | msgh->nlmsg_pid = nl->snl.nl_pid; |
1583 | |
|
1584 | 0 | bth->zns = dplane_ctx_get_ns(ctx); |
1585 | 0 | bth->buf_head = ((char *)bth->buf_head) + size; |
1586 | 0 | bth->curlen += size; |
1587 | 0 | bth->msgcnt++; |
1588 | |
|
1589 | 0 | return FRR_NETLINK_QUEUED; |
1590 | 0 | } |
1591 | | |
1592 | | static enum netlink_msg_status nl_put_msg(struct nl_batch *bth, |
1593 | | struct zebra_dplane_ctx *ctx) |
1594 | 0 | { |
1595 | 0 | if (dplane_ctx_is_skip_kernel(ctx)) |
1596 | 0 | return FRR_NETLINK_SUCCESS; |
1597 | | |
1598 | 0 | switch (dplane_ctx_get_op(ctx)) { |
1599 | | |
1600 | 0 | case DPLANE_OP_ROUTE_INSTALL: |
1601 | 0 | case DPLANE_OP_ROUTE_UPDATE: |
1602 | 0 | case DPLANE_OP_ROUTE_DELETE: |
1603 | 0 | return netlink_put_route_update_msg(bth, ctx); |
1604 | | |
1605 | 0 | case DPLANE_OP_NH_INSTALL: |
1606 | 0 | case DPLANE_OP_NH_UPDATE: |
1607 | 0 | case DPLANE_OP_NH_DELETE: |
1608 | 0 | return netlink_put_nexthop_update_msg(bth, ctx); |
1609 | | |
1610 | 0 | case DPLANE_OP_LSP_INSTALL: |
1611 | 0 | case DPLANE_OP_LSP_UPDATE: |
1612 | 0 | case DPLANE_OP_LSP_DELETE: |
1613 | 0 | return netlink_put_lsp_update_msg(bth, ctx); |
1614 | | |
1615 | 0 | case DPLANE_OP_PW_INSTALL: |
1616 | 0 | case DPLANE_OP_PW_UNINSTALL: |
1617 | 0 | return netlink_put_pw_update_msg(bth, ctx); |
1618 | | |
1619 | 0 | case DPLANE_OP_ADDR_INSTALL: |
1620 | 0 | case DPLANE_OP_ADDR_UNINSTALL: |
1621 | 0 | return netlink_put_address_update_msg(bth, ctx); |
1622 | | |
1623 | 0 | case DPLANE_OP_MAC_INSTALL: |
1624 | 0 | case DPLANE_OP_MAC_DELETE: |
1625 | 0 | return netlink_put_mac_update_msg(bth, ctx); |
1626 | | |
1627 | 0 | case DPLANE_OP_NEIGH_INSTALL: |
1628 | 0 | case DPLANE_OP_NEIGH_UPDATE: |
1629 | 0 | case DPLANE_OP_NEIGH_DELETE: |
1630 | 0 | case DPLANE_OP_VTEP_ADD: |
1631 | 0 | case DPLANE_OP_VTEP_DELETE: |
1632 | 0 | case DPLANE_OP_NEIGH_DISCOVER: |
1633 | 0 | case DPLANE_OP_NEIGH_IP_INSTALL: |
1634 | 0 | case DPLANE_OP_NEIGH_IP_DELETE: |
1635 | 0 | case DPLANE_OP_NEIGH_TABLE_UPDATE: |
1636 | 0 | return netlink_put_neigh_update_msg(bth, ctx); |
1637 | | |
1638 | 0 | case DPLANE_OP_RULE_ADD: |
1639 | 0 | case DPLANE_OP_RULE_DELETE: |
1640 | 0 | case DPLANE_OP_RULE_UPDATE: |
1641 | 0 | return netlink_put_rule_update_msg(bth, ctx); |
1642 | | |
1643 | 0 | case DPLANE_OP_SYS_ROUTE_ADD: |
1644 | 0 | case DPLANE_OP_SYS_ROUTE_DELETE: |
1645 | 0 | case DPLANE_OP_ROUTE_NOTIFY: |
1646 | 0 | case DPLANE_OP_LSP_NOTIFY: |
1647 | 0 | case DPLANE_OP_BR_PORT_UPDATE: |
1648 | 0 | return FRR_NETLINK_SUCCESS; |
1649 | | |
1650 | 0 | case DPLANE_OP_IPTABLE_ADD: |
1651 | 0 | case DPLANE_OP_IPTABLE_DELETE: |
1652 | 0 | case DPLANE_OP_IPSET_ADD: |
1653 | 0 | case DPLANE_OP_IPSET_DELETE: |
1654 | 0 | case DPLANE_OP_IPSET_ENTRY_ADD: |
1655 | 0 | case DPLANE_OP_IPSET_ENTRY_DELETE: |
1656 | 0 | return FRR_NETLINK_ERROR; |
1657 | | |
1658 | 0 | case DPLANE_OP_GRE_SET: |
1659 | 0 | return netlink_put_gre_set_msg(bth, ctx); |
1660 | | |
1661 | 0 | case DPLANE_OP_INTF_ADDR_ADD: |
1662 | 0 | case DPLANE_OP_INTF_ADDR_DEL: |
1663 | 0 | case DPLANE_OP_NONE: |
1664 | 0 | return FRR_NETLINK_ERROR; |
1665 | | |
1666 | 0 | case DPLANE_OP_INTF_NETCONFIG: |
1667 | 0 | return netlink_put_intf_netconfig(bth, ctx); |
1668 | | |
1669 | 0 | case DPLANE_OP_INTF_INSTALL: |
1670 | 0 | case DPLANE_OP_INTF_UPDATE: |
1671 | 0 | case DPLANE_OP_INTF_DELETE: |
1672 | 0 | return netlink_put_intf_update_msg(bth, ctx); |
1673 | | |
1674 | 0 | case DPLANE_OP_TC_QDISC_INSTALL: |
1675 | 0 | case DPLANE_OP_TC_QDISC_UNINSTALL: |
1676 | 0 | return netlink_put_tc_qdisc_update_msg(bth, ctx); |
1677 | 0 | case DPLANE_OP_TC_CLASS_ADD: |
1678 | 0 | case DPLANE_OP_TC_CLASS_DELETE: |
1679 | 0 | case DPLANE_OP_TC_CLASS_UPDATE: |
1680 | 0 | return netlink_put_tc_class_update_msg(bth, ctx); |
1681 | 0 | case DPLANE_OP_TC_FILTER_ADD: |
1682 | 0 | case DPLANE_OP_TC_FILTER_DELETE: |
1683 | 0 | case DPLANE_OP_TC_FILTER_UPDATE: |
1684 | 0 | return netlink_put_tc_filter_update_msg(bth, ctx); |
1685 | 0 | } |
1686 | | |
1687 | 0 | return FRR_NETLINK_ERROR; |
1688 | 0 | } |
1689 | | |
1690 | | void kernel_update_multi(struct dplane_ctx_list_head *ctx_list) |
1691 | 0 | { |
1692 | 0 | struct nl_batch batch; |
1693 | 0 | struct zebra_dplane_ctx *ctx; |
1694 | 0 | struct dplane_ctx_list_head handled_list; |
1695 | 0 | enum netlink_msg_status res; |
1696 | |
|
1697 | 0 | dplane_ctx_q_init(&handled_list); |
1698 | 0 | nl_batch_init(&batch, &handled_list); |
1699 | |
|
1700 | 0 | while (true) { |
1701 | 0 | ctx = dplane_ctx_dequeue(ctx_list); |
1702 | 0 | if (ctx == NULL) |
1703 | 0 | break; |
1704 | | |
1705 | 0 | if (batch.zns != NULL |
1706 | 0 | && batch.zns->ns_id != dplane_ctx_get_ns(ctx)->ns_id) |
1707 | 0 | nl_batch_send(&batch); |
1708 | | |
1709 | | /* |
1710 | | * Assume all messages will succeed and then mark only the ones |
1711 | | * that failed. |
1712 | | */ |
1713 | 0 | dplane_ctx_set_status(ctx, ZEBRA_DPLANE_REQUEST_SUCCESS); |
1714 | |
|
1715 | 0 | res = nl_put_msg(&batch, ctx); |
1716 | |
|
1717 | 0 | dplane_ctx_enqueue_tail(&(batch.ctx_list), ctx); |
1718 | 0 | if (res == FRR_NETLINK_ERROR) |
1719 | 0 | dplane_ctx_set_status(ctx, |
1720 | 0 | ZEBRA_DPLANE_REQUEST_FAILURE); |
1721 | |
|
1722 | 0 | if (batch.curlen > batch.limit) |
1723 | 0 | nl_batch_send(&batch); |
1724 | 0 | } |
1725 | |
|
1726 | 0 | nl_batch_send(&batch); |
1727 | |
|
1728 | 0 | dplane_ctx_q_init(ctx_list); |
1729 | 0 | dplane_ctx_list_append(ctx_list, &handled_list); |
1730 | 0 | } |
1731 | | |
1732 | | struct nlsock *kernel_netlink_nlsock_lookup(int sock) |
1733 | 0 | { |
1734 | 0 | struct nlsock lookup, *retval; |
1735 | |
|
1736 | 0 | lookup.sock = sock; |
1737 | |
|
1738 | 0 | NLSOCK_LOCK(); |
1739 | 0 | retval = hash_lookup(nlsock_hash, &lookup); |
1740 | 0 | NLSOCK_UNLOCK(); |
1741 | |
|
1742 | 0 | return retval; |
1743 | 0 | } |
1744 | | |
1745 | | /* Insert nlsock entry into hash */ |
1746 | | static void kernel_netlink_nlsock_insert(struct nlsock *nls) |
1747 | 0 | { |
1748 | 0 | NLSOCK_LOCK(); |
1749 | 0 | (void)hash_get(nlsock_hash, nls, hash_alloc_intern); |
1750 | 0 | NLSOCK_UNLOCK(); |
1751 | 0 | } |
1752 | | |
1753 | | /* Remove nlsock entry from hash */ |
1754 | | static void kernel_netlink_nlsock_remove(struct nlsock *nls) |
1755 | 0 | { |
1756 | 0 | NLSOCK_LOCK(); |
1757 | 0 | (void)hash_release(nlsock_hash, nls); |
1758 | 0 | NLSOCK_UNLOCK(); |
1759 | 0 | } |
1760 | | |
1761 | | static uint32_t kernel_netlink_nlsock_key(const void *arg) |
1762 | 0 | { |
1763 | 0 | const struct nlsock *nl = arg; |
1764 | |
|
1765 | 0 | return nl->sock; |
1766 | 0 | } |
1767 | | |
1768 | | static bool kernel_netlink_nlsock_hash_equal(const void *arg1, const void *arg2) |
1769 | 0 | { |
1770 | 0 | const struct nlsock *nl1 = arg1; |
1771 | 0 | const struct nlsock *nl2 = arg2; |
1772 | |
|
1773 | 0 | if (nl1->sock == nl2->sock) |
1774 | 0 | return true; |
1775 | | |
1776 | 0 | return false; |
1777 | 0 | } |
1778 | | |
1779 | | /* Exported interface function. This function simply calls |
1780 | | netlink_socket (). */ |
1781 | | void kernel_init(struct zebra_ns *zns) |
1782 | 1 | { |
1783 | 1 | uint32_t groups, dplane_groups, ext_groups; |
1784 | 1 | #if defined SOL_NETLINK |
1785 | 1 | int one, ret, grp; |
1786 | 1 | #endif |
1787 | | |
1788 | | /* |
1789 | | * Initialize netlink sockets |
1790 | | * |
1791 | | * If RTMGRP_XXX exists use that, but at some point |
1792 | | * I think the kernel developers realized that |
1793 | | * keeping track of all the different values would |
1794 | | * lead to confusion, so we need to convert the |
1795 | | * RTNLGRP_XXX to a bit position for ourself |
1796 | | * |
1797 | | * |
1798 | | * NOTE: If the bit is >= 32, you must use setsockopt(). Those |
1799 | | * groups are added further below after SOL_NETLINK is verified to |
1800 | | * exist. |
1801 | | */ |
1802 | 1 | groups = RTMGRP_LINK | |
1803 | 1 | RTMGRP_IPV4_ROUTE | |
1804 | 1 | RTMGRP_IPV4_IFADDR | |
1805 | 1 | RTMGRP_IPV6_ROUTE | |
1806 | 1 | RTMGRP_IPV6_IFADDR | |
1807 | 1 | RTMGRP_IPV4_MROUTE | |
1808 | 1 | RTMGRP_NEIGH | |
1809 | 1 | ((uint32_t) 1 << (RTNLGRP_IPV4_RULE - 1)) | |
1810 | 1 | ((uint32_t) 1 << (RTNLGRP_IPV6_RULE - 1)) | |
1811 | 1 | ((uint32_t) 1 << (RTNLGRP_NEXTHOP - 1)) | |
1812 | 1 | ((uint32_t) 1 << (RTNLGRP_TC - 1)); |
1813 | | |
1814 | 1 | dplane_groups = (RTMGRP_LINK | |
1815 | 1 | RTMGRP_IPV4_IFADDR | |
1816 | 1 | RTMGRP_IPV6_IFADDR | |
1817 | 1 | ((uint32_t) 1 << (RTNLGRP_IPV4_NETCONF - 1)) | |
1818 | 1 | ((uint32_t) 1 << (RTNLGRP_IPV6_NETCONF - 1)) | |
1819 | 1 | ((uint32_t) 1 << (RTNLGRP_MPLS_NETCONF - 1))); |
1820 | | |
1821 | | /* Use setsockopt for > 31 group */ |
1822 | 1 | ext_groups = RTNLGRP_TUNNEL; |
1823 | | |
1824 | 1 | snprintf(zns->netlink.name, sizeof(zns->netlink.name), |
1825 | 1 | "netlink-listen (NS %u)", zns->ns_id); |
1826 | 1 | zns->netlink.sock = -1; |
1827 | | #ifndef FUZZING |
1828 | | if (netlink_socket(&zns->netlink, groups, &ext_groups, 1, zns->ns_id) < |
1829 | | 0) { |
1830 | | zlog_err("Failure to create %s socket", |
1831 | | zns->netlink.name); |
1832 | | exit(-1); |
1833 | | } |
1834 | | |
1835 | | kernel_netlink_nlsock_insert(&zns->netlink); |
1836 | | #endif |
1837 | 1 | snprintf(zns->netlink_cmd.name, sizeof(zns->netlink_cmd.name), |
1838 | 1 | "netlink-cmd (NS %u)", zns->ns_id); |
1839 | 1 | zns->netlink_cmd.sock = -1; |
1840 | | #ifndef FUZZING |
1841 | | if (netlink_socket(&zns->netlink_cmd, 0, 0, 0, zns->ns_id) < 0) { |
1842 | | zlog_err("Failure to create %s socket", |
1843 | | zns->netlink_cmd.name); |
1844 | | exit(-1); |
1845 | | } |
1846 | | |
1847 | | kernel_netlink_nlsock_insert(&zns->netlink_cmd); |
1848 | | #endif |
1849 | | /* Outbound socket for dplane programming of the host OS. */ |
1850 | 1 | snprintf(zns->netlink_dplane_out.name, |
1851 | 1 | sizeof(zns->netlink_dplane_out.name), "netlink-dp (NS %u)", |
1852 | 1 | zns->ns_id); |
1853 | 1 | zns->netlink_dplane_out.sock = -1; |
1854 | | #ifndef FUZZING |
1855 | | if (netlink_socket(&zns->netlink_dplane_out, 0, 0, 0, zns->ns_id) < 0) { |
1856 | | zlog_err("Failure to create %s socket", |
1857 | | zns->netlink_dplane_out.name); |
1858 | | exit(-1); |
1859 | | } |
1860 | | |
1861 | | kernel_netlink_nlsock_insert(&zns->netlink_dplane_out); |
1862 | | |
1863 | | /* Inbound socket for OS events coming to the dplane. */ |
1864 | | snprintf(zns->netlink_dplane_in.name, |
1865 | | sizeof(zns->netlink_dplane_in.name), "netlink-dp-in (NS %u)", |
1866 | | zns->ns_id); |
1867 | | zns->netlink_dplane_in.sock = -1; |
1868 | | if (netlink_socket(&zns->netlink_dplane_in, dplane_groups, 0, 0, |
1869 | | zns->ns_id) < 0) { |
1870 | | zlog_err("Failure to create %s socket", |
1871 | | zns->netlink_dplane_in.name); |
1872 | | exit(-1); |
1873 | | } |
1874 | | |
1875 | | kernel_netlink_nlsock_insert(&zns->netlink_dplane_in); |
1876 | | #endif |
1877 | | #ifndef FUZZING |
1878 | | /* |
1879 | | * SOL_NETLINK is not available on all platforms yet |
1880 | | * apparently. It's in bits/socket.h which I am not |
1881 | | * sure that we want to pull into our build system. |
1882 | | */ |
1883 | | #if defined SOL_NETLINK |
1884 | | |
1885 | | /* |
1886 | | * setsockopt multicast group subscriptions that don't fit in nl_groups |
1887 | | */ |
1888 | | grp = RTNLGRP_BRVLAN; |
1889 | | ret = setsockopt(zns->netlink.sock, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, |
1890 | | &grp, sizeof(grp)); |
1891 | | |
1892 | | if (ret < 0) |
1893 | | zlog_notice( |
1894 | | "Registration for RTNLGRP_BRVLAN Membership failed : %d %s", |
1895 | | errno, safe_strerror(errno)); |
1896 | | /* |
1897 | | * Let's tell the kernel that we want to receive extended |
1898 | | * ACKS over our command socket(s) |
1899 | | */ |
1900 | | one = 1; |
1901 | | ret = setsockopt(zns->netlink_cmd.sock, SOL_NETLINK, NETLINK_EXT_ACK, |
1902 | | &one, sizeof(one)); |
1903 | | |
1904 | | if (ret < 0) |
1905 | | zlog_notice("Registration for extended cmd ACK failed : %d %s", |
1906 | | errno, safe_strerror(errno)); |
1907 | | |
1908 | | one = 1; |
1909 | | ret = setsockopt(zns->netlink_dplane_out.sock, SOL_NETLINK, |
1910 | | NETLINK_EXT_ACK, &one, sizeof(one)); |
1911 | | |
1912 | | if (ret < 0) |
1913 | | zlog_notice("Registration for extended dp ACK failed : %d %s", |
1914 | | errno, safe_strerror(errno)); |
1915 | | |
1916 | | /* |
1917 | | * Trim off the payload of the original netlink message in the |
1918 | | * acknowledgment. This option is available since Linux 4.2, so if |
1919 | | * setsockopt fails, ignore the error. |
1920 | | */ |
1921 | | one = 1; |
1922 | | ret = setsockopt(zns->netlink_dplane_out.sock, SOL_NETLINK, |
1923 | | NETLINK_CAP_ACK, &one, sizeof(one)); |
1924 | | if (ret < 0) |
1925 | | zlog_notice( |
1926 | | "Registration for reduced ACK packet size failed, probably running an early kernel"); |
1927 | | #endif |
1928 | | |
1929 | | /* Register kernel socket. */ |
1930 | | if (fcntl(zns->netlink.sock, F_SETFL, O_NONBLOCK) < 0) |
1931 | | flog_err_sys(EC_LIB_SOCKET, "Can't set %s socket flags: %s", |
1932 | | zns->netlink.name, safe_strerror(errno)); |
1933 | | |
1934 | | if (fcntl(zns->netlink_cmd.sock, F_SETFL, O_NONBLOCK) < 0) |
1935 | | zlog_err("Can't set %s socket error: %s(%d)", |
1936 | | zns->netlink_cmd.name, safe_strerror(errno), errno); |
1937 | | |
1938 | | if (fcntl(zns->netlink_dplane_out.sock, F_SETFL, O_NONBLOCK) < 0) |
1939 | | zlog_err("Can't set %s socket error: %s(%d)", |
1940 | | zns->netlink_dplane_out.name, safe_strerror(errno), |
1941 | | errno); |
1942 | | |
1943 | | if (fcntl(zns->netlink_dplane_in.sock, F_SETFL, O_NONBLOCK) < 0) |
1944 | | zlog_err("Can't set %s socket error: %s(%d)", |
1945 | | zns->netlink_dplane_in.name, safe_strerror(errno), |
1946 | | errno); |
1947 | | |
1948 | | /* Set receive buffer size if it's set from command line */ |
1949 | | if (rcvbufsize) { |
1950 | | netlink_recvbuf(&zns->netlink, rcvbufsize); |
1951 | | #ifndef FUZZING |
1952 | | netlink_recvbuf(&zns->netlink_cmd, rcvbufsize); |
1953 | | netlink_recvbuf(&zns->netlink_dplane_out, rcvbufsize); |
1954 | | netlink_recvbuf(&zns->netlink_dplane_in, rcvbufsize); |
1955 | | #endif |
1956 | | } |
1957 | | |
1958 | | /* Set filter for inbound sockets, to exclude events we've generated |
1959 | | * ourselves. |
1960 | | */ |
1961 | | netlink_install_filter(zns->netlink.sock, zns->netlink_cmd.snl.nl_pid, |
1962 | | zns->netlink_dplane_out.snl.nl_pid); |
1963 | | |
1964 | | netlink_install_filter(zns->netlink_dplane_in.sock, |
1965 | | zns->netlink_cmd.snl.nl_pid, |
1966 | | zns->netlink_dplane_out.snl.nl_pid); |
1967 | | |
1968 | | #endif /* FUZZING */ |
1969 | 1 | zns->t_netlink = NULL; |
1970 | | |
1971 | 1 | event_add_read(zrouter.master, kernel_read, zns, zns->netlink.sock, |
1972 | 1 | &zns->t_netlink); |
1973 | | |
1974 | 1 | rt_netlink_init(); |
1975 | 1 | } |
1976 | | |
1977 | | /* Helper to clean up an nlsock */ |
1978 | | static void kernel_nlsock_fini(struct nlsock *nls) |
1979 | 0 | { |
1980 | 0 | if (nls && nls->sock >= 0) { |
1981 | 0 | kernel_netlink_nlsock_remove(nls); |
1982 | 0 | close(nls->sock); |
1983 | 0 | nls->sock = -1; |
1984 | 0 | XFREE(MTYPE_NL_BUF, nls->buf); |
1985 | 0 | nls->buflen = 0; |
1986 | 0 | } |
1987 | 0 | } |
1988 | | |
1989 | | void kernel_terminate(struct zebra_ns *zns, bool complete) |
1990 | 0 | { |
1991 | 0 | EVENT_OFF(zns->t_netlink); |
1992 | |
|
1993 | 0 | kernel_nlsock_fini(&zns->netlink); |
1994 | |
|
1995 | 0 | kernel_nlsock_fini(&zns->netlink_cmd); |
1996 | |
|
1997 | 0 | kernel_nlsock_fini(&zns->netlink_dplane_in); |
1998 | | |
1999 | | /* During zebra shutdown, we need to leave the dataplane socket |
2000 | | * around until all work is done. |
2001 | | */ |
2002 | 0 | if (complete) |
2003 | 0 | kernel_nlsock_fini(&zns->netlink_dplane_out); |
2004 | 0 | } |
2005 | | |
2006 | | /* |
2007 | | * Global init for platform-/OS-specific things |
2008 | | */ |
2009 | | void kernel_router_init(void) |
2010 | 1 | { |
2011 | | /* Init nlsock hash and lock */ |
2012 | 1 | pthread_mutex_init(&nlsock_mutex, NULL); |
2013 | 1 | nlsock_hash = hash_create_size(8, kernel_netlink_nlsock_key, |
2014 | 1 | kernel_netlink_nlsock_hash_equal, |
2015 | 1 | "Netlink Socket Hash"); |
2016 | 1 | } |
2017 | | |
2018 | | /* |
2019 | | * Global deinit for platform-/OS-specific things |
2020 | | */ |
2021 | | void kernel_router_terminate(void) |
2022 | 0 | { |
2023 | 0 | pthread_mutex_destroy(&nlsock_mutex); |
2024 | |
|
2025 | 0 | hash_free(nlsock_hash); |
2026 | 0 | nlsock_hash = NULL; |
2027 | 0 | } |
2028 | | |
2029 | | #ifdef FUZZING |
2030 | | void netlink_fuzz(const uint8_t *data, size_t size) |
2031 | 0 | { |
2032 | 0 | struct nlmsghdr *h = (struct nlmsghdr *)data; |
2033 | |
|
2034 | 0 | if (!NLMSG_OK(h, size)) |
2035 | 0 | return; |
2036 | | |
2037 | 0 | netlink_information_fetch(h, NS_DEFAULT, 0); |
2038 | 0 | } |
2039 | | #endif /* FUZZING */ |
2040 | | |
2041 | | |
2042 | | |
2043 | | #endif /* HAVE_NETLINK */ |